Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
108 changes: 63 additions & 45 deletions R/accessory.R
Original file line number Diff line number Diff line change
Expand Up @@ -950,50 +950,68 @@ read_contrasts <-

# Parse YAML contrasts into a data frame
contrasts <- do.call(rbind, lapply(contrasts_yaml$contrasts, function(x) {
if (is.null(x$id)) {
stop("Missing contrast id in YAML contrasts.")
stopifnot(!is.null(x$id)) # Require that each contrast has an 'id'

row <- data.frame(
id = x$id,
variable = NA, reference = NA, target = NA,
blocking = NA,
formula = NA,
make_contrasts_str = NA,
stringsAsFactors = FALSE
)

# A contrast can be defined in two ways:
#
# 1 . Specifying a column in the sample sheet, and two values from that column which define two groups of samples to compare. Tools using a contrast defined in this way would need to generate a model and contrast using that information.
# 2. Specifying the formula and contrast string explicitly. Tools using those contrasts can then pass these directly to function of suites such as DESeq2.

if (!is.null(x$comparison)) {
if (!is.null(x$formula) || !is.null(x$make_contrasts_str)) {
stop(sprintf("Contrast id '%s' with 'comparison' must not have 'formula' or 'make_contrasts_str'.", x$id))
}
# Extract blocking factors from 'formula' if available
blocking <- NA
formula <- NA
make_contrasts_str <- NA
if (!is.null(x$formula)) {
formula_obj <- as.formula(x$formula)
terms_obj <- terms(formula_obj)
terms <- attr(terms_obj, "term.labels")
blocking_vars <- setdiff(terms, x$comparison[1])
formula <- x$formula
if (!is.null(x$make_contrasts_str)) make_contrasts_str <- x$make_contrasts_str
if (length(blocking_vars) > 0) blocking <- paste(blocking_vars, collapse = ";")
} else if (!is.null(x$blocking_factors)) {
blocking <- paste(x$blocking_factors, collapse = ";")

fields <- setNames(rep(NA, 3), c("variable", "reference", "target"))
fields[names(fields)[seq_along(x$comparison)]] <- x$comparison

if (any(is.na(fields)) || any(fields == "")) {
stop(sprintf("Contrast id '%s' must provide non-empty 'variable', 'reference', and 'target' fields.", x$id))
}

data.frame(
id = x$id,
variable = x$comparison[1],
reference = x$comparison[2],
target = x$comparison[3],
blocking = blocking,
formula = formula,
make_contrasts_str = make_contrasts_str,
stringsAsFactors = FALSE
)
}))
row$variable <- fields["variable"]
row$reference <- fields["reference"]
row$target <- fields["target"]

if (!is.null(x$blocking_factors)) {
row$blocking <- paste(x$blocking_factors, collapse = ";")
}

} else if (!is.null(x$formula)) {
if (is.null(x$make_contrasts_str) || !is.null(x$blocking_factors)) {
stop(sprintf("Contrast id '%s' with 'formula' must have 'make_contrasts_str' and no 'blocking_factors'.", x$id))
}

row$formula <- x$formula
row$make_contrasts_str <- x$make_contrasts_str

} else {
stop(sprintf("Contrast id '%s' must provide either 'comparison' or 'formula' + 'make_contrasts_str'.", x$id))
}

row
}))
if (any(duplicated(contrasts$id))) {
stop("Duplicate contrast ids found in YAML contrasts file.")
}
# Check for missing fields
if (any(is.na(contrasts$variable) | is.na(contrasts$reference) | is.na(contrasts$target))) {
stop("Contrasts file has missing values in key columns (variable, reference, target).")
}
} else {
stop("Invalid file format. Please provide a CSV or YAML file.")
}

# Check contrast content is appropriate to sample sheet
success <- checkListIsSubset(contrasts$variable, colnames(samples), "contrast variables", "sample metadata")

variables_without_na <- na.omit(contrasts$variable)
if (length(variables_without_na) > 0) {
success <- checkListIsSubset(variables_without_na, colnames(samples), "contrast variables", "sample metadata")
}
# Check blocking variables, where supplied
blocking <- unlist(lapply(contrasts[[blocking_column]], function(x) simpleSplit(x, ";")))
blocking <- blocking[!is.na(blocking)]
Expand All @@ -1002,7 +1020,7 @@ read_contrasts <-
}

# Extract design matrix columns from contrasts: the variable column plus any blocking factors.
design_cols <- unique(c(contrasts[[variable_column]], blocking))
design_cols <- unique(na.omit(c(contrasts[[variable_column]], blocking)))
design_matrix <- samples[, design_cols, drop = FALSE]

# Ensure there are no NA values in the design matrix.
Expand Down Expand Up @@ -1034,21 +1052,21 @@ read_contrasts <-
}
}

# Ensure reference, target, and blocking values are valid for their variable
# Ensure reference and target are valid for their variable
for (i in 1:nrow(contrasts)) {
var <- contrasts[i, variable_column]
for (col in c(reference_column, target_column)) {
val <- contrasts[i, col]
if (is.na(val) || val == "") {
stop(paste("Missing value for", col, "in sample sheet"))
} else {
success <- checkListIsSubset(val, samples[[var]], "contrast levels", "sample metadata variable")
ref <- contrasts[i, reference_column]
tgt <- contrasts[i, target_column]

# Only check values if both reference and target are present
if (!is.na(ref) && !is.na(tgt)) {
success <- checkListIsSubset(ref, samples[[var]], "contrast levels", "sample metadata variable")
success <- checkListIsSubset(tgt, samples[[var]], "contrast levels", "sample metadata variable")

if (ref == tgt) {
warning(sprintf("Contrast id '%s' has identical reference and target levels.", contrasts[i, "id"]))
}
}
# Warn if reference and target are identical
if (contrasts[i, reference_column] == contrasts[i, target_column]) {
warning(sprintf("Contrast id '%s' has identical reference and target levels.", contrasts[i, "id"]))
}
}

# Convert contrasts to a list if requested
Expand Down
54 changes: 54 additions & 0 deletions tests/testthat/test-accessory.R
Original file line number Diff line number Diff line change
Expand Up @@ -29,3 +29,57 @@ test_that("nlines works", {
test_html <- "<input type='text' id='myid' value='foo' style='display: none;'>"
expect_equal(as.character(hiddenInput("myid", "foo")), test_html)
})

# read_contrasts()

test_that("read_contrasts parses YAML correctly", {
samples <- data.frame(
sample = c("Sample1", "Sample7", "Sample13", "Sample19", "Sample16"),
genotype = c("WT", "WT", "KO", "KO", "KO"),
treatment = c("Control", "Treated", "Control", "Treated", "Control"),
time = c(1, 1, 1, 1, 16),
batch = c("b1", "b1", "b1", "b1", "b3"),
stringsAsFactors = FALSE
)

yaml_content <- "
contrasts:
- id: treatment_mCherry_hND6_
comparison: [\"treatment\", \"Control\", \"Treated\"]
- id: treatment_mCherry_hND6_batch
comparison: [\"treatment\", \"Control\", \"Treated\"]
blocking_factors: [\"batch\"]
- id: treatment_plus_genotype
formula: \"~ treatment + genotype\"
make_contrasts_str: \"treatmentTreated\"
- id: interaction_genotype_treatment
formula: \"~ genotype * treatment\"
make_contrasts_str: \"genotypeWT.treatmentTreated\"
- id: full_model_with_interactions
formula: \"~ genotype * treatment * time\"
make_contrasts_str: \"genotypeWT.treatmentTreated.time\"
"

yaml_file <- tempfile(fileext = ".yaml")
writeLines(yaml_content, yaml_file)

contrasts <- read_contrasts(yaml_file, samples)

# Test basic structure
expect_true(is.data.frame(contrasts))
expect_equal(nrow(contrasts), 5)
expect_true(all(c("id", "variable", "reference", "target", "blocking", "formula", "make_contrasts_str") %in% colnames(contrasts)))

# Test specific rows
expect_equal(contrasts$variable[1], "treatment")
expect_equal(contrasts$reference[1], "Control")
expect_equal(contrasts$target[1], "Treated")
expect_true(is.na(contrasts$make_contrasts_str[1]))

expect_equal(contrasts$blocking[2], "batch")
expect_equal(contrasts$formula[3], "~ treatment + genotype")
expect_equal(contrasts$make_contrasts_str[4], "genotypeWT.treatmentTreated")
expect_equal(contrasts$make_contrasts_str[5], "genotypeWT.treatmentTreated.time")

unlink(yaml_file)
})
Loading