Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
25 commits
Select commit Hold shift + click to select a range
96c4682
Allow `dream` to process non-formula contrasts
delfiterradas Dec 11, 2025
bf244eb
Merge branch 'nf-core:master' into dream
delfiterradas Dec 11, 2025
61e52dd
Add test case without formula
delfiterradas Dec 12, 2025
2c821d0
Update `meta.yml`
delfiterradas Dec 12, 2025
4bfc16c
Merge branch 'master' into dream
delfiterradas Dec 12, 2025
37794a6
Fix test with unstable file
delfiterradas Dec 12, 2025
e331c89
Update subworkflow to allow non-formula contrasts with DREAM
delfiterradas Dec 12, 2025
c7f4774
Update modules/nf-core/variancepartition/dream/templates/dream.R
delfiterradas Dec 15, 2025
eaf9b0d
Update modules/nf-core/variancepartition/dream/templates/dream.R
delfiterradas Dec 15, 2025
c76a769
Correct format of function
delfiterradas Dec 16, 2025
46dc129
Fix contrast building and code flow
delfiterradas Dec 16, 2025
2d29303
Update tests
delfiterradas Dec 16, 2025
3770858
Merge branch 'master' into dream
delfiterradas Dec 16, 2025
907fd1b
Modify logic for contrast_string building
delfiterradas Dec 17, 2025
e37219c
Merge branch 'master' into dream
delfiterradas Dec 17, 2025
19e537b
Make contrast_string compulsory with formula
delfiterradas Dec 18, 2025
c66c7b3
Update tests
delfiterradas Dec 18, 2025
8668152
Merge branch 'master' into dream
delfiterradas Dec 18, 2025
c689577
Refactor contrast_string construction logic
delfiterradas Dec 19, 2025
5958081
Make formula mutually exclusive to contrast tuple
delfiterradas Dec 23, 2025
910c313
Merge branch 'master' into dream
delfiterradas Dec 23, 2025
e1a6a9b
Modify tests with mutually exclusive varaibles
delfiterradas Dec 23, 2025
789cb9e
Revert changes in first test case
delfiterradas Dec 23, 2025
df0e9ec
Update tests
delfiterradas Dec 23, 2025
dca0f74
Fix tests
delfiterradas Dec 26, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion modules/nf-core/variancepartition/dream/meta.yml
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ input:
should be used to derive the target samples
- formula:
type: string
description: (Mandatory) R formula string used for modeling, e.g. '~ treatment
description: (Optional) R formula string used for modeling, e.g. '~ treatment
+ (1 | sample_number)'.
- comparison:
type: string
Expand Down
190 changes: 144 additions & 46 deletions modules/nf-core/variancepartition/dream/templates/dream.R
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,21 @@ library("limma")

# Load auxiliary helping functions

#' Check for Non-Empty, Non-Whitespace String
#'
#' This function checks if the input is non-NULL and contains more than just whitespace.
#' It returns TRUE if the input is a non-empty, non-whitespace string, and FALSE otherwise.
#'
#' @param input A variable to check.
#' @return A logical value: TRUE if the input is a valid, non-empty, non-whitespace string; FALSE otherwise.
#' @examples
#' is_valid_string("Hello World") # Returns TRUE
#' is_valid_string(" ") # Returns FALSE
#' is_valid_string(NULL) # Returns FALSE
is_valid_string <- function(input) {
!is.null(input) && nzchar(trimws(input))
}

#' Parse out options from a string without recourse to optparse
#'
#' @param x Long-form argument list like --opt1 val1 --opt2 val2
Expand Down Expand Up @@ -57,32 +72,33 @@ nullify <- function(x) {

# Options list
opt <- list(
output_prefix = ifelse('$task.ext.prefix' == 'null', '$meta.id', '$task.ext.prefix'),
count_file = "$counts", # File containing raw counts
sample_file = "$samplesheet", # File containing sample information
contrast_variable = "$contrast_variable", # Variable for contrast (e.g., "treatment")
contrast_reference = "$reference", # Reference level for the contrast
contrast_target = "$target", # Target level for the contrast (e.g., "mCherry")
contrast_string = "$comparison", # Optional full (complex) contrast expression comparison
sample_id_col = "sample", # Column name for sample IDs
threads = "$task.cpus", # Number of threads for multithreading
subset_to_contrast_samples = FALSE, # Whether to subset to contrast samples
exclude_samples_col = NULL, # Column for excluding samples
exclude_samples_values = NULL, # Values for excluding samples
adjust.method = "BH", # Adjustment method for topTable
p.value = 1, # P-value threshold for topTable
lfc = 0, # Log fold-change threshold for topTable
confint = FALSE, # Whether to compute confidence intervals in topTable
proportion = 0.01, # Proportion for eBayes
stdev_coef_lim = "0.1,4", # Standard deviation coefficient limits for eBayes
trend = FALSE, # Whether to use trend in eBayes
robust = FALSE, # Whether to use robust method in eBayes
winsor_tail_p = "0.05,0.1", # Winsor tail probabilities for eBayes
ddf = "adaptive", # 'Satterthwaite', 'Kenward-Roger', or 'adaptive'
reml = FALSE,
round_digits = NULL,
formula = "$formula", # User-specified formula (e.g. "~ + (1 | sample_number)")
apply_voom = FALSE # Whether to apply `voomWithDreamWeights`
output_prefix = ifelse('$task.ext.prefix' == 'null', '$meta.id', '$task.ext.prefix'),
count_file = "$counts", # File containing raw counts
sample_file = "$samplesheet", # File containing sample information
blocking_variables = NULL,
contrast_variable = "$contrast_variable", # Variable for contrast (e.g., "treatment")
contrast_reference = "$reference", # Reference level for the contrast
contrast_target = "$target", # Target level for the contrast (e.g., "mCherry")
contrast_string = "$comparison", # Full (complex) contrast expression comparison needed if using formula
sample_id_col = "sample", # Column name for sample IDs
threads = "$task.cpus", # Number of threads for multithreading
subset_to_contrast_samples = FALSE, # Whether to subset to contrast samples
exclude_samples_col = NULL, # Column for excluding samples
exclude_samples_values = NULL, # Values for excluding samples
adjust.method = "BH", # Adjustment method for topTable
p.value = 1, # P-value threshold for topTable
lfc = 0, # Log fold-change threshold for topTable
confint = FALSE, # Whether to compute confidence intervals in topTable
proportion = 0.01, # Proportion for eBayes
stdev_coef_lim = "0.1,4", # Standard deviation coefficient limits for eBayes
trend = FALSE, # Whether to use trend in eBayes
robust = FALSE, # Whether to use robust method in eBayes
winsor_tail_p = "0.05,0.1", # Winsor tail probabilities for eBayes
ddf = "adaptive", # 'Satterthwaite', 'Kenward-Roger', or 'adaptive'
reml = FALSE,
round_digits = NULL,
formula = "$formula", # User-specified formula (e.g. "~ + (1 | sample_number)")
apply_voom = FALSE # Whether to apply `voomWithDreamWeights`
)

# Load external arguments to opt list
Expand All @@ -95,7 +111,7 @@ for (ao in names(args_opt)) {
}

# If there is no option supplied, convert string "null" to NULL
keys <- c("formula", "contrast_string", "contrast_variable", "contrast_reference")
keys <- c("formula", "contrast_string", "contrast_target", "contrast_variable", "blocking_variables", "contrast_reference")
opt[keys] <- lapply(opt[keys], nullify)

opt\$threads <- as.numeric(opt\$threads)
Expand All @@ -116,6 +132,64 @@ if (!is.null(opt\$round_digits)){
metadata <- read_delim_flexible(opt\$sample_file, header = TRUE, stringsAsFactors = TRUE)
rownames(metadata) <- metadata[[opt\$sample_id_col]]

# Check if required parameters have been provided
if (is_valid_string(opt\$formula)) {
contrast_tuple <- c('contrast_variable', 'contrast_reference', 'contrast_target', 'blocking_variables')
offending <- vapply(
opt[contrast_tuple],
is_valid_string,
logical(1)
)
offending_opts <- contrast_tuple[offending]
if (length(offending_opts) > 0) {
stop(paste("When 'formula' is provided, contrasts must be specified only via 'contrast_string'.\n",
"The following options should not be set:",
paste(offending_opts, collapse = ', ')))
}
required_opts <- c('output_prefix', 'contrast_string')
} else {
required_opts <- c('contrast_variable', 'contrast_reference', 'contrast_target', 'output_prefix')
}
missing <- required_opts[!unlist(lapply(opt[required_opts], is_valid_string)) | !required_opts %in% names(opt)]

if (length(missing) > 0) {
stop(paste("Missing required options:", paste(missing, collapse=', ')))
}

if (!is_valid_string(opt\$formula)) {
contrast_variable <- make.names(opt\$contrast_variable)
blocking.vars <- c()

if (!contrast_variable %in% colnames(metadata)) {
stop(
paste0(
'Chosen contrast variable \"',
contrast_variable,
'\" not in sample sheet'
)
)
} else if (any(!c(opt\$contrast_reference, opt\$contrast_target) %in% metadata[[contrast_variable]])) {
stop(
paste(
'Please choose reference and treatment levels that are present in the',
contrast_variable,
'column of the sample sheet'
)
)
} else if (is_valid_string(opt\$blocking_variables)) {
blocking.vars = make.names(unlist(strsplit(opt\$blocking_variables, split = ';')))
if (!all(blocking.vars %in% colnames(metadata))) {
missing_block <- paste(blocking.vars[! blocking.vars %in% colnames(metadata)], collapse = ',')
stop(
paste(
'Blocking variables', missing_block,
'do not correspond to sample sheet columns.'
)
)
}
}
}

# Ensure contrast variable is factor, then relevel
if (!is.null(opt\$contrast_reference) && opt\$contrast_reference != "") {
metadata[[opt\$contrast_variable]] <- factor(metadata[[opt\$contrast_variable]])
Expand All @@ -128,15 +202,37 @@ if (!is.null(opt\$exclude_samples_col) && !is.null(opt\$exclude_samples_values))
}

# Load count data
countMatrix <- read_delim_flexible(opt\$count_file, header = TRUE, stringsAsFactors = FALSE)
countMatrix <- read_delim_flexible(opt\$count_file, header = TRUE, stringsAsFactors = FALSE, check.names = FALSE)
rownames(countMatrix) <- countMatrix\$gene_id # count_file/matrix must have a gene_id column.
countMatrix <- countMatrix[, rownames(metadata), drop = FALSE]

# Construct model formula using user-provided formula if available; if not, default to contrast variable only
if (!is.null(opt\$formula) && opt\$formula != "") {
if (opt\$subset_to_contrast_samples) {
sample_selector <- metadata[[contrast_variable]] %in%
c(opt\$contrast_target, opt\$contrast_reference)

selected_samples <- rownames(metadata)[sample_selector]

metadata <- metadata[selected_samples, , drop = FALSE]
countMatrix <- countMatrix[, selected_samples, drop = FALSE]
}

# Construct model formula using user-provided formula if available; if not, generate formula from variables
if (is_valid_string(opt\$formula)) {
form <- as.formula(opt\$formula)
} else {
stop(paste("Invalid or absent formula:", opt\$formula))
form <- '~ 0'

if (is_valid_string(opt\$blocking_variables)) {
form <- paste(form, paste(blocking.vars, collapse = ' + '), sep=' + ')
}

# Make sure all the appropriate variables are factors
for (v in c(blocking.vars, contrast_variable)) {
metadata[[v]] <- as.factor(metadata[[v]])
}

# Variable of interest goes last
form <- as.formula(paste(form, contrast_variable, sep = ' + '))
}
print(form)

Expand All @@ -152,7 +248,7 @@ if (as.logical(opt\$apply_voom)) {
vobjDream <- voomWithDreamWeights(dge, form, metadata, BPPARAM = param)
} else {
# Assume countMatrix roughly follows a normal distribution
vobjDream<- countMatrix
vobjDream <- countMatrix
}

# Fit the DREAM model with ddf and reml options
Expand All @@ -174,26 +270,28 @@ print(colnames(fitmm\$design))

# If contrast_string is provided, use that for makeContrast
if (!is.null(opt\$contrast_string)) {
cat("Using contrast string:", opt\$contrast_string, "\n")
contrast_string <- opt\$contrast_string
} else {
# Construct the contrast_string
treatment_target <- paste0(opt\$contrast_variable, opt\$contrast_target)
treatment_reference <- paste0(opt\$contrast_variable, opt\$contrast_reference)
contrast_string <- paste0(treatment_target, "-", treatment_reference)
}

# Use makeContrasts if contrast_string exists
if (is_valid_string(contrast_string)) {
cat("Using contrast string:", contrast_string, "\n")

colnames(fitmm\$design) <- make.names(colnames(fitmm\$design))
# Use makeContrasts
contrast_matrix <- makeContrasts(contrast = opt\$contrast_string, levels = colnames(fitmm\$design))
contrast_matrix <- makeContrasts(contrast = contrast_string, levels = colnames(fitmm\$design))
fit2 <- contrasts.fit(fitmm, contrast_matrix)
fit2 <- eBayes(fit2, proportion = opt\$proportion,
stdev.coef.lim = stdev_coef_lim_vals,
trend = opt\$trend, robust = opt\$robust,
winsor.tail.p = winsor_tail_p_vals)
stdev.coef.lim = stdev_coef_lim_vals,
trend = opt\$trend, robust = opt\$robust,
winsor.tail.p = winsor_tail_p_vals)
results <- topTable(fit2, number = Inf,
adjust.method = opt\$adjust.method,
p.value = opt\$p.value, lfc = opt\$lfc, confint = opt\$confint)

} else {
coef_name <- paste0(opt\$contrast_variable, opt\$contrast_target)
cat("Using default contrast matrix:", coef_name, "\n")

results <- topTable(fitmm, coef = coef_name, number = Inf,
adjust.method = opt\$adjust.method, p.value = opt\$p.value,
lfc = opt\$lfc, confint = opt\$confint)
}

results\$gene_id <- rownames(results)
Expand Down
Loading
Loading