nf-core · grst · Dec 27, 2025 · Dec 11, 2025 · Dec 11, 2025 · Dec 12, 2025
diff --git a/modules/nf-core/variancepartition/dream/meta.yml b/modules/nf-core/variancepartition/dream/meta.yml
@@ -36,7 +36,7 @@ input:
           should be used to derive the target samples
     - formula:
         type: string
-        description: (Mandatory) R formula string used for modeling, e.g. '~ treatment
+        description: (Optional) R formula string used for modeling, e.g. '~ treatment
           + (1 | sample_number)'.
     - comparison:
         type: string

diff --git a/modules/nf-core/variancepartition/dream/templates/dream.R b/modules/nf-core/variancepartition/dream/templates/dream.R
@@ -8,6 +8,21 @@ library("limma")
 
 # Load auxiliary helping functions
 
+#' Check for Non-Empty, Non-Whitespace String
+#'
+#' This function checks if the input is non-NULL and contains more than just whitespace.
+#' It returns TRUE if the input is a non-empty, non-whitespace string, and FALSE otherwise.
+#'
+#' @param input A variable to check.
+#' @return A logical value: TRUE if the input is a valid, non-empty, non-whitespace string; FALSE otherwise.
+#' @examples
+#' is_valid_string("Hello World") # Returns TRUE
+#' is_valid_string("   ")         # Returns FALSE
+#' is_valid_string(NULL)          # Returns FALSE
+is_valid_string <- function(input) {
+  !is.null(input) && nzchar(trimws(input))
+}
+
 #' Parse out options from a string without recourse to optparse
 #'
 #' @param x Long-form argument list like --opt1 val1 --opt2 val2
@@ -57,32 +72,33 @@ nullify <- function(x) {
 
 # Options list
 opt <- list(
-    output_prefix      = ifelse('$task.ext.prefix' == 'null', '$meta.id', '$task.ext.prefix'),
-    count_file         = "$counts",                 # File containing raw counts
-    sample_file        = "$samplesheet",            # File containing sample information
-    contrast_variable  = "$contrast_variable",      # Variable for contrast (e.g., "treatment")
-    contrast_reference = "$reference",              # Reference level for the contrast
-    contrast_target    = "$target",                 # Target level for the contrast (e.g., "mCherry")
-    contrast_string    = "$comparison",             # Optional full (complex) contrast expression comparison
-    sample_id_col      = "sample",                  # Column name for sample IDs
-    threads            = "$task.cpus",              # Number of threads for multithreading
+    output_prefix              = ifelse('$task.ext.prefix' == 'null', '$meta.id', '$task.ext.prefix'),
+    count_file                 = "$counts",                 # File containing raw counts
+    sample_file                = "$samplesheet",            # File containing sample information
+    blocking_variables         = NULL,
+    contrast_variable          = "$contrast_variable",      # Variable for contrast (e.g., "treatment")
+    contrast_reference         = "$reference",              # Reference level for the contrast
+    contrast_target            = "$target",                 # Target level for the contrast (e.g., "mCherry")
+    contrast_string            = "$comparison",             # Optional full (complex) contrast expression comparison
+    sample_id_col              = "sample",                  # Column name for sample IDs
+    threads                    = "$task.cpus",              # Number of threads for multithreading
     subset_to_contrast_samples = FALSE,            # Whether to subset to contrast samples
-    exclude_samples_col = NULL,                    # Column for excluding samples
-    exclude_samples_values = NULL,                 # Values for excluding samples
-    adjust.method      = "BH",                    # Adjustment method for topTable
-    p.value            = 1,                       # P-value threshold for topTable
-    lfc                = 0,                       # Log fold-change threshold for topTable
-    confint            = FALSE,                   # Whether to compute confidence intervals in topTable
-    proportion         = 0.01,                    # Proportion for eBayes
-    stdev_coef_lim     = "0.1,4",                 # Standard deviation coefficient limits for eBayes
-    trend              = FALSE,                   # Whether to use trend in eBayes
-    robust             = FALSE,                   # Whether to use robust method in eBayes
-    winsor_tail_p      = "0.05,0.1",              # Winsor tail probabilities for eBayes
-    ddf                = "adaptive",              # 'Satterthwaite', 'Kenward-Roger', or 'adaptive'
-    reml               = FALSE,
-    round_digits       = NULL,
-    formula            = "$formula",              # User-specified formula (e.g. "~ + (1 | sample_number)")
-    apply_voom         = FALSE                    # Whether to apply `voomWithDreamWeights`
+    exclude_samples_col        = NULL,                    # Column for excluding samples
+    exclude_samples_values     = NULL,                 # Values for excluding samples
+    adjust.method              = "BH",                    # Adjustment method for topTable
+    p.value                    = 1,                       # P-value threshold for topTable
+    lfc                        = 0,                       # Log fold-change threshold for topTable
+    confint                    = FALSE,                   # Whether to compute confidence intervals in topTable
+    proportion                 = 0.01,                    # Proportion for eBayes
+    stdev_coef_lim             = "0.1,4",                 # Standard deviation coefficient limits for eBayes
+    trend                      = FALSE,                   # Whether to use trend in eBayes
+    robust                     = FALSE,                   # Whether to use robust method in eBayes
+    winsor_tail_p              = "0.05,0.1",              # Winsor tail probabilities for eBayes
+    ddf                        = "adaptive",              # 'Satterthwaite', 'Kenward-Roger', or 'adaptive'
+    reml                       = FALSE,
+    round_digits               = NULL,
+    formula                    = "$formula",              # User-specified formula (e.g. "~ + (1 | sample_number)")
+    apply_voom                 = FALSE                    # Whether to apply `voomWithDreamWeights`
 )
 
 # Load external arguments to opt list
@@ -95,7 +111,7 @@ for (ao in names(args_opt)) {
 }
 
 # If there is no option supplied, convert string "null" to NULL
-keys <- c("formula", "contrast_string", "contrast_variable", "contrast_reference")
+keys <- c("formula", "contrast_string", "contrast_variable", "blocking_variables", "contrast_reference")
 opt[keys] <- lapply(opt[keys], nullify)
 
 opt\$threads      <- as.numeric(opt\$threads)
@@ -116,6 +132,40 @@ if (!is.null(opt\$round_digits)){
 metadata <- read_delim_flexible(opt\$sample_file, header = TRUE, stringsAsFactors = TRUE)
 rownames(metadata) <- metadata[[opt\$sample_id_col]]
 
+if (!is_valid_string(opt\$formula)) {
+  contrast_variable <- make.names(opt\$contrast_variable)
+  blocking.vars <- c()
+
+  if (!contrast_variable %in% colnames(metadata)) {
+    stop(
+      paste0(
+        'Chosen contrast variable \"',
+        contrast_variable,
+        '\" not in sample sheet'
+      )
+    )
+  } else if (any(!c(opt\$contrast_reference, opt\$contrast_target) %in% metadata[[contrast_variable]])) {
+    stop(
+      paste(
+        'Please choose reference and treatment levels that are present in the',
+        contrast_variable,
+        'column of the sample sheet'
+      )
+    )
+  } else if (is_valid_string(opt\$blocking_variables)) {
+    blocking.vars = make.names(unlist(strsplit(opt\$blocking_variables, split = ';')))
+    if (!all(blocking.vars %in% colnames(metadata))) {
+      missing_block <- paste(blocking.vars[! blocking.vars %in% colnames(metadata)], collapse = ',')
+      stop(
+        paste(
+          'Blocking variables', missing_block,
+          'do not correspond to sample sheet columns.'
+        )
+      )
+    }
+  }
+}
+
 # Ensure contrast variable is factor, then relevel
 if (!is.null(opt\$contrast_reference) && opt\$contrast_reference != "") {
     metadata[[opt\$contrast_variable]] <- factor(metadata[[opt\$contrast_variable]])
@@ -128,15 +178,37 @@ if (!is.null(opt\$exclude_samples_col) && !is.null(opt\$exclude_samples_values))
 }
 
 # Load count data
-countMatrix <- read_delim_flexible(opt\$count_file, header = TRUE, stringsAsFactors = FALSE)
+countMatrix <- read_delim_flexible(opt\$count_file, header = TRUE, stringsAsFactors = FALSE, check.names = FALSE)
 rownames(countMatrix) <- countMatrix\$gene_id # count_file/matrix must have a gene_id column.
 countMatrix <- countMatrix[, rownames(metadata), drop = FALSE]
 
-# Construct model formula using user-provided formula if available; if not, default to contrast variable only
-if (!is.null(opt\$formula) && opt\$formula != "") {
+if (opt\$subset_to_contrast_samples) {
+  sample_selector <- metadata[[contrast_variable]] %in%
+    c(opt\$contrast_target, opt\$contrast_reference)
+
+  selected_samples <- rownames(metadata)[sample_selector]
+
+  metadata <- metadata[selected_samples, , drop = FALSE]
+  countMatrix <- countMatrix[, selected_samples, drop = FALSE]
+}
+
+# Construct model formula using user-provided formula if available; if not, generate formula from variables
+if (is_valid_string(opt\$formula)) {
     form <- as.formula(opt\$formula)
 } else {
-        stop(paste("Invalid or absent formula:", opt\$formula))
+    form <- '~ 0'
+
+    if (is_valid_string(opt\$blocking_variables)) {
+        form <- paste(form, paste(blocking.vars, collapse = ' + '), sep=' + ')
+    }
+
+    # Make sure all the appropriate variables are factors
+    for (v in c(blocking.vars, contrast_variable)) {
+        metadata[[v]] <- as.factor(metadata[[v]])
+    }
+
+    # Variable of interest goes last
+    form <- as.formula(paste(form, contrast_variable, sep = ' + '))
 }
 print(form)
 
@@ -152,7 +224,7 @@ if (as.logical(opt\$apply_voom)) {
     vobjDream <- voomWithDreamWeights(dge, form, metadata, BPPARAM = param)
 } else {
     # Assume countMatrix roughly follows a normal distribution
-    vobjDream<- countMatrix
+    vobjDream <- countMatrix
 }
 
 # Fit the DREAM model with ddf and reml options
@@ -174,26 +246,49 @@ print(colnames(fitmm\$design))
 
 # If contrast_string is provided, use that for makeContrast
 if (!is.null(opt\$contrast_string)) {
-    cat("Using contrast string:", opt\$contrast_string, "\n")
+    contrast_string <- opt\$contrast_string
+} else {
+    # Construct the expected column names for the target and reference levels in the design matrix
+    treatment_target <- paste0(opt\$contrast_variable, opt\$contrast_target)
+    treatment_reference <- paste0(opt\$contrast_variable, opt\$contrast_reference)
+
+    # Determine how to construct the contrast string based on which levels are present in the design matrix
+    if ((treatment_target %in% colnames(fitmm\$design)) && (treatment_reference %in% colnames(fitmm\$design))) {
+        # Both target and reference levels are present in the design matrix
+        # We can directly compare the two levels
+        contrast_string <- paste0(treatment_target, "-", treatment_reference)
+    } else if (treatment_target %in% colnames(fitmm\$design)) {
+        # Only the target level is present in the design matrix
+        # The reference level may have been omitted due to collinearity or being set as the baseline
+        # We compare the target level to zero (implicit reference)
+        contrast_string <- ""
+        coef_name <- paste0(opt\$contrast_variable, opt\$contrast_target)
+        cat("Using default contrast matrix:", coef_name, "\n")
+
+        results <- topTable(fitmm, coef = coef_name, number = Inf,
+                            adjust.method = opt\$adjust.method, p.value = opt\$p.value,
+                            lfc = opt\$lfc, confint = opt\$confint)
+    } else {
+        # Neither level is present in the design matrix
+        # This indicates an error; the specified levels are not found
+        stop(paste0(treatment_target, " not found in design matrix"))
+    }
+}
+
+# Use makeContrasts if contrast_string exists
+if (is_valid_string(contrast_string)) {
+    cat("Using contrast string:", contrast_string, "\n")
+
     colnames(fitmm\$design) <- make.names(colnames(fitmm\$design))
-    # Use makeContrasts
-    contrast_matrix <- makeContrasts(contrast = opt\$contrast_string, levels = colnames(fitmm\$design))
+    contrast_matrix <- makeContrasts(contrast = contrast_string, levels = colnames(fitmm\$design))
     fit2 <- contrasts.fit(fitmm, contrast_matrix)
     fit2 <- eBayes(fit2, proportion = opt\$proportion,
-                   stdev.coef.lim = stdev_coef_lim_vals,
-                   trend = opt\$trend, robust = opt\$robust,
-                   winsor.tail.p = winsor_tail_p_vals)
+                  stdev.coef.lim = stdev_coef_lim_vals,
+                  trend = opt\$trend, robust = opt\$robust,
+                  winsor.tail.p = winsor_tail_p_vals)
     results <- topTable(fit2, number = Inf,
                         adjust.method = opt\$adjust.method,
                         p.value = opt\$p.value, lfc = opt\$lfc, confint = opt\$confint)
-
-} else {
-    coef_name <- paste0(opt\$contrast_variable, opt\$contrast_target)
-    cat("Using default contrast matrix:", coef_name, "\n")
-
-    results <- topTable(fitmm, coef = coef_name, number = Inf,
-                        adjust.method = opt\$adjust.method, p.value = opt\$p.value,
-                        lfc = opt\$lfc, confint = opt\$confint)
 }
 
 results\$gene_id <- rownames(results)