-
Notifications
You must be signed in to change notification settings - Fork 955
Allow dream to process non-formula contrasts
#9562
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 15 commits
96c4682
bf244eb
61e52dd
2c821d0
4bfc16c
37794a6
e331c89
c7f4774
eaf9b0d
c76a769
46dc129
2d29303
3770858
907fd1b
e37219c
19e537b
c66c7b3
8668152
c689577
5958081
910c313
e1a6a9b
789cb9e
df0e9ec
dca0f74
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -8,6 +8,21 @@ library("limma") | |
|
|
||
| # Load auxiliary helping functions | ||
|
|
||
| #' Check for Non-Empty, Non-Whitespace String | ||
| #' | ||
| #' This function checks if the input is non-NULL and contains more than just whitespace. | ||
| #' It returns TRUE if the input is a non-empty, non-whitespace string, and FALSE otherwise. | ||
| #' | ||
| #' @param input A variable to check. | ||
| #' @return A logical value: TRUE if the input is a valid, non-empty, non-whitespace string; FALSE otherwise. | ||
| #' @examples | ||
| #' is_valid_string("Hello World") # Returns TRUE | ||
| #' is_valid_string(" ") # Returns FALSE | ||
| #' is_valid_string(NULL) # Returns FALSE | ||
| is_valid_string <- function(input) { | ||
| !is.null(input) && nzchar(trimws(input)) | ||
| } | ||
|
|
||
| #' Parse out options from a string without recourse to optparse | ||
| #' | ||
| #' @param x Long-form argument list like --opt1 val1 --opt2 val2 | ||
|
|
@@ -57,32 +72,33 @@ nullify <- function(x) { | |
|
|
||
| # Options list | ||
| opt <- list( | ||
| output_prefix = ifelse('$task.ext.prefix' == 'null', '$meta.id', '$task.ext.prefix'), | ||
| count_file = "$counts", # File containing raw counts | ||
| sample_file = "$samplesheet", # File containing sample information | ||
| contrast_variable = "$contrast_variable", # Variable for contrast (e.g., "treatment") | ||
| contrast_reference = "$reference", # Reference level for the contrast | ||
| contrast_target = "$target", # Target level for the contrast (e.g., "mCherry") | ||
| contrast_string = "$comparison", # Optional full (complex) contrast expression comparison | ||
| sample_id_col = "sample", # Column name for sample IDs | ||
| threads = "$task.cpus", # Number of threads for multithreading | ||
| output_prefix = ifelse('$task.ext.prefix' == 'null', '$meta.id', '$task.ext.prefix'), | ||
| count_file = "$counts", # File containing raw counts | ||
| sample_file = "$samplesheet", # File containing sample information | ||
| blocking_variables = NULL, | ||
| contrast_variable = "$contrast_variable", # Variable for contrast (e.g., "treatment") | ||
| contrast_reference = "$reference", # Reference level for the contrast | ||
| contrast_target = "$target", # Target level for the contrast (e.g., "mCherry") | ||
| contrast_string = "$comparison", # Optional full (complex) contrast expression comparison | ||
| sample_id_col = "sample", # Column name for sample IDs | ||
| threads = "$task.cpus", # Number of threads for multithreading | ||
| subset_to_contrast_samples = FALSE, # Whether to subset to contrast samples | ||
| exclude_samples_col = NULL, # Column for excluding samples | ||
| exclude_samples_values = NULL, # Values for excluding samples | ||
| adjust.method = "BH", # Adjustment method for topTable | ||
| p.value = 1, # P-value threshold for topTable | ||
| lfc = 0, # Log fold-change threshold for topTable | ||
| confint = FALSE, # Whether to compute confidence intervals in topTable | ||
| proportion = 0.01, # Proportion for eBayes | ||
| stdev_coef_lim = "0.1,4", # Standard deviation coefficient limits for eBayes | ||
| trend = FALSE, # Whether to use trend in eBayes | ||
| robust = FALSE, # Whether to use robust method in eBayes | ||
| winsor_tail_p = "0.05,0.1", # Winsor tail probabilities for eBayes | ||
| ddf = "adaptive", # 'Satterthwaite', 'Kenward-Roger', or 'adaptive' | ||
| reml = FALSE, | ||
| round_digits = NULL, | ||
| formula = "$formula", # User-specified formula (e.g. "~ + (1 | sample_number)") | ||
| apply_voom = FALSE # Whether to apply `voomWithDreamWeights` | ||
| exclude_samples_col = NULL, # Column for excluding samples | ||
| exclude_samples_values = NULL, # Values for excluding samples | ||
| adjust.method = "BH", # Adjustment method for topTable | ||
| p.value = 1, # P-value threshold for topTable | ||
| lfc = 0, # Log fold-change threshold for topTable | ||
| confint = FALSE, # Whether to compute confidence intervals in topTable | ||
| proportion = 0.01, # Proportion for eBayes | ||
| stdev_coef_lim = "0.1,4", # Standard deviation coefficient limits for eBayes | ||
| trend = FALSE, # Whether to use trend in eBayes | ||
| robust = FALSE, # Whether to use robust method in eBayes | ||
| winsor_tail_p = "0.05,0.1", # Winsor tail probabilities for eBayes | ||
| ddf = "adaptive", # 'Satterthwaite', 'Kenward-Roger', or 'adaptive' | ||
| reml = FALSE, | ||
| round_digits = NULL, | ||
| formula = "$formula", # User-specified formula (e.g. "~ + (1 | sample_number)") | ||
| apply_voom = FALSE # Whether to apply `voomWithDreamWeights` | ||
| ) | ||
|
|
||
| # Load external arguments to opt list | ||
|
|
@@ -95,7 +111,7 @@ for (ao in names(args_opt)) { | |
| } | ||
|
|
||
| # If there is no option supplied, convert string "null" to NULL | ||
| keys <- c("formula", "contrast_string", "contrast_variable", "contrast_reference") | ||
| keys <- c("formula", "contrast_string", "contrast_variable", "blocking_variables", "contrast_reference") | ||
| opt[keys] <- lapply(opt[keys], nullify) | ||
|
|
||
| opt\$threads <- as.numeric(opt\$threads) | ||
|
|
@@ -116,6 +132,40 @@ if (!is.null(opt\$round_digits)){ | |
| metadata <- read_delim_flexible(opt\$sample_file, header = TRUE, stringsAsFactors = TRUE) | ||
| rownames(metadata) <- metadata[[opt\$sample_id_col]] | ||
|
|
||
| if (!is_valid_string(opt\$formula)) { | ||
| contrast_variable <- make.names(opt\$contrast_variable) | ||
| blocking.vars <- c() | ||
|
|
||
| if (!contrast_variable %in% colnames(metadata)) { | ||
| stop( | ||
| paste0( | ||
| 'Chosen contrast variable \"', | ||
| contrast_variable, | ||
| '\" not in sample sheet' | ||
| ) | ||
| ) | ||
| } else if (any(!c(opt\$contrast_reference, opt\$contrast_target) %in% metadata[[contrast_variable]])) { | ||
| stop( | ||
| paste( | ||
| 'Please choose reference and treatment levels that are present in the', | ||
| contrast_variable, | ||
| 'column of the sample sheet' | ||
| ) | ||
| ) | ||
| } else if (is_valid_string(opt\$blocking_variables)) { | ||
| blocking.vars = make.names(unlist(strsplit(opt\$blocking_variables, split = ';'))) | ||
| if (!all(blocking.vars %in% colnames(metadata))) { | ||
| missing_block <- paste(blocking.vars[! blocking.vars %in% colnames(metadata)], collapse = ',') | ||
| stop( | ||
| paste( | ||
| 'Blocking variables', missing_block, | ||
| 'do not correspond to sample sheet columns.' | ||
| ) | ||
| ) | ||
| } | ||
| } | ||
| } | ||
|
|
||
| # Ensure contrast variable is factor, then relevel | ||
| if (!is.null(opt\$contrast_reference) && opt\$contrast_reference != "") { | ||
| metadata[[opt\$contrast_variable]] <- factor(metadata[[opt\$contrast_variable]]) | ||
|
|
@@ -128,15 +178,37 @@ if (!is.null(opt\$exclude_samples_col) && !is.null(opt\$exclude_samples_values)) | |
| } | ||
|
|
||
| # Load count data | ||
| countMatrix <- read_delim_flexible(opt\$count_file, header = TRUE, stringsAsFactors = FALSE) | ||
| countMatrix <- read_delim_flexible(opt\$count_file, header = TRUE, stringsAsFactors = FALSE, check.names = FALSE) | ||
| rownames(countMatrix) <- countMatrix\$gene_id # count_file/matrix must have a gene_id column. | ||
| countMatrix <- countMatrix[, rownames(metadata), drop = FALSE] | ||
|
|
||
| # Construct model formula using user-provided formula if available; if not, default to contrast variable only | ||
| if (!is.null(opt\$formula) && opt\$formula != "") { | ||
| if (opt\$subset_to_contrast_samples) { | ||
| sample_selector <- metadata[[contrast_variable]] %in% | ||
| c(opt\$contrast_target, opt\$contrast_reference) | ||
|
|
||
| selected_samples <- rownames(metadata)[sample_selector] | ||
|
|
||
| metadata <- metadata[selected_samples, , drop = FALSE] | ||
| countMatrix <- countMatrix[, selected_samples, drop = FALSE] | ||
| } | ||
|
|
||
| # Construct model formula using user-provided formula if available; if not, generate formula from variables | ||
delfiterradas marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| if (is_valid_string(opt\$formula)) { | ||
| form <- as.formula(opt\$formula) | ||
| } else { | ||
| stop(paste("Invalid or absent formula:", opt\$formula)) | ||
| form <- '~ 0' | ||
|
|
||
| if (is_valid_string(opt\$blocking_variables)) { | ||
| form <- paste(form, paste(blocking.vars, collapse = ' + '), sep=' + ') | ||
| } | ||
|
|
||
| # Make sure all the appropriate variables are factors | ||
| for (v in c(blocking.vars, contrast_variable)) { | ||
| metadata[[v]] <- as.factor(metadata[[v]]) | ||
| } | ||
|
|
||
| # Variable of interest goes last | ||
| form <- as.formula(paste(form, contrast_variable, sep = ' + ')) | ||
| } | ||
| print(form) | ||
|
|
||
|
|
@@ -152,7 +224,7 @@ if (as.logical(opt\$apply_voom)) { | |
| vobjDream <- voomWithDreamWeights(dge, form, metadata, BPPARAM = param) | ||
| } else { | ||
| # Assume countMatrix roughly follows a normal distribution | ||
| vobjDream<- countMatrix | ||
| vobjDream <- countMatrix | ||
| } | ||
|
|
||
| # Fit the DREAM model with ddf and reml options | ||
|
|
@@ -174,26 +246,49 @@ print(colnames(fitmm\$design)) | |
|
|
||
| # If contrast_string is provided, use that for makeContrast | ||
| if (!is.null(opt\$contrast_string)) { | ||
| cat("Using contrast string:", opt\$contrast_string, "\n") | ||
| contrast_string <- opt\$contrast_string | ||
| } else { | ||
| # Construct the expected column names for the target and reference levels in the design matrix | ||
| treatment_target <- paste0(opt\$contrast_variable, opt\$contrast_target) | ||
| treatment_reference <- paste0(opt\$contrast_variable, opt\$contrast_reference) | ||
|
|
||
| # Determine how to construct the contrast string based on which levels are present in the design matrix | ||
| if ((treatment_target %in% colnames(fitmm\$design)) && (treatment_reference %in% colnames(fitmm\$design))) { | ||
| # Both target and reference levels are present in the design matrix | ||
| # We can directly compare the two levels | ||
| contrast_string <- paste0(treatment_target, "-", treatment_reference) | ||
| } else if (treatment_target %in% colnames(fitmm\$design)) { | ||
| # Only the target level is present in the design matrix | ||
| # The reference level may have been omitted due to collinearity or being set as the baseline | ||
| # We compare the target level to zero (implicit reference) | ||
| contrast_string <- "" | ||
| coef_name <- paste0(opt\$contrast_variable, opt\$contrast_target) | ||
| cat("Using default contrast matrix:", coef_name, "\n") | ||
|
|
||
| results <- topTable(fitmm, coef = coef_name, number = Inf, | ||
| adjust.method = opt\$adjust.method, p.value = opt\$p.value, | ||
| lfc = opt\$lfc, confint = opt\$confint) | ||
|
||
| } else { | ||
| # Neither level is present in the design matrix | ||
| # This indicates an error; the specified levels are not found | ||
| stop(paste0(treatment_target, " not found in design matrix")) | ||
| } | ||
| } | ||
|
|
||
| # Use makeContrasts if contrast_string exists | ||
| if (is_valid_string(contrast_string)) { | ||
| cat("Using contrast string:", contrast_string, "\n") | ||
|
|
||
| colnames(fitmm\$design) <- make.names(colnames(fitmm\$design)) | ||
| # Use makeContrasts | ||
| contrast_matrix <- makeContrasts(contrast = opt\$contrast_string, levels = colnames(fitmm\$design)) | ||
| contrast_matrix <- makeContrasts(contrast = contrast_string, levels = colnames(fitmm\$design)) | ||
| fit2 <- contrasts.fit(fitmm, contrast_matrix) | ||
| fit2 <- eBayes(fit2, proportion = opt\$proportion, | ||
| stdev.coef.lim = stdev_coef_lim_vals, | ||
| trend = opt\$trend, robust = opt\$robust, | ||
| winsor.tail.p = winsor_tail_p_vals) | ||
| stdev.coef.lim = stdev_coef_lim_vals, | ||
| trend = opt\$trend, robust = opt\$robust, | ||
| winsor.tail.p = winsor_tail_p_vals) | ||
| results <- topTable(fit2, number = Inf, | ||
| adjust.method = opt\$adjust.method, | ||
| p.value = opt\$p.value, lfc = opt\$lfc, confint = opt\$confint) | ||
|
|
||
| } else { | ||
| coef_name <- paste0(opt\$contrast_variable, opt\$contrast_target) | ||
| cat("Using default contrast matrix:", coef_name, "\n") | ||
|
|
||
| results <- topTable(fitmm, coef = coef_name, number = Inf, | ||
| adjust.method = opt\$adjust.method, p.value = opt\$p.value, | ||
| lfc = opt\$lfc, confint = opt\$confint) | ||
| } | ||
|
|
||
| results\$gene_id <- rownames(results) | ||
|
|
||
Uh oh!
There was an error while loading. Please reload this page.