Skip to content

Commit 2f6184e

Browse files
Merge pull request #459 from datashield/v6.3.5-dev
V6.3.5 dev
2 parents ea12b73 + 1aa5c13 commit 2f6184e

File tree

159 files changed

+873
-352
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

159 files changed

+873
-352
lines changed

.github/workflows/dsBase_test_suite.yaml

100644100755
Lines changed: 23 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,6 @@ on:
1515
push:
1616
schedule:
1717
- cron: '0 0 * * 0' # Weekly
18-
- cron: '0 1 * * *' # Nightly
1918

2019
jobs:
2120
dsBase_test_suite:
@@ -37,6 +36,7 @@ jobs:
3736
BRANCH_NAME: ${{ github.ref_name }}
3837
REPO_OWNER: ${{ github.repository_owner }}
3938
R_KEEP_PKG_SOURCE: yes
39+
GITHUB_TOKEN: ${{ github.token || 'placeholder-token' }}
4040

4141
steps:
4242
- name: Checkout dsBase
@@ -45,12 +45,14 @@ jobs:
4545
path: dsBase
4646

4747
- name: Checkout testStatus
48+
if: ${{ github.actor != 'nektos/act' }} # for local deployment only
4849
uses: actions/checkout@v4
4950
with:
5051
repository: ${{ env.REPO_OWNER }}/testStatus
51-
token: ${{ secrets.GH_TOKEN }}
5252
ref: master
5353
path: testStatus
54+
persist-credentials: false
55+
token: ${{ env.GITHUB_TOKEN }}
5456

5557
- uses: r-lib/actions/setup-pandoc@v2
5658

@@ -150,50 +152,41 @@ jobs:
150152
echo "branch:${{ env.BRANCH_NAME }}" > ${{ env.WORKFLOW_ID }}.txt
151153
echo "os:$(lsb_release -ds)" >> ${{ env.WORKFLOW_ID }}.txt
152154
echo "R:$(R --version | head -n1)" >> ${{ env.WORKFLOW_ID }}.txt
155+
Rscript --vanilla -e 'sessionInfo()' >> session_info_${{ env.WORKFLOW_ID }}.txt
153156
working-directory: dsBase/logs
154157

155158
- name: Parse results from testthat and covr
156159
run: |
157-
Rscript --verbose --vanilla ../testStatus/source/parse_test_report.R logs/
160+
Rscript --verbose --vanilla ../testStatus/source/parse_test_report.R logs/ logs/ https://github.com/datashield/${{ env.PROJECT_NAME }}/blob/${{ env.BRANCH_NAME }} '[^-:.]+' '(?<=::)[^:]+(?=::)'
158161
working-directory: dsBase
159-
160-
- name: Commit results to testStatus
162+
env:
163+
PROJECT_NAME: ${{ env.PROJECT_NAME }}
164+
BRANCH_NAME: ${{ env.BRANCH_NAME }}
165+
166+
- name: Render report
161167
run: |
162-
git config --global user.email "github-actions[bot]@users.noreply.github.com"
163-
git config --global user.name "github-actions[bot]"
164168
cd testStatus
165169
166-
# Reconfigure remote to use GitHub token for authentication
167-
git remote set-url origin https://x-access-token:${{ secrets.GITHUB_TOKEN }}@github.com/${{ env.REPO_OWNER }}/testStatus.git
168-
git checkout master
169-
git pull origin master
170-
171-
mkdir -p logs/${{ env.PROJECT_NAME }}/${{ env.BRANCH_NAME }}/${{ env.WORKFLOW_ID }}/
172-
mkdir -p docs/${{ env.PROJECT_NAME }}/${{ env.BRANCH_NAME }}/${{ env.WORKFLOW_ID }}/
173-
mkdir -p docs/${{ env.PROJECT_NAME }}/${{ env.BRANCH_NAME }}/latest/
174-
# clear the latest directory
175-
rm -rf docs/${{ env.PROJECT_NAME }}/${{ env.BRANCH_NAME }}/latest/*
170+
mkdir -p new/logs/${{ env.PROJECT_NAME }}/${{ env.BRANCH_NAME }}/${{ env.WORKFLOW_ID }}/
171+
mkdir -p new/docs/${{ env.PROJECT_NAME }}/${{ env.BRANCH_NAME }}/latest/
176172
177173
# Copy logs to new logs directory location
178-
cp -rv ../dsBase/logs/* logs/${{ env.PROJECT_NAME }}/${{ env.BRANCH_NAME }}/${{ env.WORKFLOW_ID }}/
179-
cp -rv ../dsBase/logs/${{ env.WORKFLOW_ID }}.txt logs/${{ env.PROJECT_NAME }}/${{ env.BRANCH_NAME }}/${{ env.WORKFLOW_ID }}/
180-
181-
# Create symbolic links
182-
ln -sf ${{ env.WORKFLOW_ID }}/ logs/${{ env.PROJECT_NAME }}/${{ env.BRANCH_NAME }}/.LATEST
183-
# ln -sf docs/${{ env.PROJECT_NAME }}/${{ env.BRANCH_NAME }}/${{ env.WORKFLOW_ID }}/ docs/${{ env.PROJECT_NAME }}/${{ env.BRANCH_NAME }}/${{ env.WORKFLOW_ID }}/latest
174+
cp -rv ../${{ env.PROJECT_NAME }}/logs/* new/logs/${{ env.PROJECT_NAME }}/${{ env.BRANCH_NAME }}/${{ env.WORKFLOW_ID }}/
175+
cp -rv ../${{ env.PROJECT_NAME }}/logs/${{ env.WORKFLOW_ID }}.txt new/logs/${{ env.PROJECT_NAME }}/${{ env.BRANCH_NAME }}/${{ env.WORKFLOW_ID }}/
184176
185-
R -e 'input_dir <- file.path("../logs", Sys.getenv("PROJECT_NAME"), Sys.getenv("BRANCH_NAME"), Sys.getenv("WORKFLOW_ID")); quarto::quarto_render("source/test_report.qmd", execute_params = list(input_dir = input_dir))'
186-
mv source/test_report.html docs/${{ env.PROJECT_NAME }}/${{ env.BRANCH_NAME }}/${{ env.WORKFLOW_ID }}/index.html
187-
cp -r docs/${{ env.PROJECT_NAME }}/${{ env.BRANCH_NAME }}/${{ env.WORKFLOW_ID }}/* docs/${{ env.PROJECT_NAME }}/${{ env.BRANCH_NAME }}/latest
188-
189-
git add .
190-
git commit -m "Auto test for ${{ env.PROJECT_NAME }}/${{ env.BRANCH_NAME }} @ ${{ env.WORKFLOW_ID }}" || echo "No changes to commit"
191-
git push origin master
177+
R -e 'input_dir <- file.path("../new/logs", Sys.getenv("PROJECT_NAME"), Sys.getenv("BRANCH_NAME"), Sys.getenv("WORKFLOW_ID")); quarto::quarto_render("source/test_report.qmd", execute_params = list(input_dir = input_dir))'
178+
mv source/test_report.html new/docs/${{ env.PROJECT_NAME }}/${{ env.BRANCH_NAME }}/latest/index.html
192179
193180
env:
194181
PROJECT_NAME: ${{ env.PROJECT_NAME }}
195182
BRANCH_NAME: ${{ env.BRANCH_NAME }}
196183
WORKFLOW_ID: ${{ env.WORKFLOW_ID }}
184+
185+
- name: Upload test logs
186+
uses: actions/upload-artifact@v4
187+
with:
188+
name: dsbase-logs
189+
path: testStatus/new
197190

198191
- name: Dump environment info
199192
run: |

DESCRIPTION

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@ Description: Base 'DataSHIELD' functions for the server side. 'DataSHIELD' is a
55
been designed to only share non disclosive summary statistics, with built in automated output
66
checking based on statistical disclosure control. With data sites setting the threshold values for
77
the automated output checks. For more details, see 'citation("dsBase")'.
8-
Version: 6.3.4
8+
Version: 6.3.5
99
Authors@R: c(person(given = "Paul",
1010
family = "Burton",
1111
role = c("aut"),

NAMESPACE

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -82,6 +82,7 @@ export(matrixDimnamesDS)
8282
export(matrixInvertDS)
8383
export(matrixMultDS)
8484
export(matrixTransposeDS)
85+
export(mdPatternDS)
8586
export(meanDS)
8687
export(meanSdGpDS)
8788
export(mergeDS)

R/glmSLMADS.assign.R

Lines changed: 13 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -18,40 +18,25 @@
1818
#' @export
1919
glmSLMADS.assign <- function(formula, family, offsetName, weightsName, dataName){
2020

21-
#############################################################
22-
#MODULE 1: CAPTURE THE nfilter SETTINGS #
23-
thr <- dsBase::listDisclosureSettingsDS() #
24-
nfilter.tab <- as.numeric(thr$nfilter.tab) #
25-
nfilter.glm <- as.numeric(thr$nfilter.glm) #
26-
#nfilter.subset<-as.numeric(thr$nfilter.subset) #
27-
#nfilter.string<-as.numeric(thr$nfilter.string) #
28-
#############################################################
21+
# Convert transmitable text for special link variance combinations back to full representation
22+
if(family=="quasigamma.link_log")
23+
{family<-"quasi(link=log,variance=mu^2)"}
2924

30-
########################################
31-
############
32-
#Convert transmitable text for special link variance combinations back to full representation
33-
if(family=="quasigamma.link_log")
34-
{family<-"quasi(link=log,variance=mu^2)"}
25+
if(family=="Gamma.link_log")
26+
{family<-"Gamma(link=log)"}
3527

36-
if(family=="Gamma.link_log")
37-
{family<-"Gamma(link=log)"}
38-
#############
28+
# Correctly name offset, weights and data objects in function call
29+
# (to allow glmPredict to work correctly later)
30+
calltext <- paste0("mg<-glm(formula,family=",family,",offset=",
31+
offsetName,",weights=",weightsName,",data=", dataName,",x=TRUE)")
3932

40-
#Activate family object (this may not be necessary as character string may already be OK
41-
#but just checking
42-
final.family.object<-eval(parse(text=family))
33+
eval(parse(text=calltext))
4334

35+
# update the call object to include the actual formula
36+
mg$call$formula <- formula
4437

45-
#Correctly name offset, weights and data objects in function call
46-
#(to allow glmPredict to work correctly later)
47-
calltext<-paste0("mg<-glm(formula,family=",family,",offset=",
48-
offsetName,",weights=",weightsName,",data=", dataName,",x=TRUE)")
49-
50-
eval(parse(text=calltext))
51-
52-
return(mg)
38+
return(mg)
5339

5440
}
55-
5641
# ASSIGN FUNCTION
5742
# glmSLMADS.assign

R/mdPatternDS.R

Lines changed: 121 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,121 @@
1+
#'
2+
#' @title Missing data pattern with disclosure control
3+
#' @description This function is a serverside aggregate function that computes the
4+
#' missing data pattern using mice::md.pattern and applies disclosure control to
5+
#' prevent revealing small cell counts.
6+
#' @details This function calls the mice::md.pattern function to generate a matrix
7+
#' showing the missing data patterns in the input data. To ensure disclosure control,
8+
#' any pattern counts that are below the threshold (nfilter.tab, default=3) are
9+
#' suppressed.
10+
#'
11+
#' \strong{Suppression Method:}
12+
#'
13+
#' When a pattern count is below threshold:
14+
#' - Row name is changed to "suppressed(<N>)" where N is the threshold
15+
#' - All pattern values in that row are set to NA
16+
#' - Summary row is also set to NA (prevents back-calculation)
17+
#'
18+
#' \strong{Output Matrix Structure:}
19+
#'
20+
#' - Rows represent different missing data patterns (plus a summary row at the bottom)
21+
#' - Row names contain pattern counts (or "suppressed(<N>)" for invalid patterns)
22+
#' - Columns show 1 if variable is observed, 0 if missing
23+
#' - Last column shows total number of missing values per pattern
24+
#' - Last row shows total number of missing values per variable
25+
#'
26+
#' \strong{Note for Pooling:}
27+
#'
28+
#' When this function is called from ds.mdPattern with type='combine', suppressed
29+
#' patterns are excluded from pooling to prevent disclosure through subtraction.
30+
#' This means pooled counts may underestimate the true total when patterns are
31+
#' suppressed in some studies.
32+
#'
33+
#' @param x a character string specifying the name of a data frame or matrix
34+
#' containing the data to analyze for missing patterns.
35+
#' @return A list containing:
36+
#' \item{pattern}{The missing data pattern matrix with disclosure control applied}
37+
#' \item{valid}{Logical indicating if all patterns meet disclosure requirements}
38+
#' \item{message}{A message describing the validity status}
39+
#' @author Xavier Escribà montagut for DataSHIELD Development Team
40+
#' @import mice
41+
#' @export
42+
#'
43+
mdPatternDS <- function(x){
44+
45+
#############################################################
46+
# MODULE 1: CAPTURE THE nfilter SETTINGS
47+
thr <- dsBase::listDisclosureSettingsDS()
48+
nfilter.tab <- as.numeric(thr$nfilter.tab)
49+
#############################################################
50+
51+
# Parse the input data name with error handling
52+
x.val <- tryCatch(
53+
{
54+
eval(parse(text=x), envir = parent.frame())
55+
},
56+
error = function(e) {
57+
stop(paste0("Object '", x, "' does not exist on the server"), call. = FALSE)
58+
}
59+
)
60+
61+
# Check object class
62+
typ <- class(x.val)
63+
64+
# Check that input is a data frame or matrix
65+
if(!("data.frame" %in% typ || "matrix" %in% typ)){
66+
stop(paste0("The input object must be of type 'data.frame' or 'matrix'. Current type: ",
67+
paste(typ, collapse = ", ")), call. = FALSE)
68+
}
69+
70+
# Use x.val for further processing
71+
x <- x.val
72+
73+
# Call mice::md.pattern with plot=FALSE
74+
pattern <- mice::md.pattern(x, plot = FALSE)
75+
76+
# Apply disclosure control
77+
# Pattern counts are stored in row names (except last row which is empty/summary)
78+
# The last row contains variable-level missing counts
79+
80+
validity <- "valid"
81+
n_patterns <- nrow(pattern) - 1 # exclude the summary row
82+
83+
if(n_patterns > 0){
84+
# Check pattern counts (stored in row names, excluding last row)
85+
pattern_counts <- as.numeric(rownames(pattern)[1:n_patterns])
86+
87+
# Find patterns with counts below threshold
88+
invalid_idx <- which(pattern_counts > 0 & pattern_counts < nfilter.tab)
89+
90+
if(length(invalid_idx) > 0){
91+
validity <- "invalid"
92+
93+
# For invalid patterns, suppress by:
94+
# - Setting row name to "suppressed"
95+
# - Setting all pattern values to NA
96+
rnames <- rownames(pattern)
97+
for(idx in invalid_idx){
98+
rnames[idx] <- paste0("suppressed(<", nfilter.tab, ")")
99+
pattern[idx, ] <- NA
100+
}
101+
rownames(pattern) <- rnames
102+
103+
# Also need to recalculate the last row (summary) if patterns were suppressed
104+
# Set to NA to avoid disclosures
105+
pattern[nrow(pattern), seq_len(ncol(pattern))] <- NA
106+
}
107+
}
108+
109+
# Return the pattern with validity information
110+
return(list(
111+
pattern = pattern,
112+
valid = (validity == "valid"),
113+
message = ifelse(validity == "valid",
114+
"Valid: all pattern counts meet disclosure requirements",
115+
paste0("Invalid: some pattern counts below threshold (",
116+
nfilter.tab, ") have been suppressed"))
117+
))
118+
}
119+
120+
#AGGREGATE FUNCTION
121+
# mdPatternDS

azure-pipelines.yml

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@ variables:
2727
branchName: $(Build.SourceBranchName)
2828
test_filter: '*'
2929
_r_check_system_clock_: 0
30+
PERF_PROFILE: 'azure-pipeline'
3031

3132

3233

@@ -46,17 +47,18 @@ resources:
4647
# When and under what condition to run the pipeline.
4748
schedules:
4849
- cron: "0 0 * * 0"
49-
displayName: Weekly build - master
50+
displayName: Weekly build - latest release
5051
branches:
5152
include:
52-
- master
53-
- 6.3.0
53+
- 6.3.4
5454
always: true
5555
- cron: "0 1 * * *"
56-
displayName: Nightly build - v6.3.1-dev
56+
displayName: Nightly build - development branchs
5757
branches:
5858
include:
59-
- v6.3.1-dev
59+
- v6.3.5-dev
60+
- v6.4.0-dev
61+
- v7.0.0-dev
6062
always: true
6163

6264
jobs:
@@ -188,6 +190,7 @@ jobs:
188190
# testthat::testpackage uses a MultiReporter, comprised of a ProgressReporter and JunitReporter
189191
# R output and messages are redirected by sink() to test_console_output.txt
190192
# junit reporter output is to test_results.xml
193+
191194
sudo R -q -e '
192195
library(covr);
193196
write.csv(
@@ -250,7 +253,6 @@ jobs:
250253
echo 'branch:'$(branchName) >> $(datetime).txt
251254
echo 'os:'$(lsb_release -ds) >> $(datetime).txt
252255
echo 'R:'$(R --version | head -n 1) >> $(datetime).txt
253-
echo 'opal:'$(opal system --opal localhost:8080 --user administrator --password "datashield_test&" --version) >> $(datetime).txt
254256
255257
workingDirectory: $(Pipeline.Workspace)/logs
256258
displayName: 'Write versions to file'

docs/404.html

Lines changed: 2 additions & 2 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

docs/LICENSE.html

Lines changed: 2 additions & 2 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

0 commit comments

Comments
 (0)