Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 0 additions & 7 deletions .vscode/settings.json

This file was deleted.

2 changes: 2 additions & 0 deletions DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,8 @@ Imports:
utils,
tools,
tidyr,
glue,
methods,
R.utils,
rlang,
docxtractr,
Expand Down
7 changes: 5 additions & 2 deletions NAMESPACE
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
# Generated by roxygen2: do not edit by hand

export()
export(DTAColumnSpec)
export(DTAColumnSpecCollection)
export(DTAContainer)
Expand All @@ -19,6 +18,7 @@ export(container)
export(convert_yaml_to_json)
export(export_column_value_table)
export(export_specs_table)
export(get_arrow_schema_type)
export(get_column_ids)
export(get_metadata)
export(get_rules)
Expand All @@ -29,6 +29,7 @@ export(matches_filename)
export(new_DTAContainer)
export(number_of_files)
export(read_file)
export(rule_check_condition)
export(rule_check_range)
export(rule_check_unique)
export(specs_from_list)
Expand All @@ -42,7 +43,6 @@ export(write_table_to_file)
import(S7)
importFrom(R.utils,gzip)
importFrom(arrow,arrow_table)
importFrom(arrow,read_csv_arrow)
importFrom(arrow,read_delim_arrow)
importFrom(cli,cli_abort)
importFrom(cli,cli_alert_danger)
Expand Down Expand Up @@ -77,13 +77,16 @@ importFrom(flextable,padding)
importFrom(flextable,save_as_docx)
importFrom(flextable,valign)
importFrom(flextable,width)
importFrom(glue,glue)
importFrom(jsonlite,fromJSON)
importFrom(jsonlite,toJSON)
importFrom(jsonlite,write_json)
importFrom(jsonvalidate,json_schema)
importFrom(magrittr,"%>%")
importFrom(methods,is)
importFrom(purrr,map)
importFrom(purrr,set_names)
importFrom(stats,setNames)
importFrom(tidyr,separate_wider_delim)
importFrom(tools,md5sum)
importFrom(utils,setTxtProgressBar)
Expand Down
2 changes: 1 addition & 1 deletion R/00_helpers.R
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ class_DTAColumnSpecCollection <- S7::new_class(
"DTAColumnSpecCollection"
)
class_DTAContainer <- S7::new_class("DTAContainer")
class_DTAMetadata <- S7::new_class("DTAMetadata")
class_DTAMetadata <- S7::new_class("DTAMetaData")
class_character_or_null <- class_character | class_null
class_numeric_or_null <- class_numeric | class_null
class_character_or_numeric_or_null <- class_character |
Expand Down
1 change: 1 addition & 0 deletions R/DTA-class.R
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,7 @@ DTA <- new_class(
#' get_metadata(DTA)
#' }
#' @name get_metadata
#' @rdname get_metadata-DTA
#' @export
if (!exists("get_metadata", mode = "function")) {
get_metadata <- new_generic("get_metadata", "x")
Expand Down
6 changes: 4 additions & 2 deletions R/DTAColumnSpec-class.R
Original file line number Diff line number Diff line change
Expand Up @@ -68,18 +68,20 @@ DTAColumnSpec <- new_class(
#' @description
#' Returns the corresponding Arrow schema type for a given DTAColumnSpec
#' object based on its `type` property.
#' @importFrom glue glue
#' @param x A DTAColumnSpec object.
#' @return A character string representing the Arrow schema type.
#' @examples
#' col <- DTAColumnSpec(id = "AGE", type = "Char")
#' get_arrow_schema_type(col)
#' @export
get_arrow_schema_type <- function(x) {
if (!inherits(x, "DTAColumnSpec")) {
if (!inherits(x, "DTAtools::DTAColumnSpec")) {
stop("Input must be a DTAColumnSpec object.")
}
type <- x@type
if (is.null(type)) {
stop(str_glue("Type is not set for {x$id}."))
stop(glue::glue("Type is not set for {x$id}."))
}
switch(
type,
Expand Down
7 changes: 5 additions & 2 deletions R/DTAColumnSpecCollection-class.R
Original file line number Diff line number Diff line change
Expand Up @@ -64,9 +64,12 @@ DTAColumnSpecCollection <- new_class(
#' \dontrun{
#' names <- get_column_ids(collection)
#' }
#' @name get_column_ids-DTAColumnSpecCollection
#' @name get_column_ids
#' @rdname get_column_ids-DTAColumnSpecCollection
#' @export
get_column_ids <- new_generic("get_column_ids", "x")
if (!exists("get_column_ids", mode = "function")) {
get_column_ids <- new_generic("get_column_ids", "x")
}
method(get_column_ids, DTAColumnSpecCollection) <- function(x) {
return(names(x@columns))
}
Expand Down
20 changes: 6 additions & 14 deletions R/DTAContainer-class.R
Original file line number Diff line number Diff line change
Expand Up @@ -315,29 +315,20 @@ write_table_to_file <- function(
#' \dontrun{
#' get_metadata(DTAContainer)
#' }
#' @name metadata-DTAContainer
#' @name get_metadata
#' @rdname get_metadata-DTAContainer
#' @export
if (!exists("get_metadata", mode = "function")) {
get_metadata <- new_generic("get_metadata", "x")
}
#' @export
method(get_metadata, DTAContainer) <- function(x) {
return(x@specs@metadata)
}

#' @title Get Rules
#' @description
#' Method to get Rules from DTAContainer.
#' @param x An object of class DTAContainer
#' @return A list with rules information
#' @examples
#' \dontrun{
#' get_rules(DTAContainer)
#' }
#' @name get_rules-DTAContainer
#' @export
if (!exists("get_rules", mode = "function")) {
get_rules <- new_generic("get_rules", "x")
}
#' @export
method(get_rules, DTAContainer) <- function(x) {
return(x@specs@rules)
}
Expand All @@ -353,7 +344,8 @@ method(get_rules, DTAContainer) <- function(x) {
#' column_format <- number_of_files(dtafileinfo)
#' }
# Define the generic only if it doesn't already exist
#' @name get_max_number_of_files-DTAContainer
#' @name get_max_number_of_files
#' @rdname get_max_number_of_files-DTAContainer
if (!exists("get_max_number_of_files", mode = "function")) {
get_max_number_of_files <- new_generic("get_max_number_of_files", "x")
}
Expand Down
6 changes: 4 additions & 2 deletions R/evaluateSchemaRules.R
Original file line number Diff line number Diff line change
Expand Up @@ -64,10 +64,10 @@ rule_check_unique <- function(rule, df) {
id = rule@id,
valid = FALSE,
message = sprintf(
"Rule '%s' violated: %d duplicate values found in column %s",
"Rule '%s' violated: %d duplicate row found when selecting column(s): %s",
rule@id,
sum(duplicated_rows, na.rm = TRUE),
col
paste(cols, collapse = ", ")
)
)
} else {
Expand Down Expand Up @@ -236,6 +236,7 @@ apply_schema_rules <- function(rules, df) {
#' Aborts with a CLI error if any rule fails.
#' @param DTAColumnSpecCollection A `DTAColumnSpecCollection` with rules defined.
#' @param table A data.frame to validate.
#' @importFrom stats setNames
#' @return (Invisibly) the list of rule results from `applySchemaRules()`.
#' @export
validate_rules <- function(DTAColumnSpecCollection, table) {
Expand All @@ -257,6 +258,7 @@ validate_rules <- function(DTAColumnSpecCollection, table) {
}

#' @keywords internal
#' @importFrom methods is
check_rule_class <- function(x) {
if (methods::is(x, "DTAtools::DTARule")) {
invisible(TRUE)
Expand Down
18 changes: 9 additions & 9 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@ Specification contain definitions of:

- **columns:** defines column names, format specification and optionally patterns or selection for values in the column. More details in section [YAML Column Format](#yaml-column-format).
- **rules:** defines rules for columns, e.g. if column A is empty, then columns B must contain a value. More details in section [YAML Schema Rule Specification](#yaml-schema-rule-specification).
- **metadata:** DTA meta data about version, author etc. More details in section [YAML Metadata].
- **metadata:** DTA meta data about version, author etc. More details in section [YAML Metadata](#yaml-metadata).

First, you import the specifications from a YAML file.

Expand Down Expand Up @@ -105,19 +105,19 @@ writeTableToFile(

If you want to export the specifications stored in the YAML as a table to Word file you can use this function:

``` r
```r
export_specs_table(specs, "dta_spec_table.docx")
```

### Export Column Values Table

Exporting all defined potential values in a column to a word table.

``` r
```r
export_column_value_table(specs, "column_value_table.docx", id = "VISIT")
```

## YAML Column Format {#yaml-column-format}
## YAML Column Format

Columns specifications can contain

Expand Down Expand Up @@ -158,7 +158,7 @@ columns:
- "other"
```

## YAML Schema Rule Specification {#yaml-schema-rule-specification}
## YAML Schema Rule Specification

The `DTAtools` package supports schema-based validation of tabular data using declarative rules defined in YAML. These rules are evaluated after column-level validation and allow for complex inter-column logic enforcement.

Expand Down Expand Up @@ -332,13 +332,13 @@ metadata:

### Validation Functions

- `validate_table()`: Validates a data frame against a spec specs
- `validate_table()`: Validates a data frame against a spec specs

### Export Functions

- `write_table_to_file()`: Write validated tables to disk with optional compression and metadata
- `export_specs_table()`: Export full spec documentation to Word
- `export_column_value_table()`: Export allowed values of a column to Word
- `write_table_to_file()`: Write validated tables to disk with optional compression and metadata
- `export_specs_table()`: Export full spec documentation to Word
- `export_column_value_table()`: Export allowed values of a column to Word

#### Rules Engine

Expand Down
4 changes: 3 additions & 1 deletion inst/extdata/params_spec.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -7,13 +7,14 @@ columns:
label: Study Identifier
type: Char
nullable: false
length: 10
values:
- 1234-5678
pattern: ^[0-9]{4}-[0-9]{4}$
description: Unique study ID
- id: VISIT
label: Visit
type: Char
length: 12
nullable: false
values:
- V01
Expand All @@ -38,6 +39,7 @@ columns:
- id: INCLUDE
label: Include in analysis
type: Char
length: 1
nullable: true
values:
- "Y"
Expand Down
6 changes: 1 addition & 5 deletions man/get_column_ids-DTAColumnSpecCollection.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 2 additions & 2 deletions man/get_max_number_of_files-DTAContainer.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

File renamed without changes.

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

19 changes: 0 additions & 19 deletions man/get_rules-DTAContainer.Rd

This file was deleted.

2 changes: 1 addition & 1 deletion tests/testthat/test-DTAColumnSpecCollection.R
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,7 @@ test_that("specs_from_list constructs valid object", {
)

# check metadata method
expect_equal(metadata(collection), list())
expect_equal(get_metadata(collection), list())

# Test specs_to_list

Expand Down
2 changes: 1 addition & 1 deletion tests/testthat/test-DTAContainer.R
Original file line number Diff line number Diff line change
Expand Up @@ -19,5 +19,5 @@ test_that("DTAContainer object is created and tables are accessible", {
expect_equal(get_data(data_obj, "test")$STUDYID[1], "1234")

# check metadata method
expect_equal(metadata(data_obj), list())
expect_equal(get_metadata(data_obj), list())
})
Loading
Loading