diff --git a/utilities/R/README.md b/utilities/R/README.md
new file mode 100644
index 0000000..d0badc2
--- /dev/null
+++ b/utilities/R/README.md
@@ -0,0 +1,126 @@
+# ARS R Utilities
+
+This directory contains R utilities for processing Analysis Results Standard (ARS) JSON files, providing R equivalents of the SAS macros with SAS compatibility.
+
+## Main Orchestration Function
+
+### `create_class_datasets_for_json_reportingevent.R`
+
+The main entry point that orchestrates the ARS JSON processing pipeline. This R implementation matches SAS structure and content.
+
+#### Usage
+
+**R Function:**
+```r
+source("utilities/R/create_class_datasets_for_json_reportingevent.R")
+
+result <- create_class_datasets_for_json_reportingevent(
+  json_schema_file = "model/ars_ldm.json",
+  reporting_event_json_file = "workfiles/examples/ARS v1/FDA Standard Safety Tables and Figures.json",
+  output_directory = tempdir(),  # or specify custom directory: "./output"
+  temp_directory = tempdir(),    # or specify custom directory: "./temp" 
+  verbose = TRUE
+)
+```
+
+**Command Line:**
+```bash
+# Process FDA example reporting event using temporary directory
+TEMP_DIR=$(Rscript -e "cat(tempdir())") && \
+Rscript utilities/R/create_class_datasets_for_json_reportingevent.R \
+  model/ars_ldm.json \
+  "workfiles/examples/ARS v1/FDA Standard Safety Tables and Figures.json" \
+  "$TEMP_DIR"
+
+# Or specify a custom output directory
+Rscript utilities/R/create_class_datasets_for_json_reportingevent.R \
+  model/ars_ldm.json \
+  "workfiles/examples/ARS v1/FDA Standard Safety Tables and Figures.json" \
+  ./output
+```
+
+
+#### Processing Pipeline
+
+The pipeline uses a **schema-driven approach** with **class-focused extraction**:
+
+1. **Read Schema Definitions** - Extract ARS class/slot definitions from JSON schema
+2. **Extract & Format Class Datasets** - Create SAS-compatible class datasets with:
+   - Schema-driven field extraction (only scalar attributes per class)
+- **26+ Class datasets** saved as CSV files (Analysis, Operation, OperationResult, etc.)
+- **SAS-compatible structure** with proper TABLEPATH and DATAPATH variables
+- **Processing summary** with statistics and timing
+- **Record counts** matching SAS output
+- **Output location**: Files saved to specified directory (temporary by default, or custom path)
+#### Output
+
+- **26+ Class datasets** saved as CSV files (Analysis, Operation, OperationResult, etc.)
+- **SAS-compatible structure** with proper TABLEPATH and DATAPATH variables
+- **Processing summary** with statistics and timing
+- **Record counts** matching SAS output
+
+## Core Helper Functions
+
+Located in `helpers/` subdirectory:
+
+### `get_class_slots.R`
+Extracts ARS class and slot definitions from the JSON schema file (`ars_ldm.json`). Creates the foundation for schema-driven processing.
+
+### `schema_driven_extractor.R`
+Core extraction engine that:
+- Processes JSON based on ARS schema class definitions
+- Extracts only scalar attributes for each class (no nested objects)
+- Applies SAS-compatible formatting (tablepath, array normalization)
+- Ensures normalized dataset structure
+
+## Key Features
+
+### ✅ SAS Compatibility
+- **Structure match** - Same column names, order, and data types as SAS
+- **Record counts** - Analysis: 20, Operation: 14, OperationResult: 93, etc.
+- **Path format** - `/root/analyses`, `/root/methods/operations` format
+- **Array normalization** - `categoryIds` → `categoryIds1`, `categoryIds2`
+
+### ✅ Schema-Driven Architecture
+- **Class-focused extraction** - Each class extracts only its own scalar attributes
+- **No over-extraction** - Complex nested objects handled by their respective classes
+- **Relational separation** - Normalized datasets, not flattened structures
+
+```bash
+# Process FDA example reporting event (using temporary directory)
+TEMP_DIR=$(Rscript -e "cat(tempdir())") && \
+Rscript utilities/R/create_class_datasets_for_json_reportingevent.R \
+  model/ars_ldm.json \
+  "workfiles/examples/ARS v1/FDA Standard Safety Tables and Figures.json" \
+  "$TEMP_DIR"
+
+# Creates: Analysis.csv, Operation.csv, OperationResult.csv, ResultGroup.csv, etc.
+```P_DIR=$(Rscript -e "cat(tempdir())") && \
+Rscript utilities/R/create_class_datasets_for_json_reportingevent.R \
+  model/ars_ldm.json \
+  "workfiles/examples/ARS v1/FDA Standard Safety Tables and Figures.json" \
+  "$TEMP_DIR"
+
+# Creates: Analysis.csv, Operation.csv, OperationResult.csv, ResultGroup.csv, etc.
+```
+
+## Requirements
+
+- R packages: `jsonlite`, `dplyr`
+- ARS JSON schema file (`ars_ldm.json`)
+- Valid ARS reporting event JSON file
+
+## Data Reassembly
+
+The DATAPATH variable uses JSON Pointer notation enabling reassembly of related data:
+
+```r
+# Example: Find ResultGroups that belong to each OperationResult
+combined_results <- result$class_datasets$OperationResult %>%
+  cross_join(result$class_datasets$ResultGroup) %>%
+  filter(startsWith(datapath.y, datapath.x)) %>%
+  select(-datapath.y) %>%
+  rename(datapath = datapath.x)
+```
+
+This works because child object DATAPATH values (e.g., `/analyses/0/results/0/resultGroups/0`) contain their parent DATAPATH as a prefix (e.g., `/analyses/0/results/0`).
diff --git a/utilities/R/create_class_datasets_for_json_reportingevent.R b/utilities/R/create_class_datasets_for_json_reportingevent.R
new file mode 100644
index 0000000..e832e91
--- /dev/null
+++ b/utilities/R/create_class_datasets_for_json_reportingevent.R
@@ -0,0 +1,313 @@
+#' Create Class Datasets for JSON ReportingEvent
+#'
+#' Reads in a JSON ReportEvent to create a separate dataset for each ARS class
+#' referenced in the reporting event. This is the main orchestration function
+#' that replicates the functionality of create_class_datasets_for_json_reportingevent.sas
+#'
+#' @description
+#' In the created class datasets:
+#' - There is one observation for each instance of the class.
+#' - On each observation:
+#'   * The TABLEPATH variable contains the path for source dataset mapped in the JSON file.
+#'   * The DATAPATH variable contains a value (specified using JSON Pointer notation)
+#'     indicating the location of the corresponding data in the input JSON reporting event file.
+#' - Multiple atomic values (e.g., multiple strings or multiple numeric values) for a
+#'   single attribute/slot are transposed into separate variables (e.g., value1, value2, etc.).
+#'
+#' @param json_schema_file Character string. Path to the JSON-Schema definition file
+#'   for the ARS model (ars_ldm.json)
+#' @param reporting_event_json_file Character string. Path to the JSON file containing
+#'   the reporting event information to be converted to datasets
+#' @param output_directory Character string. Path to the directory that will contain
+#'   the class-specific datasets created from the JSON reporting event file
+#' @param temp_directory Character string. Path to temporary directory for intermediate files.
+#'   Defaults to "temp/class_datasets_pipeline"
+#' @param class_library_name Character string. Name prefix for the class dataset library.
+#'   Defaults to "classds"
+#' @param verbose Logical. Whether to print detailed progress messages. Defaults to TRUE
+#' @param clean_temp Logical. Whether to clean up temporary files after processing. Defaults to FALSE
+#'
+#' @return List containing:
+#'   - class_datasets: List of data frames, one per class
+#'   - classes_processed: Character vector of class names created
+#'   - output_files: Character vector of output file paths
+#'   - summary: List with processing statistics
+#'   - temp_directory: Path to temporary files directory
+#'
+#' @details
+#' This function orchestrates the complete ARS JSON processing pipeline using the modular
+#' schema-driven extractor architecture:
+#' 1. **get_class_slots**: Create dataset containing class/slot definitions from JSON-Schema file
+#' 2. **extract_schema_driven_datasets**: Extract class-specific datasets using modular extractors
+#' 3. **apply_sas_compatible_formatting**: Apply SAS-compatible formatting and structure
+#'
+#' The modular extractor uses specialized modules for different ARS class types:
+#' - extractor_analysis.R: Analysis, Operation, AnalysisSet, DataSubset classes
+#' - extractor_output.R: Output, DisplaySection, ListOfContents classes
+#' - extractor_reference.R: DocumentReference, PageRef classes
+#' - extractor_metadata.R: TerminologyExtension, SponsorTerm classes
+#'
+#' The DATAPATH values are '/' delimited lists of attribute/slot names and, for repeating
+#' objects, zero-indexed ordinal values. For example, a DATAPATH value of
+#' "/methods/1/operations/0" indicates the first item (ordinal = 0) in the list of items
+#' specified for the "operations" attribute of the second item (ordinal = 1) in the list
+#' of items specified for the "methods" attribute of the reporting event.
+#'
+#' The DATAPATH variable may be used to reassemble information that is split across
+#' multiple class datasets. For example, OperationResult records can be combined with
+#' corresponding ResultGroup records using data manipulation:
+#'
+#' ```r
+#' combined_results <- operation_result %>%
+#'   left_join(result_group,
+#'             by = join_by(substr(result_group$datapath, 1, nchar(operation_result$datapath)) ==
+#'                         operation_result$datapath))
+#' ```
+#'
+#' This works because the first part of a child object's DATAPATH value matches the
+#' DATAPATH value of its parent.
+#'
+#' @examples
+#' \dontrun{
+#' # Basic usage
+#' result <- create_class_datasets_for_json_reportingevent(
+#'   json_schema_file = "model/ars_ldm.json",
+#'   reporting_event_json_file = "workfiles/examples/ARS v1/FDA Standard Safety Tables and Figures.json",
+#'   output_directory = tempdir()
+#' )
+#'
+#' # Access created class datasets
+#' analysis_data <- result$class_datasets$Analysis
+#' output_data <- result$class_datasets$Output
+#'
+#' result <- create_class_datasets_for_json_reportingevent(
+#'   json_schema_file = "model/ars_ldm.json",
+#'   reporting_event_json_file = "workfiles/examples/ARS v1/Common Safety Displays.json",
+#'   output_directory = tempdir()
+#' )
+#'
+#' # View processing summary
+#' print(result$summary)
+#' }
+#'
+#' @export
+create_class_datasets_for_json_reportingevent <- function(
+  json_schema_file,
+  reporting_event_json_file,
+  output_directory,
+  temp_directory = tempdir(),
+  class_library_name = "classds",
+  verbose = TRUE,
+  clean_temp = FALSE
+) {
+  # Input validation
+  if (!file.exists(json_schema_file)) {
+    stop("JSON schema file not found: ", json_schema_file)
+  }
+
+  if (!file.exists(reporting_event_json_file)) {
+    stop("Reporting event JSON file not found: ", reporting_event_json_file)
+  }
+
+  if (!dir.exists(dirname(output_directory))) {
+    stop("Output directory parent does not exist: ", dirname(output_directory))
+  }
+
+  # Create output and temp directories if they don't exist
+  if (!dir.exists(output_directory)) {
+    dir.create(output_directory, recursive = TRUE)
+  }
+
+  if (!dir.exists(temp_directory)) {
+    dir.create(temp_directory, recursive = TRUE)
+  }
+
+  # Load required libraries
+  if (!requireNamespace("jsonlite", quietly = TRUE)) {
+    stop("Package 'jsonlite' is required but not installed")
+  }
+
+  if (!requireNamespace("dplyr", quietly = TRUE)) {
+    stop("Package 'dplyr' is required but not installed")
+  }
+
+  # Load libraries for use by helper functions
+  library(jsonlite)
+  library(dplyr)
+
+  # Source required helper functions
+  helpers_dir <- file.path("utilities", "R", "helpers")
+
+  required_helpers <- c(
+    "get_class_slots.R",
+    "schema_driven_extractor_modular.R"
+  )
+
+  for (helper in required_helpers) {
+    helper_path <- file.path(helpers_dir, helper)
+    if (file.exists(helper_path)) {
+      source(helper_path)
+    } else {
+      stop("Required helper function not found: ", helper_path)
+    }
+  }
+
+  if (verbose) {
+    cat("=== ARS JSON REPORTING EVENT PROCESSING ===\n")
+    cat("Schema file:", json_schema_file, "\n")
+    cat("JSON file:", reporting_event_json_file, "\n")
+    cat("Output directory:", output_directory, "\n")
+    cat("Temp directory:", temp_directory, "\n\n")
+  }
+
+  start_time <- Sys.time()
+
+  # =========================================================================
+  # STEP 1: Create dataset containing class/slot definitions from JSON-Schema
+  # =========================================================================
+
+  if (verbose) {
+    cat("Step 1: Reading class/slot definitions from JSON schema...\n")
+  }
+
+  class_slots <- get_class_slots(json_schema_file)
+
+  if (verbose) {
+    cat("  ✓ Found", nrow(class_slots), "class/slot definitions\n\n")
+  }
+
+  # =========================================================================
+  # STEP 2: Extract and format class datasets from JSON using modular schema extractor
+  # =========================================================================
+
+  if (verbose) {
+    cat("Step 2: Extracting and formatting class datasets using modular schema extractor...\n")
+  }
+
+  # Extract class datasets using modular schema-driven approach
+  # This loads and uses specialized extraction modules
+  class_datasets <- extract_schema_driven_datasets(reporting_event_json_file, class_slots)
+
+  # Apply SAS-compatible formatting (tablepath, array normalization, column cleanup)
+  class_datasets <- apply_sas_compatible_formatting(class_datasets, class_slots)
+
+  # Write datasets to output directory
+  output_files <- character(0)
+  for (class_name in names(class_datasets)) {
+    dataset <- class_datasets[[class_name]]
+    if (nrow(dataset) > 0) {
+      output_file <- file.path(output_directory, paste0(class_name, ".csv"))
+      write.csv(dataset, output_file, row.names = FALSE)
+      output_files <- c(output_files, output_file)
+    }
+  }
+
+  class_result <- list(
+    class_datasets = class_datasets,
+    classes_processed = names(class_datasets),
+    output_files = output_files
+  )
+
+  if (verbose) {
+    cat("  ✓ Created", length(class_result$class_datasets), "ARS class datasets\n")
+    cat("  ✓ Classes processed:", paste(class_result$classes_processed, collapse = ", "), "\n\n")
+  }
+
+  # =========================================================================
+  # STEP 3: Generate summary and cleanup
+  # =========================================================================
+
+  end_time <- Sys.time()
+  processing_time <- as.numeric(difftime(end_time, start_time, units = "secs"))
+
+  # Create comprehensive summary
+  summary_info <- list(
+    processing_time_seconds = processing_time,
+    json_schema_file = json_schema_file,
+    reporting_event_json_file = reporting_event_json_file,
+    output_directory = output_directory,
+    temp_directory = temp_directory,
+    class_datasets_created = length(class_result$class_datasets),
+    classes_processed = class_result$classes_processed,
+    total_observations = if (length(class_result$class_datasets) > 0) sum(sapply(class_result$class_datasets, nrow)) else 0,
+    total_variables = if (length(class_result$class_datasets) > 0) sum(sapply(class_result$class_datasets, ncol)) else 0
+  )
+
+  if (verbose) {
+    cat("=== PROCESSING COMPLETE ===\n")
+    cat("Total processing time:", round(processing_time, 2), "seconds\n")
+    cat("Classes created:", summary_info$class_datasets_created, "\n")
+    cat("Total observations:", summary_info$total_observations, "\n")
+    cat("Output saved to:", output_directory, "\n")
+
+    if (!clean_temp) {
+      cat("Temporary files retained in:", temp_directory, "\n")
+    }
+  }
+
+  # Optional cleanup
+  if (clean_temp && dir.exists(temp_directory)) {
+    if (verbose) {
+      cat("Cleaning up temporary files...\n")
+    }
+    unlink(temp_directory, recursive = TRUE)
+  }
+
+  # Return comprehensive results
+  result <- list(
+    class_datasets = class_result$class_datasets,
+    classes_processed = class_result$classes_processed,
+    output_files = class_result$output_files,
+    summary = summary_info,
+    temp_directory = if (!clean_temp) temp_directory else NULL
+  )
+
+  return(result)
+}
+
+# ============================================================================
+# COMMAND LINE INTERFACE
+# ============================================================================
+
+#' Command Line Interface for ARS JSON Processing
+#'
+#' Provides a command-line interface similar to the SAS version
+process_json_reportingevent_cli <- function() {
+  # Parse command line arguments
+  args <- commandArgs(trailingOnly = TRUE)
+
+  if (length(args) == 0) {
+    return(invisible())
+  }
+
+  if (length(args) < 3) {
+    stop("Error: At least 3 arguments required. Run with no arguments for usage.")
+  }
+
+  schema_file <- args[1]
+  json_file <- args[2]
+  output_dir <- args[3]
+  temp_dir <- if (length(args) >= 4) args[4] else "temp/class_datasets_pipeline"
+
+  # Run the processing
+  cat("Starting ARS JSON ReportingEvent processing...\n")
+
+  result <- create_class_datasets_for_json_reportingevent(
+    json_schema_file = schema_file,
+    reporting_event_json_file = json_file,
+    output_directory = output_dir,
+    temp_directory = temp_dir,
+    verbose = TRUE
+  )
+
+  cat("\nProcessing completed successfully!\n")
+  cat("Class datasets created:", length(result$class_datasets), "\n")
+  cat("Output location:", output_dir, "\n")
+
+  return(invisible(result))
+}
+
+# Run CLI if script is executed directly
+if (!interactive()) {
+  process_json_reportingevent_cli()
+}
diff --git a/utilities/R/helpers/README_modules.md b/utilities/R/helpers/README_modules.md
new file mode 100644
index 0000000..7c570b2
--- /dev/null
+++ b/utilities/R/helpers/README_modules.md
@@ -0,0 +1,337 @@
+# Schema-Driven ARS Class Dataset Generation Framework
+
+Uses ARS JSON schema to map JSON reporting events into class-specific datasets. Modular architecture for maintainability and collaborative development.
+
+## Framework Overview
+
+ARS schema defines data structure. System extracts JSON into normalized, SAS-compatible datasets for each class:
+
+```
+┌─────────────────────────────────────────┐
+│         ARS JSON Schema                 │  ← Schema defines classes/slots
+│      (ars_ldm.json)                     │
+└─────────────────┬───────────────────────┘
+                  │
+                  ▼ get_class_slots()
+┌─────────────────────────────────────────┐
+│      Class/Slot Definitions             │  ← Schema parsed into workable format
+│   (parent_class, slot, range, is_array) │
+└─────────────────┬───────────────────────┘
+                  │
+                  ▼ extract_schema_driven_datasets()
+┌─────────────────────────────────────────┐
+│    JSON Reporting Event Data            │  ← Input JSON data file
+│       (reporting_event.json)           │
+└─────────────────┬───────────────────────┘
+                  │
+    ┌─────────────┼─────────────┐
+    │             │             │
+    ▼             ▼             ▼
+┌─────────┐ ┌─────────────┐ ┌─────────────┐
+│Analysis │ │   Output    │ │ Reference   │  ← Specialized extractors
+│ Module  │ │   Module    │ │   Module    │
+└─────────┘ └─────────────┘ └─────────────┘
+    │             │             │
+    ▼             ▼             ▼
+┌─────────────────────────────────────────┐
+│        Class Datasets                   │  ← Multiple separate CSV files
+│   (Analysis.csv, Operation.csv, etc.)   │
+└─────────────────────────────────────────┘
+```
+
+## Schema-to-Dataset Mapping Process
+
+### 1. Schema Analysis Phase
+Parse ARS JSON schema:
+
+```r
+# Parse ARS schema into class/slot definitions
+class_slots <- get_class_slots('model/ars_ldm.json')
+```
+
+Extracts:
+- Classes: Analysis, Operation, Output, DocumentReference, etc. (36 total)
+- Slots: Properties of each class (id, name, description, etc.)
+- Types: Data types and array indicators
+- Relationships: How classes reference each other
+
+### 2. JSON Data Mapping Phase
+Map JSON paths to class instances:
+
+```r
+# Extract data for each schema-defined class
+result <- extract_schema_driven_datasets('<reporting_event_json_file>', class_slots)
+```
+
+Mapping Examples:
+- `/methods/0/operations/1` → Operation class record (`tablepath: /root/methods/operations`)
+- `/analyses/0` → Analysis class record (`tablepath: /root/analyses`)
+- `/outputs/0/displays/0` → OutputDisplay class record (`tablepath: /root/outputs/displays`)
+- `/analyses/0/documentRefs/0` → DocumentReference class record (`tablepath: /root/analyses/documentRefs`)
+
+### 3. Dataset Generation Phase
+Each class becomes a separate dataset:
+
+| Column | Purpose | Example |
+|--------|---------|---------|
+| `tablepath` | Schema location (indices removed) | `/root/methods/operations` |
+| `datapath` | Exact JSON instance path | `/methods/0/operations/1` |
+| `id` | Class instance ID | `OP_001_1` |
+| `name` | Instance name | `Count of Subjects` |
+| *...other slots* | Class-specific fields | `resultPattern`, `order`, etc. |
+
+> 💡 **Traceability**: Every record can be traced from its `datapath` (exact JSON location) back to its `tablepath` (schema class definition). See examples in [`SCHEMA_MAPPING_EXAMPLES.md`](SCHEMA_MAPPING_EXAMPLES.md)
+
+## Architecture Overview
+
+Modular system with specialized extraction modules:
+
+```
+┌─────────────────────────────────────────┐
+│     schema_driven_extractor_modular     │  ← Main orchestrator
+│              (Entry Point)              │
+└─────────────────┬───────────────────────┘
+                  │
+    ┌─────────────┼─────────────┐
+    │             │             │
+    ▼             ▼             ▼
+┌─────────┐ ┌─────────────┐ ┌─────────────┐
+│Analysis │ │   Output    │ │ Reference   │  ← Specialized modules
+│ Module  │ │   Module    │ │   Module    │
+└─────────┘ └─────────────┘ └─────────────┘
+    │             │             │
+    ▼             ▼             ▼
+┌─────────────────────────────────────────┐
+│        extractor_record_utils           │  ← Core utilities
+│     (Record creation & formatting)      │
+└─────────────────────────────────────────┘
+```
+
+## Module Structure
+
+### Main Entry Point
+- `schema_driven_extractor_modular.R` - Orchestrates extraction workflow and loads modules
+
+### Core Utilities
+- `extractor_record_utils.R` - Record creation, formatting, and SAS compatibility functions
+
+### Specialized Extraction Modules
+- `extractor_analysis.R` - Analysis, Operation, AnalysisSet, DataSubset, Group, WhereClause
+- `extractor_output.R` - Output, DisplaySection, ListOfContents, OrderedListItem
+- `extractor_reference.R` - DocumentReference, PageNameRef, PageNumberListRef
+- `extractor_metadata.R` - TerminologyExtension, SponsorTerm, AnalysisOutputCategorization
+
+
+## Workflow: Schema → Datasets
+
+### End-to-End Process
+
+```r
+# 1. SCHEMA ANALYSIS: Parse ARS schema structure
+source('utilities/R/helpers/get_class_slots.R')
+class_slots <- get_class_slots('model/ars_ldm.json')
+# Result: 150+ class/slot definitions from schema
+
+# 2. JSON MAPPING: Map JSON data using schema knowledge
+source('utilities/R/helpers/schema_driven_extractor_modular.R')
+class_datasets <- extract_schema_driven_datasets('<reporting_event_json_file>', class_slots)
+# Result: Multiple class datasets with extracted records
+
+# 3. SAS FORMATTING: Apply SAS-compatible formatting
+formatted_datasets <- apply_sas_compatible_formatting(class_datasets, class_slots)
+# Result: Datasets ready for statistical analysis
+```
+
+### Standard Usage
+
+```r
+# Load the complete pipeline function
+source('utilities/R/create_class_datasets_for_json_reportingevent.R')
+
+# Generate all class datasets from schema + JSON
+result <- create_class_datasets_for_json_reportingevent(
+  json_schema_file = 'model/ars_ldm.json',
+  reporting_event_json_file = '<reporting_event_json_file>',
+  output_directory = 'output/class_datasets'
+)
+
+# Output: Multiple CSV files (Analysis.csv, Operation.csv, etc.)
+```
+
+Process:
+1. Schema Analysis - Parse ARS schema into class definitions
+2. Module Loading - Load specialized extraction modules
+3. JSON Mapping - Map JSON data to schema classes
+4. Record Creation - Generate records with tablepath/datapath
+5. SAS Formatting - Apply SAS-compatible formatting
+6. Dataset Output - Write CSV files for each class
+
+### Framework Results
+
+| Dataset Type | Example Classes | Record Count | Purpose |
+|--------------|-----------------|--------------|---------|
+| **Analysis** | Analysis, Operation, AnalysisSet | Variable records | Statistical analysis definitions |
+| **Output** | Output, DisplaySection, OrderedDisplay | Variable records | Report structure and formatting |
+| **Reference** | DocumentReference, PageRef | Variable records | Citations and cross-references |
+| **Metadata** | TerminologyExtension, SponsorTerm | Variable records | Controlled terminology and categorization |
+
+**Total: Multiple datasets, variable record counts, processing time <1 second**
+
+> 📋 **Class Examples**: See [`SCHEMA_MAPPING_EXAMPLES.md`](SCHEMA_MAPPING_EXAMPLES.md) for detailed walk-throughs of Analysis, Output, and DocumentReference classes showing the complete schema → JSON → dataset process.
+
+## How to Work with Modules
+
+### Module Development
+
+#### Adding New Classes
+1. Choose appropriate module based on class type
+2. Add extraction function in chosen module
+3. Register class in `schema_driven_extractor_modular.R`
+
+#### Testing Individual Modules
+```r
+# Load utilities first
+source('utilities/R/helpers/extractor_record_utils.R')
+
+# Load specific module
+source('utilities/R/helpers/extractor_analysis.R')
+
+# Test specific extraction function
+result <- extract_operations(json_data, class_definition)
+```
+
+### Standard Integration
+```r
+# Load the modular extractor
+source('utilities/R/helpers/schema_driven_extractor_modular.R')
+
+# Extract datasets with automatic module loading
+result <- extract_schema_driven_datasets(json_file, class_slots)
+```
+
+## Schema-Driven Approach Benefits
+
+### Framework Advantages
+- Schema-Driven - Uses ARS JSON schema as single source of truth
+- Schema Mapping - No manual coding required for new ARS classes
+- Standardized Output - All datasets have consistent tablepath/datapath structure
+- Type Safety - Schema enforces data types and array handling
+- Traceability - Every record links back to exact JSON location
+
+> See [`SCHEMA_MAPPING_EXAMPLES.md`](SCHEMA_MAPPING_EXAMPLES.md) for detailed examples
+
+### Schema-to-Code Translation
+
+```r
+# Schema Definition (ars_ldm.json)
+"Operation": {
+  "properties": {
+    "id": {"type": "string"},
+    "name": {"type": "string"},
+    "resultPattern": {"type": "string"}
+  }
+}
+
+# Becomes Class Definition
+class_definition <- data.frame(
+  parent_class = "Operation",
+  slot = c("id", "name", "resultPattern"),
+  range = c("string", "string", "string"),
+  is_array = c(0, 0, 0)
+)
+
+# Becomes Dataset Records
+# tablepath: /root/methods/operations
+# datapath: /methods/0/operations/1
+# id: OP_001_1
+# name: Count of Subjects
+# resultPattern: N=XX
+```
+
+> 📖 **For walk-through examples** showing schema-to-dataset mapping for Analysis, Output, and DocumentReference classes, see [`SCHEMA_MAPPING_EXAMPLES.md`](SCHEMA_MAPPING_EXAMPLES.md)
+
+### Development Benefits
+- Maintainability - Each module focuses on related functionality (largest module: 28KB)
+- Readability - Clear separation of concerns
+- Efficiency - Modify only relevant modules
+- Collaboration - Multiple developers can work on different modules
+- Testing - Test individual modules independently
+
+### Architecture Benefits
+- Separation of Concerns - Analysis, Output, Reference, and Metadata concerns isolated
+- Extensibility - Easy to add new ARS classes
+- Debugging - Problems isolated to specific functional areas
+- Scalability - System grows cleanly as new classes are added
+
+## Module Responsibilities
+
+| Module | Size | Classes | Purpose |
+|--------|------|---------|---------|
+| `extractor_analysis.R` | 27KB | Analysis, Operation, AnalysisSet, DataSubset, Group, WhereClause, etc. | Core analysis data extraction |
+| `extractor_output.R` | 17KB | Output, DisplaySection, ListOfContents, OrderedListItem, etc. | Output and display data extraction |
+| `extractor_reference.R` | 12KB | DocumentReference, PageNameRef, PageNumberListRef, etc. | Reference and citation data extraction |
+| `extractor_metadata.R` | 8KB | TerminologyExtension, SponsorTerm, AnalysisOutputCategorization, etc. | Metadata and categorization extraction |
+| `extractor_record_utils.R` | 7KB | create_record_from_object, apply_sas_compatible_formatting, etc. | Core record creation utilities |
+
+Total modular size: ~84KB across 5 focused modules
+
+## Verification & Testing
+
+### Production Results
+Framework processes real ARS data:
+- Multiple ARS class datasets extracted from JSON reporting event
+- Variable record counts across all classes
+- Operation class: Variable records (depends on JSON content)
+- Analysis class: Variable records (depends on JSON content)
+- Processing time: <1 second
+- Same data structure and SAS-compatible formatting as original system
+
+### Test Suite
+Test suite in `utilities/R/tests/`:
+
+```r
+# Run all tests
+Rscript utilities/R/tests/run_tests.R
+
+# Test Results: 38/38 tests passing (100%)
+# Unit Tests: 31/31 passing
+# Integration Tests: 7/7 passing
+```
+
+Test Coverage:
+- Schema parsing (`get_class_slots.R`) - 6/6 tests
+- Record utilities (`extractor_record_utils.R`) - 6/6 tests
+- Modular extraction (`schema_driven_extractor_modular.R`) - 8/8 tests
+- Complete pipeline (`create_class_datasets_for_json_reportingevent.R`) - 7/7 tests
+- Infrastructure (paths, JSON handling) - 4/4 tests
+
+### Development Testing
+
+```r
+# Test schema parsing
+source('utilities/R/helpers/get_class_slots.R')
+class_slots <- get_class_slots('model/ars_ldm.json')
+
+# Test modular extraction
+source('utilities/R/helpers/schema_driven_extractor_modular.R')
+result <- extract_schema_driven_datasets('data.json', class_slots)
+
+# Test individual module
+source('utilities/R/helpers/extractor_analysis.R')
+ops <- extract_operations(json_data, class_definition)
+```
+
+### Schema Alignment Verification
+Test fixtures use real ARS schema structure:
+- Operations under `methods[].operations[]` (not `analyses[].operations[]`)
+- Analysis references methods via `methodId`
+- All class relationships match `ars_ldm.json` schema
+
+> See [`SCHEMA_MAPPING_EXAMPLES.md`](SCHEMA_MAPPING_EXAMPLES.md) for mapping examples
+
+## System Status
+
+- Production Ready - `create_class_datasets_for_json_reportingevent.R` uses modular architecture
+- Fully Tested - 38/38 tests passing with comprehensive coverage
+- Schema Compliant - Test fixtures align with real ARS schema structure
+- Clean Architecture - Modular design with clear separation of concerns
diff --git a/utilities/R/helpers/SCHEMA_MAPPING_EXAMPLES.md b/utilities/R/helpers/SCHEMA_MAPPING_EXAMPLES.md
new file mode 100644
index 0000000..d696644
--- /dev/null
+++ b/utilities/R/helpers/SCHEMA_MAPPING_EXAMPLES.md
@@ -0,0 +1,319 @@
+# Schema-to-Dataset Mapping Examples
+
+This document provides walk-through examples showing how the ARS JSON schema definitions are translated into class datasets, with traceability from schema → JSON instance → dataset record.
+
+## Framework Process Overview
+
+```
+ARS Schema Definition → Class Definition → JSON Instance → Dataset Record
+      (ars_ldm.json)   (get_class_slots)   (JSON data)    (tablepath/datapath)
+```
+
+---
+
+## Example 1: Analysis Class
+
+### 📋 **1. Schema Definition** (from `ars_ldm.json`)
+```json
+"Analysis": {
+  "type": "object",
+  "properties": {
+    "id": {
+      "description": "The assigned identifying value for the instance of the class.",
+      "type": "string"
+    },
+    "name": {
+      "description": "The name for the instance of the class.",
+      "type": "string"
+    },
+    "description": {
+      "description": "A textual description of the instance of the class.",
+      "type": "string"
+    },
+    "methodId": {
+      "description": "A reference to the set of one or more statistical operations performed for the analysis.",
+      "type": "string"
+    },
+    "dataset": {
+      "description": "The name of the analysis dataset.",
+      "type": "string"
+    },
+    "variable": {
+      "description": "The name of the variable.",
+      "type": "string"
+    }
+  },
+  "required": ["id", "name"]
+}
+```
+
+### 🔄 **2. Automatic Class Definition** (via `get_class_slots()`)
+```r
+class_definition <- data.frame(
+  parent_class = "Analysis",
+  slot = c("id", "name", "description", "methodId", "dataset", "variable"),
+  range = c("string", "string", "string", "string", "string", "string"),
+  is_array = c(0, 0, 0, 0, 0, 0),
+  stringsAsFactors = FALSE
+)
+```
+
+### 📊 **3. JSON Instance** (from reporting event JSON)
+```json
+{
+  "analyses": [
+    {
+      "id": "An_01_SAF_Summ_Age",
+      "name": "Summary of Age",
+      "description": "Summary statistics for age by treatment group",
+      "methodId": "Mth_01_Summ_ByTrt",
+      "dataset": "ADSL",
+      "variable": "AGE"
+    }
+  ]
+}
+```
+
+### 📈 **4. Generated Dataset Record**
+```r
+# Extraction Process:
+# JSON Path: /analyses/0
+# Class: Analysis
+# Record Creation: create_record_from_object()
+
+# Final Dataset Record:
+Analysis_record <- data.frame(
+  tablepath = "/root/analyses",           # Schema location (indices removed)
+  datapath = "/analyses/0",               # Exact JSON instance path
+  id = "An_01_SAF_Summ_Age",             # From JSON: analyses[0].id
+  name = "Summary of Age",                # From JSON: analyses[0].name
+  description = "Summary statistics for age by treatment group",  # From JSON
+  methodId = "Mth_01_Summ_ByTrt",        # From JSON: analyses[0].methodId
+  dataset = "ADSL",                       # From JSON: analyses[0].dataset
+  variable = "AGE",                       # From JSON: analyses[0].variable
+  stringsAsFactors = FALSE
+)
+```
+
+**Traceability:** `tablepath` shows this is an Analysis class record, `datapath` shows it came from the first analysis in the JSON array.
+
+---
+
+## Example 2: Output Class
+
+### 📋 **1. Schema Definition** (from `ars_ldm.json`)
+```json
+"Output": {
+  "type": "object",
+  "properties": {
+    "id": {
+      "description": "The assigned identifying value for the instance of the class.",
+      "type": "string"
+    },
+    "name": {
+      "description": "The name for the instance of the class.",
+      "type": "string"
+    },
+    "description": {
+      "description": "A textual description of the instance of the class.",
+      "type": "string"
+    },
+    "fileSpecifications": {
+      "description": "Specifications for the file containing the output.",
+      "items": {
+        "$ref": "#/$defs/OutputFileSpecification"
+      },
+      "type": "array"
+    },
+    "displays": {
+      "description": "Tabular displays included in the output.",
+      "items": {
+        "$ref": "#/$defs/OutputDisplay"
+      },
+      "type": "array"
+    }
+  },
+  "required": ["id", "name"]
+}
+```
+
+### 🔄 **2. Automatic Class Definition** (via `get_class_slots()`)
+```r
+class_definition <- data.frame(
+  parent_class = "Output",
+  slot = c("id", "name", "description", "fileSpecifications", "displays"),
+  range = c("string", "string", "string", "OutputFileSpecification", "OutputDisplay"),
+  is_array = c(0, 0, 0, 1, 1),  # fileSpecifications and displays are arrays
+  stringsAsFactors = FALSE
+)
+```
+
+### 📊 **3. JSON Instance** (from reporting event JSON)
+```json
+{
+  "outputs": [
+    {
+      "id": "Out_14_Summary",
+      "name": "Table 14.1.1 Summary of Demographics",
+      "description": "Demographic characteristics by treatment group",
+      "fileSpecifications": [
+        {
+          "name": "demographics_summary.rtf",
+          "fileType": {"controlledTerm": "rtf"}
+        }
+      ],
+      "displays": [
+        {
+          "id": "Disp_14_1_Demographics",
+          "name": "Demographics Display"
+        }
+      ]
+    }
+  ]
+}
+```
+
+### 📈 **4. Generated Dataset Record**
+```r
+# Extraction Process:
+# JSON Path: /outputs/0
+# Class: Output
+# Record Creation: create_record_from_object()
+
+# Final Dataset Record:
+Output_record <- data.frame(
+  tablepath = "/root/outputs",                    # Schema location
+  datapath = "/outputs/0",                        # Exact JSON instance path
+  id = "Out_14_Summary",                          # From JSON: outputs[0].id
+  name = "Table 14.1.1 Summary of Demographics", # From JSON: outputs[0].name
+  description = "Demographic characteristics by treatment group", # From JSON
+  fileSpecifications = "[{\"name\":\"demographics_summary.rtf\",\"fileType\":{\"controlledTerm\":\"rtf\"}}]", # Array as JSON string
+  displays = "[{\"id\":\"Disp_14_1_Demographics\",\"name\":\"Demographics Display\"}]", # Array as JSON string
+  stringsAsFactors = FALSE
+)
+```
+
+**Note:** Array fields (`fileSpecifications`, `displays`) are stored as JSON strings in the main Output dataset, but also extracted into separate class datasets (`OutputFileSpecification.csv`, `OutputDisplay.csv`).
+
+---
+
+## Example 3: DocumentReference Class
+
+### 📋 **1. Schema Definition** (from `ars_ldm.json`)
+```json
+"DocumentReference": {
+  "type": "object",
+  "properties": {
+    "id": {
+      "description": "The assigned identifying value for the instance of the class.",
+      "type": "string"
+    },
+    "referenceDocumentId": {
+      "description": "The identifier of the referenced document.",
+      "type": "string"
+    },
+    "pageRefs": {
+      "description": "One or more references to specific parts of a document.",
+      "items": {
+        "$ref": "#/$defs/PageRef"
+      },
+      "type": "array"
+    }
+  },
+  "required": ["id", "referenceDocumentId"]
+}
+```
+
+### 🔄 **2. Automatic Class Definition** (via `get_class_slots()`)
+```r
+class_definition <- data.frame(
+  parent_class = "DocumentReference",
+  slot = c("id", "referenceDocumentId", "pageRefs"),
+  range = c("string", "string", "PageRef"),
+  is_array = c(0, 0, 1),  # pageRefs is an array
+  stringsAsFactors = FALSE
+)
+```
+
+### 📊 **3. JSON Instance** (from reporting event JSON)
+```json
+{
+  "analyses": [
+    {
+      "id": "An_01_SAF_Summ_Age",
+      "documentRefs": [
+        {
+          "id": "DocRef_SAP_Age",
+          "referenceDocumentId": "SAP_v2.0",
+          "pageRefs": [
+            {
+              "refType": "PhysicalRef",
+              "label": "Section 9.1",
+              "pageNames": ["Section_9_1"]
+            }
+          ]
+        }
+      ]
+    }
+  ]
+}
+```
+
+### 📈 **4. Generated Dataset Record**
+```r
+# Extraction Process:
+# JSON Path: /analyses/0/documentRefs/0
+# Class: DocumentReference
+# Record Creation: create_record_from_object()
+
+# Final Dataset Record:
+DocumentReference_record <- data.frame(
+  tablepath = "/root/analyses/documentRefs",      # Schema location (indices removed)
+  datapath = "/analyses/0/documentRefs/0",        # Exact JSON instance path
+  id = "DocRef_SAP_Age",                          # From JSON: documentRefs[0].id
+  referenceDocumentId = "SAP_v2.0",               # From JSON: documentRefs[0].referenceDocumentId
+  pageRefs = "[{\"refType\":\"PhysicalRef\",\"label\":\"Section 9.1\",\"pageNames\":[\"Section_9_1\"]}]", # Array as JSON string
+  stringsAsFactors = FALSE
+)
+```
+
+**Path Traceability:** The `datapath` `/analyses/0/documentRefs/0` shows this DocumentReference came from the first documentRef of the first analysis, providing traceability through nested JSON structures.
+
+---
+
+## Schema-to-Dataset Mapping Summary
+
+### 🎯 **Key Mapping Principles**
+
+1. **Schema Definition** → **Class Definition**
+   - JSON schema properties become class slots
+   - Data types are preserved (`string`, `integer`, etc.)
+   - Array indicators are detected (`"type": "array"`)
+
+2. **JSON Instance** → **Dataset Record**
+   - Each JSON object becomes one dataset record
+   - All schema-defined properties are extracted
+   - Missing properties become `NA` values
+
+3. **Path Generation**
+   - **`tablepath`**: Schema location with numeric indices removed (e.g., `/root/analyses`)
+   - **`datapath`**: Exact JSON path to the instance (e.g., `/analyses/0/documentRefs/0`)
+
+### 📊 **Array Handling**
+
+Arrays in JSON are handled in two ways:
+1. **Stored as JSON strings** in the parent class dataset (for reference)
+2. **Extracted into separate class datasets** (for detailed analysis)
+
+Example: `outputs[0].displays[]` creates records in both:
+- `Output.csv` (with displays as JSON string)
+- `OutputDisplay.csv` (with individual display records)
+
+### 🔍 **Traceability**
+
+Every generated record maintains traceability:
+- **Which schema class** it represents (`tablepath`)
+- **Where in the JSON** it originated (`datapath`)
+- **What values** were extracted (all schema-defined slots)
+
+This enables analysts to trace any dataset record back to its exact location in the original JSON reporting event and understand its structure from the ARS schema.
\ No newline at end of file
diff --git a/utilities/R/helpers/extractor_analysis.R b/utilities/R/helpers/extractor_analysis.R
new file mode 100644
index 0000000..a9829cd
--- /dev/null
+++ b/utilities/R/helpers/extractor_analysis.R
@@ -0,0 +1,685 @@
+#' Analysis-Related Data Extractors
+#'
+#' Extraction functions for analysis-related ARS classes including Analysis,
+#' AnalysisMethod, Operation, AnalysisSet, DataSubset, and related classes.
+#'
+#' @export
+
+# Core Analysis Classes
+
+extract_analyses <- function(json_data, class_definition) {
+  if (is.null(json_data$analyses) || length(json_data$analyses) == 0) return(NULL)
+
+  records <- list()
+  for (i in seq_along(json_data$analyses)) {
+    analysis <- json_data$analyses[[i]]
+    datapath <- paste0("/analyses/", i-1)
+    record <- create_record_from_object(analysis, class_definition, datapath, i)
+    records[[i]] <- record
+  }
+
+  do.call(rbind, records)
+}
+
+extract_analysis_methods <- function(json_data, class_definition) {
+  if (is.null(json_data$methods) || length(json_data$methods) == 0) return(NULL)
+
+  records <- list()
+  record_idx <- 1
+
+  for (i in seq_along(json_data$methods)) {
+    method <- json_data$methods[[i]]
+    datapath <- paste0("/methods/", i-1)
+    record <- create_record_from_object(method, class_definition, datapath, record_idx)
+    records[[record_idx]] <- record
+    record_idx <- record_idx + 1
+  }
+
+  if (length(records) > 0) {
+    do.call(rbind, records)
+  } else {
+    NULL
+  }
+}
+
+extract_operations <- function(json_data, class_definition) {
+  if (is.null(json_data$methods) || length(json_data$methods) == 0) return(NULL)
+
+  records <- list()
+  record_idx <- 1
+
+  # Extract all operations from all methods
+  for (method_idx in seq_along(json_data$methods)) {
+    method <- json_data$methods[[method_idx]]
+
+    if (!is.null(method$operations) && length(method$operations) > 0) {
+      for (op_idx in seq_along(method$operations)) {
+        operation <- method$operations[[op_idx]]
+        datapath <- paste0("/methods/", method_idx-1, "/operations/", op_idx-1)
+        tablepath <- "/root/methods/operations"
+
+        record <- create_record_from_object(operation, class_definition, datapath, record_idx)
+
+        records[[record_idx]] <- record
+        record_idx <- record_idx + 1
+      }
+    }
+  }
+
+  if (length(records) > 0) {
+    do.call(rbind, records)
+  } else {
+    NULL
+  }
+}
+
+extract_operation_results <- function(json_data, class_definition) {
+  if (is.null(json_data$analyses) || length(json_data$analyses) == 0) return(NULL)
+
+  records <- list()
+  record_idx <- 1
+
+  # Extract results from all analyses
+  for (analysis_idx in seq_along(json_data$analyses)) {
+    analysis <- json_data$analyses[[analysis_idx]]
+
+    if (!is.null(analysis$results) && length(analysis$results) > 0) {
+      for (result_idx in seq_along(analysis$results)) {
+        result <- analysis$results[[result_idx]]
+        datapath <- paste0("/analyses/", analysis_idx-1, "/results/", result_idx-1)
+        tablepath <- "/root/analyses/results"
+
+        record <- create_record_from_object(result, class_definition, datapath, record_idx)
+
+        records[[record_idx]] <- record
+        record_idx <- record_idx + 1
+      }
+    }
+  }
+
+  if (length(records) > 0) {
+    do.call(rbind, records)
+  } else {
+    NULL
+  }
+}
+
+extract_result_groups <- function(json_data, class_definition) {
+  if (is.null(json_data$analyses) || length(json_data$analyses) == 0) return(NULL)
+
+  records <- list()
+  record_idx <- 1
+
+  # Extract result groups from all analyses/results
+  for (analysis_idx in seq_along(json_data$analyses)) {
+    analysis <- json_data$analyses[[analysis_idx]]
+
+    if (!is.null(analysis$results) && length(analysis$results) > 0) {
+      for (result_idx in seq_along(analysis$results)) {
+        result <- analysis$results[[result_idx]]
+
+        if (!is.null(result$resultGroups) && length(result$resultGroups) > 0) {
+          for (group_idx in seq_along(result$resultGroups)) {
+            group <- result$resultGroups[[group_idx]]
+            datapath <- paste0("/analyses/", analysis_idx-1, "/results/", result_idx-1, "/resultGroups/", group_idx-1)
+            tablepath <- "/root/analyses/results/resultGroups"
+
+            record <- create_record_from_object(group, class_definition, datapath, record_idx)
+
+            records[[record_idx]] <- record
+            record_idx <- record_idx + 1
+          }
+        }
+      }
+    }
+  }
+
+  if (length(records) > 0) {
+    do.call(rbind, records)
+  } else {
+    NULL
+  }
+}
+
+# Analysis Sets and Data Subsets
+
+extract_analysis_sets <- function(json_data, class_definition) {
+  if (is.null(json_data$analysisSets)) return(NULL)
+  extract_array_objects(json_data$analysisSets, class_definition, "/analysisSets")
+}
+
+extract_data_subsets <- function(json_data, class_definition) {
+  if (is.null(json_data$dataSubsets)) return(NULL)
+  extract_array_objects(json_data$dataSubsets, class_definition, "/dataSubsets")
+}
+
+# Grouping-related classes
+
+extract_groups <- function(json_data, class_definition) {
+  records <- list()
+  record_idx <- 1
+
+  # Search in analysisGroupings/groups
+  if (!is.null(json_data$analysisGroupings) && length(json_data$analysisGroupings) > 0) {
+    for (grouping_idx in seq_along(json_data$analysisGroupings)) {
+      grouping <- json_data$analysisGroupings[[grouping_idx]]
+      if (!is.null(grouping$groups) && length(grouping$groups) > 0) {
+        for (group_idx in seq_along(grouping$groups)) {
+          group <- grouping$groups[[group_idx]]
+          datapath <- paste0("/analysisGroupings/", grouping_idx-1, "/groups/", group_idx-1)
+
+          record <- create_record_from_object(group, class_definition, datapath, record_idx)
+          records[[record_idx]] <- record
+          record_idx <- record_idx + 1
+        }
+      }
+    }
+  }
+
+  if (length(records) > 0) do.call(rbind, records) else NULL
+}
+
+extract_ordered_grouping_factors <- function(json_data, class_definition) {
+  records <- list()
+  record_idx <- 1
+
+  # Search in analyses/orderedGroupings
+  if (!is.null(json_data$analyses) && length(json_data$analyses) > 0) {
+    for (analysis_idx in seq_along(json_data$analyses)) {
+      analysis <- json_data$analyses[[analysis_idx]]
+      if (!is.null(analysis$orderedGroupings) && length(analysis$orderedGroupings) > 0) {
+        for (grouping_idx in seq_along(analysis$orderedGroupings)) {
+          grouping <- analysis$orderedGroupings[[grouping_idx]]
+          datapath <- paste0("/analyses/", analysis_idx-1, "/orderedGroupings/", grouping_idx-1)
+
+          record <- create_record_from_object(grouping, class_definition, datapath, record_idx)
+          records[[record_idx]] <- record
+          record_idx <- record_idx + 1
+        }
+      }
+    }
+  }
+
+  if (length(records) > 0) do.call(rbind, records) else NULL
+}
+
+extract_grouping_factors <- function(json_data, class_definition) {
+  records <- list()
+  record_idx <- 1
+
+  # Search in analysisGroupings (root level grouping factors)
+  if (!is.null(json_data$analysisGroupings) && length(json_data$analysisGroupings) > 0) {
+    for (grouping_idx in seq_along(json_data$analysisGroupings)) {
+      grouping <- json_data$analysisGroupings[[grouping_idx]]
+      datapath <- paste0("/analysisGroupings/", grouping_idx-1)
+
+      record <- create_record_from_object(grouping, class_definition, datapath, record_idx)
+      records[[record_idx]] <- record
+      record_idx <- record_idx + 1
+    }
+  }
+
+  if (length(records) > 0) do.call(rbind, records) else NULL
+}
+
+# Where Clause and Condition extractors
+
+extract_where_clauses <- function(json_data, class_definition) {
+  records <- list()
+  record_idx <- 1
+
+  # Search in dataSubsets/compoundExpression/whereClauses
+  if (!is.null(json_data$dataSubsets) && length(json_data$dataSubsets) > 0) {
+    for (subset_idx in seq_along(json_data$dataSubsets)) {
+      subset <- json_data$dataSubsets[[subset_idx]]
+      if (!is.null(subset$compoundExpression) && !is.null(subset$compoundExpression$whereClauses) && length(subset$compoundExpression$whereClauses) > 0) {
+        for (clause_idx in seq_along(subset$compoundExpression$whereClauses)) {
+          clause <- subset$compoundExpression$whereClauses[[clause_idx]]
+          datapath <- paste0("/dataSubsets/", subset_idx-1, "/compoundExpression/whereClauses/", clause_idx-1)
+
+          record <- create_record_from_object(clause, class_definition, datapath, record_idx)
+          records[[record_idx]] <- record
+          record_idx <- record_idx + 1
+        }
+      }
+    }
+  }
+
+  if (length(records) > 0) do.call(rbind, records) else NULL
+}
+
+extract_where_clause_conditions <- function(json_data, class_definition) {
+  records <- list()
+  record_idx <- 1
+
+  # Helper function to extract conditions from an object
+  extract_conditions_from_object <- function(obj, base_path) {
+    conditions <- list()
+
+    if (!is.null(obj$condition) && is.list(obj$condition)) {
+      if (is.null(names(obj$condition))) {
+        # Array of conditions
+        for (i in seq_along(obj$condition)) {
+          condition <- obj$condition[[i]]
+          datapath <- paste0(base_path, "/condition/", i-1)
+          record <- create_record_from_object(condition, class_definition, datapath, record_idx)
+          conditions[[length(conditions) + 1]] <- record
+          record_idx <<- record_idx + 1
+        }
+      } else {
+        # Single condition object
+        datapath <- paste0(base_path, "/condition")
+        record <- create_record_from_object(obj$condition, class_definition, datapath, record_idx)
+        conditions[[1]] <- record
+        record_idx <<- record_idx + 1
+      }
+    }
+
+    conditions
+  }
+
+  # Search in analysisSets
+  if (!is.null(json_data$analysisSets) && length(json_data$analysisSets) > 0) {
+    for (i in seq_along(json_data$analysisSets)) {
+      set_conditions <- extract_conditions_from_object(json_data$analysisSets[[i]], paste0("/analysisSets/", i-1))
+      records <- c(records, set_conditions)
+    }
+  }
+
+  # Search in dataSubsets
+  if (!is.null(json_data$dataSubsets) && length(json_data$dataSubsets) > 0) {
+    for (i in seq_along(json_data$dataSubsets)) {
+      # Direct condition in dataSubsets
+      subset_conditions <- extract_conditions_from_object(json_data$dataSubsets[[i]], paste0("/dataSubsets/", i-1))
+      records <- c(records, subset_conditions)
+
+      # Conditions within compoundExpression/whereClauses
+      subset <- json_data$dataSubsets[[i]]
+      if (!is.null(subset$compoundExpression) && !is.null(subset$compoundExpression$whereClauses) && length(subset$compoundExpression$whereClauses) > 0) {
+        for (clause_idx in seq_along(subset$compoundExpression$whereClauses)) {
+          clause <- subset$compoundExpression$whereClauses[[clause_idx]]
+          clause_conditions <- extract_conditions_from_object(clause, paste0("/dataSubsets/", i-1, "/compoundExpression/whereClauses/", clause_idx-1))
+          records <- c(records, clause_conditions)
+
+          # Also check for nested compound expressions within whereClauses
+          if (!is.null(clause$compoundExpression) && !is.null(clause$compoundExpression$whereClauses)) {
+            for (nested_idx in seq_along(clause$compoundExpression$whereClauses)) {
+              nested_clause <- clause$compoundExpression$whereClauses[[nested_idx]]
+              nested_conditions <- extract_conditions_from_object(nested_clause,
+                paste0("/dataSubsets/", i-1, "/compoundExpression/whereClauses/", clause_idx-1, "/compoundExpression/whereClauses/", nested_idx-1))
+              records <- c(records, nested_conditions)
+            }
+          }
+        }
+      }
+    }
+  }
+
+  # Search in analysisGroupings/groups
+  if (!is.null(json_data$analysisGroupings) && length(json_data$analysisGroupings) > 0) {
+    for (grouping_idx in seq_along(json_data$analysisGroupings)) {
+      grouping <- json_data$analysisGroupings[[grouping_idx]]
+      if (!is.null(grouping$groups) && length(grouping$groups) > 0) {
+        for (group_idx in seq_along(grouping$groups)) {
+          group_conditions <- extract_conditions_from_object(grouping$groups[[group_idx]],
+                                                            paste0("/analysisGroupings/", grouping_idx-1, "/groups/", group_idx-1))
+          records <- c(records, group_conditions)
+        }
+      }
+    }
+  }
+
+  if (length(records) > 0) do.call(rbind, records) else NULL
+}
+
+# Compound Expression extractors
+
+extract_compound_subset_expressions <- function(json_data, class_definition) {
+  records <- list()
+  record_idx <- 1
+
+  # Search in dataSubsets/compoundExpression
+  if (!is.null(json_data$dataSubsets) && length(json_data$dataSubsets) > 0) {
+    for (subset_idx in seq_along(json_data$dataSubsets)) {
+      subset <- json_data$dataSubsets[[subset_idx]]
+      if (!is.null(subset$compoundExpression)) {
+        datapath <- paste0("/dataSubsets/", subset_idx-1, "/compoundExpression")
+
+        record <- create_record_from_object(subset$compoundExpression, class_definition, datapath, record_idx)
+        records[[record_idx]] <- record
+        record_idx <- record_idx + 1
+      }
+    }
+  }
+
+  if (length(records) > 0) do.call(rbind, records) else NULL
+}
+
+extract_compound_set_expression <- function(json_data, class_definition) {
+  records <- list()
+  record_idx <- 1
+
+  # Extract compound expressions from analysis sets
+  if (!is.null(json_data$analysisSets)) {
+    for (set_idx in seq_along(json_data$analysisSets)) {
+      analysis_set <- json_data$analysisSets[[set_idx]]
+      if (!is.null(analysis_set$compoundExpression)) {
+        datapath <- sprintf("/analysisSets/%d/compoundExpression", set_idx - 1)
+        record <- create_record_from_object(analysis_set$compoundExpression, class_definition, datapath, record_idx)
+        if (!is.null(record)) {
+          records[[length(records) + 1]] <- record
+          record_idx <- record_idx + 1
+        }
+      }
+    }
+  }
+
+  # Also search nested compound expressions in data subsets
+  if (!is.null(json_data$dataSubsets)) {
+    for (subset_idx in seq_along(json_data$dataSubsets)) {
+      subset <- json_data$dataSubsets[[subset_idx]]
+      if (!is.null(subset$compoundExpression) && !is.null(subset$compoundExpression$whereClauses)) {
+        for (clause_idx in seq_along(subset$compoundExpression$whereClauses)) {
+          clause <- subset$compoundExpression$whereClauses[[clause_idx]]
+          if (!is.null(clause$compoundExpression)) {
+            datapath <- sprintf("/dataSubsets/%d/compoundExpression/whereClauses/%d/compoundExpression", subset_idx - 1, clause_idx - 1)
+            record <- create_record_from_object(clause$compoundExpression, class_definition, datapath, record_idx)
+            if (!is.null(record)) {
+              records[[length(records) + 1]] <- record
+              record_idx <- record_idx + 1
+            }
+          }
+        }
+      }
+    }
+  }
+
+  return(if (length(records) > 0) do.call(rbind, records) else data.frame())
+}
+
+extract_compound_group_expression <- function(json_data, class_definition) {
+  records <- list()
+  record_idx <- 1
+
+  # Extract compound expressions from grouping factors/groups
+  if (!is.null(json_data$analysisGroupings)) {
+    for (grouping_idx in seq_along(json_data$analysisGroupings)) {
+      grouping <- json_data$analysisGroupings[[grouping_idx]]
+      if (!is.null(grouping$groups)) {
+        for (group_idx in seq_along(grouping$groups)) {
+          group <- grouping$groups[[group_idx]]
+          if (!is.null(group$compoundExpression)) {
+            datapath <- sprintf("/analysisGroupings/%d/groups/%d/compoundExpression", grouping_idx - 1, group_idx - 1)
+            record <- create_record_from_object(group$compoundExpression, class_definition, datapath, record_idx)
+            if (!is.null(record)) {
+              records[[length(records) + 1]] <- record
+              record_idx <- record_idx + 1
+            }
+          }
+        }
+      }
+    }
+  }
+
+  # Also search nested compound expressions in data subsets (same location as CompoundSetExpression)
+  if (!is.null(json_data$dataSubsets)) {
+    for (subset_idx in seq_along(json_data$dataSubsets)) {
+      subset <- json_data$dataSubsets[[subset_idx]]
+      if (!is.null(subset$compoundExpression) && !is.null(subset$compoundExpression$whereClauses)) {
+        for (clause_idx in seq_along(subset$compoundExpression$whereClauses)) {
+          clause <- subset$compoundExpression$whereClauses[[clause_idx]]
+          if (!is.null(clause$compoundExpression)) {
+            datapath <- sprintf("/dataSubsets/%d/compoundExpression/whereClauses/%d/compoundExpression", subset_idx - 1, clause_idx - 1)
+            record <- create_record_from_object(clause$compoundExpression, class_definition, datapath, record_idx)
+            if (!is.null(record)) {
+              records[[length(records) + 1]] <- record
+              record_idx <- record_idx + 1
+            }
+          }
+        }
+      }
+    }
+  }
+
+  return(if (length(records) > 0) do.call(rbind, records) else data.frame())
+}
+
+# Analysis Purpose and Reason extractors
+
+extract_analysis_reasons <- function(json_data, class_definition) {
+  records <- list()
+  record_idx <- 1
+
+  # Search in analyses/reason - extract ONLY the controlledTerm reasons for AnalysisReason
+  if (!is.null(json_data$analyses) && length(json_data$analyses) > 0) {
+    for (analysis_idx in seq_along(json_data$analyses)) {
+      analysis <- json_data$analyses[[analysis_idx]]
+      if (!is.null(analysis$reason) && !is.null(analysis$reason$controlledTerm)) {
+        # Only extract if this is a controlledTerm reason (not sponsorTermId)
+        datapath <- paste0("/analyses/", analysis_idx-1, "/reason")
+        record <- create_record_from_object(analysis$reason, class_definition, datapath, record_idx)
+        records[[record_idx]] <- record
+        record_idx <- record_idx + 1
+      }
+    }
+  }
+
+  if (length(records) > 0) do.call(rbind, records) else NULL
+}
+
+extract_analysis_purpose <- function(json_data, class_definition) {
+  records <- list()
+  record_idx <- 1
+
+  # Extract purposes from analyses
+  if (!is.null(json_data$analyses)) {
+    for (analysis_idx in seq_along(json_data$analyses)) {
+      analysis <- json_data$analyses[[analysis_idx]]
+      if (!is.null(analysis$purpose)) {
+        datapath <- sprintf("/analyses/%d/purpose", analysis_idx - 1)
+        record <- create_record_from_object(analysis$purpose, class_definition, datapath, record_idx)
+        if (!is.null(record)) {
+          records[[length(records) + 1]] <- record
+          record_idx <- record_idx + 1
+        }
+      }
+    }
+  }
+
+  return(if (length(records) > 0) do.call(rbind, records) else data.frame())
+}
+
+extract_sponsor_analysis_purposes <- function(json_data, class_definition) {
+  records <- list()
+  record_idx <- 1
+
+  # Search in analyses/purpose - but ONLY extract purposes that have sponsorTermId
+  if (!is.null(json_data$analyses) && length(json_data$analyses) > 0) {
+    for (analysis_idx in seq_along(json_data$analyses)) {
+      analysis <- json_data$analyses[[analysis_idx]]
+      if (!is.null(analysis$purpose) && !is.null(analysis$purpose$sponsorTermId)) {
+        # Only extract if this is a sponsor purpose (not controlledTerm)
+        datapath <- paste0("/analyses/", analysis_idx-1, "/purpose")
+        record <- create_record_from_object(analysis$purpose, class_definition, datapath, record_idx)
+        records[[record_idx]] <- record
+        record_idx <- record_idx + 1
+      }
+    }
+  }
+
+  if (length(records) > 0) do.call(rbind, records) else NULL
+}
+
+extract_sponsor_analysis_reasons <- function(json_data, class_definition) {
+  records <- list()
+  record_idx <- 1
+
+  # Extract sponsor reasons from analyses
+  if (!is.null(json_data$analyses)) {
+    for (analysis_idx in seq_along(json_data$analyses)) {
+      analysis <- json_data$analyses[[analysis_idx]]
+      if (!is.null(analysis$reason) && !is.null(analysis$reason$sponsorTermId)) {
+        datapath <- sprintf("/analyses/%d/reason", analysis_idx - 1)
+        record <- create_record_from_object(analysis$reason, class_definition, datapath, record_idx)
+        if (!is.null(record)) {
+          records[[length(records) + 1]] <- record
+          record_idx <- record_idx + 1
+        }
+      }
+    }
+  }
+
+  return(if (length(records) > 0) do.call(rbind, records) else data.frame())
+}
+
+# Operation-related extractors
+
+extract_operation_roles <- function(json_data, class_definition) {
+  records <- list()
+  record_idx <- 1
+
+  # Search in methods/operations/referencedOperationRelationships/referencedOperationRole
+  if (!is.null(json_data$methods) && length(json_data$methods) > 0) {
+    for (method_idx in seq_along(json_data$methods)) {
+      method <- json_data$methods[[method_idx]]
+      if (!is.null(method$operations) && length(method$operations) > 0) {
+        for (op_idx in seq_along(method$operations)) {
+          operation <- method$operations[[op_idx]]
+          if (!is.null(operation$referencedOperationRelationships) && length(operation$referencedOperationRelationships) > 0) {
+            for (rel_idx in seq_along(operation$referencedOperationRelationships)) {
+              relationship <- operation$referencedOperationRelationships[[rel_idx]]
+              if (!is.null(relationship$referencedOperationRole)) {
+                datapath <- paste0("/methods/", method_idx-1, "/operations/", op_idx-1, "/referencedOperationRelationships/", rel_idx-1, "/referencedOperationRole")
+                record <- create_record_from_object(relationship$referencedOperationRole, class_definition, datapath, record_idx)
+                records[[record_idx]] <- record
+                record_idx <- record_idx + 1
+              }
+            }
+          }
+        }
+      }
+    }
+  }
+
+  if (length(records) > 0) do.call(rbind, records) else NULL
+}
+
+extract_referenced_analysis_operations <- function(json_data, class_definition) {
+  records <- list()
+  record_idx <- 1
+
+  # Search in analyses/referencedAnalysisOperations
+  if (!is.null(json_data$analyses) && length(json_data$analyses) > 0) {
+    for (analysis_idx in seq_along(json_data$analyses)) {
+      analysis <- json_data$analyses[[analysis_idx]]
+      if (!is.null(analysis$referencedAnalysisOperations) && length(analysis$referencedAnalysisOperations) > 0) {
+        for (ref_idx in seq_along(analysis$referencedAnalysisOperations)) {
+          ref_op <- analysis$referencedAnalysisOperations[[ref_idx]]
+          datapath <- paste0("/analyses/", analysis_idx-1, "/referencedAnalysisOperations/", ref_idx-1)
+
+          record <- create_record_from_object(ref_op, class_definition, datapath, record_idx)
+          records[[record_idx]] <- record
+          record_idx <- record_idx + 1
+        }
+      }
+    }
+  }
+
+  if (length(records) > 0) do.call(rbind, records) else NULL
+}
+
+extract_referenced_data_subsets <- function(json_data, class_definition) {
+  records <- list()
+  record_idx <- 1
+
+  # Search in dataSubsets/compoundExpression/whereClauses (same as WhereClause based on SAS paths)
+  if (!is.null(json_data$dataSubsets) && length(json_data$dataSubsets) > 0) {
+    for (subset_idx in seq_along(json_data$dataSubsets)) {
+      subset <- json_data$dataSubsets[[subset_idx]]
+      if (!is.null(subset$compoundExpression) && !is.null(subset$compoundExpression$whereClauses) && length(subset$compoundExpression$whereClauses) > 0) {
+        for (clause_idx in seq_along(subset$compoundExpression$whereClauses)) {
+          clause <- subset$compoundExpression$whereClauses[[clause_idx]]
+          datapath <- paste0("/dataSubsets/", subset_idx-1, "/compoundExpression/whereClauses/", clause_idx-1)
+
+          record <- create_record_from_object(clause, class_definition, datapath, record_idx)
+          records[[record_idx]] <- record
+          record_idx <- record_idx + 1
+        }
+      }
+    }
+  }
+
+  if (length(records) > 0) do.call(rbind, records) else NULL
+}
+
+extract_referenced_operation_relationships <- function(json_data, class_definition) {
+  records <- list()
+  record_idx <- 1
+
+  # Search in methods/operations/referencedOperationRelationships
+  if (!is.null(json_data$methods) && length(json_data$methods) > 0) {
+    for (method_idx in seq_along(json_data$methods)) {
+      method <- json_data$methods[[method_idx]]
+      if (!is.null(method$operations) && length(method$operations) > 0) {
+        for (op_idx in seq_along(method$operations)) {
+          operation <- method$operations[[op_idx]]
+          if (!is.null(operation$referencedOperationRelationships) && length(operation$referencedOperationRelationships) > 0) {
+            for (rel_idx in seq_along(operation$referencedOperationRelationships)) {
+              relationship <- operation$referencedOperationRelationships[[rel_idx]]
+              datapath <- paste0("/methods/", method_idx-1, "/operations/", op_idx-1, "/referencedOperationRelationships/", rel_idx-1)
+
+              record <- create_record_from_object(relationship, class_definition, datapath, record_idx)
+              records[[record_idx]] <- record
+              record_idx <- record_idx + 1
+            }
+          }
+        }
+      }
+    }
+  }
+
+  if (length(records) > 0) do.call(rbind, records) else NULL
+}
+
+extract_referenced_analysis_sets <- function(json_data, class_definition) {
+  records <- list()
+  record_idx <- 1
+
+  # Look for subClauseId references in data subsets compound expressions
+  if (!is.null(json_data$dataSubsets)) {
+    for (subset_idx in seq_along(json_data$dataSubsets)) {
+      subset <- json_data$dataSubsets[[subset_idx]]
+      if (!is.null(subset$compoundExpression) && !is.null(subset$compoundExpression$whereClauses)) {
+        for (clause_idx in seq_along(subset$compoundExpression$whereClauses)) {
+          clause <- subset$compoundExpression$whereClauses[[clause_idx]]
+
+          # Check for direct subClauseId
+          if (!is.null(clause$subClauseId)) {
+            datapath <- sprintf("/dataSubsets/%d/compoundExpression/whereClauses/%d", subset_idx - 1, clause_idx - 1)
+            record <- create_record_from_object(clause, class_definition, datapath, record_idx)
+            if (!is.null(record)) {
+              records[[length(records) + 1]] <- record
+              record_idx <- record_idx + 1
+            }
+          }
+
+          # Also check nested compound expression's whereClauses for subClauseId
+          if (!is.null(clause$compoundExpression) && !is.null(clause$compoundExpression$whereClauses)) {
+            for (nested_idx in seq_along(clause$compoundExpression$whereClauses)) {
+              nested_clause <- clause$compoundExpression$whereClauses[[nested_idx]]
+              datapath <- sprintf("/dataSubsets/%d/compoundExpression/whereClauses/%d/compoundExpression/whereClauses/%d",
+                                subset_idx - 1, clause_idx - 1, nested_idx - 1)
+              record <- create_record_from_object(nested_clause, class_definition, datapath, record_idx)
+              if (!is.null(record)) {
+                records[[length(records) + 1]] <- record
+                record_idx <- record_idx + 1
+              }
+            }
+          }
+        }
+      }
+    }
+  }
+
+  return(if (length(records) > 0) do.call(rbind, records) else data.frame())
+}
\ No newline at end of file
diff --git a/utilities/R/helpers/extractor_metadata.R b/utilities/R/helpers/extractor_metadata.R
new file mode 100644
index 0000000..1b67b6c
--- /dev/null
+++ b/utilities/R/helpers/extractor_metadata.R
@@ -0,0 +1,202 @@
+#' Metadata-Related Data Extractors
+#'
+#' Extraction functions for metadata-related ARS classes including TerminologyExtension,
+#' SponsorTerm, AnalysisOutputCategorization, and programming code classes.
+#'
+#' @export
+
+# Terminology and Sponsor Classes
+
+extract_terminology_extensions <- function(json_data, class_definition) {
+  if (is.null(json_data$terminologyExtensions)) return(NULL)
+  extract_array_objects(json_data$terminologyExtensions, class_definition, "/terminologyExtensions")
+}
+
+extract_sponsor_terms <- function(json_data, class_definition) {
+  records <- list()
+  record_idx <- 1
+
+  # Search in terminologyExtensions/sponsorTerms
+  if (!is.null(json_data$terminologyExtensions) && length(json_data$terminologyExtensions) > 0) {
+    for (ext_idx in seq_along(json_data$terminologyExtensions)) {
+      extension <- json_data$terminologyExtensions[[ext_idx]]
+      if (!is.null(extension$sponsorTerms) && length(extension$sponsorTerms) > 0) {
+        for (term_idx in seq_along(extension$sponsorTerms)) {
+          term <- extension$sponsorTerms[[term_idx]]
+          datapath <- paste0("/terminologyExtensions/", ext_idx-1, "/sponsorTerms/", term_idx-1)
+
+          record <- create_record_from_object(term, class_definition, datapath, record_idx)
+          records[[record_idx]] <- record
+          record_idx <- record_idx + 1
+        }
+      }
+    }
+  }
+
+  if (length(records) > 0) do.call(rbind, records) else NULL
+}
+
+# Analysis Output Categorization Classes
+
+extract_analysis_output_categorizations <- function(json_data, class_definition) {
+  records <- list()
+  record_idx <- 1
+
+  # Extract top-level categorizations
+  if (!is.null(json_data$analysisOutputCategorizations) && length(json_data$analysisOutputCategorizations) > 0) {
+    for (cat_idx in seq_along(json_data$analysisOutputCategorizations)) {
+      categorization <- json_data$analysisOutputCategorizations[[cat_idx]]
+      datapath <- paste0("/analysisOutputCategorizations/", cat_idx-1)
+
+      record <- create_record_from_object(categorization, class_definition, datapath, record_idx)
+      records[[record_idx]] <- record
+      record_idx <- record_idx + 1
+
+      # Also extract nested subCategorizations as AnalysisOutputCategorization records
+      if (!is.null(categorization$categories) && length(categorization$categories) > 0) {
+        for (category_idx in seq_along(categorization$categories)) {
+          category <- categorization$categories[[category_idx]]
+          if (!is.null(category$subCategorizations) && length(category$subCategorizations) > 0) {
+            for (sub_idx in seq_along(category$subCategorizations)) {
+              sub_cat <- category$subCategorizations[[sub_idx]]
+              sub_datapath <- paste0("/analysisOutputCategorizations/", cat_idx-1, "/categories/", category_idx-1, "/subCategorizations/", sub_idx-1)
+
+              record <- create_record_from_object(sub_cat, class_definition, sub_datapath, record_idx)
+              records[[record_idx]] <- record
+              record_idx <- record_idx + 1
+            }
+          }
+        }
+      }
+    }
+  }
+
+  if (length(records) > 0) do.call(rbind, records) else NULL
+}
+
+extract_analysis_output_categories <- function(json_data, class_definition) {
+  records <- list()
+  record_idx <- 1
+
+  # Search in analysisOutputCategorizations/categories
+  if (!is.null(json_data$analysisOutputCategorizations) && length(json_data$analysisOutputCategorizations) > 0) {
+    for (cat_idx in seq_along(json_data$analysisOutputCategorizations)) {
+      categorization <- json_data$analysisOutputCategorizations[[cat_idx]]
+      if (!is.null(categorization$categories) && length(categorization$categories) > 0) {
+        for (category_idx in seq_along(categorization$categories)) {
+          category <- categorization$categories[[category_idx]]
+          datapath <- paste0("/analysisOutputCategorizations/", cat_idx-1, "/categories/", category_idx-1)
+
+          record <- create_record_from_object(category, class_definition, datapath, record_idx)
+          records[[record_idx]] <- record
+          record_idx <- record_idx + 1
+
+          # Also extract categories within subCategorizations
+          if (!is.null(category$subCategorizations) && length(category$subCategorizations) > 0) {
+            for (sub_idx in seq_along(category$subCategorizations)) {
+              sub_cat <- category$subCategorizations[[sub_idx]]
+              if (!is.null(sub_cat$categories) && length(sub_cat$categories) > 0) {
+                for (sub_category_idx in seq_along(sub_cat$categories)) {
+                  sub_category <- sub_cat$categories[[sub_category_idx]]
+                  sub_datapath <- paste0("/analysisOutputCategorizations/", cat_idx-1, "/categories/", category_idx-1, "/subCategorizations/", sub_idx-1, "/categories/", sub_category_idx-1)
+
+                  record <- create_record_from_object(sub_category, class_definition, sub_datapath, record_idx)
+                  records[[record_idx]] <- record
+                  record_idx <- record_idx + 1
+                }
+              }
+            }
+          }
+        }
+      }
+    }
+  }
+
+  if (length(records) > 0) do.call(rbind, records) else NULL
+}
+
+# Programming Code Classes
+
+extract_analysis_output_programming_code <- function(json_data, class_definition) {
+  records <- list()
+  record_idx <- 1
+
+  # Extract programming code from analyses
+  if (!is.null(json_data$analyses)) {
+    for (analysis_idx in seq_along(json_data$analyses)) {
+      analysis <- json_data$analyses[[analysis_idx]]
+      if (!is.null(analysis$programmingCode)) {
+        datapath <- sprintf("/analyses/%d/programmingCode", analysis_idx - 1)
+        record <- create_record_from_object(analysis$programmingCode, class_definition, datapath, record_idx)
+        if (!is.null(record)) {
+          records[[length(records) + 1]] <- record
+          record_idx <- record_idx + 1
+        }
+      }
+    }
+  }
+
+  # Extract programming code from outputs
+  if (!is.null(json_data$outputs)) {
+    for (output_idx in seq_along(json_data$outputs)) {
+      output <- json_data$outputs[[output_idx]]
+      if (!is.null(output$programmingCode)) {
+        datapath <- sprintf("/outputs/%d/programmingCode", output_idx - 1)
+        record <- create_record_from_object(output$programmingCode, class_definition, datapath, record_idx)
+        if (!is.null(record)) {
+          records[[length(records) + 1]] <- record
+          record_idx <- record_idx + 1
+        }
+      }
+    }
+  }
+
+  return(if (length(records) > 0) do.call(rbind, records) else data.frame())
+}
+
+extract_analysis_programming_code_template <- function(json_data, class_definition) {
+  records <- list()
+  record_idx <- 1
+
+  # Extract code templates from methods
+  if (!is.null(json_data$methods)) {
+    for (method_idx in seq_along(json_data$methods)) {
+      method <- json_data$methods[[method_idx]]
+      if (!is.null(method$codeTemplate)) {
+        datapath <- sprintf("/methods/%d/codeTemplate", method_idx - 1)
+        record <- create_record_from_object(method$codeTemplate, class_definition, datapath, record_idx)
+        if (!is.null(record)) {
+          records[[length(records) + 1]] <- record
+          record_idx <- record_idx + 1
+        }
+      }
+    }
+  }
+
+  return(if (length(records) > 0) do.call(rbind, records) else data.frame())
+}
+
+extract_template_code_parameter <- function(json_data, class_definition) {
+  records <- list()
+  record_idx <- 1
+
+  # Extract template code parameters from methods
+  if (!is.null(json_data$methods)) {
+    for (method_idx in seq_along(json_data$methods)) {
+      method <- json_data$methods[[method_idx]]
+      if (!is.null(method$codeTemplate) && !is.null(method$codeTemplate$parameters)) {
+        for (param_idx in seq_along(method$codeTemplate$parameters)) {
+          param <- method$codeTemplate$parameters[[param_idx]]
+          datapath <- sprintf("/methods/%d/codeTemplate/parameters/%d", method_idx - 1, param_idx - 1)
+          record <- create_record_from_object(param, class_definition, datapath, record_idx)
+          if (!is.null(record)) {
+            records[[length(records) + 1]] <- record
+            record_idx <- record_idx + 1
+          }
+        }
+      }
+    }
+  }
+
+  return(if (length(records) > 0) do.call(rbind, records) else data.frame())
+}
\ No newline at end of file
diff --git a/utilities/R/helpers/extractor_output.R b/utilities/R/helpers/extractor_output.R
new file mode 100644
index 0000000..adab4c7
--- /dev/null
+++ b/utilities/R/helpers/extractor_output.R
@@ -0,0 +1,425 @@
+#' Output-Related Data Extractors
+#'
+#' Extraction functions for output-related ARS classes including Output,
+#' OutputDisplay, DisplaySection, ListOfContents, and related classes.
+#'
+#' @export
+
+# Core Output Classes
+
+extract_outputs <- function(json_data, class_definition) {
+  if (is.null(json_data$outputs)) return(NULL)
+  extract_array_objects(json_data$outputs, class_definition, "/outputs")
+}
+
+extract_output_displays <- function(json_data, class_definition) {
+  records <- list()
+  record_idx <- 1
+
+  if (!is.null(json_data$outputs) && length(json_data$outputs) > 0) {
+    for (output_idx in seq_along(json_data$outputs)) {
+      output <- json_data$outputs[[output_idx]]
+
+      if (!is.null(output$displays) && length(output$displays) > 0) {
+        for (display_idx in seq_along(output$displays)) {
+          display <- output$displays[[display_idx]]
+          datapath <- paste0("/outputs/", output_idx-1, "/displays/", display_idx-1)
+
+          record <- create_record_from_object(display, class_definition, datapath, record_idx)
+          records[[record_idx]] <- record
+          record_idx <- record_idx + 1
+        }
+      }
+    }
+  }
+
+  if (length(records) > 0) do.call(rbind, records) else NULL
+}
+
+extract_output_file <- function(json_data, class_definition) {
+  records <- list()
+  record_idx <- 1
+
+  # Extract file specifications from outputs
+  if (!is.null(json_data$outputs)) {
+    for (output_idx in seq_along(json_data$outputs)) {
+      output <- json_data$outputs[[output_idx]]
+      if (!is.null(output$fileSpecifications)) {
+        for (file_idx in seq_along(output$fileSpecifications)) {
+          file_spec <- output$fileSpecifications[[file_idx]]
+          datapath <- sprintf("/outputs/%d/fileSpecifications/%d", output_idx - 1, file_idx - 1)
+          record <- create_record_from_object(file_spec, class_definition, datapath, record_idx)
+          if (!is.null(record)) {
+            records[[length(records) + 1]] <- record
+            record_idx <- record_idx + 1
+          }
+        }
+      }
+    }
+  }
+
+  return(if (length(records) > 0) do.call(rbind, records) else data.frame())
+}
+
+extract_output_file_type <- function(json_data, class_definition) {
+  records <- list()
+  record_idx <- 1
+
+  # Extract file types from output file specifications
+  if (!is.null(json_data$outputs)) {
+    for (output_idx in seq_along(json_data$outputs)) {
+      output <- json_data$outputs[[output_idx]]
+      if (!is.null(output$fileSpecifications)) {
+        for (file_idx in seq_along(output$fileSpecifications)) {
+          file_spec <- output$fileSpecifications[[file_idx]]
+          if (!is.null(file_spec$fileType)) {
+            datapath <- sprintf("/outputs/%d/fileSpecifications/%d/fileType", output_idx - 1, file_idx - 1)
+            record <- create_record_from_object(file_spec$fileType, class_definition, datapath, record_idx)
+            if (!is.null(record)) {
+              records[[length(records) + 1]] <- record
+              record_idx <- record_idx + 1
+            }
+          }
+        }
+      }
+    }
+  }
+
+  return(if (length(records) > 0) do.call(rbind, records) else data.frame())
+}
+
+# Display Section Classes
+
+extract_display_sections <- function(json_data, class_definition) {
+  records <- list()
+  record_idx <- 1
+
+  # Search in outputs/displays/display/displaySections
+  if (!is.null(json_data$outputs)) {
+    for (output_idx in seq_along(json_data$outputs)) {
+      output <- json_data$outputs[[output_idx]]
+      if (!is.null(output$displays)) {
+        for (display_idx in seq_along(output$displays)) {
+          display_obj <- output$displays[[display_idx]]
+          if (!is.null(display_obj$display) && !is.null(display_obj$display$displaySections)) {
+            for (section_idx in seq_along(display_obj$display$displaySections)) {
+              section <- display_obj$display$displaySections[[section_idx]]
+              datapath <- paste0("/outputs/", output_idx-1, "/displays/", display_idx-1, "/display/displaySections/", section_idx-1)
+
+              record <- create_record_from_object(section, class_definition, datapath, record_idx)
+              records[[record_idx]] <- record
+              record_idx <- record_idx + 1
+            }
+          }
+        }
+      }
+    }
+  }
+
+  if (length(records) > 0) do.call(rbind, records) else NULL
+}
+
+extract_display_subsections <- function(json_data, class_definition) {
+  records <- list()
+  record_idx <- 1
+
+  # Search in globalDisplaySections/subSections
+  if (!is.null(json_data$globalDisplaySections)) {
+    for (global_idx in seq_along(json_data$globalDisplaySections)) {
+      global_section <- json_data$globalDisplaySections[[global_idx]]
+      if (!is.null(global_section$subSections)) {
+        for (sub_idx in seq_along(global_section$subSections)) {
+          subsection <- global_section$subSections[[sub_idx]]
+          datapath <- paste0("/globalDisplaySections/", global_idx-1, "/subSections/", sub_idx-1)
+
+          record <- create_record_from_object(subsection, class_definition, datapath, record_idx)
+          records[[record_idx]] <- record
+          record_idx <- record_idx + 1
+        }
+      }
+    }
+  }
+
+  # Also search in outputs/displays/display/displaySections/orderedSubSections/subSection
+  if (!is.null(json_data$outputs)) {
+    for (output_idx in seq_along(json_data$outputs)) {
+      output <- json_data$outputs[[output_idx]]
+      if (!is.null(output$displays)) {
+        for (display_idx in seq_along(output$displays)) {
+          display_obj <- output$displays[[display_idx]]
+          if (!is.null(display_obj$display) && !is.null(display_obj$display$displaySections)) {
+            for (section_idx in seq_along(display_obj$display$displaySections)) {
+              section <- display_obj$display$displaySections[[section_idx]]
+              if (!is.null(section$orderedSubSections)) {
+                for (ordered_idx in seq_along(section$orderedSubSections)) {
+                  ordered_sub <- section$orderedSubSections[[ordered_idx]]
+                  if (!is.null(ordered_sub$subSection)) {
+                    datapath <- paste0("/outputs/", output_idx-1, "/displays/", display_idx-1, "/display/displaySections/", section_idx-1, "/orderedSubSections/", ordered_idx-1, "/subSection")
+
+                    # Create the record first, then check if it has meaningful content
+                    record <- create_record_from_object(ordered_sub$subSection, class_definition, datapath, record_idx)
+
+                    # Only add if the record has meaningful content (not all NA except tablepath/datapath)
+                    meaningful_cols <- setdiff(names(record), c("tablepath", "datapath"))
+                    has_content <- any(!is.na(record[meaningful_cols]))
+
+                    if (has_content) {
+                      records[[record_idx]] <- record
+                      record_idx <- record_idx + 1
+                    }
+                  }
+                }
+              }
+            }
+          }
+        }
+      }
+    }
+  }
+
+  if (length(records) > 0) do.call(rbind, records) else NULL
+}
+
+extract_global_display_sections <- function(json_data, class_definition) {
+  if (is.null(json_data$globalDisplaySections)) return(NULL)
+  extract_array_objects(json_data$globalDisplaySections, class_definition, "/globalDisplaySections")
+}
+
+extract_ordered_subsections <- function(json_data, class_definition) {
+  records <- list()
+  record_idx <- 1
+
+  # Search in various locations for orderedSubSections, but exclude ones with subSectionId (those are OrderedSubSectionRef)
+  if (!is.null(json_data$outputs)) {
+    for (output_idx in seq_along(json_data$outputs)) {
+      output <- json_data$outputs[[output_idx]]
+      if (!is.null(output$displays)) {
+        for (display_idx in seq_along(output$displays)) {
+          display_obj <- output$displays[[display_idx]]
+          if (!is.null(display_obj$display) && !is.null(display_obj$display$displaySections)) {
+            for (section_idx in seq_along(display_obj$display$displaySections)) {
+              section <- display_obj$display$displaySections[[section_idx]]
+              if (!is.null(section$orderedSubSections)) {
+                for (ordered_idx in seq_along(section$orderedSubSections)) {
+                  ordered_sub <- section$orderedSubSections[[ordered_idx]]
+
+                  # Only extract if this orderedSubSection does NOT have a subSectionId (those are OrderedSubSectionRef)
+                  if (is.null(ordered_sub$subSectionId)) {
+                    datapath <- paste0("/outputs/", output_idx-1, "/displays/", display_idx-1, "/display/displaySections/", section_idx-1, "/orderedSubSections/", ordered_idx-1)
+
+                    record <- create_record_from_object(ordered_sub, class_definition, datapath, record_idx)
+                    records[[record_idx]] <- record
+                    record_idx <- record_idx + 1
+                  }
+                }
+              }
+            }
+          }
+        }
+      }
+    }
+  }
+
+  if (length(records) > 0) do.call(rbind, records) else NULL
+}
+
+extract_ordered_displays <- function(json_data, class_definition) {
+  records <- list()
+  record_idx <- 1
+
+  # Search in outputs/displays
+  if (!is.null(json_data$outputs) && length(json_data$outputs) > 0) {
+    for (output_idx in seq_along(json_data$outputs)) {
+      output <- json_data$outputs[[output_idx]]
+      if (!is.null(output$displays) && length(output$displays) > 0) {
+        display <- output$displays[[1]]  # Assuming first display
+        datapath <- paste0("/outputs/", output_idx-1, "/displays/0")
+
+        record <- create_record_from_object(display, class_definition, datapath, record_idx)
+        records[[record_idx]] <- record
+        record_idx <- record_idx + 1
+      }
+    }
+  }
+
+  if (length(records) > 0) do.call(rbind, records) else NULL
+}
+
+extract_ordered_subsection_refs <- function(json_data, class_definition) {
+  records <- list()
+  record_idx <- 1
+
+  # Search in outputs/displays/display/displaySections/orderedSubSections, but only for refs with subSectionId
+  if (!is.null(json_data$outputs) && length(json_data$outputs) > 0) {
+    for (output_idx in seq_along(json_data$outputs)) {
+      output <- json_data$outputs[[output_idx]]
+      if (!is.null(output$displays) && length(output$displays) > 0) {
+        for (display_idx in seq_along(output$displays)) {
+          display_obj <- output$displays[[display_idx]]
+          if (!is.null(display_obj$display) && !is.null(display_obj$display$displaySections) && length(display_obj$display$displaySections) > 0) {
+            for (section_idx in seq_along(display_obj$display$displaySections)) {
+              section <- display_obj$display$displaySections[[section_idx]]
+              if (!is.null(section$orderedSubSections) && length(section$orderedSubSections) > 0) {
+                for (ordered_idx in seq_along(section$orderedSubSections)) {
+                  ordered_sub <- section$orderedSubSections[[ordered_idx]]
+
+                  # Only extract if this orderedSubSection has a subSectionId (making it a reference)
+                  if (!is.null(ordered_sub$subSectionId)) {
+                    datapath <- paste0("/outputs/", output_idx-1, "/displays/", display_idx-1, "/display/displaySections/", section_idx-1, "/orderedSubSections/", ordered_idx-1)
+
+                    record <- create_record_from_object(ordered_sub, class_definition, datapath, record_idx)
+                    records[[record_idx]] <- record
+                    record_idx <- record_idx + 1
+                  }
+                }
+              }
+            }
+          }
+        }
+      }
+    }
+  }
+
+  if (length(records) > 0) do.call(rbind, records) else NULL
+}
+
+# List of Contents Classes
+
+extract_list_of_contents <- function(json_data, class_definition) {
+  records <- list()
+
+  # mainListOfContents
+  if (!is.null(json_data$mainListOfContents)) {
+    record <- create_record_from_object(json_data$mainListOfContents, class_definition, "/mainListOfContents", 1)
+    records[[1]] <- record
+  }
+
+  # otherListsOfContents
+  if (!is.null(json_data$otherListsOfContents) && length(json_data$otherListsOfContents) > 0) {
+    for (i in seq_along(json_data$otherListsOfContents)) {
+      other_list <- json_data$otherListsOfContents[[i]]
+      datapath <- paste0("/otherListsOfContents/", i-1)
+      record <- create_record_from_object(other_list, class_definition, datapath, length(records) + 1)
+      records[[length(records) + 1]] <- record
+    }
+  }
+
+  if (length(records) > 0) do.call(rbind, records) else NULL
+}
+
+extract_nested_lists <- function(json_data, class_definition) {
+  records <- list()
+  record_idx <- 1
+
+  # Helper function to recursively find all sublists
+  extract_all_sublists <- function(list_items, base_path) {
+    nested_records <- list()
+
+    if (!is.null(list_items) && length(list_items) > 0) {
+      for (item_idx in seq_along(list_items)) {
+        item <- list_items[[item_idx]]
+
+        # If this item has a sublist, extract it as a NestedList
+        if (!is.null(item$sublist)) {
+          sublist_path <- paste0(base_path, "/", item_idx-1, "/sublist")
+          record <- create_record_from_object(item$sublist, class_definition, sublist_path, record_idx)
+          nested_records[[length(nested_records) + 1]] <- record
+          record_idx <<- record_idx + 1
+
+          # Recursively extract deeper sublists
+          if (!is.null(item$sublist$listItems)) {
+            deeper_sublists <- extract_all_sublists(item$sublist$listItems, paste0(sublist_path, "/listItems"))
+            nested_records <- c(nested_records, deeper_sublists)
+          }
+        }
+      }
+    }
+
+    return(nested_records)
+  }
+
+  # Search in mainListOfContents/contentsList
+  if (!is.null(json_data$mainListOfContents$contentsList)) {
+    record <- create_record_from_object(json_data$mainListOfContents$contentsList, class_definition, "/mainListOfContents/contentsList", record_idx)
+    records[[record_idx]] <- record
+    record_idx <- record_idx + 1
+
+    # Extract all sublists recursively
+    if (!is.null(json_data$mainListOfContents$contentsList$listItems)) {
+      sublist_records <- extract_all_sublists(json_data$mainListOfContents$contentsList$listItems, "/mainListOfContents/contentsList/listItems")
+      records <- c(records, sublist_records)
+    }
+  }
+
+  # Search in otherListsOfContents/contentsList
+  if (!is.null(json_data$otherListsOfContents)) {
+    for (i in seq_along(json_data$otherListsOfContents)) {
+      other_list <- json_data$otherListsOfContents[[i]]
+      if (!is.null(other_list$contentsList)) {
+        datapath <- paste0("/otherListsOfContents/", i-1, "/contentsList")
+        record <- create_record_from_object(other_list$contentsList, class_definition, datapath, record_idx)
+        records[[record_idx]] <- record
+        record_idx <- record_idx + 1
+
+        # Extract all sublists recursively for otherListsOfContents too
+        if (!is.null(other_list$contentsList$listItems)) {
+          sublist_records <- extract_all_sublists(other_list$contentsList$listItems, paste0(datapath, "/listItems"))
+          records <- c(records, sublist_records)
+        }
+      }
+    }
+  }
+
+  if (length(records) > 0) do.call(rbind, records) else NULL
+}
+
+extract_ordered_list_items <- function(json_data, class_definition) {
+  records <- list()
+  record_idx <- 1
+
+  # Helper function to recursively extract list items from nested sublists
+  extract_nested_list_items <- function(list_items, base_path) {
+    nested_records <- list()
+
+    if (!is.null(list_items) && length(list_items) > 0) {
+      for (item_idx in seq_along(list_items)) {
+        item <- list_items[[item_idx]]
+        datapath <- paste0(base_path, "/", item_idx-1)
+
+        # Extract this list item
+        record <- create_record_from_object(item, class_definition, datapath, record_idx)
+        nested_records[[length(nested_records) + 1]] <- record
+        record_idx <<- record_idx + 1
+
+        # Recursively extract from sublists
+        if (!is.null(item$sublist) && !is.null(item$sublist$listItems)) {
+          sublist_path <- paste0(datapath, "/sublist/listItems")
+          sublist_records <- extract_nested_list_items(item$sublist$listItems, sublist_path)
+          nested_records <- c(nested_records, sublist_records)
+        }
+      }
+    }
+
+    return(nested_records)
+  }
+
+  # Search in mainListOfContents/contentsList/listItems
+  if (!is.null(json_data$mainListOfContents) && !is.null(json_data$mainListOfContents$contentsList) && !is.null(json_data$mainListOfContents$contentsList$listItems)) {
+    main_records <- extract_nested_list_items(json_data$mainListOfContents$contentsList$listItems, "/mainListOfContents/contentsList/listItems")
+    records <- c(records, main_records)
+  }
+
+  # Also search in otherListsOfContents/contentsList/listItems
+  if (!is.null(json_data$otherListsOfContents) && length(json_data$otherListsOfContents) > 0) {
+    for (other_idx in seq_along(json_data$otherListsOfContents)) {
+      other_list <- json_data$otherListsOfContents[[other_idx]]
+      if (!is.null(other_list$contentsList) && !is.null(other_list$contentsList$listItems)) {
+        other_base_path <- paste0("/otherListsOfContents/", other_idx-1, "/contentsList/listItems")
+        other_records <- extract_nested_list_items(other_list$contentsList$listItems, other_base_path)
+        records <- c(records, other_records)
+      }
+    }
+  }
+
+  if (length(records) > 0) do.call(rbind, records) else NULL
+}
\ No newline at end of file
diff --git a/utilities/R/helpers/extractor_record_utils.R b/utilities/R/helpers/extractor_record_utils.R
new file mode 100644
index 0000000..f34c35b
--- /dev/null
+++ b/utilities/R/helpers/extractor_record_utils.R
@@ -0,0 +1,213 @@
+#' Record Creation and Formatting Utilities
+#'
+#' Core utilities for creating data records from JSON objects and applying
+#' SAS-compatible formatting.
+#'
+#' @export
+
+#' Extract objects from JSON array
+#' @param json_array Array of JSON objects
+#' @param class_definition Class schema definition
+#' @param base_path Base JSON path for this array
+#' @return Data frame with extracted records
+extract_array_objects <- function(json_array, class_definition, base_path) {
+  if (is.null(json_array) || length(json_array) == 0) return(NULL)
+
+  records <- list()
+  for (i in seq_along(json_array)) {
+    obj <- json_array[[i]]
+    datapath <- paste0(base_path, "/", i-1)
+    record <- create_record_from_object(obj, class_definition, datapath, i)
+    records[[i]] <- record
+  }
+
+  do.call(rbind, records)
+}
+
+#' Create a record from JSON object based on class definition
+#' @param json_obj JSON object to extract from
+#' @param class_definition Schema definition for this class
+#' @param datapath JSON path to this object
+#' @param record_id Unique record identifier
+#' @return Data frame with single record
+create_record_from_object <- function(json_obj, class_definition, datapath, record_id) {
+
+  # Initialize record list (safer than data.frame for dynamic creation)
+  record_list <- list(
+    tablepath = gsub("/[0-9]+", "", datapath),  # Remove indices for tablepath
+    datapath = datapath
+  )
+
+  # Extract schema-defined fields
+  for (i in seq_len(nrow(class_definition))) {
+    slot_info <- class_definition[i, ]
+    slot_name <- slot_info$slot
+
+    # Get value from JSON object (with bounds checking)
+    if (!is.null(json_obj) && slot_name %in% names(json_obj) && !is.null(json_obj[[slot_name]])) {
+      value <- json_obj[[slot_name]]
+
+      # Only extract scalar/simple attributes - skip complex nested objects
+      # Include Enum types as they are scalar enumeration values
+      if (slot_info$range %in% c("string", "integer", "number", "boolean") ||
+          grepl("Enum$", slot_info$range)) {
+        # Simple scalar types
+        if (is.list(value) && slot_info$is_array == 1) {
+          # Simple array of scalars - flatten to pipe-separated string
+          tryCatch({
+            flattened <- unlist(value)
+            if (length(flattened) > 1) {
+              record_list[[slot_name]] <- paste(as.character(flattened), collapse = "|")
+            } else if (length(flattened) == 1) {
+              record_list[[slot_name]] <- as.character(flattened)
+            } else {
+              record_list[[slot_name]] <- NA_character_
+            }
+          }, error = function(e) {
+            record_list[[slot_name]] <- NA_character_
+          })
+        } else if (!is.list(value)) {
+          # Simple scalar value
+          tryCatch({
+            if (slot_info$range == "integer" && is.numeric(value)) {
+              record_list[[slot_name]] <- as.integer(value)
+            } else {
+              record_list[[slot_name]] <- as.character(value)[1]
+            }
+          }, error = function(e) {
+            record_list[[slot_name]] <- NA_character_
+          })
+        } else {
+          # Complex object for simple type - skip
+          record_list[[slot_name]] <- NA_character_
+        }
+      } else {
+        # Complex type (like OperationResult, OrderedGroupingFactor, etc.)
+        # These should be handled by their own class datasets - skip here
+        record_list[[slot_name]] <- NA_character_
+      }
+    } else {
+      # Set missing values as NA
+      record_list[[slot_name]] <- NA_character_
+    }
+  }
+
+  # Convert list to data.frame
+  tryCatch({
+    record <- as.data.frame(record_list, stringsAsFactors = FALSE)
+    return(record)
+  }, error = function(e) {
+    # If conversion fails, return a minimal record
+    return(data.frame(
+      tablepath = record_list$tablepath,
+      datapath = record_list$datapath,
+      error_message = paste("Failed to create record:", e$message),
+      stringsAsFactors = FALSE
+    ))
+  })
+}
+
+#' Apply SAS-compatible formatting to extracted datasets
+#'
+#' Fixes tablepath, handles array fields as separate columns, removes extra columns
+#' @param class_datasets List of extracted datasets
+#' @param class_slots Schema class definitions
+#' @return List of formatted datasets
+apply_sas_compatible_formatting <- function(class_datasets, class_slots) {
+
+  cat("Applying SAS-compatible formatting...\n")
+
+  formatted_datasets <- list()
+
+  for (class_name in names(class_datasets)) {
+    dataset <- class_datasets[[class_name]]
+
+    if (nrow(dataset) == 0) {
+      formatted_datasets[[class_name]] <- dataset
+      next
+    }
+
+    # Get class definition for array field handling
+    class_definition <- class_slots[class_slots$parent_class == class_name, ]
+
+    # 1. Fix tablepath - add /root prefix
+    if ("tablepath" %in% names(dataset)) {
+      dataset$tablepath <- ifelse(
+        startsWith(dataset$tablepath, "/root"),
+        dataset$tablepath,
+        paste0("/root", dataset$tablepath)
+      )
+    }
+
+    # 2. Handle array fields - convert pipe-separated to separate columns
+    array_fields <- class_definition[class_definition$is_array == 1, ]
+
+    for (i in seq_len(nrow(array_fields))) {
+      field_name <- array_fields$slot[i]
+
+      if (field_name %in% names(dataset)) {
+        # Split pipe-separated values into separate columns
+        for (row_idx in seq_len(nrow(dataset))) {
+          value <- dataset[[field_name]][row_idx]
+
+          if (!is.na(value) && value != "") {
+            # Split by pipe
+            parts <- strsplit(as.character(value), "\\|")[[1]]
+
+            # Create separate columns
+            for (j in seq_along(parts)) {
+              col_name <- paste0(field_name, j)
+              if (!col_name %in% names(dataset)) {
+                dataset[[col_name]] <- NA_character_
+              }
+              dataset[[col_name]][row_idx] <- parts[j]
+            }
+          }
+        }
+
+        # Remove the original concatenated column
+        dataset[[field_name]] <- NULL
+      }
+    }
+
+    # 3. Remove unwanted columns
+    # Remove NA columns, complex object columns that should be handled by other classes
+    extra_cols <- grep("^NA\\.|^X\\.", names(dataset), value = TRUE)
+
+    # Also remove complex object fields that were set to NA (they belong to other classes)
+    complex_fields <- class_definition[!class_definition$range %in% c("string", "integer", "number", "boolean"), ]
+    complex_col_names <- complex_fields$slot[complex_fields$slot %in% names(dataset)]
+
+    # Check if these columns contain only NA values - if so, remove them
+    for (col_name in complex_col_names) {
+      if (all(is.na(dataset[[col_name]]))) {
+        extra_cols <- c(extra_cols, col_name)
+      }
+    }
+
+    # Remove duplicates and only keep columns that exist
+    extra_cols <- unique(extra_cols)
+    extra_cols <- extra_cols[extra_cols %in% names(dataset)]
+
+    if (length(extra_cols) > 0) {
+      dataset[extra_cols] <- NULL
+    }
+
+    # 4. Reorder columns to match SAS: tablepath, datapath, then alphabetical
+    core_cols <- c("tablepath", "datapath")
+    other_cols <- setdiff(names(dataset), core_cols)
+    other_cols <- sort(other_cols)
+
+    final_cols <- c(
+      core_cols[core_cols %in% names(dataset)],
+      other_cols
+    )
+
+    dataset <- dataset[final_cols]
+
+    formatted_datasets[[class_name]] <- dataset
+    cat("  ✓ Formatted", class_name, ":", nrow(dataset), "records\n")
+  }
+
+  return(formatted_datasets)
+}
\ No newline at end of file
diff --git a/utilities/R/helpers/extractor_reference.R b/utilities/R/helpers/extractor_reference.R
new file mode 100644
index 0000000..d8f2357
--- /dev/null
+++ b/utilities/R/helpers/extractor_reference.R
@@ -0,0 +1,314 @@
+#' Reference-Related Data Extractors
+#'
+#' Extraction functions for reference-related ARS classes including DocumentReference,
+#' ReferenceDocument, PageRef types, and other reference classes.
+#'
+#' @export
+
+# Document Reference Classes
+
+extract_document_reference <- function(json_data, class_definition) {
+  records <- list()
+  record_idx <- 1
+
+  # Extract document references from analyses/documentRefs arrays
+  if (!is.null(json_data$analyses)) {
+    for (analysis_idx in seq_along(json_data$analyses)) {
+      analysis <- json_data$analyses[[analysis_idx]]
+      if (!is.null(analysis$documentRefs)) {
+        for (ref_idx in seq_along(analysis$documentRefs)) {
+          doc_ref <- analysis$documentRefs[[ref_idx]]
+          datapath <- sprintf("/analyses/%d/documentRefs/%d", analysis_idx - 1, ref_idx - 1)
+          record <- create_record_from_object(doc_ref, class_definition, datapath, record_idx)
+          if (!is.null(record)) {
+            records[[length(records) + 1]] <- record
+            record_idx <- record_idx + 1
+          }
+        }
+      }
+
+      # Also extract from analyses programmingCode/documentRef
+      if (!is.null(analysis$programmingCode) && !is.null(analysis$programmingCode$documentRef)) {
+        datapath <- sprintf("/analyses/%d/programmingCode/documentRef", analysis_idx - 1)
+        record <- create_record_from_object(analysis$programmingCode$documentRef, class_definition, datapath, record_idx)
+        if (!is.null(record)) {
+          records[[length(records) + 1]] <- record
+          record_idx <- record_idx + 1
+        }
+      }
+    }
+  }
+
+  # Extract document references from methods/documentRefs arrays
+  if (!is.null(json_data$methods)) {
+    for (method_idx in seq_along(json_data$methods)) {
+      method <- json_data$methods[[method_idx]]
+      if (!is.null(method$documentRefs)) {
+        for (ref_idx in seq_along(method$documentRefs)) {
+          doc_ref <- method$documentRefs[[ref_idx]]
+          datapath <- sprintf("/methods/%d/documentRefs/%d", method_idx - 1, ref_idx - 1)
+          record <- create_record_from_object(doc_ref, class_definition, datapath, record_idx)
+          if (!is.null(record)) {
+            records[[length(records) + 1]] <- record
+            record_idx <- record_idx + 1
+          }
+        }
+      }
+    }
+  }
+
+  # Extract document references from outputs/documentRefs arrays
+  if (!is.null(json_data$outputs)) {
+    for (output_idx in seq_along(json_data$outputs)) {
+      output <- json_data$outputs[[output_idx]]
+      if (!is.null(output$documentRefs)) {
+        for (ref_idx in seq_along(output$documentRefs)) {
+          doc_ref <- output$documentRefs[[ref_idx]]
+          datapath <- sprintf("/outputs/%d/documentRefs/%d", output_idx - 1, ref_idx - 1)
+          record <- create_record_from_object(doc_ref, class_definition, datapath, record_idx)
+          if (!is.null(record)) {
+            records[[length(records) + 1]] <- record
+            record_idx <- record_idx + 1
+          }
+        }
+      }
+
+      # Also extract from outputs programmingCode/documentRef
+      if (!is.null(output$programmingCode) && !is.null(output$programmingCode$documentRef)) {
+        datapath <- sprintf("/outputs/%d/programmingCode/documentRef", output_idx - 1)
+        record <- create_record_from_object(output$programmingCode$documentRef, class_definition, datapath, record_idx)
+        if (!is.null(record)) {
+          records[[length(records) + 1]] <- record
+          record_idx <- record_idx + 1
+        }
+      }
+    }
+  }
+
+  return(if (length(records) > 0) do.call(rbind, records) else data.frame())
+}
+
+extract_reference_document <- function(json_data, class_definition) {
+  records <- list()
+  record_idx <- 1
+
+  # Extract reference documents from root level
+  if (!is.null(json_data$referenceDocuments)) {
+    for (doc_idx in seq_along(json_data$referenceDocuments)) {
+      document <- json_data$referenceDocuments[[doc_idx]]
+      datapath <- sprintf("/referenceDocuments/%d", doc_idx - 1)
+      record <- create_record_from_object(document, class_definition, datapath, record_idx)
+      if (!is.null(record)) {
+        records[[length(records) + 1]] <- record
+        record_idx <- record_idx + 1
+      }
+    }
+  }
+
+  return(if (length(records) > 0) do.call(rbind, records) else data.frame())
+}
+
+# Page Reference Classes
+
+extract_page_name_refs <- function(json_data, class_definition) {
+  records <- list()
+  record_idx <- 1
+
+  # Helper function to extract page name refs from document refs
+  extract_page_name_refs_from_docrefs <- function(doc_refs, base_path) {
+    page_records <- list()
+
+    for (doc_idx in seq_along(doc_refs)) {
+      doc_ref <- doc_refs[[doc_idx]]
+      if (!is.null(doc_ref$pageRefs)) {
+        for (page_idx in seq_along(doc_ref$pageRefs)) {
+          page_ref <- doc_ref$pageRefs[[page_idx]]
+          if (!is.null(page_ref$refType) && page_ref$refType == "NamedDestination" && !is.null(page_ref$pageNames)) {
+            datapath <- sprintf("%s/%d/pageRefs/%d", base_path, doc_idx - 1, page_idx - 1)
+            record <- create_record_from_object(page_ref, class_definition, datapath, record_idx)
+            if (!is.null(record)) {
+              page_records[[length(page_records) + 1]] <- record
+              record_idx <<- record_idx + 1
+            }
+          }
+        }
+      }
+    }
+
+    return(page_records)
+  }
+
+  # Extract from analyses/documentRefs
+  if (!is.null(json_data$analyses)) {
+    for (analysis_idx in seq_along(json_data$analyses)) {
+      analysis <- json_data$analyses[[analysis_idx]]
+      if (!is.null(analysis$documentRefs)) {
+        base_path <- sprintf("/analyses/%d/documentRefs", analysis_idx - 1)
+        page_records <- extract_page_name_refs_from_docrefs(analysis$documentRefs, base_path)
+        records <- c(records, page_records)
+      }
+    }
+  }
+
+  # Extract from outputs/documentRefs
+  if (!is.null(json_data$outputs)) {
+    for (output_idx in seq_along(json_data$outputs)) {
+      output <- json_data$outputs[[output_idx]]
+      if (!is.null(output$documentRefs)) {
+        base_path <- sprintf("/outputs/%d/documentRefs", output_idx - 1)
+        page_records <- extract_page_name_refs_from_docrefs(output$documentRefs, base_path)
+        records <- c(records, page_records)
+      }
+    }
+  }
+
+  # Extract from methods/documentRefs
+  if (!is.null(json_data$methods)) {
+    for (method_idx in seq_along(json_data$methods)) {
+      method <- json_data$methods[[method_idx]]
+      if (!is.null(method$documentRefs)) {
+        base_path <- sprintf("/methods/%d/documentRefs", method_idx - 1)
+        page_records <- extract_page_name_refs_from_docrefs(method$documentRefs, base_path)
+        records <- c(records, page_records)
+      }
+    }
+  }
+
+  return(if (length(records) > 0) do.call(rbind, records) else data.frame())
+}
+
+extract_page_number_list_refs <- function(json_data, class_definition) {
+  records <- list()
+  record_idx <- 1
+
+  # Helper function to extract page number list refs from document refs
+  extract_page_number_list_refs_from_docrefs <- function(doc_refs, base_path) {
+    page_records <- list()
+
+    for (doc_idx in seq_along(doc_refs)) {
+      doc_ref <- doc_refs[[doc_idx]]
+      if (!is.null(doc_ref$pageRefs)) {
+        for (page_idx in seq_along(doc_ref$pageRefs)) {
+          page_ref <- doc_ref$pageRefs[[page_idx]]
+          if (!is.null(page_ref$refType) && page_ref$refType == "PhysicalRef" &&
+              !is.null(page_ref$pageNumbers) && is.null(page_ref$firstPage) && is.null(page_ref$lastPage)) {
+            datapath <- sprintf("%s/%d/pageRefs/%d", base_path, doc_idx - 1, page_idx - 1)
+            record <- create_record_from_object(page_ref, class_definition, datapath, record_idx)
+            if (!is.null(record)) {
+              page_records[[length(page_records) + 1]] <- record
+              record_idx <<- record_idx + 1
+            }
+          }
+        }
+      }
+    }
+
+    return(page_records)
+  }
+
+  # Extract from analyses/documentRefs
+  if (!is.null(json_data$analyses)) {
+    for (analysis_idx in seq_along(json_data$analyses)) {
+      analysis <- json_data$analyses[[analysis_idx]]
+      if (!is.null(analysis$documentRefs)) {
+        base_path <- sprintf("/analyses/%d/documentRefs", analysis_idx - 1)
+        page_records <- extract_page_number_list_refs_from_docrefs(analysis$documentRefs, base_path)
+        records <- c(records, page_records)
+      }
+    }
+  }
+
+  # Extract from outputs/documentRefs
+  if (!is.null(json_data$outputs)) {
+    for (output_idx in seq_along(json_data$outputs)) {
+      output <- json_data$outputs[[output_idx]]
+      if (!is.null(output$documentRefs)) {
+        base_path <- sprintf("/outputs/%d/documentRefs", output_idx - 1)
+        page_records <- extract_page_number_list_refs_from_docrefs(output$documentRefs, base_path)
+        records <- c(records, page_records)
+      }
+    }
+  }
+
+  # Extract from methods/documentRefs
+  if (!is.null(json_data$methods)) {
+    for (method_idx in seq_along(json_data$methods)) {
+      method <- json_data$methods[[method_idx]]
+      if (!is.null(method$documentRefs)) {
+        base_path <- sprintf("/methods/%d/documentRefs", method_idx - 1)
+        page_records <- extract_page_number_list_refs_from_docrefs(method$documentRefs, base_path)
+        records <- c(records, page_records)
+      }
+    }
+  }
+
+  return(if (length(records) > 0) do.call(rbind, records) else data.frame())
+}
+
+extract_page_number_range_refs <- function(json_data, class_definition) {
+  records <- list()
+  record_idx <- 1
+
+  # Helper function to extract page number range refs from document refs
+  extract_page_number_range_refs_from_docrefs <- function(doc_refs, base_path) {
+    page_records <- list()
+
+    for (doc_idx in seq_along(doc_refs)) {
+      doc_ref <- doc_refs[[doc_idx]]
+      if (!is.null(doc_ref$pageRefs)) {
+        for (page_idx in seq_along(doc_ref$pageRefs)) {
+          page_ref <- doc_ref$pageRefs[[page_idx]]
+          if (!is.null(page_ref$refType) && page_ref$refType == "PhysicalRef" &&
+              !is.null(page_ref$firstPage) && !is.null(page_ref$lastPage)) {
+            datapath <- sprintf("%s/%d/pageRefs/%d", base_path, doc_idx - 1, page_idx - 1)
+            record <- create_record_from_object(page_ref, class_definition, datapath, record_idx)
+            if (!is.null(record)) {
+              page_records[[length(page_records) + 1]] <- record
+              record_idx <<- record_idx + 1
+            }
+          }
+        }
+      }
+    }
+
+    return(page_records)
+  }
+
+  # Extract from analyses/documentRefs
+  if (!is.null(json_data$analyses)) {
+    for (analysis_idx in seq_along(json_data$analyses)) {
+      analysis <- json_data$analyses[[analysis_idx]]
+      if (!is.null(analysis$documentRefs)) {
+        base_path <- sprintf("/analyses/%d/documentRefs", analysis_idx - 1)
+        page_records <- extract_page_number_range_refs_from_docrefs(analysis$documentRefs, base_path)
+        records <- c(records, page_records)
+      }
+    }
+  }
+
+  # Extract from outputs/documentRefs
+  if (!is.null(json_data$outputs)) {
+    for (output_idx in seq_along(json_data$outputs)) {
+      output <- json_data$outputs[[output_idx]]
+      if (!is.null(output$documentRefs)) {
+        base_path <- sprintf("/outputs/%d/documentRefs", output_idx - 1)
+        page_records <- extract_page_number_range_refs_from_docrefs(output$documentRefs, base_path)
+        records <- c(records, page_records)
+      }
+    }
+  }
+
+  # Extract from methods/documentRefs
+  if (!is.null(json_data$methods)) {
+    for (method_idx in seq_along(json_data$methods)) {
+      method <- json_data$methods[[method_idx]]
+      if (!is.null(method$documentRefs)) {
+        base_path <- sprintf("/methods/%d/documentRefs", method_idx - 1)
+        page_records <- extract_page_number_range_refs_from_docrefs(method$documentRefs, base_path)
+        records <- c(records, page_records)
+      }
+    }
+  }
+
+  return(if (length(records) > 0) do.call(rbind, records) else data.frame())
+}
\ No newline at end of file
diff --git a/utilities/R/helpers/get_class_slots.R b/utilities/R/helpers/get_class_slots.R
new file mode 100644
index 0000000..a5d5acc
--- /dev/null
+++ b/utilities/R/helpers/get_class_slots.R
@@ -0,0 +1,373 @@
+#' Get Class Slots from ARS JSON Schema
+#'
+#' Reads a JSON-Schema representation of the ARS Model to create a data frame
+#' containing class attribute/slot properties. This is the R equivalent of the
+#' SAS macro get_class_slots.sas.
+#'
+#' @param json_schema_file Path to the local copy of the JSON-Schema definition
+#'   file for the ARS model (ars_ldm.json), which can be downloaded from the
+#'   model folder of the ARS GitHub repository.
+#' @param return_type Character string specifying the return format. Options are:
+#'   "data.frame" (default) for a standard data frame, or "list" for a named list
+#'   structure for easier programmatic access.
+#' @param include_unused Logical indicating whether to include slots marked as
+#'   "NOT USED" in the schema (default: FALSE).
+#'
+#' @return A data frame (or list if return_type="list") containing class/slot
+#'   definitions with the following columns:
+#'   \describe{
+#'     \item{parent_class}{Character. The parent class that contains this slot}
+#'     \item{slot}{Character. The name of the attribute/slot}
+#'     \item{range}{Character. The data type or class that this slot references}
+#'     \item{is_reqd}{Logical. Whether this slot is required in the parent class}
+#'     \item{is_array}{Logical. Whether this slot can contain multiple values}
+#'     \item{is_anyOf}{Logical. Whether this slot uses anyOf schema construct}
+#'   }
+#'
+#' @details
+#' This function processes the ARS JSON Schema to extract class and slot
+#' definitions that describe the structure of ARS objects. It identifies:
+#' \itemize{
+#'   \item Parent-child relationships between classes
+#'   \item Required vs. optional attributes
+#'   \item Data types and references for each slot
+#'   \item Array/list properties
+#'   \item Unused slots (excluded by default)
+#' }
+#'
+#' The function mimics the behavior of the SAS get_class_slots macro, providing
+#' equivalent functionality for R-based ARS processing workflows.
+#'
+#' @examples
+#' \dontrun{
+#' # Basic usage
+#' schema_file <- "model/ars_ldm.json"
+#' class_slots <- get_class_slots(schema_file)
+#'
+#' # Include unused slots
+#' all_slots <- get_class_slots(schema_file, include_unused = TRUE)
+#'
+#' # Return as list for programmatic access
+#' slots_list <- get_class_slots(schema_file, return_type = "list")
+#' }
+#'
+#' @export
+get_class_slots <- function(json_schema_file, return_type = "data.frame", include_unused = FALSE) {
+  # Input validation
+  if (!file.exists(json_schema_file)) {
+    stop("JSON schema file not found: ", json_schema_file)
+  }
+
+  if (!return_type %in% c("data.frame", "list")) {
+    stop("return_type must be 'data.frame' or 'list'")
+  }
+
+  # Load required packages
+  if (!requireNamespace("jsonlite", quietly = TRUE)) {
+    stop("Package 'jsonlite' is required but not installed.")
+  }
+
+  # Read the JSON schema
+  tryCatch(
+    {
+      schema <- jsonlite::fromJSON(json_schema_file, simplifyVector = FALSE)
+    },
+    error = function(e) {
+      stop("Failed to read JSON schema file: ", e$message)
+    }
+  )
+
+  # Check for required schema structure
+  if (!"$defs" %in% names(schema)) {
+    stop("Invalid ARS schema: Missing '$defs' section")
+  }
+
+  # Initialize result data frame with numeric flags (SAS compatibility: 1/0 instead of TRUE/FALSE)
+  # Column order matches SAS CSV: parent_class, slot, is_reqd, is_array, is_anyOf, range
+  result_df <- data.frame(
+    parent_class = character(0),
+    slot = character(0),
+    is_reqd = integer(0),
+    is_array = integer(0),
+    is_anyOf = integer(0),
+    range = character(0),
+    stringsAsFactors = FALSE
+  )
+
+  # Extract unused slots (marked as "NOT USED")
+  unused_slots <- character(0)
+  if (!include_unused) {
+    unused_slots <- find_unused_slots(schema)
+  }
+
+  # Process each class definition in $defs
+  defs <- schema[["$defs"]]
+  required_slots <- extract_required_slots(defs)
+
+  for (class_name in names(defs)) {
+    class_def <- defs[[class_name]]
+
+    if (!"properties" %in% names(class_def)) {
+      next
+    }
+
+    properties <- class_def[["properties"]]
+
+    for (slot_name in names(properties)) {
+      # Skip unused slots if not including them
+      slot_key <- paste(class_name, slot_name, sep = ".")
+      if (!include_unused && slot_key %in% unused_slots) {
+        next
+      }
+
+      slot_def <- properties[[slot_name]]
+
+      # Extract slot information
+      slot_info <- parse_slot_definition(slot_def, class_name, slot_name)
+
+      # Check if required
+      is_required <- slot_key %in% required_slots
+
+      # Handle anyOf options - create separate rows like SAS
+      if (slot_info$is_anyOf && "anyOf_options" %in% names(slot_info) && length(slot_info$anyOf_options) > 0) {
+        for (range_option in slot_info$anyOf_options) {
+          result_df <- rbind(
+            result_df,
+            data.frame(
+              parent_class = class_name,
+              slot = slot_name,
+              is_reqd = as.integer(is_required),
+              is_array = as.integer(slot_info$is_array),
+              is_anyOf = as.integer(slot_info$is_anyOf),
+              range = range_option,
+              stringsAsFactors = FALSE
+            )
+          )
+        }
+      } else {
+        # Add single entry for non-anyOf slots
+        result_df <- rbind(
+          result_df,
+          data.frame(
+            parent_class = class_name,
+            slot = slot_name,
+            is_reqd = as.integer(is_required),
+            is_array = as.integer(slot_info$is_array),
+            is_anyOf = as.integer(slot_info$is_anyOf),
+            range = slot_info$range,
+            stringsAsFactors = FALSE
+          )
+        )
+      }
+    }
+  }
+
+  # Add the "root" entry (equivalent to SAS version)
+  result_df <- rbind(
+    result_df,
+    data.frame(
+      parent_class = NA_character_,
+      slot = "root",
+      is_reqd = 1L,
+      is_array = 0L,
+      is_anyOf = 0L,
+      range = "ReportingEvent",
+      stringsAsFactors = FALSE
+    )
+  )
+
+  # Return in requested format
+  if (return_type == "list") {
+    return(convert_to_list_format(result_df))
+  } else {
+    return(result_df)
+  }
+}
+
+#' Find slots marked as "NOT USED" in the schema
+#' @param schema The parsed JSON schema
+#' @return Character vector of unused slot keys in format "class.slot"
+#' @keywords internal
+find_unused_slots <- function(schema) {
+  unused <- character(0)
+
+  if (!"$defs" %in% names(schema)) {
+    return(unused)
+  }
+
+  defs <- schema[["$defs"]]
+
+  for (class_name in names(defs)) {
+    class_def <- defs[[class_name]]
+
+    if (!"properties" %in% names(class_def)) {
+      next
+    }
+
+    properties <- class_def[["properties"]]
+
+    for (slot_name in names(properties)) {
+      slot_def <- properties[[slot_name]]
+
+      # Check if description indicates "NOT USED"
+      if ("description" %in% names(slot_def)) {
+        if (grepl("NOT USED", slot_def[["description"]], ignore.case = TRUE)) {
+          unused <- c(unused, paste(class_name, slot_name, sep = "."))
+        }
+      }
+    }
+  }
+
+  return(unused)
+}
+
+#' Extract required slots from schema definitions
+#' @param defs The $defs section of the JSON schema
+#' @return Character vector of required slot keys in format "class.slot"
+#' @keywords internal
+extract_required_slots <- function(defs) {
+  required <- character(0)
+
+  for (class_name in names(defs)) {
+    class_def <- defs[[class_name]]
+
+    if ("required" %in% names(class_def)) {
+      required_props <- class_def[["required"]]
+      # Convert list to character vector if needed (jsonlite parsing)
+      if (is.list(required_props)) {
+        required_props <- unlist(required_props)
+      }
+      if (is.character(required_props)) {
+        required_keys <- paste(class_name, required_props, sep = ".")
+        required <- c(required, required_keys)
+      }
+    }
+  }
+
+  return(required)
+}
+
+#' Parse individual slot definition from schema
+#' @param slot_def The slot definition from the schema
+#' @param class_name The parent class name
+#' @param slot_name The slot name
+#' @return List with parsed slot information
+#' @keywords internal
+parse_slot_definition <- function(slot_def, class_name, slot_name) {
+  result <- list(
+    range = "string", # default
+    is_array = 0L,
+    is_anyOf = 0L
+  )
+
+  # Check for $ref (reference to another class)
+  if ("$ref" %in% names(slot_def)) {
+    ref_path <- slot_def[["$ref"]]
+    result$range <- extract_class_from_ref(ref_path)
+  }
+
+  # Check for type
+  if ("type" %in% names(slot_def)) {
+    slot_type <- slot_def[["type"]]
+    if (slot_type == "array") {
+      result$is_array <- 1L
+      # For arrays, check the items definition
+      if ("items" %in% names(slot_def)) {
+        items_def <- slot_def[["items"]]
+        if ("$ref" %in% names(items_def)) {
+          result$range <- extract_class_from_ref(items_def[["$ref"]])
+        } else if ("anyOf" %in% names(items_def)) {
+          # Handle anyOf within array items
+          result$is_anyOf <- 1L
+          result$anyOf_options <- list()
+          any_of_options <- items_def[["anyOf"]]
+          for (option in any_of_options) {
+            if ("$ref" %in% names(option)) {
+              result$anyOf_options <- append(result$anyOf_options, extract_class_from_ref(option[["$ref"]]))
+            } else if ("type" %in% names(option) && option[["type"]] != "null") {
+              result$anyOf_options <- append(result$anyOf_options, option[["type"]])
+            }
+          }
+          if (length(result$anyOf_options) > 0) {
+            result$range <- result$anyOf_options[[1]]
+          }
+        } else if ("type" %in% names(items_def)) {
+          result$range <- items_def[["type"]]
+        }
+      }
+    } else {
+      result$range <- slot_type
+    }
+  }
+
+  # Check for anyOf - create separate entries for each option like SAS
+  if ("anyOf" %in% names(slot_def)) {
+    result$is_anyOf <- 1L
+    result$anyOf_options <- list()
+    any_of_options <- slot_def[["anyOf"]]
+    for (option in any_of_options) {
+      if ("$ref" %in% names(option)) {
+        result$anyOf_options <- append(result$anyOf_options, extract_class_from_ref(option[["$ref"]]))
+      } else if ("type" %in% names(option) && option[["type"]] != "null") {
+        result$anyOf_options <- append(result$anyOf_options, option[["type"]])
+      }
+    }
+    # Set range to first option for backward compatibility
+    if (length(result$anyOf_options) > 0) {
+      result$range <- result$anyOf_options[[1]]
+    }
+  }
+
+  return(result)
+}
+
+#' Extract class name from JSON schema $ref path
+#' @param ref_path The $ref path (e.g., "#/$defs/Analysis")
+#' @return Character string with the class name
+#' @keywords internal
+extract_class_from_ref <- function(ref_path) {
+  # Extract the last part after the final "/"
+  parts <- strsplit(ref_path, "/")[[1]]
+  return(parts[length(parts)])
+}
+
+#' Convert data frame result to list format
+#' @param df The data frame to convert
+#' @return Named list organized by parent class
+#' @keywords internal
+convert_to_list_format <- function(df) {
+  result_list <- list()
+
+  # Group by parent class
+  classes <- unique(df$parent_class[!is.na(df$parent_class)])
+
+  for (class_name in classes) {
+    class_slots <- df[!is.na(df$parent_class) & df$parent_class == class_name, ]
+
+    result_list[[class_name]] <- list(
+      properties = setNames(
+        lapply(seq_len(nrow(class_slots)), function(i) {
+          list(
+            range = class_slots$range[i],
+            is_reqd = class_slots$is_reqd[i],
+            is_array = class_slots$is_array[i],
+            is_anyOf = class_slots$is_anyOf[i]
+          )
+        }),
+        class_slots$slot
+      )
+    )
+  }
+
+  # Add root entry
+  root_row <- df[!is.na(df$slot) & df$slot == "root", ]
+  if (nrow(root_row) > 0) {
+    result_list[["_root"]] <- list(
+      slot = root_row$slot[1],
+      range = root_row$range[1],
+      is_reqd = root_row$is_reqd[1]
+    )
+  }
+
+  return(result_list)
+}
diff --git a/utilities/R/helpers/schema_driven_extractor_modular.R b/utilities/R/helpers/schema_driven_extractor_modular.R
new file mode 100644
index 0000000..2c43338
--- /dev/null
+++ b/utilities/R/helpers/schema_driven_extractor_modular.R
@@ -0,0 +1,192 @@
+#' Modular Schema-Driven JSON Data Extractor
+#'
+#' This is the refactored modular version of the schema-driven extractor.
+#' It loads and coordinates extraction modules for cleaner code organization.
+#'
+#' @export
+
+# Get the directory where this script is located
+get_script_dir <- function() {
+  if (exists("sys.frame") && !is.null(sys.frame(1)$ofile)) {
+    return(dirname(sys.frame(1)$ofile))
+  } else {
+    # Fallback for interactive mode or when sys.frame is not available
+    return("utilities/R/helpers")
+  }
+}
+
+#' Load all extraction modules
+load_extractor_modules <- function() {
+  script_dir <- get_script_dir()
+
+  # Load all modules
+  modules <- c(
+    "extractor_record_utils.R",
+    "extractor_analysis.R",
+    "extractor_output.R",
+    "extractor_reference.R",
+    "extractor_metadata.R"
+  )
+
+  for (module in modules) {
+    module_path <- file.path(script_dir, module)
+    if (file.exists(module_path)) {
+      source(module_path)
+      cat("✓ Loaded module:", module, "\n")
+    } else {
+      warning("Module not found:", module_path)
+    }
+  }
+}
+
+#' Schema-Driven JSON Data Extractor (Modular Version)
+#'
+#' Extracts data from JSON based on ARS schema class definitions.
+#' This modular version loads extraction functions from separate modules
+#' for better code organization and maintainability.
+#'
+#' @param json_file Path to JSON file
+#' @param class_slots Data frame with class/slot definitions from get_class_slots
+#' @return List containing class datasets with actual data
+#' @export
+extract_schema_driven_datasets <- function(json_file, class_slots) {
+
+  # Load required libraries
+  library(jsonlite)
+  library(dplyr)
+
+  # Load extraction modules
+  cat("=== Loading Extraction Modules ===\n")
+  load_extractor_modules()
+
+  # Load JSON
+  json_data <- fromJSON(json_file, simplifyVector = FALSE)
+
+  cat("=== Schema-Driven Data Extraction ===\n")
+
+  # Initialize results
+  class_datasets <- list()
+
+  # Get all unique classes from schema
+  all_classes <- unique(class_slots$parent_class[!is.na(class_slots$parent_class)])
+
+  cat("Extracting data for", length(all_classes), "schema-defined classes...\n")
+
+  # Extract data for each class based on known JSON paths and schema
+  for (class_name in all_classes) {
+
+    # Get slots for this class
+    class_definition <- class_slots[class_slots$parent_class == class_name, ]
+
+    # Extract data based on class type and expected JSON locations
+    extracted_data <- extract_class_data(json_data, class_name, class_definition)
+
+    if (!is.null(extracted_data) && nrow(extracted_data) > 0) {
+      class_datasets[[class_name]] <- extracted_data
+      cat("  ✓", class_name, ":", nrow(extracted_data), "records\n")
+    } else {
+      cat("  -", class_name, ": no data\n")
+    }
+  }
+
+  cat("Schema extraction complete:", length(class_datasets), "classes with data\n")
+
+  return(class_datasets)
+}
+
+#' Extract Data for Specific Class (Modular Router)
+#'
+#' Routes extraction to appropriate module based on class type.
+#' This function coordinates between the different extraction modules.
+#'
+#' @param json_data Parsed JSON object
+#' @param class_name Name of the class to extract
+#' @param class_definition Data frame with slot definitions for this class
+#' @return Data frame with extracted records for this class
+extract_class_data <- function(json_data, class_name, class_definition) {
+
+  # Route to appropriate extraction module based on class type
+  result <- switch(class_name,
+
+    # CORE REPORTING
+    "ReportingEvent" = extract_reporting_event(json_data, class_definition),
+
+    # ANALYSIS MODULE (extractor_analysis.R)
+    "Analysis" = extract_analyses(json_data, class_definition),
+    "AnalysisMethod" = extract_analysis_methods(json_data, class_definition),
+    "Operation" = extract_operations(json_data, class_definition),
+    "OperationResult" = extract_operation_results(json_data, class_definition),
+    "ResultGroup" = extract_result_groups(json_data, class_definition),
+    "AnalysisSet" = extract_analysis_sets(json_data, class_definition),
+    "DataSubset" = extract_data_subsets(json_data, class_definition),
+    "Group" = extract_groups(json_data, class_definition),
+    "OrderedGroupingFactor" = extract_ordered_grouping_factors(json_data, class_definition),
+    "GroupingFactor" = extract_grouping_factors(json_data, class_definition),
+    "WhereClause" = extract_where_clauses(json_data, class_definition),
+    "WhereClauseCondition" = extract_where_clause_conditions(json_data, class_definition),
+    "CompoundSetExpression" = extract_compound_set_expression(json_data, class_definition),
+    "CompoundGroupExpression" = extract_compound_group_expression(json_data, class_definition),
+    "CompoundSubsetExpression" = extract_compound_subset_expressions(json_data, class_definition),
+    "AnalysisReason" = extract_analysis_reasons(json_data, class_definition),
+    "AnalysisPurpose" = extract_analysis_purpose(json_data, class_definition),
+    "ReferencedAnalysisOperation" = extract_referenced_analysis_operations(json_data, class_definition),
+    "ReferencedDataSubset" = extract_referenced_data_subsets(json_data, class_definition),
+    "ReferencedOperationRelationship" = extract_referenced_operation_relationships(json_data, class_definition),
+    "OperationRole" = extract_operation_roles(json_data, class_definition),
+    "ReferencedAnalysisSet" = extract_referenced_analysis_sets(json_data, class_definition),
+    "SponsorAnalysisReason" = extract_sponsor_analysis_reasons(json_data, class_definition),
+    "SponsorAnalysisPurpose" = extract_sponsor_analysis_purposes(json_data, class_definition),
+
+    # OUTPUT MODULE (extractor_output.R)
+    "Output" = extract_outputs(json_data, class_definition),
+    "OutputDisplay" = extract_output_displays(json_data, class_definition),
+    "OutputFile" = extract_output_file(json_data, class_definition),
+    "OutputFileType" = extract_output_file_type(json_data, class_definition),
+    "DisplaySection" = extract_display_sections(json_data, class_definition),
+    "DisplaySubSection" = extract_display_subsections(json_data, class_definition),
+    "GlobalDisplaySection" = extract_global_display_sections(json_data, class_definition),
+    "OrderedSubSection" = extract_ordered_subsections(json_data, class_definition),
+    "OrderedDisplay" = extract_ordered_displays(json_data, class_definition),
+    "OrderedSubSectionRef" = extract_ordered_subsection_refs(json_data, class_definition),
+    "ListOfContents" = extract_list_of_contents(json_data, class_definition),
+    "NestedList" = extract_nested_lists(json_data, class_definition),
+    "OrderedListItem" = extract_ordered_list_items(json_data, class_definition),
+
+    # REFERENCE MODULE (extractor_reference.R)
+    "DocumentReference" = extract_document_reference(json_data, class_definition),
+    "ReferenceDocument" = extract_reference_document(json_data, class_definition),
+    "PageNameRef" = extract_page_name_refs(json_data, class_definition),
+    "PageNumberListRef" = extract_page_number_list_refs(json_data, class_definition),
+    "PageNumberRangeRef" = extract_page_number_range_refs(json_data, class_definition),
+
+    # METADATA MODULE (extractor_metadata.R)
+    "TerminologyExtension" = extract_terminology_extensions(json_data, class_definition),
+    "SponsorTerm" = extract_sponsor_terms(json_data, class_definition),
+    "AnalysisOutputCategorization" = extract_analysis_output_categorizations(json_data, class_definition),
+    "AnalysisOutputCategory" = extract_analysis_output_categories(json_data, class_definition),
+    "AnalysisOutputProgrammingCode" = extract_analysis_output_programming_code(json_data, class_definition),
+    "AnalysisProgrammingCodeTemplate" = extract_analysis_programming_code_template(json_data, class_definition),
+    "TemplateCodeParameter" = extract_template_code_parameter(json_data, class_definition),
+
+    # Default: return NULL for unknown classes
+    NULL
+  )
+
+  return(result)
+}
+
+#' Extract root-level ReportingEvent
+#' @param json_data Parsed JSON object
+#' @param class_definition Schema definition for ReportingEvent
+#' @return Data frame with ReportingEvent record
+extract_reporting_event <- function(json_data, class_definition) {
+  create_record_from_object(json_data, class_definition, "/", 1)
+}
+
+#' Apply SAS-compatible formatting wrapper
+#' @param class_datasets List of extracted datasets
+#' @param class_slots Schema class definitions
+#' @return List of formatted datasets
+apply_formatting <- function(class_datasets, class_slots) {
+  apply_sas_compatible_formatting(class_datasets, class_slots)
+}
\ No newline at end of file
diff --git a/utilities/R/tests/README.md b/utilities/R/tests/README.md
new file mode 100644
index 0000000..0d5ab97
--- /dev/null
+++ b/utilities/R/tests/README.md
@@ -0,0 +1,196 @@
+# R Utilities Test Suite
+
+Test suite for the R utilities in the OARS project, covering both unit tests for individual components and integration tests for complete workflows.
+
+## Test Structure
+
+```
+tests/
+├── run_tests.R                    # Main test runner script
+├── README.md                      # This documentation
+├── fixtures/                      # Test data and schemas
+│   ├── test_schema.json          # Minimal ARS schema for testing
+│   └── test_data.json            # Sample JSON data matching schema
+├── unit/                         # Unit tests for individual components
+│   ├── test_get_class_slots.R    # Tests for schema parsing
+│   ├── test_extractor_record_utils.R  # Tests for record utilities
+│   └── test_modular_extractor.R  # Tests for modular extractor system
+└── integration/                  # Integration tests for complete workflows
+    └── test_create_class_datasets.R  # Tests for main pipeline function
+```
+
+## Running Tests
+
+### Quick Start
+```bash
+# Run all tests
+Rscript tests/run_tests.R
+
+# Run only unit tests
+Rscript tests/run_tests.R unit
+
+# Run only integration tests
+Rscript tests/run_tests.R integration
+```
+
+### Individual Test Files
+```bash
+# Always run from project root directory (utilities/R/tests/run_tests.R expects this)
+cd /path/to/oars  # Project root where dev/ directory is located
+
+# Run specific test file
+Rscript utilities/R/tests/unit/test_get_class_slots.R
+
+# Or run via test runner (recommended)
+Rscript utilities/R/tests/run_tests.R unit
+```
+
+## Test Coverage
+
+### Unit Tests
+
+#### `test_get_class_slots.R`
+- ✅ Function exists and can be called
+- ✅ Handles invalid file paths gracefully
+- ✅ Processes test schema correctly
+- ✅ Processes real ARS schema (if available)
+- ✅ Extracts slots properly
+- ✅ Detects array fields correctly
+
+#### `test_extractor_record_utils.R`
+- ✅ `create_record_from_object` basic functionality
+- ✅ Array handling in record creation
+- ✅ Missing field handling
+- ✅ `extract_array_objects` functionality
+- ✅ SAS-compatible formatting
+- ✅ Array splitting in formatting
+
+#### `test_modular_extractor.R`
+- ✅ Main extractor function exists
+- ✅ Module loading works
+- ✅ Class data routing works
+- ✅ Extraction with test fixtures
+- ✅ Extraction with real data (if available)
+- ✅ Data structure validation
+- ✅ Analysis class extraction
+- ✅ Operation extraction
+
+### Integration Tests
+
+#### `test_create_class_datasets.R`
+- ✅ Function exists
+- ✅ Invalid input handling
+- ✅ Complete pipeline with test fixtures
+- ✅ Real data processing (if available)
+- ✅ Output file validation
+- ✅ Summary information validation
+- ✅ Verbose mode functionality
+
+## Test Fixtures
+
+The test suite includes minimal test fixtures to ensure tests can run independently:
+
+### `test_schema.json`
+- Simplified ARS schema with core classes: Analysis, Operation, AnalysisReason
+- Includes proper type definitions and relationships
+- Designed for fast, predictable testing
+
+### `test_data.json`
+- Sample reporting event with 2 analyses and 3 operations
+- Matches the test schema structure
+- Provides known data for validation
+
+## Test Philosophy
+
+### Unit Tests
+- **Fast**: Each test runs in milliseconds
+- **Isolated**: Tests individual functions without dependencies
+- **Deterministic**: Same input always produces same output
+- **Coverage**: Cover both success and error cases
+
+### Integration Tests
+- **Realistic**: Use actual workflow patterns
+- **End-to-End**: Test complete pipelines
+- **File I/O**: Validate actual file creation and content
+- **Error Handling**: Test failure modes and recovery
+
+## Expected Test Results
+
+When running against the full OARS dataset:
+
+- **36 ARS classes** extracted successfully
+- **461 total observations** across all classes
+- **Operation class**: 14 records (from 4 methods with different operation counts)
+- **Analysis class**: 20 records
+- **Processing time**: < 1 second
+
+## Continuous Testing
+
+### Before Commits
+```bash
+# Always run tests before committing changes
+Rscript tests/run_tests.R
+```
+
+### After Refactoring
+```bash
+# Run full test suite after major changes
+Rscript tests/run_tests.R all
+```
+
+### Development Workflow
+```bash
+# Run relevant unit tests during development
+Rscript tests/run_tests.R unit
+
+# Run integration tests before finalizing features
+Rscript tests/run_tests.R integration
+```
+
+## Adding New Tests
+
+### New Unit Test
+1. Create `test_new_feature.R` in `unit/`
+2. Follow existing patterns:
+   - Use `run_test()` helper function
+   - Include multiple test cases
+   - Handle both success and error scenarios
+3. Update this README
+
+### New Integration Test
+1. Create test in `integration/`
+2. Include setup/cleanup functions
+3. Test complete workflows
+4. Validate file outputs
+
+### Test Fixtures
+- Add new fixtures to `fixtures/` directory
+- Keep fixtures minimal but realistic
+- Document expected behavior
+
+## Troubleshooting
+
+### Common Issues
+
+#### Tests Skip with "fixtures not found"
+- Ensure `fixtures/test_schema.json` and `fixtures/test_data.json` exist
+- Check file paths are relative to test script location
+
+#### Tests Skip with "real data not found"
+- This is expected if `model/ars_ldm.json` or `workfiles/examples/ARS v1/FDA Standard Safety Tables and Figures.json` don't exist
+- Tests will skip gracefully and still pass
+
+#### Permission Errors
+- Ensure test runner is executable: `chmod +x run_tests.R`
+- Check write permissions for temporary directories
+
+#### Module Loading Errors
+- Verify all helper files exist in `../helpers/` relative to test
+- Check that working directory is correctly set
+
+## Test Maintenance
+
+- **Review test coverage** when adding new features
+- **Update fixtures** if schema changes significantly
+- **Benchmark performance** to catch regressions
+- **Document breaking changes** that affect test expectations
diff --git a/utilities/R/tests/fixtures/test_data.json b/utilities/R/tests/fixtures/test_data.json
new file mode 100644
index 0000000..4bc808e
--- /dev/null
+++ b/utilities/R/tests/fixtures/test_data.json
@@ -0,0 +1,66 @@
+{
+  "id": "TEST_RE_001",
+  "name": "Test Reporting Event",
+  "version": "1.0.0",
+  "analyses": [
+    {
+      "id": "ANA_001",
+      "name": "Analysis 1",
+      "description": "First test analysis",
+      "order": 1,
+      "methodId": "METHOD_001",
+      "reason": {
+        "controlledTerm": "SPECIFIED_IN_SAP",
+        "description": "Analysis specified in statistical analysis plan"
+      }
+    },
+    {
+      "id": "ANA_002",
+      "name": "Analysis 2",
+      "description": "Second test analysis",
+      "order": 2,
+      "methodId": "METHOD_002",
+      "reason": {
+        "controlledTerm": "POST_HOC",
+        "description": "Post-hoc analysis"
+      }
+    }
+  ],
+  "methods": [
+    {
+      "id": "METHOD_001",
+      "name": "Count and Percentage Method",
+      "description": "Method for counting and calculating percentages",
+      "operations": [
+        {
+          "id": "OP_001_1",
+          "name": "Count",
+          "description": "Count of subjects",
+          "order": 1,
+          "resultPattern": "N=XX"
+        },
+        {
+          "id": "OP_001_2",
+          "name": "Percentage",
+          "description": "Percentage of subjects",
+          "order": 2,
+          "resultPattern": "XX%"
+        }
+      ]
+    },
+    {
+      "id": "METHOD_002",
+      "name": "Mean Calculation Method",
+      "description": "Method for calculating mean values",
+      "operations": [
+        {
+          "id": "OP_002_1",
+          "name": "Mean",
+          "description": "Mean value",
+          "order": 1,
+          "resultPattern": "XX.X"
+        }
+      ]
+    }
+  ]
+}
\ No newline at end of file
diff --git a/utilities/R/tests/fixtures/test_schema.json b/utilities/R/tests/fixtures/test_schema.json
new file mode 100644
index 0000000..cad0902
--- /dev/null
+++ b/utilities/R/tests/fixtures/test_schema.json
@@ -0,0 +1,110 @@
+{
+  "$schema": "https://json-schema.org/draft/2019-09/schema",
+  "$id": "https://www.cdisc.org/ars/1-0",
+  "title": "Test ARS Schema",
+  "type": "object",
+  "properties": {
+    "ReportingEvent": {
+      "type": "object",
+      "properties": {
+        "id": {
+          "type": "string"
+        },
+        "name": {
+          "type": "string"
+        },
+        "version": {
+          "type": "string"
+        },
+        "analyses": {
+          "type": "array",
+          "items": {
+            "$ref": "#/$defs/Analysis"
+          }
+        },
+        "methods": {
+          "type": "array",
+          "items": {
+            "$ref": "#/$defs/AnalysisMethod"
+          }
+        }
+      }
+    }
+  },
+  "$defs": {
+    "Analysis": {
+      "type": "object",
+      "properties": {
+        "id": {
+          "type": "string"
+        },
+        "name": {
+          "type": "string"
+        },
+        "description": {
+          "type": "string"
+        },
+        "order": {
+          "type": "integer"
+        },
+        "methodId": {
+          "type": "string"
+        },
+        "reason": {
+          "$ref": "#/$defs/AnalysisReason"
+        }
+      }
+    },
+    "AnalysisReason": {
+      "type": "object",
+      "properties": {
+        "controlledTerm": {
+          "type": "string"
+        },
+        "description": {
+          "type": "string"
+        }
+      }
+    },
+    "AnalysisMethod": {
+      "type": "object",
+      "properties": {
+        "id": {
+          "type": "string"
+        },
+        "name": {
+          "type": "string"
+        },
+        "description": {
+          "type": "string"
+        },
+        "operations": {
+          "type": "array",
+          "items": {
+            "$ref": "#/$defs/Operation"
+          }
+        }
+      }
+    },
+    "Operation": {
+      "type": "object",
+      "properties": {
+        "id": {
+          "type": "string"
+        },
+        "name": {
+          "type": "string"
+        },
+        "description": {
+          "type": "string"
+        },
+        "order": {
+          "type": "integer"
+        },
+        "resultPattern": {
+          "type": "string"
+        }
+      }
+    }
+  }
+}
\ No newline at end of file
diff --git a/utilities/R/tests/integration/test_create_class_datasets.R b/utilities/R/tests/integration/test_create_class_datasets.R
new file mode 100644
index 0000000..2879c28
--- /dev/null
+++ b/utilities/R/tests/integration/test_create_class_datasets.R
@@ -0,0 +1,370 @@
+#!/usr/bin/env Rscript
+
+#' Integration Tests for create_class_datasets_for_json_reportingevent.R
+#'
+#' Tests the complete pipeline from JSON schema and data to class datasets
+#'
+#' Usage: Rscript test_create_class_datasets.R
+
+# Load required libraries
+library(jsonlite)
+library(dplyr)
+
+# Declare paths
+main_script_path <- file.path("utilities/R/create_class_datasets_for_json_reportingevent.R")
+fixtures_path <- file.path("utilities/R/tests/fixtures")
+model_path <- file.path("model")
+temp_base <- file.path("utilities/R/tests")
+
+# Source the function under test
+source(main_script_path)
+
+# Test configuration
+test_schema_file <- file.path(fixtures_path, "test_schema.json")
+test_data_file <- file.path(fixtures_path, "test_data.json")
+real_schema_file <- file.path(model_path, "ars_ldm.json")
+real_data_file <- file.path("workfiles/examples/ARS v1/FDA Standard Safety Tables and Figures.json")
+test_output_dir <- file.path(temp_base, "temp_output")
+test_temp_dir <- file.path(temp_base, "temp_temp")
+
+#' Test helper function to run a test and report results
+run_test <- function(test_name, test_func) {
+  cat("Testing:", test_name, "... ")
+
+  tryCatch({
+    result <- test_func()
+    if (result) {
+      cat("✅ PASS\n")
+      return(TRUE)
+    } else {
+      cat("❌ FAIL\n")
+      return(FALSE)
+    }
+  }, error = function(e) {
+    cat("❌ ERROR:", e$message, "\n")
+    return(FALSE)
+  })
+}
+
+#' Setup function to create test directories
+setup_test_dirs <- function() {
+  if (!dir.exists(test_output_dir)) {
+    dir.create(test_output_dir, recursive = TRUE)
+  }
+  if (!dir.exists(test_temp_dir)) {
+    dir.create(test_temp_dir, recursive = TRUE)
+  }
+}
+
+#' Cleanup function to remove test directories
+cleanup_test_dirs <- function() {
+  if (dir.exists(test_output_dir)) {
+    unlink(test_output_dir, recursive = TRUE)
+  }
+  if (dir.exists(test_temp_dir)) {
+    unlink(test_temp_dir, recursive = TRUE)
+  }
+}
+
+#' Test 1: Function exists
+test_function_exists <- function() {
+  return(exists("create_class_datasets_for_json_reportingevent"))
+}
+
+#' Test 2: Invalid input handling
+test_invalid_inputs <- function() {
+  setup_test_dirs()
+
+  # Test with non-existent schema file
+  result1 <- tryCatch({
+    create_class_datasets_for_json_reportingevent(
+      json_schema_file = "nonexistent.json",
+      reporting_event_json_file = test_data_file,
+      output_directory = test_output_dir,
+      verbose = FALSE
+    )
+    return(FALSE)  # Should have thrown an error
+  }, error = function(e) {
+    return(TRUE)   # Expected error
+  })
+
+  # Test with non-existent data file
+  result2 <- tryCatch({
+    create_class_datasets_for_json_reportingevent(
+      json_schema_file = test_schema_file,
+      reporting_event_json_file = "nonexistent.json",
+      output_directory = test_output_dir,
+      verbose = FALSE
+    )
+    return(FALSE)  # Should have thrown an error
+  }, error = function(e) {
+    return(TRUE)   # Expected error
+  })
+
+  cleanup_test_dirs()
+  return(result1 && result2)
+}
+
+#' Test 3: Complete pipeline with test fixtures
+test_complete_pipeline_fixtures <- function() {
+  if (!file.exists(test_schema_file) || !file.exists(test_data_file)) {
+    cat("(SKIP - test fixtures not found) ")
+    return(TRUE)
+  }
+
+  setup_test_dirs()
+
+  result <- create_class_datasets_for_json_reportingevent(
+    json_schema_file = test_schema_file,
+    reporting_event_json_file = test_data_file,
+    output_directory = test_output_dir,
+    temp_directory = test_temp_dir,
+    verbose = FALSE
+  )
+
+  # Should return a list with expected structure
+  if (!is.list(result)) {
+    cleanup_test_dirs()
+    return(FALSE)
+  }
+
+  expected_elements <- c("class_datasets", "classes_processed", "output_files", "summary")
+  if (!all(expected_elements %in% names(result))) {
+    cleanup_test_dirs()
+    return(FALSE)
+  }
+
+  # Should have extracted some classes
+  if (length(result$class_datasets) == 0) {
+    cleanup_test_dirs()
+    return(FALSE)
+  }
+
+  # Should have created output files
+  if (length(result$output_files) == 0) {
+    cleanup_test_dirs()
+    return(FALSE)
+  }
+
+  # Files should actually exist
+  for (file_path in result$output_files) {
+    if (!file.exists(file_path)) {
+      cleanup_test_dirs()
+      return(FALSE)
+    }
+  }
+
+  cleanup_test_dirs()
+  return(TRUE)
+}
+
+#' Test 4: Real data processing (if available)
+test_real_data_processing <- function() {
+  if (!file.exists(real_schema_file) || !file.exists(real_data_file)) {
+    cat("(SKIP - real data not found) ")
+    return(TRUE)
+  }
+
+  setup_test_dirs()
+
+  result <- create_class_datasets_for_json_reportingevent(
+    json_schema_file = real_schema_file,
+    reporting_event_json_file = real_data_file,
+    output_directory = test_output_dir,
+    temp_directory = test_temp_dir,
+    verbose = FALSE
+  )
+
+  # Should have processed many classes
+  if (length(result$class_datasets) < 10) {
+    cleanup_test_dirs()
+    return(FALSE)
+  }
+
+  # Should include key ARS classes
+  expected_classes <- c("Analysis", "Operation", "ReportingEvent")
+  found_classes <- names(result$class_datasets)
+
+  if (!all(expected_classes %in% found_classes)) {
+    cleanup_test_dirs()
+    return(FALSE)
+  }
+
+  # Should have meaningful data
+  if (result$summary$total_observations < 100) {
+    cleanup_test_dirs()
+    return(FALSE)
+  }
+
+  cleanup_test_dirs()
+  return(TRUE)
+}
+
+#' Test 5: Output file validation
+test_output_file_validation <- function() {
+  if (!file.exists(test_schema_file) || !file.exists(test_data_file)) {
+    cat("(SKIP - test fixtures not found) ")
+    return(TRUE)
+  }
+
+  setup_test_dirs()
+
+  result <- create_class_datasets_for_json_reportingevent(
+    json_schema_file = test_schema_file,
+    reporting_event_json_file = test_data_file,
+    output_directory = test_output_dir,
+    verbose = FALSE
+  )
+
+  # Check that output files have correct structure
+  for (file_path in result$output_files) {
+    # Read the CSV file
+    tryCatch({
+      data <- read.csv(file_path, stringsAsFactors = FALSE)
+
+      # Should have tablepath and datapath columns
+      required_cols <- c("tablepath", "datapath")
+      if (!all(required_cols %in% names(data))) {
+        cleanup_test_dirs()
+        return(FALSE)
+      }
+
+      # Should have at least one row
+      if (nrow(data) == 0) {
+        cleanup_test_dirs()
+        return(FALSE)
+      }
+
+    }, error = function(e) {
+      cleanup_test_dirs()
+      return(FALSE)
+    })
+  }
+
+  cleanup_test_dirs()
+  return(TRUE)
+}
+
+#' Test 6: Summary information validation
+test_summary_validation <- function() {
+  if (!file.exists(test_schema_file) || !file.exists(test_data_file)) {
+    cat("(SKIP - test fixtures not found) ")
+    return(TRUE)
+  }
+
+  setup_test_dirs()
+
+  result <- create_class_datasets_for_json_reportingevent(
+    json_schema_file = test_schema_file,
+    reporting_event_json_file = test_data_file,
+    output_directory = test_output_dir,
+    verbose = FALSE
+  )
+
+  summary_info <- result$summary
+
+  # Should have expected summary fields
+  expected_fields <- c(
+    "processing_time_seconds",
+    "json_schema_file",
+    "reporting_event_json_file",
+    "output_directory",
+    "class_datasets_created",
+    "total_observations",
+    "total_variables"
+  )
+
+  if (!all(expected_fields %in% names(summary_info))) {
+    cleanup_test_dirs()
+    return(FALSE)
+  }
+
+  # Processing time should be positive
+  if (summary_info$processing_time_seconds <= 0) {
+    cleanup_test_dirs()
+    return(FALSE)
+  }
+
+  # Should have created some datasets
+  if (summary_info$class_datasets_created == 0) {
+    cleanup_test_dirs()
+    return(FALSE)
+  }
+
+  cleanup_test_dirs()
+  return(TRUE)
+}
+
+#' Test 7: Verbose mode functionality
+test_verbose_mode <- function() {
+  if (!file.exists(test_schema_file) || !file.exists(test_data_file)) {
+    cat("(SKIP - test fixtures not found) ")
+    return(TRUE)
+  }
+
+  setup_test_dirs()
+
+  # Capture output from verbose mode
+  output <- capture.output({
+    result <- create_class_datasets_for_json_reportingevent(
+      json_schema_file = test_schema_file,
+      reporting_event_json_file = test_data_file,
+      output_directory = test_output_dir,
+      verbose = TRUE
+    )
+  })
+
+  # Should have produced some output
+  if (length(output) == 0) {
+    cleanup_test_dirs()
+    return(FALSE)
+  }
+
+  # Should mention key steps
+  output_text <- paste(output, collapse = " ")
+  expected_phrases <- c("Step 1", "Step 2", "PROCESSING COMPLETE")
+
+  if (!all(sapply(expected_phrases, function(phrase) grepl(phrase, output_text)))) {
+    cleanup_test_dirs()
+    return(FALSE)
+  }
+
+  cleanup_test_dirs()
+  return(TRUE)
+}
+
+# Run all tests
+cat("=== Integration Tests for create_class_datasets_for_json_reportingevent.R ===\n\n")
+
+tests <- list(
+  "Function exists" = test_function_exists,
+  "Invalid input handling" = test_invalid_inputs,
+  "Complete pipeline (fixtures)" = test_complete_pipeline_fixtures,
+  "Real data processing" = test_real_data_processing,
+  "Output file validation" = test_output_file_validation,
+  "Summary validation" = test_summary_validation,
+  "Verbose mode" = test_verbose_mode
+)
+
+# Run tests and collect results
+results <- list()
+for (test_name in names(tests)) {
+  results[[test_name]] <- run_test(test_name, tests[[test_name]])
+}
+
+# Final cleanup
+cleanup_test_dirs()
+
+# Summary
+cat("\n=== Test Summary ===\n")
+passed <- sum(unlist(results))
+total <- length(results)
+cat("Passed:", passed, "/", total, "tests\n")
+
+if (passed == total) {
+  cat("✅ All tests PASSED!\n")
+  quit(status = 0)
+} else {
+  cat("❌ Some tests FAILED!\n")
+  quit(status = 1)
+}
\ No newline at end of file
diff --git a/utilities/R/tests/run_tests.R b/utilities/R/tests/run_tests.R
new file mode 100644
index 0000000..3d5d63e
--- /dev/null
+++ b/utilities/R/tests/run_tests.R
@@ -0,0 +1,162 @@
+#' Test Runner for R Utilities
+#'
+#' Runs all unit and integration tests for the R utilities
+#'
+#' Usage: Rscript run_tests.R [test_type]
+#'   test_type: "unit", "integration", or "all" (default: "all")
+
+# Parse command line arguments
+args <- commandArgs(trailingOnly = TRUE)
+test_type <- if (length(args) > 0) args[1] else "all"
+
+# Validate test type
+valid_types <- c("unit", "integration", "all")
+if (!test_type %in% valid_types) {
+  cat("Error: Invalid test type '", test_type, "'\n", sep = "")
+  cat("Valid options: ", paste(valid_types, collapse = ", "), "\n")
+  quit(status = 1)
+}
+
+# Configuration - simple relative paths
+test_dir <- paste0(getwd(), "/utilities/R/tests")
+unit_test_dir <- file.path(test_dir, "unit")
+integration_test_dir <- file.path(test_dir, "integration")
+
+#' Run a test script and capture results
+run_test_script <- function(script_path, script_name) {
+  cat("Running", script_name, "...\n")
+
+  script_dir <- dirname(script_path)
+
+  tryCatch({
+    # Run the test script
+    result <- system2("Rscript", args = script_path,
+                     stdout = TRUE, stderr = TRUE)
+
+    # Check return status
+    exit_status <- attr(result, "status")
+    if (is.null(exit_status)) {
+      exit_status <- 0  # No status means success
+    }
+
+    # Print output
+    if (length(result) > 0) {
+      cat(paste(result, collapse = "\n"), "\n")
+    }
+
+    return(list(
+      name = script_name,
+      status = exit_status,
+      output = result
+    ))
+
+  }, error = function(e) {
+    cat("ERROR running", script_name, ":", e$message, "\n")
+    return(list(
+      name = script_name,
+      status = 1,
+      output = paste("ERROR:", e$message)
+    ))
+  })
+}
+
+#' Discover test scripts in a directory
+discover_tests <- function(test_directory) {
+  if (!dir.exists(test_directory)) {
+    return(character(0))
+  }
+
+  test_files <- list.files(test_directory, pattern = "^test_.*\\.R$", full.names = TRUE)
+  return(test_files)
+}
+
+#' Main test execution
+main <- function() {
+  cat("=== R Utilities Test Runner ===\n")
+  cat("Test type:", test_type, "\n")
+  cat("Test directory:", test_dir, "\n\n")
+
+  # Initialize results
+  all_results <- list()
+
+  # Run unit tests
+  if (test_type %in% c("unit", "all")) {
+    cat("=== UNIT TESTS ===\n")
+    unit_tests <- discover_tests(unit_test_dir)
+
+    if (length(unit_tests) == 0) {
+      cat("No unit tests found in", unit_test_dir, "\n")
+    } else {
+      for (test_file in unit_tests) {
+        test_name <- paste("Unit:", basename(test_file))
+        result <- run_test_script(test_file, test_name)
+        all_results[[test_name]] <- result
+        cat("\n")
+      }
+    }
+    cat("\n")
+  }
+
+  # Run integration tests
+  if (test_type %in% c("integration", "all")) {
+    cat("=== INTEGRATION TESTS ===\n")
+    integration_tests <- discover_tests(integration_test_dir)
+
+    if (length(integration_tests) == 0) {
+      cat("No integration tests found in", integration_test_dir, "\n")
+    } else {
+      for (test_file in integration_tests) {
+        test_name <- paste("Integration:", basename(test_file))
+        result <- run_test_script(test_file, test_name)
+        all_results[[test_name]] <- result
+        cat("\n")
+      }
+    }
+    cat("\n")
+  }
+
+  # Summary
+  cat("=== TEST SUMMARY ===\n")
+
+  if (length(all_results) == 0) {
+    cat("No tests were run.\n")
+    quit(status = 0)
+  }
+
+  # Count results
+  total_tests <- length(all_results)
+  passed_tests <- sum(sapply(all_results, function(r) r$status == 0))
+  failed_tests <- total_tests - passed_tests
+
+  # Print individual results
+  for (result in all_results) {
+    status_symbol <- if (result$status == 0) "✅" else "❌"
+    cat(status_symbol, result$name, "\n")
+  }
+
+  cat("\n")
+  cat("Total tests:", total_tests, "\n")
+  cat("Passed:", passed_tests, "\n")
+  cat("Failed:", failed_tests, "\n")
+
+  # Overall result
+  if (failed_tests == 0) {
+    cat("\n🎉 ALL TESTS PASSED! 🎉\n")
+    quit(status = 0)
+  } else {
+    cat("\n💥 SOME TESTS FAILED 💥\n")
+
+    # List failed tests
+    cat("\nFailed tests:\n")
+    for (result in all_results) {
+      if (result$status != 0) {
+        cat("  ❌", result$name, "\n")
+      }
+    }
+
+    quit(status = 1)
+  }
+}
+
+# Run main function
+main()
\ No newline at end of file
diff --git a/utilities/R/tests/unit/test_basic.R b/utilities/R/tests/unit/test_basic.R
new file mode 100644
index 0000000..fb03b6a
--- /dev/null
+++ b/utilities/R/tests/unit/test_basic.R
@@ -0,0 +1,101 @@
+#!/usr/bin/env Rscript
+
+#' Basic Test to Validate Test Infrastructure
+#'
+#' Simple test to ensure the test framework is working
+#'
+#' Usage: Rscript test_basic.R
+
+#' Test helper function to run a test and report results
+run_test <- function(test_name, test_func) {
+  cat("Testing:", test_name, "... ")
+
+  tryCatch({
+    result <- test_func()
+    if (result) {
+      cat("✅ PASS\n")
+      return(TRUE)
+    } else {
+      cat("❌ FAIL\n")
+      return(FALSE)
+    }
+  }, error = function(e) {
+    cat("❌ ERROR:", e$message, "\n")
+    return(FALSE)
+  })
+}
+
+#' Test 1: Basic R functionality
+test_basic_r <- function() {
+  # Simple arithmetic
+  result <- 2 + 2
+  return(result == 4)
+}
+
+#' Test 2: Data frame creation
+test_data_frame <- function() {
+  df <- data.frame(
+    id = c(1, 2, 3),
+    name = c("A", "B", "C"),
+    stringsAsFactors = FALSE
+  )
+
+  return(nrow(df) == 3 && ncol(df) == 2)
+}
+
+#' Test 3: File path checking
+test_file_paths <- function() {
+  # Check if we can access relative paths from project root
+  current_dir <- getwd()
+
+  # Should have dev directory (indicating we're at project root)
+  dev_exists <- dir.exists("utilities")
+  unit_test_dir_exists <- dir.exists("utilities/R/tests/unit")
+
+  return(dev_exists && unit_test_dir_exists)
+}
+
+#' Test 4: JSON handling
+test_json_basic <- function() {
+  tryCatch({
+    library(jsonlite, quietly = TRUE)
+
+    # Simple JSON parsing
+    json_text <- '{"id": 1, "name": "test"}'
+    parsed <- fromJSON(json_text)
+
+    return(parsed$id == 1 && parsed$name == "test")
+  }, error = function(e) {
+    return(FALSE)
+  })
+}
+
+# Run all tests
+cat("=== Basic Infrastructure Tests ===\n\n")
+
+tests <- list(
+  "Basic R functionality" = test_basic_r,
+  "Data frame creation" = test_data_frame,
+  "File path checking" = test_file_paths,
+  "JSON handling" = test_json_basic
+)
+
+# Run tests and collect results
+results <- list()
+for (test_name in names(tests)) {
+  results[[test_name]] <- run_test(test_name, tests[[test_name]])
+}
+
+# Summary
+cat("\n=== Test Summary ===\n")
+passed <- sum(unlist(results))
+total <- length(results)
+cat("Passed:", passed, "/", total, "tests\n")
+
+if (passed == total) {
+  cat("✅ All tests PASSED!\n")
+  quit(status = 0)
+} else {
+  cat("❌ Some tests FAILED!\n")
+  quit(status = 1)
+}
\ No newline at end of file
diff --git a/utilities/R/tests/unit/test_extractor_record_utils.R b/utilities/R/tests/unit/test_extractor_record_utils.R
new file mode 100644
index 0000000..7e8856e
--- /dev/null
+++ b/utilities/R/tests/unit/test_extractor_record_utils.R
@@ -0,0 +1,306 @@
+#!/usr/bin/env Rscript
+
+#' Unit Tests for extractor_record_utils.R
+#'
+#' Tests the core record creation and formatting utilities
+#'
+#' Usage: Rscript test_extractor_record_utils.R
+
+# Load required libraries
+library(jsonlite)
+library(dplyr)
+
+# Specify the helpers path
+  helpers_path <- file.path("utilities/R/helpers/")
+
+
+# Source the functions under test
+source(file.path(helpers_path, "extractor_record_utils.R"))
+
+#' Test helper function to run a test and report results
+run_test <- function(test_name, test_func) {
+  cat("Testing:", test_name, "... ")
+
+  tryCatch({
+    result <- test_func()
+    if (result) {
+      cat("✅ PASS\n")
+      return(TRUE)
+    } else {
+      cat("❌ FAIL\n")
+      return(FALSE)
+    }
+  }, error = function(e) {
+    cat("❌ ERROR:", e$message, "\n")
+    return(FALSE)
+  })
+}
+
+#' Test 1: create_record_from_object basic functionality
+test_create_record_basic <- function() {
+  # Mock class definition
+  class_def <- data.frame(
+    slot = c("id", "name", "description"),
+    range = c("string", "string", "string"),
+    is_array = c(0, 0, 0),
+    stringsAsFactors = FALSE
+  )
+
+  # Mock JSON object
+  json_obj <- list(
+    id = "TEST_001",
+    name = "Test Object",
+    description = "A test object"
+  )
+
+  result <- create_record_from_object(json_obj, class_def, "/test/0", 1)
+
+  # Should return a data frame
+  if (!is.data.frame(result)) {
+    return(FALSE)
+  }
+
+  # Should have expected columns
+  expected_cols <- c("tablepath", "datapath", "id", "name", "description")
+  if (!all(expected_cols %in% names(result))) {
+    return(FALSE)
+  }
+
+  # Should have correct values
+  if (result$id != "TEST_001" || result$name != "Test Object") {
+    return(FALSE)
+  }
+
+  # Should have correct paths (tablepath removes numeric indices)
+  if (result$tablepath != "/test" || result$datapath != "/test/0") {
+    return(FALSE)
+  }
+
+  return(TRUE)
+}
+
+#' Test 2: Array handling in create_record_from_object
+test_create_record_arrays <- function() {
+  # Mock class definition with array field
+  class_def <- data.frame(
+    slot = c("id", "tags"),
+    range = c("string", "string"),
+    is_array = c(0, 1),
+    stringsAsFactors = FALSE
+  )
+
+  # Mock JSON object with array
+  json_obj <- list(
+    id = "TEST_002",
+    tags = list("tag1", "tag2", "tag3")
+  )
+
+  result <- create_record_from_object(json_obj, class_def, "/test/path", 1)
+
+  # Should return a data frame
+  if (!is.data.frame(result)) {
+    return(FALSE)
+  }
+
+  # Array should be pipe-separated
+  if (result$tags != "tag1|tag2|tag3") {
+    return(FALSE)
+  }
+
+  return(TRUE)
+}
+
+#' Test 3: Missing fields handling
+test_create_record_missing_fields <- function() {
+  # Mock class definition
+  class_def <- data.frame(
+    slot = c("id", "name", "missing_field"),
+    range = c("string", "string", "string"),
+    is_array = c(0, 0, 0),
+    stringsAsFactors = FALSE
+  )
+
+  # Mock JSON object (missing 'missing_field')
+  json_obj <- list(
+    id = "TEST_003",
+    name = "Test Object"
+  )
+
+  result <- create_record_from_object(json_obj, class_def, "/test/path", 1)
+
+  # Should return a data frame
+  if (!is.data.frame(result)) {
+    return(FALSE)
+  }
+
+  # Missing field should be NA
+  if (!is.na(result$missing_field)) {
+    return(FALSE)
+  }
+
+  return(TRUE)
+}
+
+#' Test 4: extract_array_objects functionality
+test_extract_array_objects <- function() {
+  # Mock class definition
+  class_def <- data.frame(
+    slot = c("id", "name", "order"),
+    range = c("string", "string", "integer"),
+    is_array = c(0, 0, 0),
+    stringsAsFactors = FALSE
+  )
+
+  # Mock JSON array
+  json_array <- list(
+    list(id = "OBJ_001", name = "Object 1", order = 1),
+    list(id = "OBJ_002", name = "Object 2", order = 2),
+    list(id = "OBJ_003", name = "Object 3", order = 3)
+  )
+
+  result <- extract_array_objects(json_array, class_def, "/test/objects")
+
+  # Should return a data frame
+  if (!is.data.frame(result)) {
+    return(FALSE)
+  }
+
+  # Should have 3 rows
+  if (nrow(result) != 3) {
+    return(FALSE)
+  }
+
+  # Should have correct paths
+  expected_paths <- c("/test/objects/0", "/test/objects/1", "/test/objects/2")
+  if (!all(result$datapath %in% expected_paths)) {
+    return(FALSE)
+  }
+
+  # Should have correct values
+  if (result$id[1] != "OBJ_001" || result$name[2] != "Object 2" || result$order[3] != 3) {
+    return(FALSE)
+  }
+
+  return(TRUE)
+}
+
+#' Test 5: apply_sas_compatible_formatting basic functionality
+test_sas_formatting_basic <- function() {
+  # Create mock datasets
+  dataset1 <- data.frame(
+    tablepath = c("/methods/operations", "/methods/operations"),
+    datapath = c("/methods/0/operations/0", "/methods/0/operations/1"),
+    id = c("OP_001", "OP_002"),
+    name = c("Operation 1", "Operation 2"),
+    stringsAsFactors = FALSE
+  )
+
+  class_datasets <- list(Operation = dataset1)
+
+  # Mock class slots
+  class_slots <- data.frame(
+    parent_class = c("Operation", "Operation", "Operation", "Operation"),
+    slot = c("id", "name", "description", "tags"),
+    range = c("string", "string", "string", "string"),
+    is_array = c(0, 0, 0, 1),
+    stringsAsFactors = FALSE
+  )
+
+  result <- apply_sas_compatible_formatting(class_datasets, class_slots)
+
+  # Should return a list
+  if (!is.list(result)) {
+    return(FALSE)
+  }
+
+  # Should have Operation dataset
+  if (!"Operation" %in% names(result)) {
+    return(FALSE)
+  }
+
+  formatted_dataset <- result$Operation
+
+  # Should have /root prefix in tablepath
+  if (!all(startsWith(formatted_dataset$tablepath, "/root"))) {
+    return(FALSE)
+  }
+
+  return(TRUE)
+}
+
+#' Test 6: Array splitting in SAS formatting
+test_sas_formatting_arrays <- function() {
+  # Create mock dataset with pipe-separated array
+  dataset1 <- data.frame(
+    tablepath = c("/test", "/test"),
+    datapath = c("/test/0", "/test/1"),
+    id = c("TEST_001", "TEST_002"),
+    tags = c("tag1|tag2|tag3", "tagA|tagB"),
+    stringsAsFactors = FALSE
+  )
+
+  class_datasets <- list(TestClass = dataset1)
+
+  # Mock class slots with array field
+  class_slots <- data.frame(
+    parent_class = c("TestClass", "TestClass", "TestClass"),
+    slot = c("id", "tags", "description"),
+    range = c("string", "string", "string"),
+    is_array = c(0, 1, 0),
+    stringsAsFactors = FALSE
+  )
+
+  result <- apply_sas_compatible_formatting(class_datasets, class_slots)
+
+  formatted_dataset <- result$TestClass
+
+  # Should have split array columns
+  expected_cols <- c("tablepath", "datapath", "id", "tags1", "tags2", "tags3")
+  if (!all(expected_cols %in% names(formatted_dataset))) {
+    return(FALSE)
+  }
+
+  # Should have correct values in split columns
+  if (formatted_dataset$tags1[1] != "tag1" || formatted_dataset$tags2[1] != "tag2") {
+    return(FALSE)
+  }
+
+  # Original tags column should be removed
+  if ("tags" %in% names(formatted_dataset)) {
+    return(FALSE)
+  }
+
+  return(TRUE)
+}
+
+# Run all tests
+cat("=== Unit Tests for extractor_record_utils.R ===\n\n")
+
+tests <- list(
+  "create_record_from_object basic" = test_create_record_basic,
+  "create_record_from_object arrays" = test_create_record_arrays,
+  "create_record_from_object missing fields" = test_create_record_missing_fields,
+  "extract_array_objects" = test_extract_array_objects,
+  "SAS formatting basic" = test_sas_formatting_basic,
+  "SAS formatting arrays" = test_sas_formatting_arrays
+)
+
+# Run tests and collect results
+results <- list()
+for (test_name in names(tests)) {
+  results[[test_name]] <- run_test(test_name, tests[[test_name]])
+}
+
+# Summary
+cat("\n=== Test Summary ===\n")
+passed <- sum(unlist(results))
+total <- length(results)
+cat("Passed:", passed, "/", total, "tests\n")
+
+if (passed == total) {
+  cat("✅ All tests PASSED!\n")
+  quit(status = 0)
+} else {
+  cat("❌ Some tests FAILED!\n")
+  quit(status = 1)
+}
\ No newline at end of file
diff --git a/utilities/R/tests/unit/test_get_class_slots.R b/utilities/R/tests/unit/test_get_class_slots.R
new file mode 100644
index 0000000..0c8a635
--- /dev/null
+++ b/utilities/R/tests/unit/test_get_class_slots.R
@@ -0,0 +1,202 @@
+#' Unit Tests for get_class_slots.R
+#'
+#' Tests the functionality of extracting class/slot definitions from JSON schema
+#'
+#' Usage: Rscript test_get_class_slots.R
+
+# Load required libraries
+library(jsonlite)
+
+# Set up relative paths based on current working directory
+# This script should be run from the tests/unit directory or via the test runner
+
+# Declare paths
+  helpers_path <- file.path("utilities/R/helpers/")
+  fixtures_path <- file.path("utilities/R/tests/fixtures")
+  model_path <- file.path("model")
+
+# Source the function under test
+source(file.path(helpers_path, "get_class_slots.R"))
+
+# Test configuration
+test_schema_file <- file.path(fixtures_path, "test_schema.json")
+real_schema_file <- file.path(model_path, "ars_ldm.json")
+
+#' Test helper function to run a test and report results
+run_test <- function(test_name, test_func) {
+  cat("Testing:", test_name, "... ")
+
+  tryCatch({
+    result <- test_func()
+    if (result) {
+      cat("✅ PASS\n")
+      return(TRUE)
+    } else {
+      cat("❌ FAIL\n")
+      return(FALSE)
+    }
+  }, error = function(e) {
+    cat("❌ ERROR:", e$message, "\n")
+    return(FALSE)
+  })
+}
+
+#' Test 1: Function exists and can be called
+test_function_exists <- function() {
+  return(exists("get_class_slots"))
+}
+
+#' Test 2: Function handles invalid file path
+test_invalid_file <- function() {
+  result <- tryCatch({
+    get_class_slots("nonexistent_file.json")
+    return(FALSE)  # Should have thrown an error
+  }, error = function(e) {
+    return(TRUE)   # Expected error
+  })
+  return(result)
+}
+
+#' Test 3: Function processes test schema correctly
+test_test_schema <- function() {
+  if (!file.exists(test_schema_file)) {
+    cat("(SKIP - test schema not found) ")
+    return(TRUE)
+  }
+
+  result <- get_class_slots(test_schema_file)
+
+  # Should return a data frame
+  if (!is.data.frame(result)) {
+    return(FALSE)
+  }
+
+  # Should have required columns
+  required_cols <- c("parent_class", "slot", "range", "is_array")
+  if (!all(required_cols %in% names(result))) {
+    return(FALSE)
+  }
+
+  # Should have rows for our test classes
+  classes <- unique(result$parent_class[!is.na(result$parent_class)])
+  expected_classes <- c("Analysis", "AnalysisReason", "Operation")
+
+  if (!all(expected_classes %in% classes)) {
+    return(FALSE)
+  }
+
+  return(TRUE)
+}
+
+#' Test 4: Function processes real ARS schema
+test_real_schema <- function() {
+  if (!file.exists(real_schema_file)) {
+    cat("(SKIP - real schema not found) ")
+    return(TRUE)
+  }
+
+  result <- get_class_slots(real_schema_file)
+
+  # Should return a data frame
+  if (!is.data.frame(result)) {
+    return(FALSE)
+  }
+
+  # Should have many rows (real schema is complex)
+  if (nrow(result) < 100) {
+    return(FALSE)
+  }
+
+  # Should include known ARS classes
+  classes <- unique(result$parent_class[!is.na(result$parent_class)])
+  expected_classes <- c("Analysis", "Operation", "ReportingEvent", "Output")
+
+  if (!all(expected_classes %in% classes)) {
+    return(FALSE)
+  }
+
+  return(TRUE)
+}
+
+#' Test 5: Slot extraction works correctly
+test_slot_extraction <- function() {
+  if (!file.exists(test_schema_file)) {
+    cat("(SKIP - test schema not found) ")
+    return(TRUE)
+  }
+
+  result <- get_class_slots(test_schema_file)
+
+  # Check Analysis class slots
+  analysis_slots <- result[result$parent_class == "Analysis" & !is.na(result$parent_class), ]
+
+  if (nrow(analysis_slots) == 0) {
+    return(FALSE)
+  }
+
+  # Should include known slots
+  expected_slots <- c("id", "name", "description", "order")
+  found_slots <- analysis_slots$slot
+
+  if (!all(expected_slots %in% found_slots)) {
+    return(FALSE)
+  }
+
+  return(TRUE)
+}
+
+#' Test 6: Array detection works
+test_array_detection <- function() {
+  if (!file.exists(test_schema_file)) {
+    cat("(SKIP - test schema not found) ")
+    return(TRUE)
+  }
+
+  result <- get_class_slots(test_schema_file)
+
+  # The 'operations' field in AnalysisMethod should be detected as an array
+  operations_entry <- result[result$slot == "operations" & !is.na(result$slot), ]
+
+  if (nrow(operations_entry) == 0) {
+    return(FALSE)
+  }
+
+  # Should be marked as array
+  if (operations_entry$is_array[1] != 1) {
+    return(FALSE)
+  }
+
+  return(TRUE)
+}
+
+# Run all tests
+cat("=== Unit Tests for get_class_slots.R ===\n\n")
+
+tests <- list(
+  "Function exists" = test_function_exists,
+  "Invalid file handling" = test_invalid_file,
+  "Test schema processing" = test_test_schema,
+  "Real schema processing" = test_real_schema,
+  "Slot extraction" = test_slot_extraction,
+  "Array detection" = test_array_detection
+)
+
+# Run tests and collect results
+results <- list()
+for (test_name in names(tests)) {
+  results[[test_name]] <- run_test(test_name, tests[[test_name]])
+}
+
+# Summary
+cat("\n=== Test Summary ===\n")
+passed <- sum(unlist(results))
+total <- length(results)
+cat("Passed:", passed, "/", total, "tests\n")
+
+if (passed == total) {
+  cat("✅ All tests PASSED!\n")
+  quit(status = 0)
+} else {
+  cat("❌ Some tests FAILED!\n")
+  quit(status = 1)
+}
\ No newline at end of file
diff --git a/utilities/R/tests/unit/test_modular_extractor.R b/utilities/R/tests/unit/test_modular_extractor.R
new file mode 100644
index 0000000..a876d67
--- /dev/null
+++ b/utilities/R/tests/unit/test_modular_extractor.R
@@ -0,0 +1,268 @@
+#' Unit Tests for Modular Extractor System
+#'
+#' Tests the functionality of the modular schema-driven extractor
+#'
+#' Usage: Rscript test_modular_extractor.R
+
+# Load required libraries
+library(jsonlite)
+library(dplyr)
+
+# Declare paths
+helpers_path <- file.path("utilities/R/helpers/")
+fixtures_path <- file.path("utilities/R/tests/fixtures")
+model_path <- file.path("model")
+
+# Source the functions under test
+source(file.path(helpers_path, "schema_driven_extractor_modular.R"))
+source(file.path(helpers_path, "get_class_slots.R"))
+
+# Test configuration
+test_schema_file <- file.path(fixtures_path, "test_schema.json")
+test_data_file <- file.path(fixtures_path, "test_data.json")
+real_schema_file <- file.path(model_path, "ars_ldm.json")
+real_data_file <- file.path("workfiles/examples/ARS v1/FDA Standard Safety Tables and Figures.json")
+
+#' Test helper function to run a test and report results
+run_test <- function(test_name, test_func) {
+  cat("Testing:", test_name, "... ")
+
+  tryCatch({
+    result <- test_func()
+    if (result) {
+      cat("✅ PASS\n")
+      return(TRUE)
+    } else {
+      cat("❌ FAIL\n")
+      return(FALSE)
+    }
+  }, error = function(e) {
+    cat("❌ ERROR:", e$message, "\n")
+    return(FALSE)
+  })
+}
+
+#' Test 1: Main extractor function exists
+test_extractor_exists <- function() {
+  return(exists("extract_schema_driven_datasets"))
+}
+
+#' Test 2: Module loading works
+test_module_loading <- function() {
+  # The function should exist after sourcing
+  return(exists("load_extractor_modules"))
+}
+
+#' Test 3: Class data routing works
+test_class_routing <- function() {
+  return(exists("extract_class_data"))
+}
+
+#' Test 4: Extract with test fixtures
+test_extract_test_fixtures <- function() {
+  if (!file.exists(test_schema_file) || !file.exists(test_data_file)) {
+    cat("(SKIP - test fixtures not found) ")
+    return(TRUE)
+  }
+
+  # Get class slots from test schema
+  class_slots <- get_class_slots(test_schema_file)
+
+  # Extract datasets
+  result <- extract_schema_driven_datasets(test_data_file, class_slots)
+
+  # Should return a list
+  if (!is.list(result)) {
+    return(FALSE)
+  }
+
+  # Should have extracted some classes
+  if (length(result) == 0) {
+    return(FALSE)
+  }
+
+  # Should include Analysis class (from our test data)
+  if (!"Analysis" %in% names(result)) {
+    return(FALSE)
+  }
+
+  # Analysis dataset should have records
+  if (nrow(result$Analysis) != 2) {  # We have 2 analyses in test data
+    return(FALSE)
+  }
+
+  return(TRUE)
+}
+
+#' Test 5: Extract with real ARS data (if available)
+test_extract_real_data <- function() {
+  if (!file.exists(real_schema_file) || !file.exists(real_data_file)) {
+    cat("(SKIP - real data not found) ")
+    return(TRUE)
+  }
+
+  # Get class slots from real schema
+  class_slots <- get_class_slots(real_schema_file)
+
+  # Extract datasets
+  result <- extract_schema_driven_datasets(real_data_file, class_slots)
+
+  # Should return a list
+  if (!is.list(result)) {
+    return(FALSE)
+  }
+
+  # Should have extracted many classes
+  if (length(result) < 10) {
+    return(FALSE)
+  }
+
+  # Should include key ARS classes
+  expected_classes <- c("Analysis", "Operation", "ReportingEvent")
+  found_classes <- names(result)
+
+  if (!all(expected_classes %in% found_classes)) {
+    return(FALSE)
+  }
+
+  return(TRUE)
+}
+
+#' Test 6: Data structure validation
+test_data_structure <- function() {
+  if (!file.exists(test_schema_file) || !file.exists(test_data_file)) {
+    cat("(SKIP - test fixtures not found) ")
+    return(TRUE)
+  }
+
+  # Get class slots and extract data
+  class_slots <- get_class_slots(test_schema_file)
+  result <- extract_schema_driven_datasets(test_data_file, class_slots)
+
+  # Check that datasets have required structure
+  for (class_name in names(result)) {
+    dataset <- result[[class_name]]
+
+    # Should be a data frame
+    if (!is.data.frame(dataset)) {
+      return(FALSE)
+    }
+
+    # Should have tablepath and datapath columns
+    required_cols <- c("tablepath", "datapath")
+    if (!all(required_cols %in% names(dataset))) {
+      return(FALSE)
+    }
+
+    # tablepath should start with / (before SAS formatting adds /root)
+    if (any(!startsWith(dataset$tablepath, "/"))) {
+      return(FALSE)
+    }
+  }
+
+  return(TRUE)
+}
+
+#' Test 7: Analysis class extraction validation
+test_analysis_extraction <- function() {
+  if (!file.exists(test_schema_file) || !file.exists(test_data_file)) {
+    cat("(SKIP - test fixtures not found) ")
+    return(TRUE)
+  }
+
+  class_slots <- get_class_slots(test_schema_file)
+  result <- extract_schema_driven_datasets(test_data_file, class_slots)
+
+  if (!"Analysis" %in% names(result)) {
+    return(FALSE)
+  }
+
+  analysis_data <- result$Analysis
+
+  # Should have the analyses from our test data
+  if (nrow(analysis_data) != 2) {
+    return(FALSE)
+  }
+
+  # Should have expected fields
+  expected_fields <- c("id", "name", "description", "order")
+  if (!all(expected_fields %in% names(analysis_data))) {
+    return(FALSE)
+  }
+
+  # Should have correct values
+  if (analysis_data$id[1] != "ANA_001" || analysis_data$name[1] != "Analysis 1") {
+    return(FALSE)
+  }
+
+  return(TRUE)
+}
+
+#' Test 8: Operation extraction validation
+test_operation_extraction <- function() {
+  if (!file.exists(test_schema_file) || !file.exists(test_data_file)) {
+    cat("(SKIP - test fixtures not found) ")
+    return(TRUE)
+  }
+
+  class_slots <- get_class_slots(test_schema_file)
+  result <- extract_schema_driven_datasets(test_data_file, class_slots)
+
+  if (!"Operation" %in% names(result)) {
+    return(FALSE)
+  }
+
+  operation_data <- result$Operation
+
+  # Should have 3 operations total (2 from first analysis + 1 from second)
+  if (nrow(operation_data) != 3) {
+    return(FALSE)
+  }
+
+  # Should have expected fields
+  expected_fields <- c("id", "name", "description", "order", "resultPattern")
+  if (!all(expected_fields %in% names(operation_data))) {
+    return(FALSE)
+  }
+
+  # Should have correct values
+  if (operation_data$id[1] != "OP_001_1" || operation_data$resultPattern[1] != "N=XX") {
+    return(FALSE)
+  }
+
+  return(TRUE)
+}
+
+# Run all tests
+cat("=== Unit Tests for Modular Extractor System ===\n\n")
+
+tests <- list(
+  "Extractor function exists" = test_extractor_exists,
+  "Module loading works" = test_module_loading,
+  "Class routing works" = test_class_routing,
+  "Extract test fixtures" = test_extract_test_fixtures,
+  "Extract real data" = test_extract_real_data,
+  "Data structure validation" = test_data_structure,
+  "Analysis extraction" = test_analysis_extraction,
+  "Operation extraction" = test_operation_extraction
+)
+
+# Run tests and collect results
+results <- list()
+for (test_name in names(tests)) {
+  results[[test_name]] <- run_test(test_name, tests[[test_name]])
+}
+
+# Summary
+cat("\n=== Test Summary ===\n")
+passed <- sum(unlist(results))
+total <- length(results)
+cat("Passed:", passed, "/", total, "tests\n")
+
+if (passed == total) {
+  cat("✅ All tests PASSED!\n")
+  quit(status = 0)
+} else {
+  cat("❌ Some tests FAILED!\n")
+  quit(status = 1)
+}
\ No newline at end of file