diff --git a/performance/compression-review/compression-review.py b/performance/compression-review/compression-review.py
index eaec577..4eb2d86 100644
--- a/performance/compression-review/compression-review.py
+++ b/performance/compression-review/compression-review.py
@@ -47,7 +47,7 @@ def getData(appConfig):
     logFileHandle.write("\n")
 
     # output header to csv
-    logFileHandle.write("{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{}\n".format('dbName','collName','numDocs','avgDocSize','sizeGB','storageGB','compRatio','compEnabled','minSample','maxSample','avgSample','minComp','maxComp','avgComp','compRatio','exceptions','compTime(ms)'))
+    logFileHandle.write("{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{}\n".format('dbName','collName','numDocs','avgDocSize','sizeGB','storageGB','existingCompRatio','compEnabled','minSample','maxSample','avgSample','minComp','maxComp','avgComp','projectedCompRatio','exceptions','compTime(ms)'))
 
     # get databases - filter out admin, config, local, and system
     dbDict = client.admin.command("listDatabases",nameOnly=True,filter={"name":{"$nin":['admin','config','local','system']}})['databases']
diff --git a/sizing-tool/README.md b/sizing-tool/README.md
new file mode 100644
index 0000000..5d23b3d
--- /dev/null
+++ b/sizing-tool/README.md
@@ -0,0 +1,129 @@
+# Amazon DocumentDB Sizing Tool
+
+The sizing tool analyzes your MongoDB database and generates a CSV file for use with the [DocumentDB Cost Estimator](https://aws.improving.com/documentdb/cost-estimator/). The tool automatically measures compression ratios using zstd-3-dict (matching Amazon DocumentDB 8.0), collects database statistics, and produces a properly formatted CSV file ready for upload to the cost estimator.
+
+**Note:** The tool automatically excludes:
+- System databases: `admin`, `config`, `local`, and `system`
+- Views (only collections are analyzed)
+- The `system.profile` collection
+- Collections with no documents
+
+# Requirements
+ - Python 3.7+
+ - pymongo Python package
+   - MongoDB 2.6 - 3.4 | pymongo 3.10 - 3.12
+   - MongoDB 3.6 - 5.0 | pymongo 3.12 - 4.0
+   - MongoDB 5.1+      | pymongo 4.0+
+   - DocumentDB        | pymongo 3.10+
+   - If not installed - "$ pip3 install pymongo"
+ - lz4 Python package
+   - If not installed - "$ pip3 install lz4"
+ - zstandard Python package
+   - If not installed - "$ pip3 install zstandard"
+ - compression-review.py script (/performance/compression-review/compression-review.py)
+
+**Quick Install**: `pip3 install -r requirements.txt`
+
+## Using the Sizing Tool
+`python3 sizing.py --uri <server-uri>`
+
+- Automatically uses zstd-3-dict compression (matching DocumentDB 8.0)
+- Samples 1000 documents per collection by default
+- Run on any instance in the replica set
+- Creates a single CSV file per execution: `sizing-<timestamp>.csv`
+- The \<server-uri> options can be found at https://www.mongodb.com/docs/manual/reference/connection-string/
+  - If your URI contains ampersand (&) characters they must be escaped with the backslash or enclosed your URI in double quotes
+- For DocumentDB use either the cluster endpoint or any of the instance endpoints
+
+### Optional Parameters
+
+| Parameter | Default | Description |
+| ----------- | ----------- | ----------- |
+| --sample-size | 1000 | Number of documents to sample per collection |
+| --dictionary-sample-size | 100 | Number of documents for dictionary creation |
+
+### Example Usage
+
+Localhost (no authentication):
+```
+python3 sizing.py --uri "mongodb://localhost:27017"
+```
+
+Remote server with authentication:
+```
+python3 sizing.py --uri "mongodb://username:password@hostname:27017"
+```
+
+With custom sample size:
+```
+python3 sizing.py --uri "mongodb://username:password@hostname:27017" --sample-size 2000
+```
+
+## Output
+
+The tool generates a CSV file named: `sizing-<timestamp>.csv` in your current working directory (where you run the command).
+
+Example: `sizing-20260204123045.csv`
+
+### CSV Columns
+- **SLNo** - Serial number
+- **Database_Name** - Name of the database
+- **Collection_Name** - Name of the collection
+- **Document_Count** - Number of documents
+- **Average_Document_Size** - Average document size (bytes)
+- **Total_Indexes** - Number of indexes
+- **Index_Size** - Total index size (GB)
+- **Index_Working_Set** - Percentage of indexes in memory (%)
+- **Data_Working_Set** - Percentage of data in memory (%)
+- **Inserts_Per_Day** - Daily insert operations (count)
+- **Updates_Per_Day** - Daily update operations (count)
+- **Deletes_Per_Day** - Daily delete operations (count)
+- **Reads_Per_Day** - Daily read operations (count)
+- **Compression_Ratio** - Compression ratio
+
+### Important Note: Manual Updates Required
+
+The generated CSV includes default placeholder values for workload metrics that **MUST be manually updated** in a text editor:
+
+| Field | Default Value | Description |
+|-------|---------------|-------------|
+| **Index_Working_Set** | 100 | Percentage of indexes that need to be in memory |
+| **Data_Working_Set** | 10 | Percentage of data that needs to be in memory |
+| **Inserts_Per_Day** | 0 | Number of insert operations per day |
+| **Updates_Per_Day** | 0 | Number of update operations per day |
+| **Deletes_Per_Day** | 0 | Number of delete operations per day |
+| **Reads_Per_Day** | 0 | Number of read operations per day |
+
+**Why manual updates are required:**
+- These statistics cannot be calculated automatically from database metadata
+- They require knowledge of your application's workload patterns
+- Accurate values are critical for proper instance sizing and cost estimation
+
+**How to update:**
+1. Locate the generated CSV file in your current working directory (where you ran the command)
+2. Open the CSV file in a text editor (not Excel, which may corrupt the format)
+3. Locate the columns for the fields above
+4. Update each row with values based on your workload knowledge
+5. Save the file
+6. Upload to the [DocumentDB Cost Estimator](https://aws.improving.com/documentdb/cost-estimator/)
+
+**Tips for determining values:**
+- **Working Sets**: Use MongoDB monitoring tools or `db.serverStatus()` to understand memory usage patterns
+- **Daily Operations**: Check application logs, MongoDB profiler, or monitoring dashboards for operation counts
+- **Conservative estimates**: If unsure, use higher working set percentages and operation counts for safer sizing
+
+## How It Works
+1. Runs compression-review.py to analyze compression ratios using zstd-3-dict
+2. Connects to MongoDB to gather collection statistics (document counts, sizes, indexes)
+3. Combines compression data with collection metadata
+4. Generates a CSV file formatted for the [DocumentDB Cost Estimator](https://aws.improving.com/documentdb/cost-estimator/)
+5. Cleans up temporary files
+
+## Next Steps
+1. Run the sizing tool to generate your CSV file
+2. Open the CSV and update workload metrics (working sets and daily operations) with your actual values
+3. Upload the CSV to the [DocumentDB Cost Estimator](https://aws.improving.com/documentdb/cost-estimator/)
+4. Review the sizing recommendations
+
+## License
+This tool is licensed under the Apache 2.0 License.
diff --git a/sizing-tool/requirements.txt b/sizing-tool/requirements.txt
new file mode 100644
index 0000000..220379f
--- /dev/null
+++ b/sizing-tool/requirements.txt
@@ -0,0 +1,3 @@
+pymongo
+lz4
+zstandard
diff --git a/sizing-tool/sizing.py b/sizing-tool/sizing.py
new file mode 100644
index 0000000..9775850
--- /dev/null
+++ b/sizing-tool/sizing.py
@@ -0,0 +1,395 @@
+import argparse
+import sys
+import csv
+import glob
+import os
+import datetime as dt
+import pymongo
+import importlib.util
+
+# Compressor to use for compression analysis
+# zstd-3-dict matches Amazon DocumentDB 8.0 dictionary-based compression
+COMPRESSOR = 'zstd-3-dict'
+
+# Fixed dictionary size in Amazon DocumentDB 8.0 dictionary-based compression
+DICTIONARY_SIZE_BYTES = 4096
+
+# Server alias base for output file naming
+SERVER_ALIAS_BASE = 'temp'
+
+
+def load_compression_module():
+    """
+    Load the compression-review.py module dynamically.
+    
+    Returns:
+        module: The loaded compression_review module
+        
+    Raises:
+        RuntimeError: If the compression-review.py file does not exist or cannot be loaded
+    """
+    script_dir = os.path.dirname(os.path.abspath(__file__))
+    compression_script = os.path.join(
+        script_dir, '..', 'performance', 'compression-review', 'compression-review.py'
+    )
+    
+    # Check if the file exists
+    if not os.path.exists(compression_script):
+        raise RuntimeError(
+            f"Compression module not found at: {compression_script}\n"
+            f"Expected location: ../performance/compression-review/compression-review.py\n"
+            f"Please ensure the compression-review tool is available in the correct directory."
+        )
+    
+    # Check if it's a file (not a directory)
+    if not os.path.isfile(compression_script):
+        raise RuntimeError(
+            f"Path exists but is not a file: {compression_script}\n"
+            f"Expected a Python script at this location."
+        )
+    
+    try:
+        spec = importlib.util.spec_from_file_location("compression_review", compression_script)
+        if spec is None or spec.loader is None:
+            raise RuntimeError(
+                f"Failed to create module spec for: {compression_script}\n"
+                f"The file may not be a valid Python module."
+            )
+        
+        compression_module = importlib.util.module_from_spec(spec)
+        spec.loader.exec_module(compression_module)
+        
+        # Verify the module has the required getData function
+        if not hasattr(compression_module, 'getData'):
+            raise RuntimeError(
+                f"Compression module loaded but missing required 'getData' function.\n"
+                f"The compression-review.py file may be corrupted or outdated."
+            )
+        
+        return compression_module
+        
+    except Exception as e:
+        if isinstance(e, RuntimeError):
+            raise
+        raise RuntimeError(
+            f"Error loading compression module from {compression_script}: {e}"
+        )
+
+
+def cleanup_csv_files(csv_files):
+    """
+    Remove CSV files and log any errors.
+    
+    Args:
+        csv_files: Iterable of CSV file paths to remove
+    """
+    for csv_file in csv_files:
+        try:
+            os.remove(csv_file)
+            print(f"Cleaned up partial file: {csv_file}", file=sys.stderr)
+        except Exception as e:
+            print(f"Warning: Could not remove file {csv_file}: {e}", file=sys.stderr)
+
+
+def run_compression_and_get_output(uri, sample_size, dictionary_sample_size):
+    """
+    Run compression analysis and return the path to the generated CSV file.
+    
+    Args:
+        uri: MongoDB connection URI
+        sample_size: Number of documents to sample per collection
+        dictionary_sample_size: Number of documents for dictionary creation
+        
+    Returns:
+        str: Path to the generated compression CSV file
+        
+    Raises:
+        RuntimeError: If compression analysis fails or no CSV file is created
+    """
+    print("Running compression analysis...")
+    
+    # Load the compression module
+    compression_module = load_compression_module()
+    
+    # Create server alias with PID for concurrency safety
+    server_alias = f"{SERVER_ALIAS_BASE}-{os.getpid()}"
+    
+    # Get list of existing CSV files before running compression analysis
+    csv_pattern = f"{server_alias}-*-compression-review.csv"
+    existing_csv_files = set(glob.glob(csv_pattern))
+    
+    # Configure and run compression analysis
+    app_config = {
+        'uri': uri,
+        'serverAlias': server_alias,
+        'sampleSize': sample_size,
+        'compressor': COMPRESSOR,
+        'dictionarySampleSize': dictionary_sample_size,
+        'dictionarySize': DICTIONARY_SIZE_BYTES
+    }
+    
+    try:
+        compression_module.getData(app_config)
+    except Exception as e:
+        # Clean up any partial CSV files that may have been created
+        current_csv_files = set(glob.glob(csv_pattern))
+        new_csv_files = current_csv_files - existing_csv_files
+        if new_csv_files:
+            cleanup_csv_files(new_csv_files)
+        raise RuntimeError(f"Error running compression analysis: {e}")
+    
+    # Find the newly created CSV file by comparing before and after
+    current_csv_files = set(glob.glob(csv_pattern))
+    new_csv_files = current_csv_files - existing_csv_files
+    
+    if not new_csv_files:
+        raise RuntimeError(f"No new CSV file created. Expected pattern: {csv_pattern}")
+    
+    if len(new_csv_files) > 1:
+        print(f"Warning: Multiple new CSV files found: {new_csv_files}", file=sys.stderr)
+        # Use the most recent one
+        latest_csv = max(new_csv_files, key=os.path.getmtime)
+    else:
+        latest_csv = new_csv_files.pop()
+    
+    print(f"Parsing results from: {latest_csv}")
+    return latest_csv
+
+
+def parse_compression_csv(csv_filepath):
+    """
+    Parse compression review CSV and extract collection data.
+    
+    Args:
+        csv_filepath: Path to the compression review CSV file
+        
+    Returns:
+        dict: Dictionary mapping 'db.collection' to compression data
+        
+    Raises:
+        RuntimeError: If CSV header cannot be found or file is invalid
+    """
+    comp_data = {}
+    
+    with open(csv_filepath, 'r') as f:
+        # Read all lines to find where the actual data starts
+        lines = f.readlines()
+        
+        # Find the header line (starts with dbName)
+        header_idx = None
+        for i, line in enumerate(lines):
+            if line.startswith('dbName'):
+                header_idx = i
+                break
+        
+        if header_idx is None:
+            raise RuntimeError("Could not find data header in CSV")
+        
+        # Use DictReader for named column access
+        reader = csv.DictReader(lines[header_idx:])
+        
+        for row in reader:
+            try:
+                # Access columns by name instead of index
+                db_name = row['dbName']
+                coll_name = row['collName']
+                num_docs = int(row['numDocs'])
+                avg_doc_size = int(row['avgDocSize'])
+                comp_ratio = float(row['projectedCompRatio'])
+                
+                key = f"{db_name}.{coll_name}"
+                comp_data[key] = {
+                    'db_name': db_name,
+                    'coll_name': coll_name,
+                    'num_docs': num_docs,
+                    'avg_doc_size': avg_doc_size,
+                    'comp_ratio': comp_ratio
+                }
+            except (KeyError, ValueError) as e:
+                # Skip rows with missing columns or invalid data
+                print(f"Warning: Skipping row due to error: {e}", file=sys.stderr)
+                continue
+    
+    return comp_data
+
+
+def generate_sizing_csv(comp_data, uri):
+    """
+    Generate cost estimator CSV by combining compression data with MongoDB stats.
+    
+    Args:
+        comp_data: Dictionary of compression data from parse_compression_csv()
+        uri: MongoDB connection URI
+        
+    Returns:
+        str: Path to the generated sizing CSV file
+    """
+    print("Connecting to MongoDB to gather additional stats...")
+    
+    # Create output CSV file
+    log_timestamp = dt.datetime.now(dt.timezone.utc).strftime('%Y%m%d%H%M%S')
+    output_filename = f"sizing-{log_timestamp}.csv"
+    
+    with pymongo.MongoClient(host=uri, appname='workload-calc', serverSelectionTimeoutMS=5000) as client:
+        with open(output_filename, 'w', newline='') as csvfile:
+            csvwriter = csv.writer(csvfile)
+            
+            # Write header
+            csvwriter.writerow([
+                'SLNo', 'Database_Name', 'Collection_Name', 'Document_Count',
+                'Average_Document_Size', 'Total_Indexes', 'Index_Size',
+                'Index_Working_Set', 'Data_Working_Set', 'Inserts_Per_Day',
+                'Updates_Per_Day', 'Deletes_Per_Day', 'Reads_Per_Day',
+                'Compression_Ratio'
+            ])
+            
+            sl_no = 1
+            
+            # Iterate through collections from compression data
+            for key, data in comp_data.items():
+                db_name = data['db_name']
+                coll_name = data['coll_name']
+                
+                try:
+                    # Get collection stats from MongoDB
+                    stats = client[db_name].command("collStats", coll_name)
+                    
+                    doc_count = data['num_docs']
+                    avg_doc_size = data['avg_doc_size']
+                    total_indexes = stats.get('nindexes', 0)
+                    index_size_bytes = stats.get('totalIndexSize', 0)
+                    index_size_gb = index_size_bytes / (1024 * 1024 * 1024)
+                    comp_ratio = data['comp_ratio']
+                    
+                    # Default estimates for workload metrics
+                    index_working_set = 100
+                    data_working_set = 10
+                    inserts_per_day = 0
+                    updates_per_day = 0
+                    deletes_per_day = 0
+                    reads_per_day = 0
+                    
+                    # Write row
+                    csvwriter.writerow([
+                        sl_no,
+                        db_name,
+                        coll_name,
+                        doc_count,
+                        avg_doc_size,
+                        total_indexes,
+                        f"{index_size_gb:.4f}",
+                        index_working_set,
+                        data_working_set,
+                        inserts_per_day,
+                        updates_per_day,
+                        deletes_per_day,
+                        reads_per_day,
+                        f"{comp_ratio:.4f}"
+                    ])
+                    
+                    sl_no += 1
+                    
+                except Exception as e:
+                    print(f"Error processing {db_name}.{coll_name}: {e}", file=sys.stderr)
+                    continue
+    
+    return output_filename
+
+
+def validate_args(args):
+    """
+    Validate command-line arguments.
+    
+    Args:
+        args: Parsed arguments from argparse
+        
+    Raises:
+        ValueError: If any argument is invalid
+    """
+    # Validate URI format
+    if not args.uri:
+        raise ValueError("MongoDB URI cannot be empty")
+    
+    if not (args.uri.startswith('mongodb://') or args.uri.startswith('mongodb+srv://')):
+        raise ValueError("MongoDB URI must start with 'mongodb://' or 'mongodb+srv://'")
+    
+    # Validate sample size (only check lower bound)
+    if args.sample_size <= 0:
+        raise ValueError(f"Sample size must be positive, got: {args.sample_size}")
+    
+    # Validate dictionary sample size (only check lower bound)
+    if args.dictionary_sample_size <= 0:
+        raise ValueError(f"Dictionary sample size must be positive, got: {args.dictionary_sample_size}")
+
+
+def main():
+    parser = argparse.ArgumentParser(description='Run compression review and analyze results')
+    
+    parser.add_argument('--uri',
+                        required=True,
+                        type=str,
+                        help='MongoDB Connection URI')
+    
+    parser.add_argument('--sample-size',
+                        required=False,
+                        type=int,
+                        default=1000,
+                        help='Number of documents to sample in each collection, default 1000')
+    
+    parser.add_argument('--dictionary-sample-size',
+                        required=False,
+                        type=int,
+                        default=100,
+                        help='Number of documents to sample for dictionary creation')
+    
+    args = parser.parse_args()
+    
+    # Validate arguments
+    try:
+        validate_args(args)
+    except ValueError as e:
+        parser.error(str(e))
+    
+    compression_csv = None  # Initialize to handle cleanup in finally
+    
+    try:
+        # Run compression analysis and get the output CSV file
+        compression_csv = run_compression_and_get_output(
+            uri=args.uri,
+            sample_size=args.sample_size,
+            dictionary_sample_size=args.dictionary_sample_size
+        )
+        
+        # Parse compression CSV to extract collection data
+        comp_data = parse_compression_csv(compression_csv)
+        
+        # Generate sizing CSV by combining compression data with MongoDB stats
+        output_filename = generate_sizing_csv(comp_data, args.uri)
+        
+        print(f"\nSizing CSV generated: {output_filename}")
+        print("\n" + "="*80)
+        print("IMPORTANT: Manual Updates Required")
+        print("="*80)
+        print("\nThe following fields have been set to default values and MUST be updated")
+        print("manually in a text editor based on your workload knowledge:\n")
+        print("  • Index_Working_Set (default: 100) - Percentage of indexes in memory")
+        print("  • Data_Working_Set (default: 10) - Percentage of data in memory")
+        print("  • Inserts_Per_Day (default: 0) - Daily insert operations")
+        print("  • Updates_Per_Day (default: 0) - Daily update operations")
+        print("  • Deletes_Per_Day (default: 0) - Daily delete operations")
+        print("  • Reads_Per_Day (default: 0) - Daily read operations")
+        print("\nThese statistics cannot be calculated automatically and require knowledge")
+        print("of your existing workload patterns. Open the CSV file in a text editor")
+        print("and update these values for accurate sizing recommendations.")
+        print("="*80 + "\n")
+        
+    except RuntimeError as e:
+        print(str(e), file=sys.stderr)
+        sys.exit(1)
+    finally:
+        # Clean up the compression-review CSV file if it was created
+        if compression_csv is not None:
+            cleanup_csv_files([compression_csv])
+
+if __name__ == "__main__":
+    main()
diff --git a/sizing-tool/test/README.md b/sizing-tool/test/README.md
new file mode 100644
index 0000000..ea992d3
--- /dev/null
+++ b/sizing-tool/test/README.md
@@ -0,0 +1,77 @@
+# Sizing Tool Tests
+
+This directory contains unit tests for the sizing tool.
+
+## Prerequisites
+
+- Python 3.7+
+- No external dependencies required (tests use `unittest.mock` for all external calls)
+- Tests do not require MongoDB connection or the compression-review.py script
+
+## Running Tests
+
+### Run all tests
+```bash
+# From the test directory
+python -m unittest test_sizing
+
+# With verbose output
+python -m unittest test_sizing -v
+```
+
+### Run specific test class
+```bash
+python -m unittest test_sizing.TestValidateArgs
+```
+
+### Run specific test
+```bash
+python -m unittest test_sizing.TestValidateArgs.test_valid_args
+```
+
+## Test Coverage
+
+The test suite includes unit tests for:
+
+- **Argument validation** - URI format, sample sizes, parameter bounds
+- **CSV parsing** - Valid data, missing headers, invalid rows, empty files
+- **Compression module loading** - File existence, module validation, error handling
+- **Compression execution** - Successful runs, file creation, error scenarios, cleanup
+- **Sizing CSV generation** - MongoDB stats collection, multiple collections, error handling
+
+## Test Structure
+
+All tests use mocks to avoid external dependencies:
+- MongoDB connections are mocked using `unittest.mock`
+- File system operations use temporary files
+- The compression-review.py module is mocked for isolation
+
+This ensures tests run quickly and don't require any external services or configuration.
+
+## Adding New Tests
+
+When adding new functionality to sizing.py:
+
+1. Create a new test class or add to an existing one
+2. Use descriptive test names that explain what is being tested
+3. Mock all external dependencies (MongoDB, file system, external modules)
+4. Test both success and failure scenarios
+5. Include edge cases and boundary conditions
+
+Example test structure:
+```python
+class TestNewFeature(unittest.TestCase):
+    """Tests for new_feature function"""
+    
+    @patch('sizing.external_dependency')
+    def test_success_case(self, mock_dependency):
+        """Test successful execution"""
+        # Setup mocks
+        mock_dependency.return_value = expected_value
+        
+        # Execute
+        result = new_feature()
+        
+        # Assert
+        self.assertEqual(result, expected_value)
+```
diff --git a/sizing-tool/test/test_sizing.py b/sizing-tool/test/test_sizing.py
new file mode 100644
index 0000000..1cbe26d
--- /dev/null
+++ b/sizing-tool/test/test_sizing.py
@@ -0,0 +1,534 @@
+import unittest
+import os
+import csv
+import tempfile
+from unittest.mock import Mock, patch, MagicMock
+from argparse import Namespace
+import sys
+
+# Import functions from sizing.py (parent directory)
+sys.path.insert(0, os.path.dirname(os.path.dirname(__file__)))
+from sizing import (
+    validate_args,
+    parse_compression_csv,
+    run_compression_and_get_output,
+    generate_sizing_csv,
+    load_compression_module
+)
+
+
+class TestValidateArgs(unittest.TestCase):
+    """Tests for validate_args function"""
+    
+    def test_valid_args(self):
+        """Test that valid arguments pass validation"""
+        args = Namespace(
+            uri='mongodb://localhost:27017',
+            sample_size=1000,
+            dictionary_sample_size=100
+        )
+        # Should not raise any exception
+        validate_args(args)
+    
+    def test_valid_args_with_srv(self):
+        """Test that mongodb+srv:// URI is valid"""
+        args = Namespace(
+            uri='mongodb+srv://cluster.mongodb.net',
+            sample_size=1000,
+            dictionary_sample_size=100
+        )
+        validate_args(args)
+    
+    def test_empty_uri(self):
+        """Test that empty URI raises ValueError"""
+        args = Namespace(
+            uri='',
+            sample_size=1000,
+            dictionary_sample_size=100
+        )
+        with self.assertRaisesRegex(ValueError, "MongoDB URI cannot be empty"):
+            validate_args(args)
+    
+    def test_invalid_uri_format(self):
+        """Test that invalid URI format raises ValueError"""
+        args = Namespace(
+            uri='http://localhost:27017',
+            sample_size=1000,
+            dictionary_sample_size=100
+        )
+        with self.assertRaisesRegex(ValueError, "must start with 'mongodb://' or 'mongodb\\+srv://'"):
+            validate_args(args)
+    
+    def test_negative_sample_size(self):
+        """Test that negative sample size raises ValueError"""
+        args = Namespace(
+            uri='mongodb://localhost:27017',
+            sample_size=-100,
+            dictionary_sample_size=100
+        )
+        with self.assertRaisesRegex(ValueError, "Sample size must be positive"):
+            validate_args(args)
+    
+    def test_zero_sample_size(self):
+        """Test that zero sample size raises ValueError"""
+        args = Namespace(
+            uri='mongodb://localhost:27017',
+            sample_size=0,
+            dictionary_sample_size=100
+        )
+        with self.assertRaisesRegex(ValueError, "Sample size must be positive"):
+            validate_args(args)
+    
+    def test_negative_dictionary_sample_size(self):
+        """Test that negative dictionary sample size raises ValueError"""
+        args = Namespace(
+            uri='mongodb://localhost:27017',
+            sample_size=1000,
+            dictionary_sample_size=-10
+        )
+        with self.assertRaisesRegex(ValueError, "Dictionary sample size must be positive"):
+            validate_args(args)
+    
+    def test_large_values_accepted(self):
+        """Test that large values are accepted (no upper limits)"""
+        args = Namespace(
+            uri='mongodb://localhost:27017',
+            sample_size=10000000,  # 10 million
+            dictionary_sample_size=5000000  # 5 million
+        )
+        # Should not raise any exception
+        validate_args(args)
+
+
+class TestParseCompressionCsv(unittest.TestCase):
+    """Tests for parse_compression_csv function"""
+    
+    def test_parse_valid_csv(self):
+        """Test parsing a valid compression CSV"""
+        csv_content = """compressor,docsSampled,dictDocsSampled,dictBytes
+zstd-3-dict,1000,100,4096
+
+dbName,collName,numDocs,avgDocSize,sizeGB,storageGB,existingCompRatio,compEnabled,minSample,maxSample,avgSample,minComp,maxComp,avgComp,projectedCompRatio,exceptions,compTime(ms)
+testdb,users,10000,512,5.0,2.5,2.0,Y/1024,256,1024,512,128,512,256,2.0,0,123.45
+testdb,orders,5000,1024,5.0,2.0,2.5,Y/1024,512,2048,1024,256,1024,512,2.0,0,234.56
+"""
+        with tempfile.NamedTemporaryFile(mode='w', delete=False, suffix='.csv') as f:
+            f.write(csv_content)
+            temp_file = f.name
+        
+        try:
+            result = parse_compression_csv(temp_file)
+            
+            self.assertEqual(len(result), 2)
+            self.assertIn('testdb.users', result)
+            self.assertIn('testdb.orders', result)
+            
+            users_data = result['testdb.users']
+            self.assertEqual(users_data['db_name'], 'testdb')
+            self.assertEqual(users_data['coll_name'], 'users')
+            self.assertEqual(users_data['num_docs'], 10000)
+            self.assertEqual(users_data['avg_doc_size'], 512)
+            self.assertEqual(users_data['comp_ratio'], 2.0)
+            
+            orders_data = result['testdb.orders']
+            self.assertEqual(orders_data['db_name'], 'testdb')
+            self.assertEqual(orders_data['coll_name'], 'orders')
+            self.assertEqual(orders_data['num_docs'], 5000)
+        finally:
+            os.unlink(temp_file)
+    
+    def test_parse_csv_missing_header(self):
+        """Test that missing header raises RuntimeError"""
+        csv_content = """compressor,docsSampled,dictDocsSampled,dictBytes
+zstd-3-dict,1000,100,4096
+
+testdb,users,10000,512,5.0,2.5,2.0,Y/1024,256,1024,512,128,512,256,2.0,0,123.45
+"""
+        with tempfile.NamedTemporaryFile(mode='w', delete=False, suffix='.csv') as f:
+            f.write(csv_content)
+            temp_file = f.name
+        
+        try:
+            with self.assertRaisesRegex(RuntimeError, "Could not find data header in CSV"):
+                parse_compression_csv(temp_file)
+        finally:
+            os.unlink(temp_file)
+    
+    def test_parse_csv_with_invalid_row(self):
+        """Test that invalid rows are skipped with warning"""
+        csv_content = """compressor,docsSampled,dictDocsSampled,dictBytes
+zstd-3-dict,1000,100,4096
+
+dbName,collName,numDocs,avgDocSize,sizeGB,storageGB,existingCompRatio,compEnabled,minSample,maxSample,avgSample,minComp,maxComp,avgComp,projectedCompRatio,exceptions,compTime(ms)
+testdb,users,10000,512,5.0,2.5,2.0,Y/1024,256,1024,512,128,512,256,2.0,0,123.45
+testdb,invalid,not_a_number,512,5.0,2.5,2.0,Y/1024,256,1024,512,128,512,256,2.0,0,123.45
+testdb,orders,5000,1024,5.0,2.0,2.5,Y/1024,512,2048,1024,256,1024,512,2.0,0,234.56
+"""
+        with tempfile.NamedTemporaryFile(mode='w', delete=False, suffix='.csv') as f:
+            f.write(csv_content)
+            temp_file = f.name
+        
+        try:
+            result = parse_compression_csv(temp_file)
+            
+            # Should have 2 valid rows (invalid row skipped)
+            self.assertEqual(len(result), 2)
+            self.assertIn('testdb.users', result)
+            self.assertIn('testdb.orders', result)
+            self.assertNotIn('testdb.invalid', result)
+        finally:
+            os.unlink(temp_file)
+    
+    def test_parse_empty_csv(self):
+        """Test parsing an empty CSV"""
+        csv_content = """compressor,docsSampled,dictDocsSampled,dictBytes
+zstd-3-dict,1000,100,4096
+
+dbName,collName,numDocs,avgDocSize,sizeGB,storageGB,existingCompRatio,compEnabled,minSample,maxSample,avgSample,minComp,maxComp,avgComp,projectedCompRatio,exceptions,compTime(ms)
+"""
+        with tempfile.NamedTemporaryFile(mode='w', delete=False, suffix='.csv') as f:
+            f.write(csv_content)
+            temp_file = f.name
+        
+        try:
+            result = parse_compression_csv(temp_file)
+            self.assertEqual(len(result), 0)
+        finally:
+            os.unlink(temp_file)
+
+
+class TestLoadCompressionModule(unittest.TestCase):
+    """Tests for load_compression_module function"""
+    
+    def test_load_module_file_not_found(self):
+        """Test that missing compression module raises RuntimeError"""
+        with patch('sizing.os.path.exists', return_value=False):
+            with self.assertRaisesRegex(RuntimeError, "Compression module not found"):
+                load_compression_module()
+    
+    def test_load_module_path_is_directory(self):
+        """Test that directory path raises RuntimeError"""
+        with patch('sizing.os.path.exists', return_value=True):
+            with patch('sizing.os.path.isfile', return_value=False):
+                with self.assertRaisesRegex(RuntimeError, "Path exists but is not a file"):
+                    load_compression_module()
+    
+    def test_load_module_invalid_spec(self):
+        """Test that invalid module spec raises RuntimeError"""
+        with patch('sizing.os.path.exists', return_value=True):
+            with patch('sizing.os.path.isfile', return_value=True):
+                with patch('sizing.importlib.util.spec_from_file_location', return_value=None):
+                    with self.assertRaisesRegex(RuntimeError, "Failed to create module spec"):
+                        load_compression_module()
+    
+    def test_load_module_missing_getdata_function(self):
+        """Test that module without getData function raises RuntimeError"""
+        mock_module = MagicMock()
+        del mock_module.getData  # Remove the getData attribute
+        
+        with patch('sizing.os.path.exists', return_value=True):
+            with patch('sizing.os.path.isfile', return_value=True):
+                with patch('sizing.importlib.util.spec_from_file_location') as mock_spec_from_file:
+                    mock_spec = MagicMock()
+                    mock_spec_from_file.return_value = mock_spec
+                    with patch('sizing.importlib.util.module_from_spec', return_value=mock_module):
+                        with self.assertRaisesRegex(RuntimeError, "missing required 'getData' function"):
+                            load_compression_module()
+    
+    def test_load_module_success(self):
+        """Test successful module loading"""
+        mock_module = MagicMock()
+        mock_module.getData = MagicMock()
+        
+        with patch('sizing.os.path.exists', return_value=True):
+            with patch('sizing.os.path.isfile', return_value=True):
+                with patch('sizing.importlib.util.spec_from_file_location') as mock_spec_from_file:
+                    mock_spec = MagicMock()
+                    mock_spec_from_file.return_value = mock_spec
+                    with patch('sizing.importlib.util.module_from_spec', return_value=mock_module):
+                        result = load_compression_module()
+                        self.assertEqual(result, mock_module)
+                        self.assertTrue(hasattr(result, 'getData'))
+
+
+class TestRunCompressionAndGetOutput(unittest.TestCase):
+    """Tests for run_compression_and_get_output function"""
+    
+    @patch('sizing.load_compression_module')
+    @patch('sizing.glob.glob')
+    def test_successful_compression_run(self, mock_glob, mock_load_compression):
+        """Test successful compression analysis run"""
+        # Setup mocks
+        mock_compression_module = MagicMock()
+        mock_load_compression.return_value = mock_compression_module
+        
+        mock_glob.side_effect = [
+            [],  # No existing files
+            ['temp-20260209120000-compression-review.csv']  # New file created
+        ]
+        
+        result = run_compression_and_get_output(
+            uri='mongodb://localhost:27017',
+            sample_size=1000,
+            dictionary_sample_size=100
+        )
+        
+        self.assertEqual(result, 'temp-20260209120000-compression-review.csv')
+        mock_compression_module.getData.assert_called_once()
+        mock_load_compression.assert_called_once()
+    
+    @patch('sizing.load_compression_module')
+    @patch('sizing.glob.glob')
+    def test_compression_run_with_existing_files(self, mock_glob, mock_load_compression):
+        """Test compression run when old files exist"""
+        # Setup mocks
+        mock_compression_module = MagicMock()
+        mock_load_compression.return_value = mock_compression_module
+        
+        mock_glob.side_effect = [
+            ['temp-20260209110000-compression-review.csv'],  # Existing file
+            [
+                'temp-20260209110000-compression-review.csv',
+                'temp-20260209120000-compression-review.csv'
+            ]  # Old + new file
+        ]
+        
+        result = run_compression_and_get_output(
+            uri='mongodb://localhost:27017',
+            sample_size=1000,
+            dictionary_sample_size=100
+        )
+        
+        self.assertEqual(result, 'temp-20260209120000-compression-review.csv')
+    
+    @patch('sizing.load_compression_module')
+    @patch('sizing.glob.glob')
+    def test_compression_run_no_file_created(self, mock_glob, mock_load_compression):
+        """Test error when no CSV file is created"""
+        # Setup mocks
+        mock_compression_module = MagicMock()
+        mock_load_compression.return_value = mock_compression_module
+        
+        mock_glob.side_effect = [[], []]
+        
+        with self.assertRaisesRegex(RuntimeError, "No new CSV file created"):
+            run_compression_and_get_output(
+                uri='mongodb://localhost:27017',
+                sample_size=1000,
+                dictionary_sample_size=100
+            )
+    
+    @patch('sizing.load_compression_module')
+    @patch('sizing.glob.glob')
+    def test_compression_run_failure(self, mock_glob, mock_load_compression):
+        """Test error handling when compression analysis fails"""
+        mock_compression_module = MagicMock()
+        mock_compression_module.getData.side_effect = Exception("Connection failed")
+        mock_load_compression.return_value = mock_compression_module
+        
+        mock_glob.return_value = []
+        
+        with self.assertRaisesRegex(RuntimeError, "Error running compression analysis"):
+            run_compression_and_get_output(
+                uri='mongodb://localhost:27017',
+                sample_size=1000,
+                dictionary_sample_size=100
+            )
+    
+    @patch('sizing.load_compression_module')
+    @patch('sizing.glob.glob')
+    @patch('sizing.os.path.getmtime')
+    def test_multiple_new_files_created(self, mock_getmtime, mock_glob, mock_load_compression):
+        """Test handling when multiple new files are created"""
+        # Setup mocks
+        mock_compression_module = MagicMock()
+        mock_load_compression.return_value = mock_compression_module
+        
+        mock_glob.side_effect = [
+            [],  # No existing files
+            [
+                'temp-20260209120000-compression-review.csv',
+                'temp-20260209120001-compression-review.csv'
+            ]  # Two new files
+        ]
+        
+        # Mock getmtime to return different times based on filename
+        def getmtime_side_effect(filename):
+            if '120001' in filename:
+                return 2000  # Newer file
+            else:
+                return 1000  # Older file
+        
+        mock_getmtime.side_effect = getmtime_side_effect
+        
+        result = run_compression_and_get_output(
+            uri='mongodb://localhost:27017',
+            sample_size=1000,
+            dictionary_sample_size=100
+        )
+        
+        # Should return the most recent file
+        self.assertEqual(result, 'temp-20260209120001-compression-review.csv')
+    
+    @patch('sizing.load_compression_module')
+    def test_compression_module_load_failure(self, mock_load_compression):
+        """Test error handling when compression module fails to load"""
+        mock_load_compression.side_effect = RuntimeError("Compression module not found")
+        
+        with self.assertRaisesRegex(RuntimeError, "Compression module not found"):
+            run_compression_and_get_output(
+                uri='mongodb://localhost:27017',
+                sample_size=1000,
+                dictionary_sample_size=100
+            )
+
+
+class TestGenerateSizingCsv(unittest.TestCase):
+    """Tests for generate_sizing_csv function"""
+    
+    @patch('sizing.pymongo.MongoClient')
+    @patch('sizing.dt.datetime')
+    def test_generate_sizing_csv_success(self, mock_datetime, mock_mongo_client):
+        """Test successful sizing CSV generation"""
+        # Setup mocks
+        mock_datetime.now.return_value.strftime.return_value = '20260209120000'
+        
+        mock_client = MagicMock()
+        mock_mongo_client.return_value.__enter__.return_value = mock_client
+        
+        # Mock MongoDB collStats response
+        mock_client.__getitem__.return_value.command.return_value = {
+            'nindexes': 3,
+            'totalIndexSize': 1073741824  # 1GB
+        }
+        
+        comp_data = {
+            'testdb.users': {
+                'db_name': 'testdb',
+                'coll_name': 'users',
+                'num_docs': 10000,
+                'avg_doc_size': 512,
+                'comp_ratio': 2.0
+            }
+        }
+        
+        with tempfile.TemporaryDirectory() as tmpdir:
+            os.chdir(tmpdir)
+            
+            result = generate_sizing_csv(
+                comp_data=comp_data,
+                uri='mongodb://localhost:27017'
+            )
+            
+            self.assertEqual(result, 'sizing-20260209120000.csv')
+            self.assertTrue(os.path.exists(result))
+            
+            # Verify CSV content
+            with open(result, 'r') as f:
+                reader = csv.reader(f)
+                rows = list(reader)
+                
+                # Check header
+                self.assertEqual(rows[0][0], 'SLNo')
+                self.assertEqual(rows[0][1], 'Database_Name')
+                
+                # Check data row
+                self.assertEqual(rows[1][0], '1')
+                self.assertEqual(rows[1][1], 'testdb')
+                self.assertEqual(rows[1][2], 'users')
+                self.assertEqual(rows[1][3], '10000')
+    
+    @patch('sizing.pymongo.MongoClient')
+    @patch('sizing.dt.datetime')
+    def test_generate_sizing_csv_with_error(self, mock_datetime, mock_mongo_client):
+        """Test sizing CSV generation with collection error"""
+        # Setup mocks
+        mock_datetime.now.return_value.strftime.return_value = '20260209120000'
+        
+        mock_client = MagicMock()
+        mock_mongo_client.return_value.__enter__.return_value = mock_client
+        
+        # Mock MongoDB collStats to raise exception
+        mock_client.__getitem__.return_value.command.side_effect = Exception("Collection not found")
+        
+        comp_data = {
+            'testdb.users': {
+                'db_name': 'testdb',
+                'coll_name': 'users',
+                'num_docs': 10000,
+                'avg_doc_size': 512,
+                'comp_ratio': 2.0
+            }
+        }
+        
+        with tempfile.TemporaryDirectory() as tmpdir:
+            os.chdir(tmpdir)
+            
+            result = generate_sizing_csv(
+                comp_data=comp_data,
+                uri='mongodb://localhost:27017'
+            )
+            
+            # Should still create file, but with no data rows
+            self.assertTrue(os.path.exists(result))
+            
+            with open(result, 'r') as f:
+                reader = csv.reader(f)
+                rows = list(reader)
+                
+                # Only header, no data rows
+                self.assertEqual(len(rows), 1)
+    
+    @patch('sizing.pymongo.MongoClient')
+    @patch('sizing.dt.datetime')
+    def test_generate_sizing_csv_multiple_collections(self, mock_datetime, mock_mongo_client):
+        """Test sizing CSV generation with multiple collections"""
+        # Setup mocks
+        mock_datetime.now.return_value.strftime.return_value = '20260209120000'
+        
+        mock_client = MagicMock()
+        mock_mongo_client.return_value.__enter__.return_value = mock_client
+        
+        # Mock MongoDB collStats response
+        mock_client.__getitem__.return_value.command.return_value = {
+            'nindexes': 2,
+            'totalIndexSize': 536870912  # 512MB
+        }
+        
+        comp_data = {
+            'testdb.users': {
+                'db_name': 'testdb',
+                'coll_name': 'users',
+                'num_docs': 10000,
+                'avg_doc_size': 512,
+                'comp_ratio': 2.0
+            },
+            'testdb.orders': {
+                'db_name': 'testdb',
+                'coll_name': 'orders',
+                'num_docs': 5000,
+                'avg_doc_size': 1024,
+                'comp_ratio': 2.5
+            }
+        }
+        
+        with tempfile.TemporaryDirectory() as tmpdir:
+            os.chdir(tmpdir)
+            
+            result = generate_sizing_csv(
+                comp_data=comp_data,
+                uri='mongodb://localhost:27017'
+            )
+            
+            with open(result, 'r') as f:
+                reader = csv.reader(f)
+                rows = list(reader)
+                
+                # Header + 2 data rows
+                self.assertEqual(len(rows), 3)
+                self.assertEqual(rows[1][2], 'users')
+                self.assertEqual(rows[2][2], 'orders')
+
+
+if __name__ == '__main__':
+    unittest.main()