microsoft · utsab345 · Oct 13, 2025
diff --git a/rdagent/app/finetune/data_science/conf.py b/rdagent/app/finetune/data_science/conf.py
@@ -1,5 +1,5 @@
 import os
-
+import torch
 from pydantic_settings import SettingsConfigDict
 
 from rdagent.app.data_science.conf import DS_RD_SETTING
@@ -38,3 +38,52 @@ def update_settings(competition: str):
         if hasattr(DS_RD_SETTING, field_name):
             setattr(DS_RD_SETTING, field_name, new_value)
     DS_RD_SETTING.competition = competition
+
+def get_training_config():
+    return {
+        "device": "cuda" if torch.cuda.is_available() else "cpu",
+        "batch_size": 32 if torch.cuda.is_available() else 16,
+        "use_mixed_precision": True if torch.cuda.is_available() else False,
+        "num_workers": 4 if torch.cuda.is_available() else 2,
+        "pin_memory": True if torch.cuda.is_available() else False
+    }
+
+class GPUConfig:
+    @staticmethod
+    def setup_cuda_optimizations():
+        if torch.cuda.is_available():
+            torch.backends.cudnn.benchmark = True
+            torch.backends.cudnn.deterministic = False
+
+    @staticmethod
+    def get_optimized_batch_size(base_batch_size=32):
+        if torch.cuda.is_available():
+            # Adjust based on available GPU memory
+            gpu_memory = torch.cuda.get_device_properties(0).total_memory
+            if gpu_memory > 8e9:  # 8GB
+                return base_batch_size * 4
+            elif gpu_memory > 4e9:  # 4GB
+                return base_batch_size * 2
+        return base_batch_size
+
+def get_gpu_enhanced_config():
+    """Get configuration optimized for GPU if available"""
+    gpu_available = torch.cuda.is_available()
+
+    return {
+        "training": {
+            "device": "cuda" if gpu_available else "cpu",
+            "use_amp": gpu_available,  
+            "gradient_accumulation_steps": 1,
+            "max_grad_norm": 1.0
+        },
+        "data": {
+            "num_workers": 4 if gpu_available else 2,
+            "pin_memory": gpu_available,
+            "prefetch_factor": 2 if gpu_available else 1
+        },
+        "model": {
+            "use_compile": gpu_available,  
+            "optimize_for_inference": gpu_available
+        }
+    }
diff --git a/rdagent/app/finetune/data_science/loop.py b/rdagent/app/finetune/data_science/loop.py
@@ -1,14 +1,33 @@
 import asyncio
 from pathlib import Path
-
 import fire
 
 from rdagent.app.data_science.conf import DS_RD_SETTING
 from rdagent.app.finetune.data_science.conf import update_settings
 from rdagent.core.utils import import_class
 from rdagent.log import rdagent_logger as logger
 from rdagent.scenarios.data_science.loop import DataScienceRDLoop
-
+from ...utils.gpu_utils import setup_gpu
+
+class EnhancedTrainingLoop:
+    def __init__(self):
+        self.device = setup_gpu()
+
+    def train_time_series_model(self, model, data_loader, optimizer):
+        model = model.to(self.device)
+
+        for batch in data_loader:
+            # Move data to GPU
+            inputs, targets = batch
+            inputs = inputs.to(self.device)
+            targets = targets.to(self.device)
+
+            outputs = model(inputs)
+            loss = self.criterion(outputs, targets)
+
+            loss.backward()
+            optimizer.step()
+            optimizer.zero_grad()
 
 def main(
     model: str | None = None,

diff --git a/rdagent/app/general_model/general_model.py b/rdagent/app/general_model/general_model.py
@@ -1,5 +1,7 @@
 import fire
-
+import torch
+import torch.nn as nn
+from ..utils.gpu_utils import setup_gpu
 from rdagent.components.coder.model_coder.task_loader import (
     ModelExperimentLoaderFromPDFfiles,
 )
@@ -10,7 +12,40 @@
 from rdagent.scenarios.general_model.scenario import GeneralModelScenario
 from rdagent.scenarios.qlib.developer.model_coder import QlibModelCoSTEER
 
+class GPUEnhancedLSTM(nn.Module):
+    def __init__(self, input_size, hidden_size, num_layers, output_size, dropout=0.2):
+        super(GPUEnhancedLSTM, self).__init__()
+        self.hidden_size = hidden_size
+        self.num_layers = num_layers
+        self.device = setup_gpu()
+
+        self.lstm = nn.LSTM(input_size, hidden_size, num_layers, 
+                           batch_first=True, dropout=dropout)
+        self.fc = nn.Linear(hidden_size, output_size)
+
+    def forward(self, x):
+        # Initialize hidden states on correct device
+        h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(self.device)
+        c0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(self.device)
+
+        out, _ = self.lstm(x, (h0, c0))
+        out = self.fc(out[:, -1, :])
+        return out
 
+class TimeSeriesModelFactory:
+    def create_model(self, model_type, **kwargs):
+        model = None
+        if model_type == "lstm":
+            model = GPUEnhancedLSTM(
+                input_size=kwargs.get('input_size', 10),
+                hidden_size=kwargs.get('hidden_size', 50),
+                num_layers=kwargs.get('num_layers', 2),
+                output_size=kwargs.get('output_size', 1)
+            )
+        if model:
+            model = model.to(setup_gpu())
+        return model
+
 def extract_models_and_implement(report_file_path: str) -> None:
     """
     This is a research copilot to automatically implement models from a report file or paper.

diff --git a/rdagent/app/utils/dl.py b/rdagent/app/utils/dl.py
@@ -0,0 +1,33 @@
+import torch
+from .gpu_utils import setup_gpu
+
+class GPUDataLoader:
+    def __init__(self, dataset, batch_size=32, shuffle=True):
+        self.device = setup_gpu()
+        self.loader = torch.utils.data.DataLoader(
+            dataset,
+            batch_size=batch_size,
+            shuffle=shuffle,
+            num_workers=4 if torch.cuda.is_available() else 2,
+            pin_memory=True if torch.cuda.is_available() else False
+        )
+
+    def __iter__(self):
+        for data, target in self.loader:
+            yield data.to(self.device), target.to(self.device)
+
+def create_gpu_optimized_loader(dataset, config=None):
+    """Create data loader optimized for GPU"""
+    if config is None:
+        config = {}
+
+    batch_size = config.get('batch_size', 32)
+    if torch.cuda.is_available():
+        # Increase batch size for GPU
+        batch_size = batch_size * 2
+
+    return GPUDataLoader(
+        dataset,
+        batch_size=batch_size,
+        shuffle=config.get('shuffle', True)
+    )
diff --git a/rdagent/app/utils/gpu_utils.py b/rdagent/app/utils/gpu_utils.py
@@ -0,0 +1,177 @@
+import torch
+import logging
+import gc
+import subprocess
+
+logger = logging.getLogger(__name__)
+
+def check_nvidia_drivers():
+    try:
+        result = subprocess.run(['nvidia-smi'], capture_output=True, text=True)
+        return result.returncode == 0
+    except FileNotFoundError:
+        return False
+
+def is_cuda_built():
+    try:
+        if hasattr(torch.cuda, 'is_built'):
+            return torch.cuda.is_built()
+        else:
+            return torch.cuda.is_available()
+    except:
+        return False
+
+def setup_gpu(verbose=True):
+    if verbose:
+        print("Initializing GPU support...")
+        print(f"PyTorch version: {torch.__version__}")
+        print(f"CUDA built with PyTorch: {is_cuda_built()}")
+        print(f"CUDA available: {torch.cuda.is_available()}")
+
+    if not torch.cuda.is_available():
+        if verbose:
+            print("CUDA not available in PyTorch")
+            print("Possible solutions:")
+            print("1. Install PyTorch with CUDA support")
+            print("2. Update NVIDIA drivers")
+            print("3. Check CUDA toolkit installation")
+        return torch.device("cpu")
+
+    num_gpus = torch.cuda.device_count()
+    if num_gpus == 0:
+        if verbose:
+            print("No GPUs detected")
+        return torch.device("cpu")
+
+    if verbose:
+        print(f"Found {num_gpus} GPU(s)")
+
+    device = torch.device("cuda:0")
+
+    try:
+        test_tensor = torch.tensor([1.0, 2.0, 3.0]).cuda()
+        del test_tensor
+        if hasattr(torch.cuda, 'synchronize'):
+            torch.cuda.synchronize()
+
+        if verbose:
+            gpu_name = torch.cuda.get_device_name(0)
+            memory = torch.cuda.get_device_properties(0).total_memory / 1e9
+            print(f"Using GPU: {gpu_name}")
+            print(f"GPU Memory: {memory:.1f} GB")
+            try:
+                if hasattr(torch.version, 'cuda'):
+                    print(f"CUDA version: {torch.version.cuda}")
+            except:
+                print("CUDA version: Unknown")
+
+        if hasattr(torch.backends, 'cudnn'):
+            if hasattr(torch.backends.cudnn, 'benchmark'):
+                torch.backends.cudnn.benchmark = True
+            if hasattr(torch.backends.cudnn, 'deterministic'):
+                torch.backends.cudnn.deterministic = False
+
+        return device
+
+    except Exception as e:
+        if verbose:
+            print(f"GPU initialization failed: {e}")
+            print("Falling back to CPU")
+        return torch.device("cpu")
+
+def force_cuda_initialization():
+    if torch.cuda.is_available():
+        try:
+            x = torch.cuda.FloatTensor(1)
+            del x
+            if hasattr(torch.cuda, 'synchronize'):
+                torch.cuda.synchronize()
+            return True
+        except Exception as e:
+            print(f"CUDA forced initialization failed: {e}")
+            return False
+    return False
+
+def get_gpu_info():
+    info = {
+        "pytorch_version": torch.__version__,
+        "cuda_available": torch.cuda.is_available(),
+        "cuda_built": is_cuda_built(),
+        "gpu_count": torch.cuda.device_count() if torch.cuda.is_available() else 0,
+        "gpus": []
+    }
+
+    try:
+        if hasattr(torch.version, 'cuda'):
+            info["cuda_version"] = torch.version.cuda
+        else:
+            info["cuda_version"] = "Unknown"
+    except:
+        info["cuda_version"] = "Unknown"
+
+    if torch.cuda.is_available():
+        for i in range(torch.cuda.device_count()):
+            try:
+                gpu_info = {
+                    "name": torch.cuda.get_device_name(i),
+                    "memory_total_gb": torch.cuda.get_device_properties(i).total_memory / 1e9,
+                }
+                try:
+                    gpu_info["memory_allocated_gb"] = torch.cuda.memory_allocated(i) / 1e9
+                    gpu_info["memory_reserved_gb"] = torch.cuda.memory_reserved(i) / 1e9
+                except:
+                    gpu_info["memory_allocated_gb"] = 0
+                    gpu_info["memory_reserved_gb"] = 0
+                info["gpus"].append(gpu_info)
+            except Exception as e:
+                print(f"Could not get info for GPU {i}: {e}")
+
+    return info
+
+def print_gpu_memory():
+    if torch.cuda.is_available():
+        for i in range(torch.cuda.device_count()):
+            try:
+                allocated = torch.cuda.memory_allocated(i) / 1e9
+                reserved = torch.cuda.memory_reserved(i) / 1e9
+                total = torch.cuda.get_device_properties(i).total_memory / 1e9
+                print(f"GPU {i} - Allocated: {allocated:.2f}GB, Reserved: {reserved:.2f}GB, Total: {total:.1f}GB")
+            except Exception as e:
+                print(f"Could not get memory info for GPU {i}: {e}")
+
+def clear_gpu_cache():
+    if torch.cuda.is_available():
+        try:
+            torch.cuda.empty_cache()
+            gc.collect()
+        except Exception as e:
+            print(f"Could not clear GPU cache: {e}")
+
+def optimize_model_for_gpu(model):
+    if torch.cuda.is_available():
+        try:
+            model = model.cuda()
+            if hasattr(torch, 'compile'):
+                try:
+                    model = torch.compile(model)
+                    print("Model compilation enabled")
+                except Exception as e:
+                    print(f"Model compilation failed: {e}")
+        except Exception as e:
+            print(f"Failed to move model to GPU: {e}")
+    return model
+
+def check_pytorch_installation():
+    print("PyTorch Installation Check")
+    print("=" * 40)
+    print(f"Version: {torch.__version__}")
+    print(f"CUDA Available: {torch.cuda.is_available()}")
+    print(f"Built with CUDA: {is_cuda_built()}")
+
+    if not torch.cuda.is_available():
+        print("\nRECOMMENDATION:")
+        print("To enable GPU support, install PyTorch with CUDA:")
+        print("For CUDA 11.8: pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu118")
+        print("For CUDA 12.1: pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu121")
+
+    return torch.cuda.is_available()