From fb2f813d82dbfe60beaf7b4fbbcaf2bcd88d8ab6 Mon Sep 17 00:00:00 2001
From: deep1401 <gandhi0869@gmail.com>
Date: Tue, 10 Mar 2026 15:17:22 -0600
Subject: [PATCH 1/8] Simpler profiling from scratch

---
 .../routers/compute_provider.py               |   5 +
 api/transformerlab/routers/experiment/jobs.py |  28 ++
 .../schemas/compute_providers.py              |   8 +
 lab-sdk/src/lab/profiling.py                  | 385 ++++++++++++++++++
 lab-sdk/src/lab/remote_trap.py                |  38 ++
 .../Tasks/EmbeddableStreamingOutput.tsx       |  57 ++-
 .../Experiment/Tasks/ProfilingReport.tsx      | 205 ++++++++++
 .../Experiment/Tasks/QueueTaskModal.tsx       |  54 +++
 .../components/Experiment/Tasks/Tasks.tsx     |   8 +
 .../Tasks/ViewOutputModalStreaming.tsx        |   7 +-
 src/renderer/lib/api-client/endpoints.ts      |   2 +
 11 files changed, 778 insertions(+), 19 deletions(-)
 create mode 100644 lab-sdk/src/lab/profiling.py
 create mode 100644 src/renderer/components/Experiment/Tasks/ProfilingReport.tsx

diff --git a/api/transformerlab/routers/compute_provider.py b/api/transformerlab/routers/compute_provider.py
index f160ddc13..151685909 100644
--- a/api/transformerlab/routers/compute_provider.py
+++ b/api/transformerlab/routers/compute_provider.py
@@ -1713,6 +1713,11 @@ async def launch_template_on_provider(
     if request.enable_trackio:
         env_vars["TLAB_TRACKIO_AUTO_INIT"] = "true"
 
+    if request.enable_profiling:
+        env_vars["_TFL_PROFILING"] = "1"
+        if request.enable_profiling_torch:
+            env_vars["_TFL_PROFILING_TORCH"] = "1"
+
     # Get TFL_STORAGE_URI from storage context
     tfl_storage_uri = None
     try:
diff --git a/api/transformerlab/routers/experiment/jobs.py b/api/transformerlab/routers/experiment/jobs.py
index 296388306..d8bde0607 100644
--- a/api/transformerlab/routers/experiment/jobs.py
+++ b/api/transformerlab/routers/experiment/jobs.py
@@ -1691,3 +1691,31 @@ async def generate():
     return StreamingResponse(
         generate(), media_type=media_type, headers={"Content-Disposition": f'inline; filename="{filename}"'}
     )
+
+
+@router.get("/{job_id}/profiling_report")
+async def get_profiling_report(
+    job_id: str,
+    experimentId: str,
+    session: AsyncSession = Depends(get_async_session),
+    user_and_team: dict = Depends(get_user_and_team),
+):
+    """
+    Return the profiling_report.json written by tfl-remote-trap when _TFL_PROFILING=1.
+
+    Returns 404 if profiling was not enabled or the job has not yet completed profiling.
+    """
+    from lab.dirs import get_job_dir
+
+    job_dir = await get_job_dir(job_id)
+    report_path = storage.join(job_dir, "profiling_report.json")
+
+    if not await storage.exists(report_path):
+        raise HTTPException(status_code=404, detail="Profiling report not found for this job")
+
+    try:
+        async with await storage.open(report_path, "r", encoding="utf-8") as f:
+            content = await f.read()
+        return json.loads(content)
+    except Exception as exc:
+        raise HTTPException(status_code=500, detail=f"Failed to read profiling report: {exc}") from exc
diff --git a/api/transformerlab/schemas/compute_providers.py b/api/transformerlab/schemas/compute_providers.py
index 61dcc48ff..26b8830ca 100644
--- a/api/transformerlab/schemas/compute_providers.py
+++ b/api/transformerlab/schemas/compute_providers.py
@@ -166,6 +166,14 @@ class ProviderTemplateLaunchRequest(BaseModel):
         default=False,
         description="When True, set TLAB_TRACKIO_AUTO_INIT=true in the job environment so lab SDK can auto-integrate with Trackio.",
     )
+    enable_profiling: Optional[bool] = Field(
+        default=False,
+        description="When True, set _TFL_PROFILING=1 to enable system-level CPU/GPU/memory sampling via tfl-remote-trap.",
+    )
+    enable_profiling_torch: Optional[bool] = Field(
+        default=False,
+        description="When True (requires enable_profiling), also set _TFL_PROFILING_TORCH=1 to inject torch.profiler and export a Chrome trace.",
+    )
 
 
 class ProviderTemplateFileUploadResponse(BaseModel):
diff --git a/lab-sdk/src/lab/profiling.py b/lab-sdk/src/lab/profiling.py
new file mode 100644
index 000000000..d04921a0d
--- /dev/null
+++ b/lab-sdk/src/lab/profiling.py
@@ -0,0 +1,385 @@
+"""
+Job profiling: background sampler for CPU, memory, and GPU resource usage.
+
+Usage in tfl-remote-trap (or any process wrapper):
+
+    import subprocess
+    from lab.profiling import maybe_start_profiling, finalize_profiling
+
+    proc = subprocess.Popen(...)
+    profiling_thread = maybe_start_profiling(proc.pid, job_dir)
+
+    exit_code = proc.wait()
+    wall_time = time.monotonic() - start_time
+
+    finalize_profiling(profiling_thread, job_dir, wall_time)
+
+Activation:
+    Set _TFL_PROFILING=1 in the job environment.
+    Set _TFL_PROFILING_INTERVAL=<seconds> to change sampling interval (default 5).
+    Set _TFL_PROFILING_TORCH=1 to also inject torch.profiler tracing.
+"""
+from __future__ import annotations
+
+import json
+import os
+import subprocess
+import tempfile
+import threading
+import time
+from typing import Any, Dict, List, Optional
+
+_PROFILING_SAMPLES_FILE = "profiling_samples.jsonl"
+_PROFILING_REPORT_FILE = "profiling_report.json"
+_TORCH_PROFILE_DIR = "torch_profile"
+_DEFAULT_INTERVAL_SEC = 5.0
+
+
+def _sample_cpu_memory(pid: int) -> Dict[str, Any]:
+    """Return CPU percent and RSS memory (MB) for the pid and its children."""
+    try:
+        import psutil  # type: ignore[import-not-found]
+    except ImportError:
+        return {}
+
+    try:
+        parent = psutil.Process(pid)
+        procs = [parent] + parent.children(recursive=True)
+        cpu_total = 0.0
+        rss_total = 0.0
+        for p in procs:
+            try:
+                cpu_total += p.cpu_percent(interval=None)
+                rss_total += p.memory_info().rss
+            except (psutil.NoSuchProcess, psutil.AccessDenied):
+                pass
+        return {
+            "cpu_percent": round(cpu_total, 2),
+            "memory_rss_mb": round(rss_total / (1024 * 1024), 2),
+        }
+    except Exception:
+        return {}
+
+
+def _sample_gpus_nvidia() -> List[Dict[str, Any]]:
+    """Try pynvml first, then fall back to nvidia-smi subprocess."""
+    # Try pynvml
+    try:
+        import pynvml  # type: ignore[import-not-found]
+
+        pynvml.nvmlInit()
+        count = pynvml.nvmlDeviceGetCount()
+        gpus = []
+        for i in range(count):
+            handle = pynvml.nvmlDeviceGetHandleByIndex(i)
+            util = pynvml.nvmlDeviceGetUtilizationRates(handle)
+            mem = pynvml.nvmlDeviceGetMemoryInfo(handle)
+            gpus.append(
+                {
+                    "index": i,
+                    "util_percent": util.gpu,
+                    "mem_used_mb": round(mem.used / (1024 * 1024), 2),
+                    "mem_total_mb": round(mem.total / (1024 * 1024), 2),
+                }
+            )
+        return gpus
+    except Exception:
+        pass
+
+    # Fall back to nvidia-smi
+    try:
+        result = subprocess.run(
+            [
+                "nvidia-smi",
+                "--query-gpu=index,utilization.gpu,memory.used,memory.total",
+                "--format=csv,noheader,nounits",
+            ],
+            capture_output=True,
+            text=True,
+            timeout=5,
+        )
+        if result.returncode != 0 or not result.stdout.strip():
+            return []
+        gpus = []
+        for line in result.stdout.strip().splitlines():
+            parts = [p.strip() for p in line.split(",")]
+            if len(parts) < 4:
+                continue
+            try:
+                gpus.append(
+                    {
+                        "index": int(parts[0]),
+                        "util_percent": float(parts[1]),
+                        "mem_used_mb": float(parts[2]),
+                        "mem_total_mb": float(parts[3]),
+                    }
+                )
+            except (ValueError, IndexError):
+                pass
+        return gpus
+    except Exception:
+        return []
+
+
+def _sample_gpus_amd() -> List[Dict[str, Any]]:
+    """Sample AMD GPU stats via rocm-smi."""
+    try:
+        result = subprocess.run(
+            ["rocm-smi", "--showuse", "--showmemuse", "--csv"],
+            capture_output=True,
+            text=True,
+            timeout=5,
+        )
+        if result.returncode != 0 or not result.stdout.strip():
+            return []
+        lines = result.stdout.strip().splitlines()
+        # rocm-smi CSV format can vary; best-effort parse
+        gpus = []
+        for i, line in enumerate(lines[1:]):  # skip header
+            parts = [p.strip() for p in line.split(",")]
+            if len(parts) < 2:
+                continue
+            try:
+                gpus.append(
+                    {
+                        "index": i,
+                        "util_percent": float(parts[1].rstrip("%")),
+                        "mem_used_mb": None,
+                        "mem_total_mb": None,
+                    }
+                )
+            except (ValueError, IndexError):
+                pass
+        return gpus
+    except Exception:
+        return []
+
+
+def _sample_gpus() -> List[Dict[str, Any]]:
+    """Detect and sample GPU stats from NVIDIA or AMD hardware."""
+    import shutil
+
+    if shutil.which("nvidia-smi"):
+        return _sample_gpus_nvidia()
+    if shutil.which("rocm-smi"):
+        return _sample_gpus_amd()
+    return []
+
+
+class _ProfilingThread(threading.Thread):
+    """Background thread that periodically samples resource stats and writes to JSONL."""
+
+    def __init__(self, pid: int, job_dir: str, interval_sec: float = _DEFAULT_INTERVAL_SEC) -> None:
+        super().__init__(daemon=True, name="tfl-profiler")
+        self.pid = pid
+        self.job_dir = job_dir
+        self.interval_sec = interval_sec
+        self._stop_event = threading.Event()
+        self.samples: List[Dict[str, Any]] = []
+
+    def stop(self) -> None:
+        self._stop_event.set()
+
+    def run(self) -> None:
+        samples_path = os.path.join(self.job_dir, _PROFILING_SAMPLES_FILE)
+        # Initialise cpu_percent (first call always returns 0.0 for psutil)
+        try:
+            import psutil  # type: ignore[import-not-found]
+
+            psutil.Process(self.pid).cpu_percent(interval=None)
+        except Exception:
+            pass
+
+        try:
+            f = open(samples_path, "w", encoding="utf-8")
+        except OSError:
+            return
+
+        try:
+            while not self._stop_event.wait(self.interval_sec):
+                sample: Dict[str, Any] = {"timestamp": time.time()}
+                sample.update(_sample_cpu_memory(self.pid))
+                gpus = _sample_gpus()
+                if gpus:
+                    sample["gpus"] = gpus
+                self.samples.append(sample)
+                try:
+                    f.write(json.dumps(sample) + "\n")
+                    f.flush()
+                except OSError:
+                    pass
+        finally:
+            try:
+                f.close()
+            except OSError:
+                pass
+
+
+def _aggregate_samples(samples: List[Dict[str, Any]], wall_time_sec: float, interval_sec: float) -> Dict[str, Any]:
+    """Summarise a list of samples into a profiling report dict."""
+    report: Dict[str, Any] = {
+        "wall_time_sec": round(wall_time_sec, 2),
+        "sample_count": len(samples),
+        "interval_sec": interval_sec,
+    }
+
+    if not samples:
+        return report
+
+    cpu_values = [s["cpu_percent"] for s in samples if "cpu_percent" in s]
+    mem_values = [s["memory_rss_mb"] for s in samples if "memory_rss_mb" in s]
+
+    if cpu_values:
+        report["cpu"] = {
+            "peak_percent": round(max(cpu_values), 2),
+            "avg_percent": round(sum(cpu_values) / len(cpu_values), 2),
+        }
+    if mem_values:
+        report["memory"] = {
+            "peak_rss_mb": round(max(mem_values), 2),
+            "avg_rss_mb": round(sum(mem_values) / len(mem_values), 2),
+        }
+
+    # GPU aggregation: group by index
+    gpu_samples: Dict[int, List[Dict[str, Any]]] = {}
+    for s in samples:
+        for g in s.get("gpus", []):
+            idx = g.get("index", 0)
+            gpu_samples.setdefault(idx, []).append(g)
+
+    if gpu_samples:
+        gpu_summaries = []
+        for idx in sorted(gpu_samples.keys()):
+            gs = gpu_samples[idx]
+            utils = [g["util_percent"] for g in gs if g.get("util_percent") is not None]
+            mems_used = [g["mem_used_mb"] for g in gs if g.get("mem_used_mb") is not None]
+            mem_total = next((g["mem_total_mb"] for g in reversed(gs) if g.get("mem_total_mb") is not None), None)
+            entry: Dict[str, Any] = {"index": idx}
+            if utils:
+                entry["peak_util_percent"] = round(max(utils), 2)
+                entry["avg_util_percent"] = round(sum(utils) / len(utils), 2)
+            if mems_used:
+                entry["peak_mem_used_mb"] = round(max(mems_used), 2)
+                entry["avg_mem_used_mb"] = round(sum(mems_used) / len(mems_used), 2)
+            if mem_total is not None:
+                entry["mem_total_mb"] = round(mem_total, 2)
+            gpu_summaries.append(entry)
+        report["gpus"] = gpu_summaries
+
+    return report
+
+
+def maybe_start_profiling(pid: int, job_dir: str) -> Optional[_ProfilingThread]:
+    """
+    Start a profiling thread if _TFL_PROFILING=1 is set in the environment.
+
+    Returns the thread (caller must call finalize_profiling later) or None if profiling
+    is disabled or the job_dir is unavailable.
+    """
+    if os.environ.get("_TFL_PROFILING") != "1":
+        return None
+    if not job_dir or not os.path.isdir(job_dir):
+        return None
+    try:
+        interval = float(os.environ.get("_TFL_PROFILING_INTERVAL", str(_DEFAULT_INTERVAL_SEC)))
+    except ValueError:
+        interval = _DEFAULT_INTERVAL_SEC
+
+    thread = _ProfilingThread(pid=pid, job_dir=job_dir, interval_sec=interval)
+    thread.start()
+    return thread
+
+
+def finalize_profiling(
+    thread: Optional[_ProfilingThread],
+    job_dir: str,
+    wall_time_sec: float,
+) -> None:
+    """
+    Stop the profiling thread and write profiling_report.json to job_dir.
+
+    Safe to call even when thread is None (profiling disabled).
+    """
+    if thread is None:
+        return
+    try:
+        thread.stop()
+        thread.join(timeout=10)
+    except Exception:
+        pass
+
+    try:
+        report = _aggregate_samples(thread.samples, wall_time_sec, thread.interval_sec)
+        report_path = os.path.join(job_dir, _PROFILING_REPORT_FILE)
+        with open(report_path, "w", encoding="utf-8") as f:
+            json.dump(report, f, indent=2)
+    except Exception:
+        pass
+
+
+# ---------------------------------------------------------------------------
+# Optional PyTorch profiler injection via sitecustomize.py
+# ---------------------------------------------------------------------------
+
+_SITECUSTOMIZE_TEMPLATE = """\
+# Auto-injected by tfl-profile-trap (lab-sdk profiling).
+# Activates torch.profiler.profile() and exports a Chrome trace to the job dir.
+import os as _os
+import atexit as _atexit
+
+_TFL_TORCH_PROFILE_DIR = _os.environ.get("_TFL_TORCH_PROFILE_DIR", "")
+if _TFL_TORCH_PROFILE_DIR:
+    try:
+        import torch  # noqa: F401
+        from torch.profiler import profile as _profile, ProfilerActivity as _PA
+
+        _prof = _profile(
+            activities=[_PA.CPU, _PA.CUDA],
+            with_stack=False,
+            record_shapes=False,
+        )
+        _prof.__enter__()
+
+        def _export_trace():
+            try:
+                _prof.__exit__(None, None, None)
+                import pathlib as _pl
+                _pl.Path(_TFL_TORCH_PROFILE_DIR).mkdir(parents=True, exist_ok=True)
+                _trace_path = _pl.Path(_TFL_TORCH_PROFILE_DIR) / "trace.json"
+                _prof.export_chrome_trace(str(_trace_path))
+            except Exception:
+                pass
+
+        _atexit.register(_export_trace)
+    except Exception:
+        pass
+"""
+
+
+def inject_torch_profiler(job_dir: str, env: dict) -> str:
+    """
+    If _TFL_PROFILING_TORCH=1, write a sitecustomize.py to a temp dir and
+    prepend it to PYTHONPATH in env so torch.profiler auto-activates in the job.
+
+    Returns the temp dir path (caller should clean up after the job exits).
+    """
+    if os.environ.get("_TFL_PROFILING_TORCH") != "1":
+        return ""
+
+    try:
+        torch_profile_dir = os.path.join(job_dir, _TORCH_PROFILE_DIR)
+        os.makedirs(torch_profile_dir, exist_ok=True)
+
+        tmp_dir = tempfile.mkdtemp(prefix="tfl_sitecustomize_")
+        sitecustomize_path = os.path.join(tmp_dir, "sitecustomize.py")
+        with open(sitecustomize_path, "w", encoding="utf-8") as f:
+            f.write(_SITECUSTOMIZE_TEMPLATE)
+
+        env["_TFL_TORCH_PROFILE_DIR"] = torch_profile_dir
+        existing_pythonpath = env.get("PYTHONPATH", "")
+        env["PYTHONPATH"] = (
+            f"{tmp_dir}{os.pathsep}{existing_pythonpath}" if existing_pythonpath else tmp_dir
+        )
+        return tmp_dir
+    except Exception:
+        return ""
diff --git a/lab-sdk/src/lab/remote_trap.py b/lab-sdk/src/lab/remote_trap.py
index 503753d51..596994824 100644
--- a/lab-sdk/src/lab/remote_trap.py
+++ b/lab-sdk/src/lab/remote_trap.py
@@ -4,10 +4,12 @@
 import os
 import subprocess
 import sys
+import time
 from typing import List
 
 from lab import Job, storage
 from lab.job_status import JobStatus
+from lab.profiling import finalize_profiling, inject_torch_profiler, maybe_start_profiling
 
 
 async def _set_live_status_async(job_id: str, status: str) -> None:
@@ -171,20 +173,43 @@ def main(argv: List[str] | None = None) -> int:
     _set_live_status("started")
     _set_status(JobStatus.RUNNING)
 
+    # Resolve job directory for profiling output (same path used by _write_provider_logs).
+    job_id = os.environ.get("_TFL_JOB_ID")
+    job_dir: str = ""
+    if job_id:
+        try:
+            from lab.dirs import get_job_dir
+
+            async def _get_job_dir() -> str:
+                return await get_job_dir(job_id)
+
+            job_dir = asyncio.run(_get_job_dir())
+        except Exception:
+            job_dir = ""
+
+    # Optionally inject torch.profiler via sitecustomize.py before spawning the process.
+    proc_env = os.environ.copy()
+    torch_tmp_dir = inject_torch_profiler(job_dir, proc_env) if job_dir else ""
+
     # Run the original command in the shell so it behaves exactly as submitted.
     # Stream output line-by-line to avoid buffering large logs in memory (training
     # jobs can produce GBs of output). stdout and stderr are merged into a single
     # stream (stderr redirected to stdout) so we can tee to both the console and
     # the provider_logs.txt file.
     log_lines: List[str] = []
+    start_time = time.monotonic()
     proc = subprocess.Popen(
         command_str,
         shell=True,
         stdout=subprocess.PIPE,
         stderr=subprocess.STDOUT,
         text=True,
+        env=proc_env if torch_tmp_dir else None,
     )
 
+    # Start profiling sidecar thread (no-op if _TFL_PROFILING is not set).
+    profiling_thread = maybe_start_profiling(proc.pid, job_dir) if job_dir else None
+
     assert proc.stdout is not None
     for line in proc.stdout:
         try:
@@ -195,10 +220,23 @@ def main(argv: List[str] | None = None) -> int:
         log_lines.append(line)
 
     exit_code = proc.wait()
+    wall_time = time.monotonic() - start_time
 
     combined_logs = "".join(log_lines)
     _write_provider_logs(combined_logs)
 
+    # Finalise profiling: stop sampler thread and write profiling_report.json.
+    finalize_profiling(profiling_thread, job_dir, wall_time)
+
+    # Clean up torch sitecustomize temp dir (best-effort).
+    if torch_tmp_dir:
+        try:
+            import shutil
+
+            shutil.rmtree(torch_tmp_dir, ignore_errors=True)
+        except Exception:
+            pass
+
     # Update live_status based on outcome (best-effort).
     if exit_code == 0:
         _set_live_status("finished")
diff --git a/src/renderer/components/Experiment/Tasks/EmbeddableStreamingOutput.tsx b/src/renderer/components/Experiment/Tasks/EmbeddableStreamingOutput.tsx
index d6ae69c67..c3e2ee4b5 100644
--- a/src/renderer/components/Experiment/Tasks/EmbeddableStreamingOutput.tsx
+++ b/src/renderer/components/Experiment/Tasks/EmbeddableStreamingOutput.tsx
@@ -26,6 +26,7 @@ import * as chatAPI from 'renderer/lib/transformerlab-api-sdk';
 import { useExperimentInfo } from 'renderer/lib/ExperimentInfoContext';
 import { jobChipColor } from 'renderer/lib/utils';
 import PollingOutputTerminal from './PollingOutputTerminal';
+import ProfilingReport from './ProfilingReport';
 
 interface ProviderLogsTerminalProps {
   logsText: string;
@@ -198,30 +199,33 @@ function RefreshIndicator({
   );
 }
 
-const TAB_OPTIONS: { value: 'output' | 'provider'; label: string }[] = [
+type TabValue = 'output' | 'provider' | 'profiling';
+
+const TAB_OPTIONS: { value: TabValue; label: string }[] = [
   { value: 'output', label: 'Lab SDK Output' },
   { value: 'provider', label: 'Machine Logs' },
+  { value: 'profiling', label: 'Profiling' },
 ];
 
 export interface EmbeddableStreamingOutputProps {
   jobId: number;
   /** Which tabs to show, in order. e.g. ['output', 'provider'] or ['provider'] for interactive tasks. */
-  tabs?: ('output' | 'provider')[];
+  tabs?: TabValue[];
   /** Current job status string (e.g. 'RUNNING', 'COMPLETE'). Passed from the parent to avoid extra polling. */
   jobStatus?: string;
 }
 
 export default function EmbeddableStreamingOutput({
   jobId,
-  tabs: tabsProp = ['output', 'provider'],
+  tabs: tabsProp = ['output', 'provider', 'profiling'],
   jobStatus = '',
 }: EmbeddableStreamingOutputProps) {
   const { experimentInfo } = useExperimentInfo();
-  const [activeTab, setActiveTab] = useState<'output' | 'provider'>('output');
+  const [activeTab, setActiveTab] = useState<TabValue>('output');
   const [viewLiveProviderLogs, setViewLiveProviderLogs] =
     useState<boolean>(false);
 
-  const tabs = tabsProp.length > 0 ? tabsProp : ['output', 'provider'];
+  const tabs = tabsProp.length > 0 ? tabsProp : ['output', 'provider', 'profiling'];
   const showTabList = tabs.length > 1;
   const tabsKey = tabs.join(',');
 
@@ -229,7 +233,7 @@ export default function EmbeddableStreamingOutput({
     setActiveTab((current) =>
       tabs.includes(current)
         ? current
-        : ((tabs[0] ?? 'output') as 'output' | 'provider'),
+        : ((tabs[0] ?? 'output') as TabValue),
     );
     setViewLiveProviderLogs(false);
     // tabsKey is a stable serialization of tabs to avoid array reference churn
@@ -332,9 +336,11 @@ export default function EmbeddableStreamingOutput({
           onChange={(_event, value) => {
             if (
               typeof value === 'string' &&
-              (value === 'output' || value === 'provider')
+              (value === 'output' ||
+                value === 'provider' ||
+                value === 'profiling')
             ) {
-              setActiveTab(value);
+              setActiveTab(value as TabValue);
             }
           }}
         >
@@ -391,13 +397,19 @@ export default function EmbeddableStreamingOutput({
             </>
           )}
         </Box>
-        <RefreshIndicator
-          seconds={activeTab === 'output' ? outputCountdown : providerCountdown}
-          isRefreshing={
-            activeTab === 'output' ? outputIsValidating : providerIsValidating
-          }
-          onRefresh={handleManualRefresh}
-        />
+        {activeTab !== 'profiling' && (
+          <RefreshIndicator
+            seconds={
+              activeTab === 'output' ? outputCountdown : providerCountdown
+            }
+            isRefreshing={
+              activeTab === 'output'
+                ? outputIsValidating
+                : providerIsValidating
+            }
+            onRefresh={handleManualRefresh}
+          />
+        )}
       </Box>
       <Box
         sx={{
@@ -433,6 +445,19 @@ export default function EmbeddableStreamingOutput({
               onMutateReady={handleOutputMutateReady}
             />
           </Box>
+        ) : activeTab === 'profiling' ? (
+          <Box
+            sx={{
+              flex: 1,
+              minHeight: 0,
+              width: '100%',
+              overflowY: 'auto',
+              borderRadius: '8px',
+              border: '1px solid var(--joy-palette-divider)',
+            }}
+          >
+            <ProfilingReport jobId={jobId} />
+          </Box>
         ) : (
           <Box
             sx={{
@@ -512,6 +537,6 @@ export default function EmbeddableStreamingOutput({
 }
 
 EmbeddableStreamingOutput.defaultProps = {
-  tabs: ['output', 'provider'],
+  tabs: ['output', 'provider', 'profiling'],
   jobStatus: '',
 };
diff --git a/src/renderer/components/Experiment/Tasks/ProfilingReport.tsx b/src/renderer/components/Experiment/Tasks/ProfilingReport.tsx
new file mode 100644
index 000000000..d51cec587
--- /dev/null
+++ b/src/renderer/components/Experiment/Tasks/ProfilingReport.tsx
@@ -0,0 +1,205 @@
+import React from 'react';
+import {
+  Box,
+  Card,
+  CardContent,
+  Divider,
+  Stack,
+  Table,
+  Typography,
+} from '@mui/joy';
+import { useSWRWithAuth as useSWR } from 'renderer/lib/authContext';
+import * as chatAPI from 'renderer/lib/transformerlab-api-sdk';
+import { useExperimentInfo } from 'renderer/lib/ExperimentInfoContext';
+
+interface GpuSummary {
+  index: number;
+  peak_util_percent?: number;
+  avg_util_percent?: number;
+  peak_mem_used_mb?: number;
+  avg_mem_used_mb?: number;
+  mem_total_mb?: number;
+}
+
+interface ProfilingData {
+  wall_time_sec?: number;
+  sample_count?: number;
+  interval_sec?: number;
+  cpu?: {
+    peak_percent?: number;
+    avg_percent?: number;
+  };
+  memory?: {
+    peak_rss_mb?: number;
+    avg_rss_mb?: number;
+  };
+  gpus?: GpuSummary[];
+}
+
+function formatMb(mb: number | undefined): string {
+  if (mb == null) return '—';
+  if (mb >= 1024) return `${(mb / 1024).toFixed(1)} GB`;
+  return `${mb.toFixed(0)} MB`;
+}
+
+function formatPct(v: number | undefined): string {
+  if (v == null) return '—';
+  return `${v.toFixed(1)}%`;
+}
+
+function formatSec(sec: number | undefined): string {
+  if (sec == null) return '—';
+  if (sec < 60) return `${sec.toFixed(1)}s`;
+  const m = Math.floor(sec / 60);
+  const s = Math.round(sec % 60);
+  return `${m}m ${s}s`;
+}
+
+interface StatCardProps {
+  label: string;
+  peak: string;
+  avg: string;
+}
+
+function StatCard({ label, peak, avg }: StatCardProps) {
+  return (
+    <Card variant="soft" sx={{ minWidth: 140 }}>
+      <CardContent>
+        <Typography level="body-xs" textColor="neutral.500" sx={{ mb: 0.5 }}>
+          {label}
+        </Typography>
+        <Typography level="title-md">{peak}</Typography>
+        <Typography level="body-xs" textColor="neutral.500">
+          avg {avg}
+        </Typography>
+      </CardContent>
+    </Card>
+  );
+}
+
+interface ProfilingReportProps {
+  jobId: number;
+}
+
+export default function ProfilingReport({ jobId }: ProfilingReportProps) {
+  const { experimentInfo } = useExperimentInfo();
+
+  const url =
+    jobId !== -1 && experimentInfo?.id
+      ? chatAPI.Endpoints.Experiment.GetProfilingReport(
+          experimentInfo.id,
+          String(jobId),
+        )
+      : null;
+
+  const { data, isLoading, isError } = useSWR(url);
+
+  if (!url || isLoading) {
+    return (
+      <Box sx={{ p: 2 }}>
+        <Typography level="body-sm" color="neutral">
+          Loading profiling report…
+        </Typography>
+      </Box>
+    );
+  }
+
+  if (isError || !data) {
+    return (
+      <Box sx={{ p: 2 }}>
+        <Typography level="body-sm" color="neutral">
+          No profiling report available. Enable profiling when launching the job
+          to capture CPU and GPU metrics.
+        </Typography>
+      </Box>
+    );
+  }
+
+  const report = data as ProfilingData;
+
+  return (
+    <Box sx={{ p: 2, overflowY: 'auto' }}>
+      <Stack spacing={2}>
+        {/* Summary row */}
+        <Stack direction="row" spacing={1.5} flexWrap="wrap">
+          <Card variant="soft" sx={{ minWidth: 140 }}>
+            <CardContent>
+              <Typography
+                level="body-xs"
+                textColor="neutral.500"
+                sx={{ mb: 0.5 }}
+              >
+                Wall Time
+              </Typography>
+              <Typography level="title-md">
+                {formatSec(report.wall_time_sec)}
+              </Typography>
+              <Typography level="body-xs" textColor="neutral.500">
+                {report.sample_count ?? 0} samples /{' '}
+                {report.interval_sec ?? 5}s
+              </Typography>
+            </CardContent>
+          </Card>
+
+          {report.cpu && (
+            <StatCard
+              label="CPU"
+              peak={formatPct(report.cpu.peak_percent)}
+              avg={formatPct(report.cpu.avg_percent)}
+            />
+          )}
+
+          {report.memory && (
+            <StatCard
+              label="Memory (RSS)"
+              peak={formatMb(report.memory.peak_rss_mb)}
+              avg={formatMb(report.memory.avg_rss_mb)}
+            />
+          )}
+        </Stack>
+
+        {/* GPU table */}
+        {report.gpus && report.gpus.length > 0 && (
+          <>
+            <Divider />
+            <Typography level="title-sm">GPU Summary</Typography>
+            <Box sx={{ overflowX: 'auto' }}>
+              <Table size="sm" borderAxis="xBetween" hoverRow>
+                <thead>
+                  <tr>
+                    <th>GPU</th>
+                    <th>Peak Util</th>
+                    <th>Avg Util</th>
+                    <th>Peak Mem</th>
+                    <th>Avg Mem</th>
+                    <th>Total Mem</th>
+                  </tr>
+                </thead>
+                <tbody>
+                  {report.gpus.map((g) => (
+                    <tr key={g.index}>
+                      <td>GPU {g.index}</td>
+                      <td>{formatPct(g.peak_util_percent)}</td>
+                      <td>{formatPct(g.avg_util_percent)}</td>
+                      <td>{formatMb(g.peak_mem_used_mb)}</td>
+                      <td>{formatMb(g.avg_mem_used_mb)}</td>
+                      <td>{formatMb(g.mem_total_mb)}</td>
+                    </tr>
+                  ))}
+                </tbody>
+              </Table>
+            </Box>
+          </>
+        )}
+
+        {/* Note about torch trace */}
+        {report.gpus === undefined && !report.cpu && (
+          <Typography level="body-xs" color="neutral">
+            No resource samples were collected. The job may have been too short
+            to capture data.
+          </Typography>
+        )}
+      </Stack>
+    </Box>
+  );
+}
diff --git a/src/renderer/components/Experiment/Tasks/QueueTaskModal.tsx b/src/renderer/components/Experiment/Tasks/QueueTaskModal.tsx
index 54a8bbbc3..35308aaeb 100644
--- a/src/renderer/components/Experiment/Tasks/QueueTaskModal.tsx
+++ b/src/renderer/components/Experiment/Tasks/QueueTaskModal.tsx
@@ -106,6 +106,8 @@ export default function QueueTaskModal({
   const [lowerIsBetter, setLowerIsBetter] = React.useState(true);
   const [jobSlurmFlags, setJobSlurmFlags] = React.useState<string[]>(['']);
   const [useTrackio, setUseTrackio] = React.useState(false);
+  const [useProfiling, setUseProfiling] = React.useState(false);
+  const [useProfilingTorch, setUseProfilingTorch] = React.useState(false);
   const loadingMessages = React.useMemo(
     () => [
       'Contacting compute provider…',
@@ -612,6 +614,15 @@ export default function QueueTaskModal({
       config.enable_trackio = true;
     }
 
+    // Profiling: when enabled, backend will set _TFL_PROFILING=1 so tfl-remote-trap
+    // samples CPU/GPU/memory during the job and writes profiling_report.json.
+    if (useProfiling) {
+      config.enable_profiling = true;
+      if (useProfilingTorch) {
+        config.enable_profiling_torch = true;
+      }
+    }
+
     onSubmit(config);
   };
 
@@ -1313,6 +1324,49 @@ export default function QueueTaskModal({
 
             <Divider />
 
+            {/* Profiling Section */}
+            <Stack spacing={2}>
+              <Typography level="title-sm">Profiling</Typography>
+              <FormControl
+                orientation="horizontal"
+                sx={{ alignItems: 'center' }}
+              >
+                <Checkbox
+                  checked={useProfiling}
+                  onChange={(e) => {
+                    setUseProfiling(e.target.checked);
+                    if (!e.target.checked) setUseProfilingTorch(false);
+                  }}
+                  disabled={isSubmitting}
+                />
+                <FormLabel sx={{ ml: 1 }}>
+                  Enable CPU &amp; GPU profiling for this run
+                </FormLabel>
+              </FormControl>
+              <FormHelperText>
+                Samples CPU%, memory, and GPU utilization every few seconds
+                during the job. Results are available in the Profiling tab after
+                the job completes.
+              </FormHelperText>
+              {useProfiling && (
+                <FormControl
+                  orientation="horizontal"
+                  sx={{ alignItems: 'center', ml: 3 }}
+                >
+                  <Checkbox
+                    checked={useProfilingTorch}
+                    onChange={(e) => setUseProfilingTorch(e.target.checked)}
+                    disabled={isSubmitting}
+                  />
+                  <FormLabel sx={{ ml: 1 }}>
+                    Also capture PyTorch op-level trace (Chrome trace format)
+                  </FormLabel>
+                </FormControl>
+              )}
+            </Stack>
+
+            <Divider />
+
             {/* Sweep Configuration Section */}
             <SweepConfigSection
               runSweeps={runSweeps}
diff --git a/src/renderer/components/Experiment/Tasks/Tasks.tsx b/src/renderer/components/Experiment/Tasks/Tasks.tsx
index b32cf92fc..137ccb76a 100644
--- a/src/renderer/components/Experiment/Tasks/Tasks.tsx
+++ b/src/renderer/components/Experiment/Tasks/Tasks.tsx
@@ -871,6 +871,8 @@ export default function Tasks({ subtype }: { subtype?: string }) {
         provider_id: _pid,
         provider_name: _pname,
         enable_trackio,
+        enable_profiling,
+        enable_profiling_torch,
         ...paramConfig
       } = config ?? {};
 
@@ -919,6 +921,12 @@ export default function Tasks({ subtype }: { subtype?: string }) {
           cfg.minutes_requested || task.minutes_requested || undefined,
         enable_trackio:
           typeof enable_trackio === 'boolean' ? enable_trackio : undefined,
+        enable_profiling:
+          typeof enable_profiling === 'boolean' ? enable_profiling : undefined,
+        enable_profiling_torch:
+          typeof enable_profiling_torch === 'boolean'
+            ? enable_profiling_torch
+            : undefined,
       };
 
       const response = await fetchWithAuth(
diff --git a/src/renderer/components/Experiment/Tasks/ViewOutputModalStreaming.tsx b/src/renderer/components/Experiment/Tasks/ViewOutputModalStreaming.tsx
index 12a30c545..06ba1ae84 100644
--- a/src/renderer/components/Experiment/Tasks/ViewOutputModalStreaming.tsx
+++ b/src/renderer/components/Experiment/Tasks/ViewOutputModalStreaming.tsx
@@ -5,13 +5,14 @@ import EmbeddableStreamingOutput from './EmbeddableStreamingOutput';
 const TAB_LABELS: Record<string, string> = {
   output: 'Lab SDK Output',
   provider: 'Machine Logs',
+  profiling: 'Profiling',
 };
 
 interface ViewOutputModalStreamingProps {
   jobId: number;
   setJobId: (jobId: number) => void;
   /** Which tabs to show, in order. e.g. ['output', 'provider'] or ['provider'] for interactive tasks. */
-  tabs?: ('output' | 'provider')[];
+  tabs?: ('output' | 'provider' | 'profiling')[];
   /** Current job status string (e.g. 'RUNNING', 'COMPLETE'). */
   jobStatus?: string;
 }
@@ -19,7 +20,7 @@ interface ViewOutputModalStreamingProps {
 function ViewOutputModalStreaming({
   jobId,
   setJobId,
-  tabs = ['output', 'provider'],
+  tabs = ['output', 'provider', 'profiling'],
   jobStatus = '',
 }: ViewOutputModalStreamingProps) {
   if (jobId === -1) {
@@ -61,7 +62,7 @@ function ViewOutputModalStreaming({
 }
 
 ViewOutputModalStreaming.defaultProps = {
-  tabs: ['output', 'provider'],
+  tabs: ['output', 'provider', 'profiling'],
   jobStatus: '',
 };
 
diff --git a/src/renderer/lib/api-client/endpoints.ts b/src/renderer/lib/api-client/endpoints.ts
index e540d4247..5f54ac514 100644
--- a/src/renderer/lib/api-client/endpoints.ts
+++ b/src/renderer/lib/api-client/endpoints.ts
@@ -468,6 +468,8 @@ Endpoints.Experiment = {
     tailLines: number = 1000,
   ) =>
     `${API_URL()}experiment/${experimentId}/jobs/${jobId}/tunnel_info?tail_lines=${tailLines}`,
+  GetProfilingReport: (experimentId: string, jobId: string) =>
+    `${API_URL()}experiment/${experimentId}/jobs/${jobId}/profiling_report`,
   GetAdditionalDetails: (
     experimentId: string,
     jobId: string,

From 6a3586a6cc47309cd05e32c6f96dfdfad42d6293 Mon Sep 17 00:00:00 2001
From: deep1401 <gandhi0869@gmail.com>
Date: Tue, 10 Mar 2026 15:17:40 -0600
Subject: [PATCH 2/8] prettier

---
 .../Experiment/Tasks/EmbeddableStreamingOutput.tsx    | 11 ++++-------
 .../components/Experiment/Tasks/ProfilingReport.tsx   |  3 +--
 2 files changed, 5 insertions(+), 9 deletions(-)

diff --git a/src/renderer/components/Experiment/Tasks/EmbeddableStreamingOutput.tsx b/src/renderer/components/Experiment/Tasks/EmbeddableStreamingOutput.tsx
index c3e2ee4b5..68039755c 100644
--- a/src/renderer/components/Experiment/Tasks/EmbeddableStreamingOutput.tsx
+++ b/src/renderer/components/Experiment/Tasks/EmbeddableStreamingOutput.tsx
@@ -225,15 +225,14 @@ export default function EmbeddableStreamingOutput({
   const [viewLiveProviderLogs, setViewLiveProviderLogs] =
     useState<boolean>(false);
 
-  const tabs = tabsProp.length > 0 ? tabsProp : ['output', 'provider', 'profiling'];
+  const tabs =
+    tabsProp.length > 0 ? tabsProp : ['output', 'provider', 'profiling'];
   const showTabList = tabs.length > 1;
   const tabsKey = tabs.join(',');
 
   useEffect(() => {
     setActiveTab((current) =>
-      tabs.includes(current)
-        ? current
-        : ((tabs[0] ?? 'output') as TabValue),
+      tabs.includes(current) ? current : ((tabs[0] ?? 'output') as TabValue),
     );
     setViewLiveProviderLogs(false);
     // tabsKey is a stable serialization of tabs to avoid array reference churn
@@ -403,9 +402,7 @@ export default function EmbeddableStreamingOutput({
               activeTab === 'output' ? outputCountdown : providerCountdown
             }
             isRefreshing={
-              activeTab === 'output'
-                ? outputIsValidating
-                : providerIsValidating
+              activeTab === 'output' ? outputIsValidating : providerIsValidating
             }
             onRefresh={handleManualRefresh}
           />
diff --git a/src/renderer/components/Experiment/Tasks/ProfilingReport.tsx b/src/renderer/components/Experiment/Tasks/ProfilingReport.tsx
index d51cec587..eb2fc751f 100644
--- a/src/renderer/components/Experiment/Tasks/ProfilingReport.tsx
+++ b/src/renderer/components/Experiment/Tasks/ProfilingReport.tsx
@@ -135,8 +135,7 @@ export default function ProfilingReport({ jobId }: ProfilingReportProps) {
                 {formatSec(report.wall_time_sec)}
               </Typography>
               <Typography level="body-xs" textColor="neutral.500">
-                {report.sample_count ?? 0} samples /{' '}
-                {report.interval_sec ?? 5}s
+                {report.sample_count ?? 0} samples / {report.interval_sec ?? 5}s
               </Typography>
             </CardContent>
           </Card>

From 0d2c8338ea06401bcfba15c5be0c80e8bc64ab29 Mon Sep 17 00:00:00 2001
From: deep1401 <gandhi0869@gmail.com>
Date: Fri, 13 Mar 2026 11:55:01 -0600
Subject: [PATCH 3/8] Changes to sync after run

---
 .../routers/compute_provider.py               |  5 +-
 api/transformerlab/routers/experiment/jobs.py | 11 +--
 lab-sdk/src/lab/dirs.py                       | 11 +++
 lab-sdk/src/lab/job.py                        |  6 ++
 lab-sdk/src/lab/lab_facade.py                 | 18 +++++
 lab-sdk/src/lab/profiling.py                  | 76 +++++++++++++------
 lab-sdk/src/lab/remote_trap.py                | 45 ++++++-----
 7 files changed, 124 insertions(+), 48 deletions(-)

diff --git a/api/transformerlab/routers/compute_provider.py b/api/transformerlab/routers/compute_provider.py
index 0bc6ceb6b..869c81a94 100644
--- a/api/transformerlab/routers/compute_provider.py
+++ b/api/transformerlab/routers/compute_provider.py
@@ -1694,7 +1694,10 @@ async def launch_template_on_provider(
     # Ensure transformerlab SDK is available on remote machines for live_status tracking and other helpers.
     # This runs after AWS credentials are configured so we have access to any remote storage if needed.
     if provider.type != ProviderType.LOCAL.value:
-        setup_commands.append("pip install -q transformerlab")
+        # setup_commands.append("pip install -q transformerlab")
+        setup_commands.append(
+            "git clone https://github.com/transformerlab/transformerlab-app; cd transformerlab-app; git checkout add/profiler; pip install -e lab-sdk/; cd ~;"
+        )
 
     # Add GitHub clone setup if enabled
     if request.github_repo_url:
diff --git a/api/transformerlab/routers/experiment/jobs.py b/api/transformerlab/routers/experiment/jobs.py
index c462702f2..3b557a825 100644
--- a/api/transformerlab/routers/experiment/jobs.py
+++ b/api/transformerlab/routers/experiment/jobs.py
@@ -1708,14 +1708,15 @@ async def get_profiling_report(
     user_and_team: dict = Depends(get_user_and_team),
 ):
     """
-    Return the profiling_report.json written by tfl-remote-trap when _TFL_PROFILING=1.
+    Return the profiling_report.json from the job's profiling folder (written when
+    _TFL_PROFILING=1 and copied on lab.finish/error or when the remote trap exits).
 
-    Returns 404 if profiling was not enabled or the job has not yet completed profiling.
+    Returns 404 if profiling was not enabled or the report is not yet available.
     """
-    from lab.dirs import get_job_dir
+    from lab.dirs import get_job_profiling_dir
 
-    job_dir = await get_job_dir(job_id)
-    report_path = storage.join(job_dir, "profiling_report.json")
+    profiling_dir = await get_job_profiling_dir(job_id)
+    report_path = storage.join(profiling_dir, "profiling_report.json")
 
     if not await storage.exists(report_path):
         raise HTTPException(status_code=404, detail="Profiling report not found for this job")
diff --git a/lab-sdk/src/lab/dirs.py b/lab-sdk/src/lab/dirs.py
index bc1929999..b40d7b0c2 100644
--- a/lab-sdk/src/lab/dirs.py
+++ b/lab-sdk/src/lab/dirs.py
@@ -267,6 +267,17 @@ async def get_job_artifacts_dir(job_id: str | int) -> str:
     return path
 
 
+async def get_job_profiling_dir(job_id: str | int) -> str:
+    """
+    Return the profiling directory for a specific job, creating it if needed.
+    Example: ~/.transformerlab/workspace/jobs/<job_id>/profiling
+    """
+    job_dir = await get_job_dir(job_id)
+    path = storage.join(job_dir, "profiling")
+    await storage.makedirs(path, exist_ok=True)
+    return path
+
+
 async def get_job_checkpoints_dir(job_id: str | int) -> str:
     """
     Return the checkpoints directory for a specific job, creating it if needed.
diff --git a/lab-sdk/src/lab/job.py b/lab-sdk/src/lab/job.py
index f749b1eaa..0ac75a7b2 100644
--- a/lab-sdk/src/lab/job.py
+++ b/lab-sdk/src/lab/job.py
@@ -294,6 +294,12 @@ async def get_artifacts_dir(self):
         """
         return await dirs.get_job_artifacts_dir(self.id)
 
+    async def get_profiling_dir(self):
+        """
+        Get the profiling directory path for this job.
+        """
+        return await dirs.get_job_profiling_dir(self.id)
+
     async def get_checkpoint_paths(self):
         """
         Get list of checkpoint paths for this job.
diff --git a/lab-sdk/src/lab/lab_facade.py b/lab-sdk/src/lab/lab_facade.py
index ed1fbc594..86bb5ca3d 100644
--- a/lab-sdk/src/lab/lab_facade.py
+++ b/lab-sdk/src/lab/lab_facade.py
@@ -538,6 +538,15 @@ def finish(
         Mark the job as successfully completed and set completion metadata.
         """
         self._ensure_initialized()
+        # Copy profiling from temp dir into job's profiling folder (when run under remote trap).
+        try:
+            profiling_temp = os.environ.get("_TFL_PROFILING_TEMP_DIR")
+            if profiling_temp and self._job:
+                from lab.profiling import copy_profiling_to_job
+
+                _run_async(copy_profiling_to_job(profiling_temp, str(self._job.id)))  # type: ignore[union-attr]
+        except Exception:
+            pass
         _run_async(self._job.update_progress(100))  # type: ignore[union-attr]
         _run_async(self._job.update_status(JobStatus.COMPLETE))  # type: ignore[union-attr]
         _run_async(self._job.update_job_data_field("completion_status", "success"))  # type: ignore[union-attr]
@@ -1435,6 +1444,15 @@ def error(
         Mark the job as failed and set completion metadata.
         """
         self._ensure_initialized()
+        # Copy profiling from temp dir into job's profiling folder (when run under remote trap).
+        try:
+            profiling_temp = os.environ.get("_TFL_PROFILING_TEMP_DIR")
+            if profiling_temp and self._job:
+                from lab.profiling import copy_profiling_to_job
+
+                _run_async(copy_profiling_to_job(profiling_temp, str(self._job.id)))  # type: ignore[union-attr]
+        except Exception:
+            pass
         _run_async(self._job.update_status(JobStatus.COMPLETE))  # type: ignore[union-attr]
         _run_async(self._job.update_job_data_field("completion_status", "failed"))  # type: ignore[union-attr]
         _run_async(self._job.update_job_data_field("completion_details", message))  # type: ignore[union-attr]
diff --git a/lab-sdk/src/lab/profiling.py b/lab-sdk/src/lab/profiling.py
index d04921a0d..fa6618eef 100644
--- a/lab-sdk/src/lab/profiling.py
+++ b/lab-sdk/src/lab/profiling.py
@@ -1,18 +1,19 @@
 """
 Job profiling: background sampler for CPU, memory, and GPU resource usage.
 
-Usage in tfl-remote-trap (or any process wrapper):
-
-    import subprocess
-    from lab.profiling import maybe_start_profiling, finalize_profiling
+Profiling writes to a temp directory during the run. The contents are copied into
+the job's "profiling" folder (alongside "artifacts") when:
+  - lab.finish() or lab.error() is called (if _TFL_PROFILING_TEMP_DIR is set), or
+  - the remote trap exits after the child process (trap copies then).
 
-    proc = subprocess.Popen(...)
-    profiling_thread = maybe_start_profiling(proc.pid, job_dir)
-
-    exit_code = proc.wait()
-    wall_time = time.monotonic() - start_time
+Usage in tfl-remote-trap (or any process wrapper):
 
-    finalize_profiling(profiling_thread, job_dir, wall_time)
+    output_dir = tempfile.mkdtemp(prefix="tfl_profiling_")
+    os.environ["_TFL_PROFILING_TEMP_DIR"] = output_dir  # so lab.finish/error can copy
+    profiling_thread = maybe_start_profiling(proc.pid, output_dir)
+    ...
+    finalize_profiling(profiling_thread, output_dir, wall_time)
+    await copy_profiling_to_job(output_dir, job_id)  # or call from lab.finish/error
 
 Activation:
     Set _TFL_PROFILING=1 in the job environment.
@@ -169,10 +170,10 @@ def _sample_gpus() -> List[Dict[str, Any]]:
 class _ProfilingThread(threading.Thread):
     """Background thread that periodically samples resource stats and writes to JSONL."""
 
-    def __init__(self, pid: int, job_dir: str, interval_sec: float = _DEFAULT_INTERVAL_SEC) -> None:
+    def __init__(self, pid: int, output_dir: str, interval_sec: float = _DEFAULT_INTERVAL_SEC) -> None:
         super().__init__(daemon=True, name="tfl-profiler")
         self.pid = pid
-        self.job_dir = job_dir
+        self.output_dir = output_dir
         self.interval_sec = interval_sec
         self._stop_event = threading.Event()
         self.samples: List[Dict[str, Any]] = []
@@ -181,7 +182,7 @@ def stop(self) -> None:
         self._stop_event.set()
 
     def run(self) -> None:
-        samples_path = os.path.join(self.job_dir, _PROFILING_SAMPLES_FILE)
+        samples_path = os.path.join(self.output_dir, _PROFILING_SAMPLES_FILE)
         # Initialise cpu_percent (first call always returns 0.0 for psutil)
         try:
             import psutil  # type: ignore[import-not-found]
@@ -269,34 +270,38 @@ def _aggregate_samples(samples: List[Dict[str, Any]], wall_time_sec: float, inte
     return report
 
 
-def maybe_start_profiling(pid: int, job_dir: str) -> Optional[_ProfilingThread]:
+def maybe_start_profiling(pid: int, output_dir: str) -> Optional[_ProfilingThread]:
     """
     Start a profiling thread if _TFL_PROFILING=1 is set in the environment.
 
-    Returns the thread (caller must call finalize_profiling later) or None if profiling
-    is disabled or the job_dir is unavailable.
+    output_dir: temp directory to write profiling_samples.jsonl (and later
+        profiling_report.json). Caller must create it and pass the same path to
+        finalize_profiling and copy_profiling_to_job.
+
+    Returns the thread (caller must call finalize_profiling later) or None if
+    profiling is disabled or output_dir is unavailable.
     """
     if os.environ.get("_TFL_PROFILING") != "1":
         return None
-    if not job_dir or not os.path.isdir(job_dir):
+    if not output_dir or not os.path.isdir(output_dir):
         return None
     try:
         interval = float(os.environ.get("_TFL_PROFILING_INTERVAL", str(_DEFAULT_INTERVAL_SEC)))
     except ValueError:
         interval = _DEFAULT_INTERVAL_SEC
 
-    thread = _ProfilingThread(pid=pid, job_dir=job_dir, interval_sec=interval)
+    thread = _ProfilingThread(pid=pid, output_dir=output_dir, interval_sec=interval)
     thread.start()
     return thread
 
 
 def finalize_profiling(
     thread: Optional[_ProfilingThread],
-    job_dir: str,
+    output_dir: str,
     wall_time_sec: float,
 ) -> None:
     """
-    Stop the profiling thread and write profiling_report.json to job_dir.
+    Stop the profiling thread and write profiling_report.json to output_dir.
 
     Safe to call even when thread is None (profiling disabled).
     """
@@ -310,20 +315,39 @@ def finalize_profiling(
 
     try:
         report = _aggregate_samples(thread.samples, wall_time_sec, thread.interval_sec)
-        report_path = os.path.join(job_dir, _PROFILING_REPORT_FILE)
+        report_path = os.path.join(output_dir, _PROFILING_REPORT_FILE)
         with open(report_path, "w", encoding="utf-8") as f:
             json.dump(report, f, indent=2)
     except Exception:
         pass
 
 
+async def copy_profiling_to_job(profiling_temp_dir: str, job_id: str) -> None:
+    """
+    Copy profiling output from a temp directory into the job's profiling folder.
+
+    Uses the storage abstraction so the destination may be local or remote (e.g. S3).
+    Safe to call if profiling_temp_dir is missing or empty; no-op on failure.
+    """
+    if not profiling_temp_dir or not os.path.isdir(profiling_temp_dir):
+        return
+    try:
+        from lab.dirs import get_job_profiling_dir
+        from lab import storage
+
+        dest_dir = await get_job_profiling_dir(job_id)
+        await storage.copy_dir(profiling_temp_dir, dest_dir)
+    except Exception:
+        pass
+
+
 # ---------------------------------------------------------------------------
 # Optional PyTorch profiler injection via sitecustomize.py
 # ---------------------------------------------------------------------------
 
 _SITECUSTOMIZE_TEMPLATE = """\
 # Auto-injected by tfl-profile-trap (lab-sdk profiling).
-# Activates torch.profiler.profile() and exports a Chrome trace to the job dir.
+# Activates torch.profiler.profile() and exports a Chrome trace to the profiling output dir.
 import os as _os
 import atexit as _atexit
 
@@ -356,18 +380,20 @@ def _export_trace():
 """
 
 
-def inject_torch_profiler(job_dir: str, env: dict) -> str:
+def inject_torch_profiler(profiling_output_dir: str, env: dict) -> str:
     """
     If _TFL_PROFILING_TORCH=1, write a sitecustomize.py to a temp dir and
     prepend it to PYTHONPATH in env so torch.profiler auto-activates in the job.
+    Trace is written under profiling_output_dir/torch_profile so it is copied
+    with the rest of profiling data.
 
-    Returns the temp dir path (caller should clean up after the job exits).
+    Returns the sitecustomize temp dir path (caller should clean up after the job exits).
     """
     if os.environ.get("_TFL_PROFILING_TORCH") != "1":
         return ""
 
     try:
-        torch_profile_dir = os.path.join(job_dir, _TORCH_PROFILE_DIR)
+        torch_profile_dir = os.path.join(profiling_output_dir, _TORCH_PROFILE_DIR)
         os.makedirs(torch_profile_dir, exist_ok=True)
 
         tmp_dir = tempfile.mkdtemp(prefix="tfl_sitecustomize_")
diff --git a/lab-sdk/src/lab/remote_trap.py b/lab-sdk/src/lab/remote_trap.py
index 596994824..0de26cc46 100644
--- a/lab-sdk/src/lab/remote_trap.py
+++ b/lab-sdk/src/lab/remote_trap.py
@@ -4,12 +4,13 @@
 import os
 import subprocess
 import sys
+import tempfile
 import time
 from typing import List
 
 from lab import Job, storage
 from lab.job_status import JobStatus
-from lab.profiling import finalize_profiling, inject_torch_profiler, maybe_start_profiling
+from lab.profiling import copy_profiling_to_job, finalize_profiling, inject_torch_profiler, maybe_start_profiling
 
 
 async def _set_live_status_async(job_id: str, status: str) -> None:
@@ -173,23 +174,20 @@ def main(argv: List[str] | None = None) -> int:
     _set_live_status("started")
     _set_status(JobStatus.RUNNING)
 
-    # Resolve job directory for profiling output (same path used by _write_provider_logs).
     job_id = os.environ.get("_TFL_JOB_ID")
-    job_dir: str = ""
-    if job_id:
+    # Profiling writes to a temp dir; we copy it into job's "profiling" folder on exit
+    # (and lab.finish/error copy from _TFL_PROFILING_TEMP_DIR when the user calls them).
+    profiling_temp_dir: str = ""
+    if job_id and os.environ.get("_TFL_PROFILING") == "1":
         try:
-            from lab.dirs import get_job_dir
+            profiling_temp_dir = tempfile.mkdtemp(prefix="tfl_profiling_")
+        except OSError:
+            profiling_temp_dir = ""
 
-            async def _get_job_dir() -> str:
-                return await get_job_dir(job_id)
-
-            job_dir = asyncio.run(_get_job_dir())
-        except Exception:
-            job_dir = ""
-
-    # Optionally inject torch.profiler via sitecustomize.py before spawning the process.
     proc_env = os.environ.copy()
-    torch_tmp_dir = inject_torch_profiler(job_dir, proc_env) if job_dir else ""
+    if profiling_temp_dir:
+        proc_env["_TFL_PROFILING_TEMP_DIR"] = profiling_temp_dir
+    torch_tmp_dir = inject_torch_profiler(profiling_temp_dir, proc_env) if profiling_temp_dir else ""
 
     # Run the original command in the shell so it behaves exactly as submitted.
     # Stream output line-by-line to avoid buffering large logs in memory (training
@@ -208,7 +206,7 @@ async def _get_job_dir() -> str:
     )
 
     # Start profiling sidecar thread (no-op if _TFL_PROFILING is not set).
-    profiling_thread = maybe_start_profiling(proc.pid, job_dir) if job_dir else None
+    profiling_thread = maybe_start_profiling(proc.pid, profiling_temp_dir) if profiling_temp_dir else None
 
     assert proc.stdout is not None
     for line in proc.stdout:
@@ -225,8 +223,21 @@ async def _get_job_dir() -> str:
     combined_logs = "".join(log_lines)
     _write_provider_logs(combined_logs)
 
-    # Finalise profiling: stop sampler thread and write profiling_report.json.
-    finalize_profiling(profiling_thread, job_dir, wall_time)
+    # Finalise profiling: stop sampler thread and write report to profiling temp dir.
+    finalize_profiling(profiling_thread, profiling_temp_dir, wall_time)
+
+    # Copy profiling output from temp dir into job's profiling folder (same as lab.finish/error).
+    if profiling_temp_dir and job_id:
+        try:
+            asyncio.run(copy_profiling_to_job(profiling_temp_dir, job_id))
+        except Exception:
+            pass
+        try:
+            import shutil
+
+            shutil.rmtree(profiling_temp_dir, ignore_errors=True)
+        except Exception:
+            pass
 
     # Clean up torch sitecustomize temp dir (best-effort).
     if torch_tmp_dir:

From fe0ae8abcdf04054aee6971878b854c0e9930190 Mon Sep 17 00:00:00 2001
From: deep1401 <gandhi0869@gmail.com>
Date: Fri, 13 Mar 2026 12:12:07 -0600
Subject: [PATCH 4/8] move profiler modal and add job data flag

---
 lab-sdk/src/lab/profiling.py                  | 14 ++++--
 .../Tasks/EmbeddableStreamingOutput.tsx       | 46 +++++--------------
 .../components/Experiment/Tasks/JobsList.tsx  | 10 +++-
 .../components/Experiment/Tasks/Tasks.tsx     |  8 ++++
 .../Tasks/ViewOutputModalStreaming.tsx        |  7 ++-
 .../Experiment/Tasks/ViewProfilingModal.tsx   | 38 +++++++++++++++
 6 files changed, 81 insertions(+), 42 deletions(-)
 create mode 100644 src/renderer/components/Experiment/Tasks/ViewProfilingModal.tsx

diff --git a/lab-sdk/src/lab/profiling.py b/lab-sdk/src/lab/profiling.py
index fa6618eef..517055e49 100644
--- a/lab-sdk/src/lab/profiling.py
+++ b/lab-sdk/src/lab/profiling.py
@@ -20,6 +20,7 @@
     Set _TFL_PROFILING_INTERVAL=<seconds> to change sampling interval (default 5).
     Set _TFL_PROFILING_TORCH=1 to also inject torch.profiler tracing.
 """
+
 from __future__ import annotations
 
 import json
@@ -327,6 +328,7 @@ async def copy_profiling_to_job(profiling_temp_dir: str, job_id: str) -> None:
     Copy profiling output from a temp directory into the job's profiling folder.
 
     Uses the storage abstraction so the destination may be local or remote (e.g. S3).
+    Sets has_profiling=True in job_data so the UI can show a "View Profiling" option.
     Safe to call if profiling_temp_dir is missing or empty; no-op on failure.
     """
     if not profiling_temp_dir or not os.path.isdir(profiling_temp_dir):
@@ -337,6 +339,14 @@ async def copy_profiling_to_job(profiling_temp_dir: str, job_id: str) -> None:
 
         dest_dir = await get_job_profiling_dir(job_id)
         await storage.copy_dir(profiling_temp_dir, dest_dir)
+        try:
+            from lab.job import Job
+
+            job = await Job.get(job_id)
+            if job is not None:
+                await job.update_job_data_field("has_profiling", True)
+        except Exception:
+            pass
     except Exception:
         pass
 
@@ -403,9 +413,7 @@ def inject_torch_profiler(profiling_output_dir: str, env: dict) -> str:
 
         env["_TFL_TORCH_PROFILE_DIR"] = torch_profile_dir
         existing_pythonpath = env.get("PYTHONPATH", "")
-        env["PYTHONPATH"] = (
-            f"{tmp_dir}{os.pathsep}{existing_pythonpath}" if existing_pythonpath else tmp_dir
-        )
+        env["PYTHONPATH"] = f"{tmp_dir}{os.pathsep}{existing_pythonpath}" if existing_pythonpath else tmp_dir
         return tmp_dir
     except Exception:
         return ""
diff --git a/src/renderer/components/Experiment/Tasks/EmbeddableStreamingOutput.tsx b/src/renderer/components/Experiment/Tasks/EmbeddableStreamingOutput.tsx
index 68039755c..02f357d3d 100644
--- a/src/renderer/components/Experiment/Tasks/EmbeddableStreamingOutput.tsx
+++ b/src/renderer/components/Experiment/Tasks/EmbeddableStreamingOutput.tsx
@@ -26,7 +26,6 @@ import * as chatAPI from 'renderer/lib/transformerlab-api-sdk';
 import { useExperimentInfo } from 'renderer/lib/ExperimentInfoContext';
 import { jobChipColor } from 'renderer/lib/utils';
 import PollingOutputTerminal from './PollingOutputTerminal';
-import ProfilingReport from './ProfilingReport';
 
 interface ProviderLogsTerminalProps {
   logsText: string;
@@ -199,12 +198,11 @@ function RefreshIndicator({
   );
 }
 
-type TabValue = 'output' | 'provider' | 'profiling';
+type TabValue = 'output' | 'provider';
 
 const TAB_OPTIONS: { value: TabValue; label: string }[] = [
   { value: 'output', label: 'Lab SDK Output' },
   { value: 'provider', label: 'Machine Logs' },
-  { value: 'profiling', label: 'Profiling' },
 ];
 
 export interface EmbeddableStreamingOutputProps {
@@ -217,7 +215,7 @@ export interface EmbeddableStreamingOutputProps {
 
 export default function EmbeddableStreamingOutput({
   jobId,
-  tabs: tabsProp = ['output', 'provider', 'profiling'],
+  tabs: tabsProp = ['output', 'provider'],
   jobStatus = '',
 }: EmbeddableStreamingOutputProps) {
   const { experimentInfo } = useExperimentInfo();
@@ -225,8 +223,7 @@ export default function EmbeddableStreamingOutput({
   const [viewLiveProviderLogs, setViewLiveProviderLogs] =
     useState<boolean>(false);
 
-  const tabs =
-    tabsProp.length > 0 ? tabsProp : ['output', 'provider', 'profiling'];
+  const tabs = tabsProp.length > 0 ? tabsProp : ['output', 'provider'];
   const showTabList = tabs.length > 1;
   const tabsKey = tabs.join(',');
 
@@ -335,9 +332,7 @@ export default function EmbeddableStreamingOutput({
           onChange={(_event, value) => {
             if (
               typeof value === 'string' &&
-              (value === 'output' ||
-                value === 'provider' ||
-                value === 'profiling')
+              (value === 'output' || value === 'provider')
             ) {
               setActiveTab(value as TabValue);
             }
@@ -396,17 +391,13 @@ export default function EmbeddableStreamingOutput({
             </>
           )}
         </Box>
-        {activeTab !== 'profiling' && (
-          <RefreshIndicator
-            seconds={
-              activeTab === 'output' ? outputCountdown : providerCountdown
-            }
-            isRefreshing={
-              activeTab === 'output' ? outputIsValidating : providerIsValidating
-            }
-            onRefresh={handleManualRefresh}
-          />
-        )}
+        <RefreshIndicator
+          seconds={activeTab === 'output' ? outputCountdown : providerCountdown}
+          isRefreshing={
+            activeTab === 'output' ? outputIsValidating : providerIsValidating
+          }
+          onRefresh={handleManualRefresh}
+        />
       </Box>
       <Box
         sx={{
@@ -442,19 +433,6 @@ export default function EmbeddableStreamingOutput({
               onMutateReady={handleOutputMutateReady}
             />
           </Box>
-        ) : activeTab === 'profiling' ? (
-          <Box
-            sx={{
-              flex: 1,
-              minHeight: 0,
-              width: '100%',
-              overflowY: 'auto',
-              borderRadius: '8px',
-              border: '1px solid var(--joy-palette-divider)',
-            }}
-          >
-            <ProfilingReport jobId={jobId} />
-          </Box>
         ) : (
           <Box
             sx={{
@@ -534,6 +512,6 @@ export default function EmbeddableStreamingOutput({
 }
 
 EmbeddableStreamingOutput.defaultProps = {
-  tabs: ['output', 'provider', 'profiling'],
+  tabs: ['output', 'provider'],
   jobStatus: '',
 };
diff --git a/src/renderer/components/Experiment/Tasks/JobsList.tsx b/src/renderer/components/Experiment/Tasks/JobsList.tsx
index 2c9fc9f26..7bfe16e3e 100644
--- a/src/renderer/components/Experiment/Tasks/JobsList.tsx
+++ b/src/renderer/components/Experiment/Tasks/JobsList.tsx
@@ -45,6 +45,7 @@ interface JobsListProps {
   onViewInteractive?: (jobId: string) => void;
   onViewJobDatasets?: (jobId: string) => void;
   onViewJobModels?: (jobId: string) => void;
+  onViewProfiling?: (jobId: string) => void;
   onViewFileBrowser?: (jobId: string) => void;
   loading: boolean;
   onViewTrackio?: (jobId: string) => void;
@@ -69,6 +70,7 @@ const JobsList: React.FC<JobsListProps> = ({
   onViewInteractive,
   onViewJobDatasets,
   onViewJobModels,
+  onViewProfiling,
   onViewFileBrowser,
   loading,
   onViewTrackio,
@@ -348,7 +350,8 @@ const JobsList: React.FC<JobsListProps> = ({
                   {(job?.job_data?.artifacts ||
                     job?.job_data?.artifacts_dir ||
                     job?.job_data?.generated_datasets ||
-                    job?.job_data?.models) && (
+                    job?.job_data?.models ||
+                    job?.job_data?.has_profiling) && (
                     <Dropdown>
                       <MenuButton
                         size="sm"
@@ -374,6 +377,11 @@ const JobsList: React.FC<JobsListProps> = ({
                             View Artifacts
                           </MenuItem>
                         )}
+                        {job?.job_data?.has_profiling && (
+                          <MenuItem onClick={() => onViewProfiling?.(job?.id)}>
+                            View Profiling
+                          </MenuItem>
+                        )}
                         {job?.job_data?.generated_datasets && (
                           <MenuItem
                             onClick={() => onViewJobDatasets?.(job?.id)}
diff --git a/src/renderer/components/Experiment/Tasks/Tasks.tsx b/src/renderer/components/Experiment/Tasks/Tasks.tsx
index 0dc956ea5..533c09054 100644
--- a/src/renderer/components/Experiment/Tasks/Tasks.tsx
+++ b/src/renderer/components/Experiment/Tasks/Tasks.tsx
@@ -21,6 +21,7 @@ import DeleteTaskConfirmModal from './DeleteTaskConfirmModal';
 import QueueTaskModal from './QueueTaskModal';
 import ViewOutputModalStreaming from './ViewOutputModalStreaming';
 import ViewArtifactsModal from './ViewArtifactsModal';
+import ViewProfilingModal from './ViewProfilingModal';
 import ViewCheckpointsModal from './ViewCheckpointsModal';
 import ViewEvalResultsModal from './ViewEvalResultsModal';
 import CompareEvalResultsModal from './CompareEvalResultsModal';
@@ -52,6 +53,7 @@ export default function Tasks({ subtype }: { subtype?: string }) {
     useState(-1);
   const [viewCheckpointsFromJob, setViewCheckpointsFromJob] = useState(-1);
   const [viewArtifactsFromJob, setViewArtifactsFromJob] = useState(-1);
+  const [viewProfilingFromJob, setViewProfilingFromJob] = useState(-1);
   const [viewEvalImagesFromJob, setViewEvalImagesFromJob] = useState(-1);
   const [viewOutputFromSweepJob, setViewOutputFromSweepJob] = useState(false);
   const [viewSweepResultsFromJob, setViewSweepResultsFromJob] = useState(-1);
@@ -1270,6 +1272,7 @@ export default function Tasks({ subtype }: { subtype?: string }) {
             setViewCheckpointsFromJob(parseInt(jobId))
           }
           onViewArtifacts={(jobId) => setViewArtifactsFromJob(parseInt(jobId))}
+          onViewProfiling={(jobId) => setViewProfilingFromJob(parseInt(jobId))}
           onViewEvalImages={(jobId) =>
             setViewEvalImagesFromJob(parseInt(jobId))
           }
@@ -1333,6 +1336,11 @@ export default function Tasks({ subtype }: { subtype?: string }) {
         onClose={() => setViewArtifactsFromJob(-1)}
         jobId={viewArtifactsFromJob}
       />
+      <ViewProfilingModal
+        open={viewProfilingFromJob !== -1}
+        onClose={() => setViewProfilingFromJob(-1)}
+        jobId={viewProfilingFromJob}
+      />
       <ViewCheckpointsModal
         open={viewCheckpointsFromJob !== -1}
         onClose={() => setViewCheckpointsFromJob(-1)}
diff --git a/src/renderer/components/Experiment/Tasks/ViewOutputModalStreaming.tsx b/src/renderer/components/Experiment/Tasks/ViewOutputModalStreaming.tsx
index 06ba1ae84..12a30c545 100644
--- a/src/renderer/components/Experiment/Tasks/ViewOutputModalStreaming.tsx
+++ b/src/renderer/components/Experiment/Tasks/ViewOutputModalStreaming.tsx
@@ -5,14 +5,13 @@ import EmbeddableStreamingOutput from './EmbeddableStreamingOutput';
 const TAB_LABELS: Record<string, string> = {
   output: 'Lab SDK Output',
   provider: 'Machine Logs',
-  profiling: 'Profiling',
 };
 
 interface ViewOutputModalStreamingProps {
   jobId: number;
   setJobId: (jobId: number) => void;
   /** Which tabs to show, in order. e.g. ['output', 'provider'] or ['provider'] for interactive tasks. */
-  tabs?: ('output' | 'provider' | 'profiling')[];
+  tabs?: ('output' | 'provider')[];
   /** Current job status string (e.g. 'RUNNING', 'COMPLETE'). */
   jobStatus?: string;
 }
@@ -20,7 +19,7 @@ interface ViewOutputModalStreamingProps {
 function ViewOutputModalStreaming({
   jobId,
   setJobId,
-  tabs = ['output', 'provider', 'profiling'],
+  tabs = ['output', 'provider'],
   jobStatus = '',
 }: ViewOutputModalStreamingProps) {
   if (jobId === -1) {
@@ -62,7 +61,7 @@ function ViewOutputModalStreaming({
 }
 
 ViewOutputModalStreaming.defaultProps = {
-  tabs: ['output', 'provider', 'profiling'],
+  tabs: ['output', 'provider'],
   jobStatus: '',
 };
 
diff --git a/src/renderer/components/Experiment/Tasks/ViewProfilingModal.tsx b/src/renderer/components/Experiment/Tasks/ViewProfilingModal.tsx
new file mode 100644
index 000000000..eaec187df
--- /dev/null
+++ b/src/renderer/components/Experiment/Tasks/ViewProfilingModal.tsx
@@ -0,0 +1,38 @@
+import React from 'react';
+import { Box, Modal, ModalClose, ModalDialog, Typography } from '@mui/joy';
+import ProfilingReport from './ProfilingReport';
+
+interface ViewProfilingModalProps {
+  open: boolean;
+  onClose: () => void;
+  jobId: number;
+}
+
+export default function ViewProfilingModal({
+  open,
+  onClose,
+  jobId,
+}: ViewProfilingModalProps) {
+  return (
+    <Modal open={open} onClose={onClose}>
+      <ModalDialog
+        sx={{
+          width: '90vw',
+          maxWidth: 640,
+          maxHeight: '85vh',
+          overflow: 'hidden',
+          display: 'flex',
+          flexDirection: 'column',
+        }}
+      >
+        <ModalClose />
+        <Typography level="title-lg" sx={{ mb: 1 }}>
+          Profiling – Job {jobId}
+        </Typography>
+        <Box sx={{ flex: 1, minHeight: 0, overflow: 'auto' }}>
+          {jobId !== -1 && <ProfilingReport jobId={jobId} />}
+        </Box>
+      </ModalDialog>
+    </Modal>
+  );
+}

From 458e577efab7764596dddba232a2e85ba7243373 Mon Sep 17 00:00:00 2001
From: deep1401 <gandhi0869@gmail.com>
Date: Fri, 13 Mar 2026 12:26:28 -0600
Subject: [PATCH 5/8] sdk version

---
 api/pyproject.toml     | 2 +-
 lab-sdk/pyproject.toml | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/api/pyproject.toml b/api/pyproject.toml
index fada1a5c5..16017528d 100644
--- a/api/pyproject.toml
+++ b/api/pyproject.toml
@@ -33,7 +33,7 @@ dependencies = [
   "soundfile==0.13.1",
   "tensorboardX==2.6.2.2",
   "timm==1.0.15",
-  "transformerlab==0.0.98",
+  "transformerlab==0.0.99",
   "transformerlab-inference==0.2.52",
   "transformers==4.57.1",
   "wandb==0.23.1",
diff --git a/lab-sdk/pyproject.toml b/lab-sdk/pyproject.toml
index 5c419da3d..d17f0fc8a 100644
--- a/lab-sdk/pyproject.toml
+++ b/lab-sdk/pyproject.toml
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
 
 [project]
 name = "transformerlab"
-version = "0.0.98"
+version = "0.0.99"
 description = "Python SDK for Transformer Lab"
 readme = "README.md"
 requires-python = ">=3.10"

From bebdc729782ee73eb44a72f43759b2dc60033ce8 Mon Sep 17 00:00:00 2001
From: deep1401 <gandhi0869@gmail.com>
Date: Fri, 13 Mar 2026 12:28:58 -0600
Subject: [PATCH 6/8] restore the setup commands back

---
 api/transformerlab/routers/compute_provider.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/api/transformerlab/routers/compute_provider.py b/api/transformerlab/routers/compute_provider.py
index 869c81a94..846a565cb 100644
--- a/api/transformerlab/routers/compute_provider.py
+++ b/api/transformerlab/routers/compute_provider.py
@@ -1694,10 +1694,10 @@ async def launch_template_on_provider(
     # Ensure transformerlab SDK is available on remote machines for live_status tracking and other helpers.
     # This runs after AWS credentials are configured so we have access to any remote storage if needed.
     if provider.type != ProviderType.LOCAL.value:
-        # setup_commands.append("pip install -q transformerlab")
-        setup_commands.append(
-            "git clone https://github.com/transformerlab/transformerlab-app; cd transformerlab-app; git checkout add/profiler; pip install -e lab-sdk/; cd ~;"
-        )
+        setup_commands.append("pip install -q transformerlab")
+        # Install torch as well if torch profiler is enabled
+        if request.enable_profiling_torch:
+            setup_commands.append("pip install -q torch")
 
     # Add GitHub clone setup if enabled
     if request.github_repo_url:

From 0657f333643ac580f288ab40ccb38e7598b6e9f6 Mon Sep 17 00:00:00 2001
From: deep1401 <gandhi0869@gmail.com>
Date: Fri, 13 Mar 2026 12:36:46 -0600
Subject: [PATCH 7/8] retrigger

---
 api/transformerlab/routers/compute_provider.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/api/transformerlab/routers/compute_provider.py b/api/transformerlab/routers/compute_provider.py
index 846a565cb..3e52e90d4 100644
--- a/api/transformerlab/routers/compute_provider.py
+++ b/api/transformerlab/routers/compute_provider.py
@@ -1695,6 +1695,7 @@ async def launch_template_on_provider(
     # This runs after AWS credentials are configured so we have access to any remote storage if needed.
     if provider.type != ProviderType.LOCAL.value:
         setup_commands.append("pip install -q transformerlab")
+
         # Install torch as well if torch profiler is enabled
         if request.enable_profiling_torch:
             setup_commands.append("pip install -q torch")

From 0611d499f032779b2bda218ee26c6cc4f062482e Mon Sep 17 00:00:00 2001
From: deep1401 <gandhi0869@gmail.com>
Date: Mon, 16 Mar 2026 09:14:26 -0600
Subject: [PATCH 8/8] sdk

---
 api/pyproject.toml     | 2 +-
 lab-sdk/pyproject.toml | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/api/pyproject.toml b/api/pyproject.toml
index 16017528d..4da723887 100644
--- a/api/pyproject.toml
+++ b/api/pyproject.toml
@@ -33,7 +33,7 @@ dependencies = [
   "soundfile==0.13.1",
   "tensorboardX==2.6.2.2",
   "timm==1.0.15",
-  "transformerlab==0.0.99",
+  "transformerlab==0.1.0",
   "transformerlab-inference==0.2.52",
   "transformers==4.57.1",
   "wandb==0.23.1",
diff --git a/lab-sdk/pyproject.toml b/lab-sdk/pyproject.toml
index d17f0fc8a..53dc601ab 100644
--- a/lab-sdk/pyproject.toml
+++ b/lab-sdk/pyproject.toml
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
 
 [project]
 name = "transformerlab"
-version = "0.0.99"
+version = "0.1.0"
 description = "Python SDK for Transformer Lab"
 readme = "README.md"
 requires-python = ">=3.10"