Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
89 commits
Select commit Hold shift + click to select a range
b7f3e42
Add version
mina-parham Mar 6, 2026
0085f7a
Add asset version routers
mina-parham Mar 6, 2026
ff23303
Add asset version routers
mina-parham Mar 6, 2026
cea7182
Ui version
mina-parham Mar 6, 2026
2932b65
Version group
mina-parham Mar 6, 2026
a4a826d
Alembic
mina-parham Mar 6, 2026
929bddf
Merge branch 'main' into add/model-dataset-group
mina-parham Mar 6, 2026
361ee4a
Merge branch 'main' into add/model-dataset-group
mina-parham Mar 9, 2026
cf65d08
Ruff
mina-parham Mar 9, 2026
4d9b36c
Merge branch 'add/model-dataset-group' of https://github.com/transfor…
mina-parham Mar 9, 2026
7216854
Merge conflict
mina-parham Mar 9, 2026
048fad2
Ruff
mina-parham Mar 9, 2026
bc1d8c4
Prettier
mina-parham Mar 9, 2026
af276e8
Merge branch 'main' into add/model-dataset-group
mina-parham Mar 9, 2026
4cc8e42
Fix alebmic issue
mina-parham Mar 9, 2026
47e604e
Merge conflict
mina-parham Mar 9, 2026
df7059c
Fix failed tests
mina-parham Mar 9, 2026
bd82eb0
Merge branch 'main' into add/model-dataset-group
deep1401 Mar 10, 2026
c4ae4ee
Fix alembic table down version and fix bug in artifacts function
deep1401 Mar 10, 2026
d3b04b5
Merge branch 'main' into add/model-dataset-group
mina-parham Mar 12, 2026
ff09e24
Add asset version
mina-parham Mar 12, 2026
64f93fc
Update jobs endpoints
mina-parham Mar 12, 2026
95654d4
Add asset version service
mina-parham Mar 12, 2026
e134fec
Update modes endpoint
mina-parham Mar 12, 2026
c9beb7f
Update dataset ui
mina-parham Mar 12, 2026
cf0b63f
Update save to registry dialog
mina-parham Mar 12, 2026
a768a2e
Add dataset modal
mina-parham Mar 12, 2026
6a81ec5
Add model modal
mina-parham Mar 12, 2026
486ba4d
Update the dataset and model ui
mina-parham Mar 12, 2026
5e5102a
Update modeloo ui
mina-parham Mar 12, 2026
6430b5d
Add version drawer
mina-parham Mar 12, 2026
3609538
Update endpoints
mina-parham Mar 12, 2026
3730526
Update alembic version
mina-parham Mar 12, 2026
d3f1820
Add dataset registry multiuser mode
mina-parham Mar 12, 2026
de09a30
Add model registry multi user mode
mina-parham Mar 12, 2026
81d67e3
Update endpoints
mina-parham Mar 12, 2026
47cc2e6
Make dataset ui better
mina-parham Mar 12, 2026
381e4ea
Make model ui better
mina-parham Mar 12, 2026
5010b4f
Make dataset ui better
mina-parham Mar 12, 2026
0f1bf42
Make the model ui better
mina-parham Mar 12, 2026
07e13ad
Merge branch 'main' into add/model-dataset-group
mina-parham Mar 12, 2026
ba28351
Ruff
mina-parham Mar 12, 2026
359c223
Prettier
mina-parham Mar 12, 2026
6d39e5b
Merge branch 'add/model-dataset-group' of https://github.com/transfor…
mina-parham Mar 12, 2026
f6fddf8
Merge branch 'main' into add/model-dataset-group
mina-parham Mar 13, 2026
4983e88
Merge remote-tracking branch 'origin/main' into add/model-dataset-group
mina-parham Mar 13, 2026
cf2d46c
Merge branch 'main' into add/model-dataset-group
deep1401 Mar 13, 2026
9d47020
Merge branch 'main' into add/model-dataset-group
mina-parham Mar 16, 2026
a532aa5
Merge branch 'main' into add/model-dataset-group
mina-parham Mar 17, 2026
a232fda
Merge branch 'main' into add/model-dataset-group
mina-parham Mar 18, 2026
573f5ca
Remove asset version table
mina-parham Mar 18, 2026
5fe5ccf
Merge branch 'add/model-dataset-group' of https://github.com/transfor…
mina-parham Mar 18, 2026
65d42aa
Remove metadata table
mina-parham Mar 18, 2026
5495c7f
Update asset version routers
mina-parham Mar 18, 2026
a5a629d
Update jobs routers
mina-parham Mar 18, 2026
db6ea3e
Update asset version
mina-parham Mar 18, 2026
b72e103
Add asset group dirs
mina-parham Mar 18, 2026
9e00a8a
Update models
mina-parham Mar 18, 2026
c1131b1
Update data registry ui
mina-parham Mar 18, 2026
008bd0e
Update data dialog
mina-parham Mar 18, 2026
890dc37
Update model dialog
mina-parham Mar 18, 2026
576897a
Update model modal
mina-parham Mar 18, 2026
e45e694
Update model registry ui
mina-parham Mar 18, 2026
97f0809
Update asset version drawer
mina-parham Mar 18, 2026
d264f0c
Update version ui
mina-parham Mar 18, 2026
8c9f65b
Update endpoints
mina-parham Mar 18, 2026
c6cfb5d
Ruff
mina-parham Mar 18, 2026
b8a4668
Prettier
mina-parham Mar 18, 2026
9d5e99c
Prettier
mina-parham Mar 18, 2026
8a86bde
Merge branch 'main' into add/model-dataset-group
mina-parham Mar 18, 2026
d62d451
Fix failed test
mina-parham Mar 18, 2026
454be16
Merge branch 'add/model-dataset-group' of https://github.com/transfor…
mina-parham Mar 18, 2026
1e208a4
Update test
mina-parham Mar 18, 2026
b6a540b
Make save to registry async
mina-parham Mar 18, 2026
2e5109a
Make the ui better
mina-parham Mar 18, 2026
8af076f
Merge branch 'main' into add/model-dataset-group
mina-parham Mar 18, 2026
6e0a768
Ruff
mina-parham Mar 18, 2026
1571740
Prettier
mina-parham Mar 18, 2026
a89875d
Merge branch 'add/model-dataset-group' of https://github.com/transfor…
mina-parham Mar 18, 2026
4bfac62
Revert "Update test"
mina-parham Mar 18, 2026
a93f186
Fix pytest
mina-parham Mar 18, 2026
dc283d0
Use background task
mina-parham Mar 18, 2026
435b474
Merge branch 'main' into add/model-dataset-group
mina-parham Mar 18, 2026
8c0ff17
Ruff
mina-parham Mar 18, 2026
1ffac84
Add model name
mina-parham Mar 18, 2026
d88d566
Add model name in the ui
mina-parham Mar 18, 2026
03df35b
Merge branch 'main' into add/model-dataset-group
mina-parham Mar 18, 2026
6cfcd2d
Ruff
mina-parham Mar 18, 2026
887f040
Fix tests
mina-parham Mar 18, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions api/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,7 @@ def _enable_datadog_if_setup():
api_keys,
quota,
ssh_keys,
asset_versions,
trackio,
)
from transformerlab.routers.auth import get_user_and_team # noqa: E402
Expand Down Expand Up @@ -339,6 +340,7 @@ async def validation_exception_handler(request, exc):
app.include_router(api_keys.router)
app.include_router(quota.router)
app.include_router(ssh_keys.router, dependencies=[Depends(get_user_and_team)])
app.include_router(asset_versions.router, dependencies=[Depends(get_user_and_team)])
app.include_router(trackio.router, dependencies=[Depends(get_user_and_team)])


Expand Down
119 changes: 24 additions & 95 deletions api/test/api/test_job_save_to_registry.py
Original file line number Diff line number Diff line change
Expand Up @@ -96,26 +96,18 @@ def test_list_job_datasets_invalid_job_id(client, tmp_workspace):


def test_save_dataset_to_registry_copies_files(client, tmp_workspace):
"""Saving a dataset copies it from job dir to global datasets registry."""
"""Saving a dataset triggers the background copy to the global datasets registry."""
job_id = "42"
dataset_name = "my-dataset"
_seed_job_dataset(tmp_workspace, job_id, dataset_name, content='{"row":1}')

# Not in the registry yet
registry_path = tmp_workspace["datasets_dir"] / dataset_name
assert not registry_path.exists()

resp = client.post(f"/experiment/alpha/jobs/{job_id}/datasets/{dataset_name}/save_to_registry")
assert resp.status_code == 200
assert resp.json()["status"] == "success"

# Now in the registry
assert registry_path.exists()
assert (registry_path / "data.jsonl").read_text() == '{"row":1}'
assert resp.json()["status"] == "started"


def test_save_dataset_to_registry_duplicate_gets_timestamped_name(client, tmp_workspace):
"""Duplicate dataset name in registry gets a unique timestamped suffix."""
"""Duplicate dataset name: endpoint still returns started (copy runs in background)."""
job_id = "42"
dataset_name = "dup-dataset"

Expand All @@ -130,15 +122,7 @@ def test_save_dataset_to_registry_duplicate_gets_timestamped_name(client, tmp_wo
assert resp.status_code == 200

body = resp.json()
assert body["status"] == "success"
# Extract name from "Dataset saved to registry as '<name>'"
saved_name = body["message"].split("'")[1]
assert saved_name.startswith(dataset_name)
assert saved_name != dataset_name

# Both versions should exist
assert existing.exists()
assert (tmp_workspace["datasets_dir"] / saved_name).exists()
assert body["status"] == "started"


def test_save_nonexistent_dataset_returns_404(client, tmp_workspace):
Expand Down Expand Up @@ -189,24 +173,18 @@ def test_list_job_models_invalid_job_id(client, tmp_workspace):


def test_save_model_to_registry_copies_files(client, tmp_workspace):
"""Saving a model copies it from job dir to global models registry."""
"""Saving a model triggers the background copy to the global models registry."""
job_id = "42"
model_name = "my-model"
_seed_job_model(tmp_workspace, job_id, model_name, content="weights-v1")

registry_path = tmp_workspace["models_dir"] / model_name
assert not registry_path.exists()

resp = client.post(f"/experiment/alpha/jobs/{job_id}/models/{model_name}/save_to_registry")
assert resp.status_code == 200
assert resp.json()["status"] == "success"

assert registry_path.exists()
assert (registry_path / "model.safetensors").read_text() == "weights-v1"
assert resp.json()["status"] == "started"


def test_save_model_to_registry_duplicate_gets_timestamped_name(client, tmp_workspace):
"""Duplicate model name in registry gets a unique timestamped suffix."""
"""Duplicate model name: endpoint still returns started (copy runs in background)."""
job_id = "42"
model_name = "dup-model"

Expand All @@ -220,13 +198,7 @@ def test_save_model_to_registry_duplicate_gets_timestamped_name(client, tmp_work
assert resp.status_code == 200

body = resp.json()
assert body["status"] == "success"
saved_name = body["message"].split("'")[1]
assert saved_name.startswith(model_name)
assert saved_name != model_name

assert existing.exists()
assert (tmp_workspace["models_dir"] / saved_name).exists()
assert body["status"] == "started"


def test_save_nonexistent_model_returns_404(client, tmp_workspace):
Expand All @@ -244,7 +216,7 @@ def test_save_nonexistent_model_returns_404(client, tmp_workspace):


def test_save_dataset_and_model_from_same_job(client, tmp_workspace):
"""A job with both a dataset and model can save each to the registry."""
"""A job with both a dataset and model can trigger saves to the registry."""
job_id = "100"
dataset_name = "generated-ds"
model_name = "finetuned-model"
Expand All @@ -253,10 +225,6 @@ def test_save_dataset_and_model_from_same_job(client, tmp_workspace):
_seed_job_dataset(tmp_workspace, job_id, dataset_name, content='{"prompt":"hi"}')
_seed_job_model(tmp_workspace, job_id, model_name, content="trained-weights")

# Neither should be in the registry yet
assert not (tmp_workspace["datasets_dir"] / dataset_name).exists()
assert not (tmp_workspace["models_dir"] / model_name).exists()

# List and verify they show up in job artifacts
ds_resp = client.get(f"/experiment/alpha/jobs/{job_id}/datasets")
assert ds_resp.status_code == 200
Expand All @@ -266,23 +234,14 @@ def test_save_dataset_and_model_from_same_job(client, tmp_workspace):
assert model_resp.status_code == 200
assert model_name in [m["name"] for m in model_resp.json()["models"]]

# Save both to registry
# Save both to registry — both should start successfully
ds_save = client.post(f"/experiment/alpha/jobs/{job_id}/datasets/{dataset_name}/save_to_registry")
assert ds_save.status_code == 200
assert ds_save.json()["status"] == "success"
assert ds_save.json()["status"] == "started"

model_save = client.post(f"/experiment/alpha/jobs/{job_id}/models/{model_name}/save_to_registry")
assert model_save.status_code == 200
assert model_save.json()["status"] == "success"

# Verify both now exist in the registry with correct content
reg_ds = tmp_workspace["datasets_dir"] / dataset_name
assert reg_ds.exists()
assert (reg_ds / "data.jsonl").read_text() == '{"prompt":"hi"}'

reg_model = tmp_workspace["models_dir"] / model_name
assert reg_model.exists()
assert (reg_model / "model.safetensors").read_text() == "trained-weights"
assert model_save.json()["status"] == "started"


# ---------------------------------------------------------------------------
Expand All @@ -291,29 +250,22 @@ def test_save_dataset_and_model_from_same_job(client, tmp_workspace):


def test_save_dataset_to_registry_with_custom_name(client, tmp_workspace):
"""Saving a dataset with a custom target_name uses that name in the registry."""
"""Saving a dataset with a custom target_name starts the background copy."""
job_id = "42"
dataset_name = "my-dataset"
custom_name = "custom-dataset"
_seed_job_dataset(tmp_workspace, job_id, dataset_name, content='{"row":1}')

registry_path = tmp_workspace["datasets_dir"] / custom_name
assert not registry_path.exists()

resp = client.post(
f"/experiment/alpha/jobs/{job_id}/datasets/{dataset_name}/save_to_registry",
params={"target_name": custom_name, "mode": "new"},
)
assert resp.status_code == 200
assert resp.json()["status"] == "success"
assert custom_name in resp.json()["message"]

assert registry_path.exists()
assert (registry_path / "data.jsonl").read_text() == '{"row":1}'
assert resp.json()["status"] == "started"


def test_save_dataset_to_registry_custom_name_duplicate_gets_timestamp(client, tmp_workspace):
"""Saving with a custom name that already exists adds a timestamp suffix."""
"""Saving with a custom name that already exists: endpoint still returns started."""
job_id = "42"
dataset_name = "my-dataset"
custom_name = "existing-ds"
Expand All @@ -329,15 +281,7 @@ def test_save_dataset_to_registry_custom_name_duplicate_gets_timestamp(client, t
params={"target_name": custom_name, "mode": "new"},
)
assert resp.status_code == 200
body = resp.json()
saved_name = body["message"].split("'")[1]
assert saved_name.startswith(custom_name)
assert saved_name != custom_name

# Original untouched
assert (existing / "data.jsonl").read_text() == "v1"
# New copy exists
assert (tmp_workspace["datasets_dir"] / saved_name).exists()
assert resp.json()["status"] == "started"


# ---------------------------------------------------------------------------
Expand All @@ -346,7 +290,7 @@ def test_save_dataset_to_registry_custom_name_duplicate_gets_timestamp(client, t


def test_save_dataset_to_existing_registry_entry(client, tmp_workspace):
"""mode='existing' merges files into an existing registry dataset."""
"""mode='existing' triggers background merge into an existing registry dataset."""
job_id = "42"
dataset_name = "my-dataset"
existing_name = "registry-dataset"
Expand All @@ -362,8 +306,7 @@ def test_save_dataset_to_existing_registry_entry(client, tmp_workspace):
params={"target_name": existing_name, "mode": "existing"},
)
assert resp.status_code == 200
assert resp.json()["status"] == "success"
assert "merged" in resp.json()["message"].lower() or existing_name in resp.json()["message"]
assert resp.json()["status"] == "started"


def test_save_dataset_to_existing_requires_target_name(client, tmp_workspace):
Expand Down Expand Up @@ -398,29 +341,22 @@ def test_save_dataset_to_nonexistent_existing_returns_404(client, tmp_workspace)


def test_save_model_to_registry_with_custom_name(client, tmp_workspace):
"""Saving a model with a custom target_name uses that name in the registry."""
"""Saving a model with a custom target_name starts the background copy."""
job_id = "42"
model_name = "my-model"
custom_name = "custom-model"
_seed_job_model(tmp_workspace, job_id, model_name, content="weights-v1")

registry_path = tmp_workspace["models_dir"] / custom_name
assert not registry_path.exists()

resp = client.post(
f"/experiment/alpha/jobs/{job_id}/models/{model_name}/save_to_registry",
params={"target_name": custom_name, "mode": "new"},
)
assert resp.status_code == 200
assert resp.json()["status"] == "success"
assert custom_name in resp.json()["message"]

assert registry_path.exists()
assert (registry_path / "model.safetensors").read_text() == "weights-v1"
assert resp.json()["status"] == "started"


def test_save_model_to_registry_custom_name_duplicate_gets_timestamp(client, tmp_workspace):
"""Saving with a custom name that already exists adds a timestamp suffix."""
"""Saving with a custom name that already exists: endpoint still returns started."""
job_id = "42"
model_name = "my-model"
custom_name = "existing-model"
Expand All @@ -435,13 +371,7 @@ def test_save_model_to_registry_custom_name_duplicate_gets_timestamp(client, tmp
params={"target_name": custom_name, "mode": "new"},
)
assert resp.status_code == 200
body = resp.json()
saved_name = body["message"].split("'")[1]
assert saved_name.startswith(custom_name)
assert saved_name != custom_name

assert (existing / "model.safetensors").read_text() == "weights-v1"
assert (tmp_workspace["models_dir"] / saved_name).exists()
assert resp.json()["status"] == "started"


# ---------------------------------------------------------------------------
Expand All @@ -450,7 +380,7 @@ def test_save_model_to_registry_custom_name_duplicate_gets_timestamp(client, tmp


def test_save_model_to_existing_registry_entry(client, tmp_workspace):
"""mode='existing' merges files into an existing registry model."""
"""mode='existing' triggers background merge into an existing registry model."""
job_id = "42"
model_name = "my-model"
existing_name = "registry-model"
Expand All @@ -465,8 +395,7 @@ def test_save_model_to_existing_registry_entry(client, tmp_workspace):
params={"target_name": existing_name, "mode": "existing"},
)
assert resp.status_code == 200
assert resp.json()["status"] == "success"
assert "merged" in resp.json()["message"].lower() or existing_name in resp.json()["message"]
assert resp.json()["status"] == "started"


def test_save_model_to_existing_requires_target_name(client, tmp_workspace):
Expand Down
Loading
Loading