Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 5 additions & 5 deletions ramalama/annotations.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,15 +3,15 @@
# https://github.com/CloudNativeAI/model-spec

# ArtifactTypeModelManifest specifies the media type for a model manifest.
ArtifactTypeModelManifest = "application/vnd.cnai.model.manifest.v1+json"
ArtifactTypeModelManifest = "application/vnd.cncf.model.manifest.v1+json"

# ArtifactTypeModelLayer is the media type used for layers referenced by the
# manifest.
ArtifactTypeModelLayer = "application/vnd.cnai.model.layer.v1.tar"
ArtifactTypeModelLayer = "application/vnd.cncf.model.layer.v1.tar"

# ArtifactTypeModelLayerGzip is the media type used for gzipped layers
# referenced by the manifest.
ArtifactTypeModelLayerGzip = "application/vnd.cnai.model.layer.v1.tar+gzip"
ArtifactTypeModelLayerGzip = "application/vnd.cncf.model.layer.v1.tar+gzip"

# AnnotationCreated is the annotation key for the date and time on which the
# model was built (date-time string as defined by RFC 3339).
Expand Down Expand Up @@ -76,7 +76,7 @@
# DEPRECATED: Migrate to AnnotationFilepath
# AnnotationModel is the annotation key for the layer is a model file (boolean),
# such as `true` or `false`.
AnnotationModel = "org.cnai.model.model"
AnnotationModel = "org.cncf.model.model"

# AnnotationFilepath is the annotation key for the file path of the layer.
AnnotationFilepath = "org.cnai.model.filepath"
AnnotationFilepath = "org.cncf.model.filepath"
5 changes: 3 additions & 2 deletions ramalama/chat_providers/api_providers.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,8 @@ def get_provider_api_key(scheme: str) -> str | None:
"""Return a configured API key for the given provider scheme, if any."""

if resolver := PROVIDER_API_KEY_RESOLVERS.get(scheme):
return resolver()
Copy link
Collaborator

@olliewalsh olliewalsh Feb 27, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Every unrelated change...

e.g this doesn't look related to artifact pulling, but it's not obvious whether it is an intentional change or a rebase issue

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Just better edge case handling, no grand conspiracy here.

if key := resolver():
return key
return get_config().api_key


Expand All @@ -26,7 +27,7 @@ def get_provider_api_key(scheme: str) -> str | None:

def get_chat_provider(scheme: str) -> ChatProvider:
if (resolver := DEFAULT_PROVIDERS.get(scheme, None)) is None:
raise ValueError(f"No support chat providers for {scheme}")
raise ValueError(f"No supported chat provider for {scheme}")
return resolver()


Expand Down
2 changes: 1 addition & 1 deletion ramalama/chat_providers/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -185,7 +185,7 @@ def list_models(self) -> list[str]:
if exc.code in (401, 403):
message = (
f"Could not authenticate with {self.provider}."
"The provided API key was either missing or invalid.\n"
" The provided API key was either missing or invalid.\n"
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

that's an issue but not related to this feature so I would remove and address in a follow-up

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Come on man.

Copy link
Collaborator

@olliewalsh olliewalsh Feb 27, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Come on man.

Ditto. If this was a trivial PR then sure. However it is a huge PR. Every unrelated change just reduces the signal to noise ratio, requiring far more effort to identify and review the relevant changes.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Also more likely to cause conflicts with other PRs

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This PR wasn't as large when it was opened in... November? Dan opened his PR with podman in particular after I opened this but we agreed to push his through and rebase this into it which forced the scope to expand.

I just don't think you have any appreciation for how much time I've spent rebasing this thing to keep up with main over the past four months with zero eyes.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Also more likely to cause conflicts with other PRs

This isn't a credible concern applied to correcting an errant whitespace.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Every unrelated change...

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This PR wasn't as large when it was opened in... November? Dan opened his PR with podman in particular after I opened this but we agreed to push his through and rebase this into it which forced the scope to expand.

I just don't think you have any appreciation for how much time I've spent rebasing this thing to keep up with main over the past four months with zero eyes.

Not sure why that frustration is directed my way when the feature was mostly a collaboration between you and @rhatdan.

Resolving merge conflicts when rebasing a branch is never fun. It just can't be avoided sometimes, esp with large PRs or multiple PRs that depend on each other.

Cleaning up a bad rebase really is not fun. It is not how I wanted to spend my Friday afternoon. I've pretty much dropped everything to get this PR merged again ASAP before something else conflicts with it.

Copy link
Collaborator Author

@ieaves ieaves Feb 27, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Not sure why that frustration is directed my way {...} I've pretty much dropped everything to get this PR merged again ASAP before something else conflicts with it.

Because this thread is almost entirely nits. This thread is about an entirely cosmetic whitespace change that, were it a problem, could have been flagged during review. This is not the appropriate context for long drawn out back and forths about these sorts of stylistic preferences.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

👍 yeah, this change is nothing - just was the first of many so the thread spawned from here. Most of the unrelated changes that made this PR quite difficult to review have been addressed now in any case

f"Set RAMALAMA_API_KEY or ramalama.provider.<provider_name>.api_key."
)
try:
Expand Down
11 changes: 3 additions & 8 deletions ramalama/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -713,7 +713,6 @@ def info_cli(args: DefaultArgsType) -> None:
message = f"{name}={source} ({config_source})"
print(message)
return

info: dict[str, Any] = {
"Accelerator": get_accel(),
"Config": load_file_config(),
Expand Down Expand Up @@ -950,6 +949,8 @@ def push_cli(args):

if args.TARGET:
shortnames = get_shortnames()
if source_model.type == "OCI":
raise ValueError(f"converting from an OCI based image {args.SOURCE} is not supported")
target = shortnames.resolve(args.TARGET)

target_model = New(target, args)
Expand Down Expand Up @@ -1642,12 +1643,7 @@ def _rm_model(models, args):

try:
m = New(model, args)
if m.remove(args):
continue
# Failed to remove and might be OCI so attempt to remove OCI
if args.ignore:
_rm_oci_model(model, args)
continue
m.remove(args)
except (KeyError, subprocess.CalledProcessError) as e:
for prefix in MODEL_TYPES:
if model.startswith(prefix + "://"):
Expand Down Expand Up @@ -1723,7 +1719,6 @@ def inspect_cli(args):
if not args.MODEL:
parser = get_parser()
parser.error("inspect requires MODEL")

args.pull = "never"

model = New(args.MODEL, args)
Expand Down
40 changes: 38 additions & 2 deletions ramalama/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,9 @@
import subprocess
import sys
from collections.abc import Callable, Sequence
from dataclasses import dataclass
from functools import lru_cache
from pathlib import Path
from typing import IO, TYPE_CHECKING, Any, Literal, Optional, Protocol, TypeAlias, TypedDict, cast, get_args

import yaml
Expand Down Expand Up @@ -294,10 +296,11 @@ def genname():
return "ramalama-" + "".join(random.choices(string.ascii_letters + string.digits, k=10))


def engine_version(engine: SUPPORTED_ENGINES) -> str:
@lru_cache
def engine_version(engine: SUPPORTED_ENGINES | Path | str) -> SemVer:
# Create manifest list for target with imageid
cmd_args = [str(engine), "version", "--format", "{{ .Client.Version }}"]
return run_cmd(cmd_args, encoding="utf-8").stdout.strip()
return SemVer.parse(run_cmd(cmd_args, encoding="utf-8").stdout.strip())


class CDI_DEVICE(TypedDict):
Expand Down Expand Up @@ -712,3 +715,36 @@ def __str__(self):

def __repr__(self):
return repr(self.entrypoint)


SEMVER_RE = re.compile(
r"^(?P<major>0|[1-9]\d*)\."
r"(?P<minor>0|[1-9]\d*)\."
r"(?P<patch>0|[1-9]\d*)"
r"(?:-(?P<prerelease>"
r"(?:0|[1-9]\d*|\d*[a-zA-Z-][0-9a-zA-Z-]*)"
r"(?:\.(?:0|[1-9]\d*|\d*[a-zA-Z-][0-9a-zA-Z-]*))*"
r"))?"
r"(?:\+(?P<buildmetadata>[0-9a-zA-Z-]+(?:\.[0-9a-zA-Z-]+)*))?$"
)


def parse_semver(s: str) -> SemVer:
m = SEMVER_RE.fullmatch(s)
if not m:
raise ValueError(f"Not a valid SemVer 2.0.0: {s!r}")
major = int(m.group("major"))
minor = int(m.group("minor"))
patch = int(m.group("patch"))
return SemVer(major, minor, patch)


@dataclass(frozen=True, order=True)
class SemVer:
major: int
minor: int
patch: int

@classmethod
def parse(cls, s: str) -> "SemVer":
return parse_semver(s)
8 changes: 4 additions & 4 deletions ramalama/compose.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

import os
import shlex
from typing import Optional, Tuple
from typing import Optional

from ramalama.common import RAG_DIR, get_accel_env_vars
from ramalama.file import PlainFile
Expand All @@ -13,9 +13,9 @@ class Compose:
def __init__(
self,
model_name: str,
model_paths: Tuple[str, str],
chat_template_paths: Optional[Tuple[str, str]],
mmproj_paths: Optional[Tuple[str, str]],
model_paths: tuple[str, str],
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

could drop this file from the PR

chat_template_paths: Optional[tuple[str, str]],
mmproj_paths: Optional[tuple[str, str]],
args,
exec_args,
):
Expand Down
97 changes: 85 additions & 12 deletions ramalama/oci_tools.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,13 @@
import json
import subprocess
from dataclasses import dataclass
from datetime import datetime
from itertools import chain
from typing import TypedDict

import ramalama.annotations as annotations
from ramalama.arg_types import EngineArgType
from ramalama.common import engine_version, run_cmd
from ramalama.common import SemVer, engine_version, run_cmd
from ramalama.logger import logger

ocilabeltype = "org.containers.type"
Expand Down Expand Up @@ -69,21 +71,33 @@ def list_artifacts(args: EngineArgType):
except subprocess.CalledProcessError as e:
logger.debug(e)
return []
if output == "":
return []

artifacts = json.loads(f"[{output[:-1]}]")
models: list[ListModelResponse] = []
try:
artifacts = json.loads(f"[{output[:-1]}]")
except json.JSONDecodeError:
return []

models = []
for artifact in artifacts:
conman_args = [
args.engine,
"artifact",
"inspect",
artifact["ID"],
]
output = run_cmd(conman_args).stdout.decode("utf-8").strip()
try:
output = run_cmd(conman_args, ignore_stderr=True).stdout.decode("utf-8").strip()
except Exception:
continue
Comment on lines +92 to +93
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

medium

Catching a broad Exception can hide unexpected errors. It's better to be more specific about the exceptions you expect to handle. In this case, run_cmd can raise subprocess.CalledProcessError and .decode() can raise UnicodeDecodeError. Catching these specific exceptions makes the code more robust and easier to debug.

Suggested change
except Exception:
continue
except (subprocess.CalledProcessError, UnicodeDecodeError):
continue


if output == "":
continue
inspect = json.loads(output)
try:
inspect = json.loads(output)
except json.JSONDecodeError:
continue
if "Manifest" not in inspect:
continue
if "artifactType" not in inspect["Manifest"]:
Expand All @@ -103,8 +117,12 @@ def list_artifacts(args: EngineArgType):
def engine_supports_manifest_attributes(engine) -> bool:
if not engine or engine == "" or engine == "docker":
return False
if engine == "podman" and engine_version(engine) < "5":
return False
if engine == "podman":
try:
if engine_version(engine) < SemVer(5, 0, 0):
return False
except Exception:
return False
return True


Expand Down Expand Up @@ -227,12 +245,67 @@ def list_images(args: EngineArgType) -> list[ListModelResponse]:


def list_models(args: EngineArgType) -> list[ListModelResponse]:
conman = args.engine
if conman is None:
if args.engine is None:
return []

models = list_images(args)
models.extend(list_manifests(args))
models.extend(list_artifacts(args))
model_gen = chain(list_images(args), list_manifests(args), list_artifacts(args))

seen: set[str] = set()
models: list[ListModelResponse] = []
for m in model_gen:
if (name := m["name"]) in seen:
continue
seen.add(name)
models.append(m)
return models


@dataclass(frozen=True)
class OciRef:
registry: str
repository: str
specifier: str # Either the digest or the tag
tag: str | None = None
digest: str | None = None

def __str__(self) -> str:
if self.digest:
return f"{self.registry}/{self.repository}@{self.digest}"
return f"{self.registry}/{self.repository}:{self.tag or self.specifier}"

@staticmethod
def from_ref_string(ref: str) -> "OciRef":
return split_oci_reference(ref)


def split_oci_reference(ref: str, default_registry: str = "docker.io") -> OciRef:
ref = ref.strip()

name, digest = ref.split("@", 1) if "@" in ref else (ref, None)

slash = name.rfind("/")
colon = name.rfind(":")
if colon > slash:
name, tag = name[:colon], name[colon + 1 :]
else:
tag = None

parts = name.split("/", 1)
if len(parts) == 1:
registry = default_registry
repository = parts[0]
else:
first, rest = parts[0], parts[1]
if first == "localhost" or "." in first or ":" in first:
registry = first
repository = rest
else:
registry = default_registry
repository = name # keep full path

specifier = digest or tag
if specifier is None:
tag = "latest"
specifier = tag

return OciRef(registry=registry, repository=repository, tag=tag, digest=digest, specifier=specifier)
3 changes: 1 addition & 2 deletions ramalama/rag.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
from ramalama.engine import BuildEngine, Engine, is_healthy, stop_container, wait_for_healthy
from ramalama.path_utils import get_container_mount_path
from ramalama.transports.base import Transport
from ramalama.transports.oci import OCI
from ramalama.transports.oci.oci import OCI
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit: from ramalama.transports.oci import OCI should still work


INPUT_DIR = "/docs"

Expand Down Expand Up @@ -197,7 +197,6 @@ def serve(self, args: RagArgsType, cmd: list[str]):
stop_container(args.model_args, args.model_args.name, remove=True)

def run(self, args: RagArgsType, cmd: list[str]):

args.model_args.name = self.imodel.get_container_name(args.model_args)
process = self.imodel.serve_nonblocking(args.model_args, self.model_cmd)
rag_process = self.serve_nonblocking(args, cmd)
Expand Down
Loading
Loading