-
Notifications
You must be signed in to change notification settings - Fork 307
Open
Labels
bugSomething isn't workingSomething isn't working
Description
Issue Description
Trying to generate quadlet files or docker-compose files via the serve command and the llama-stack api fails to generate these config files. In fact it just starts up the pod. generate kube or quadlet/kube just work as expected.
Steps to reproduce the issue
Just run
ramalama serve gemma3 --api=llama-stack --generate=compose
or
ramalama serve gemma3 --api=llama-stack --generate=quadlet
Describe the results you received
Both just starts up the pod but do not generate any files.
Describe the results you expected
The generated files.
ramalama info output
{
"Accelerator": "cuda",
"Config": {
"benchmarks": {},
"provider": {
"openai": {}
},
"settings": {
"config_files": [
"/home/christian/.local/share/uv/tools/ramalama/share/ramalama/ramalama.conf"
]
},
"user": {}
},
"Engine": {
"Info": {
"host": {
"arch": "amd64",
"buildahVersion": "1.42.2",
"cgroupControllers": [
"memory",
"pids"
],
"cgroupManager": "systemd",
"cgroupVersion": "v2",
"conmon": {
"package": "conmon-1:2.2.1-1.1",
"path": "/usr/bin/conmon",
"version": "conmon version 2.2.1, commit: c8cc2c4db27531bd4e084ce7857f73cd21ee639d"
},
"cpuUtilization": {
"idlePercent": 94.7,
"systemPercent": 1.72,
"userPercent": 3.58
},
"cpus": 8,
"databaseBackend": "sqlite",
"distribution": {
"distribution": "cachyos",
"version": "unknown"
},
"eventLogger": "journald",
"freeLocks": 2039,
"hostname": "kafka",
"idMappings": {
"gidmap": [
{
"container_id": 0,
"host_id": 1000,
"size": 1
},
{
"container_id": 1,
"host_id": 100000,
"size": 65536
}
],
"uidmap": [
{
"container_id": 0,
"host_id": 1000,
"size": 1
},
{
"container_id": 1,
"host_id": 100000,
"size": 65536
}
]
},
"kernel": "6.18.13-2-cachyos-lts",
"linkmode": "dynamic",
"logDriver": "journald",
"memFree": 799338496,
"memTotal": 16624513024,
"networkBackend": "netavark",
"networkBackendInfo": {
"backend": "netavark",
"dns": {
"package": "aardvark-dns-1.17.0-1.1",
"path": "/usr/lib/podman/aardvark-dns",
"version": "aardvark-dns 1.17.0"
},
"package": "netavark-1.17.2-1.1",
"path": "/usr/lib/podman/netavark",
"version": "netavark 1.17.2"
},
"ociRuntime": {
"name": "crun",
"package": "crun-1.26-3.1",
"path": "/usr/bin/crun",
"version": "crun version 1.26\ncommit: 3241e671f92c33b0c003cd7de319e4f32add6231\nrundir: /run/user/1000/crun\nspec: 1.0.0\n+SYSTEMD +SELINUX +APPARMOR +CAP +SECCOMP +EBPF +CRIU +LIBKRUN +YAJL"
},
"os": "linux",
"pasta": {
"executable": "/usr/bin/pasta",
"package": "passt-2026_01_20.386b5f5-1.1",
"version": "pasta 2026_01_20.386b5f5\nCopyright Red Hat\nGNU General Public License, version 2 or later\n <https://www.gnu.org/licenses/old-licenses/gpl-2.0.html>\nThis is free software: you are free to change and redistribute it.\nThere is NO WARRANTY, to the extent permitted by law.\n"
},
"remoteSocket": {
"exists": true,
"path": "/run/user/1000/podman/podman.sock"
},
"rootlessNetworkCmd": "pasta",
"security": {
"apparmorEnabled": false,
"capabilities": "CAP_CHOWN,CAP_DAC_OVERRIDE,CAP_FOWNER,CAP_FSETID,CAP_KILL,CAP_NET_BIND_SERVICE,CAP_SETFCAP,CAP_SETGID,CAP_SETPCAP,CAP_SETUID,CAP_SYS_CHROOT",
"rootless": true,
"seccompEnabled": true,
"seccompProfilePath": "/etc/containers/seccomp.json",
"selinuxEnabled": false
},
"serviceIsRemote": false,
"slirp4netns": {
"executable": "",
"package": "",
"version": ""
},
"swapFree": 16624058368,
"swapTotal": 16624119808,
"uptime": "2h 32m 50.00s (Approximately 0.08 days)",
"variant": ""
},
"plugins": {
"authorization": null,
"log": [
"k8s-file",
"none",
"passthrough",
"journald"
],
"network": [
"bridge",
"macvlan",
"ipvlan"
],
"volume": [
"local"
]
},
"registries": {},
"store": {
"configFile": "/home/christian/.config/containers/storage.conf",
"containerStore": {
"number": 8,
"paused": 0,
"running": 5,
"stopped": 3
},
"graphDriverName": "overlay",
"graphOptions": {},
"graphRoot": "/home/christian/.local/share/containers/storage",
"graphRootAllocated": 838860800000,
"graphRootUsed": 79664758784,
"graphStatus": {
"Backing Filesystem": "btrfs",
"Native Overlay Diff": "true",
"Supports d_type": "true",
"Supports shifting": "false",
"Supports volatile": "true",
"Using metacopy": "false"
},
"imageCopyTmpDir": "/var/tmp",
"imageStore": {
"number": 33
},
"runRoot": "/run/user/1000/containers",
"transientStore": false,
"volumePath": "/home/christian/.local/share/containers/storage/volumes"
},
"version": {
"APIVersion": "5.7.1",
"Built": 1765448784,
"BuiltTime": "Thu Dec 11 11:26:24 2025",
"GitCommit": "f845d14e941889ba4c071f35233d09b29d363c75",
"GoVersion": "go1.25.5 X:nodwarf5",
"Os": "linux",
"OsArch": "linux/amd64",
"Version": "5.7.1"
}
},
"Name": "podman"
},
"Image": "quay.io/ramalama/cuda:latest",
"Inference": {
"Default": "llama.cpp",
"Engines": {
"llama.cpp": "/home/christian/.local/share/uv/tools/ramalama/share/ramalama/inference/llama.cpp.yaml",
"mlx": "/home/christian/.local/share/uv/tools/ramalama/share/ramalama/inference/mlx.yaml",
"schema.1-0-0": "/home/christian/.local/share/uv/tools/ramalama/share/ramalama/inference/schema.1-0-0.json",
"vllm": "/home/christian/.local/share/uv/tools/ramalama/share/ramalama/inference/vllm.yaml"
},
"Schema": {
"1-0-0": "/home/christian/.local/share/uv/tools/ramalama/share/ramalama/inference/schema.1-0-0.json"
}
},
"RagImage": "quay.io/ramalama/cuda-rag:latest",
"Selinux": false,
"Shortnames": {
"Files": [
"/home/christian/.local/share/uv/tools/ramalama/share/ramalama/shortnames.conf"
],
"Names": {
"cerebrum": "huggingface://froggeric/Cerebrum-1.0-7b-GGUF/Cerebrum-1.0-7b-Q4_KS.gguf",
"deepseek": "ollama://deepseek-r1",
"dragon": "huggingface://llmware/dragon-mistral-7b-v0/dragon-mistral-7b-q4_k_m.gguf",
"gemma3": "hf://ggml-org/gemma-3-4b-it-GGUF",
"gemma3:12b": "hf://ggml-org/gemma-3-12b-it-GGUF",
"gemma3:1b": "hf://ggml-org/gemma-3-1b-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
"gemma3:27b": "hf://ggml-org/gemma-3-27b-it-GGUF",
"gemma3:4b": "hf://ggml-org/gemma-3-4b-it-GGUF",
"gemma3n": "hf://ggml-org/gemma-3n-E4B-it-GGUF/gemma-3n-E4B-it-Q8_0.gguf",
"gemma3n:e2b": "hf://ggml-org/gemma-3n-E2B-it-GGUF/gemma-3n-E2B-it-Q8_0.gguf",
"gemma3n:e2b-it-f16": "hf://ggml-org/gemma-3n-E2B-it-GGUF/gemma-3n-E2B-it-f16.gguf",
"gemma3n:e2b-it-q8_0": "hf://ggml-org/gemma-3n-E2B-it-GGUF/gemma-3n-E2B-it-Q8_0.gguf",
"gemma3n:e4b": "hf://ggml-org/gemma-3n-E4B-it-GGUF/gemma-3n-E4B-it-Q8_0.gguf",
"gemma3n:e4b-it-f16": "hf://ggml-org/gemma-3n-E4B-it-GGUF/gemma-3n-E4B-it-f16.gguf",
"gemma3n:e4b-it-q8_0": "hf://ggml-org/gemma-3n-E4B-it-GGUF/gemma-3n-E4B-it-Q8_0.gguf",
"gpt-5.1": "openai://gpt-5.1-2025-11-13",
"gpt-oss": "hf://ggml-org/gpt-oss-20b-GGUF",
"gpt-oss:120b": "hf://ggml-org/gpt-oss-120b-GGUF",
"gpt-oss:20b": "hf://ggml-org/gpt-oss-20b-GGUF",
"granite": "ollama://granite3.1-dense",
"granite-be-3.0:1b": "hf://taronaeo/Granite-3.0-1B-A400M-Instruct-BE-GGUF/granite-3.0-1b-a400m-instruct-be.Q2_K.gguf",
"granite-be-3.3:2b": "hf://taronaeo/Granite-3.3-2B-Instruct-BE-GGUF/granite-3.3-2b-instruct-be.Q4_K_M.gguf",
"granite-lab-7b": "huggingface://instructlab/granite-7b-lab-GGUF/granite-7b-lab-Q4_K_M.gguf",
"granite-lab-8b": "huggingface://ibm-granite/granite-3.3-8b-instruct-GGUF/granite-3.3-8b-instruct-Q4_K_M.gguf",
"granite-lab:7b": "huggingface://instructlab/granite-7b-lab-GGUF/granite-7b-lab-Q4_K_M.gguf",
"granite:2b": "ollama://granite3.1-dense:2b",
"granite:7b": "huggingface://instructlab/granite-7b-lab-GGUF/granite-7b-lab-Q4_K_M.gguf",
"granite:8b": "ollama://granite3.1-dense:8b",
"hermes": "huggingface://NousResearch/Hermes-2-Pro-Mistral-7B-GGUF/Hermes-2-Pro-Mistral-7B.Q4_K_M.gguf",
"ibm/granite": "ollama://granite3.1-dense:8b",
"ibm/granite:2b": "ollama://granite3.1-dense:2b",
"ibm/granite:7b": "huggingface://instructlab/granite-7b-lab-GGUF/granite-7b-lab-Q4_K_M.gguf",
"ibm/granite:8b": "ollama://granite3.1-dense:8b",
"merlinite": "huggingface://instructlab/merlinite-7b-lab-GGUF/merlinite-7b-lab-Q4_K_M.gguf",
"merlinite-lab-7b": "huggingface://instructlab/merlinite-7b-lab-GGUF/merlinite-7b-lab-Q4_K_M.gguf",
"merlinite-lab:7b": "huggingface://instructlab/merlinite-7b-lab-GGUF/merlinite-7b-lab-Q4_K_M.gguf",
"merlinite:7b": "huggingface://instructlab/merlinite-7b-lab-GGUF/merlinite-7b-lab-Q4_K_M.gguf",
"mistral": "hf://lmstudio-community/Mistral-7B-Instruct-v0.3-GGUF/Mistral-7B-Instruct-v0.3-Q4_K_M.gguf",
"mistral-small3.1": "hf://bartowski/mistralai_Mistral-Small-3.1-24B-Instruct-2503-GGUF/mistralai_Mistral-Small-3.1-24B-Instruct-2503-IQ2_M.gguf",
"mistral-small3.1:24b": "hf://bartowski/mistralai_Mistral-Small-3.1-24B-Instruct-2503-GGUF/mistralai_Mistral-Small-3.1-24B-Instruct-2503-IQ2_M.gguf",
"mistral:7b": "hf://lmstudio-community/Mistral-7B-Instruct-v0.3-GGUF/Mistral-7B-Instruct-v0.3-Q4_K_M.gguf",
"mistral:7b-v1": "huggingface://TheBloke/Mistral-7B-Instruct-v0.1-GGUF/mistral-7b-instruct-v0.1.Q5_K_M.gguf",
"mistral:7b-v2": "huggingface://TheBloke/Mistral-7B-Instruct-v0.2-GGUF/mistral-7b-instruct-v0.2.Q4_K_M.gguf",
"mistral:7b-v3": "hf://lmstudio-community/Mistral-7B-Instruct-v0.3-GGUF/Mistral-7B-Instruct-v0.3-Q4_K_M.gguf",
"mistral_code_16k": "huggingface://TheBloke/Mistral-7B-Code-16K-qlora-GGUF/mistral-7b-code-16k-qlora.Q4_K_M.gguf",
"mistral_codealpaca": "huggingface://TheBloke/Mistral-7B-codealpaca-lora-GGUF/mistral-7b-codealpaca-lora.Q4_K_M.gguf",
"mixtao": "huggingface://MaziyarPanahi/MixTAO-7Bx2-MoE-Instruct-v7.0-GGUF/MixTAO-7Bx2-MoE-Instruct-v7.0.Q4_K_M.gguf",
"openchat": "huggingface://TheBloke/openchat-3.5-0106-GGUF/openchat-3.5-0106.Q4_K_M.gguf",
"openorca": "huggingface://TheBloke/Mistral-7B-OpenOrca-GGUF/mistral-7b-openorca.Q4_K_M.gguf",
"phi2": "huggingface://MaziyarPanahi/phi-2-GGUF/phi-2.Q4_K_M.gguf",
"qwen2.5vl": "hf://ggml-org/Qwen2.5-VL-32B-Instruct-GGUF",
"qwen2.5vl:2b": "hf://ggml-org/Qwen2.5-VL-2B-Instruct-GGUF",
"qwen2.5vl:32b": "hf://ggml-org/Qwen2.5-VL-32B-Instruct-GGUF",
"qwen2.5vl:3b": "hf://ggml-org/Qwen2.5-VL-3B-Instruct-GGUF",
"qwen2.5vl:7b": "hf://ggml-org/Qwen2.5-VL-7B-Instruct-GGUF",
"smollm:135m": "hf://HuggingFaceTB/smollm-135M-instruct-v0.2-Q8_0-GGUF",
"smolvlm": "hf://ggml-org/SmolVLM-500M-Instruct-GGUF",
"smolvlm:256m": "hf://ggml-org/SmolVLM-256M-Instruct-GGUF",
"smolvlm:2b": "hf://ggml-org/SmolVLM-Instruct-GGUF",
"smolvlm:500m": "hf://ggml-org/SmolVLM-500M-Instruct-GGUF",
"stories-be:260k": "hf://taronaeo/tinyllamas-BE/stories260K-be.gguf",
"tiny": "hf://TheBloke/TinyLlama-1.1B-Chat-v1.0-GGUF",
"tinyllama": "hf://TheBloke/TinyLlama-1.1B-Chat-v1.0-GGUF"
}
},
"Store": "/home/christian/.local/share/ramalama",
"UseContainer": true,
"Version": "0.17.1"
}Upstream Latest Release
Yes
Additional environment details
No response
Additional information
Same on my other ROCM based machine.
Reactions are currently unavailable
Metadata
Metadata
Assignees
Labels
bugSomething isn't workingSomething isn't working