Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
30 commits
Select commit Hold shift + click to select a range
993d98b
feat: Add OCI artifact output support to LMEvalJob (#529)
ruivieira Jul 25, 2025
3f04672
feat: Add subject to LMEval CRD's OCI outputs (#530)
ruivieira Jul 30, 2025
4b97685
let's log the CM reading logic
tarilabs Sep 17, 2025
30d920c
Fix test compilation errors by adding PermissionConfig parameter
tarilabs Nov 18, 2025
0733297
params.env bump
tarilabs Nov 19, 2025
7f52228
debug: GHA results of params.env(s)
tarilabs Nov 19, 2025
6455873
replace Token with (.)DockerConfigJson
tarilabs Nov 21, 2025
d19726e
bump images to 6455873ad6e4a3427859d9a9ec0d667c5e07df7f
tarilabs Nov 21, 2025
209263a
regenerate images
tarilabs Nov 21, 2025
981180e
need images to 209263a004ab9b4caba2ea8420ffc41092fa4c14
tarilabs Nov 21, 2025
7f64959
regenerate images
tarilabs Nov 21, 2025
e26591f
need the skopeo dependency
tarilabs Nov 21, 2025
1c4d451
need params at e26591fac20d6fa5c078b88f8e26ce1a76fd5553
tarilabs Nov 21, 2025
51fd641
replace `registry` as string
tarilabs Nov 21, 2025
76addf3
need params env at 51fd6419b1da09b4d43c9b014ed657d7051c4ff0
tarilabs Nov 21, 2025
edbfbf4
regenerate images
tarilabs Nov 21, 2025
c2ccdf4
regenerate images
tarilabs Nov 21, 2025
b11dbd5
params.env at c2ccdf4b83ae67d140965ed8bb8081c423cab988
tarilabs Nov 21, 2025
dd6d643
implement TAG if missing is Job name
tarilabs Nov 21, 2025
45620d3
params.env at dd6d64313806a314f8b0835563f4efdec3062f44
tarilabs Nov 21, 2025
e2b9fc7
remove no-longer needed Path
tarilabs Nov 27, 2025
4b1e455
remove CABundle
tarilabs Nov 27, 2025
30c6bbb
params.env at 4b1e4552f9a25287b4f6fb2400d274a537df8298
tarilabs Nov 27, 2025
f05fef7
regenerate images
tarilabs Nov 27, 2025
f606fa8
params.env at f05fef7f4da7b031ff70a4bf74cb1822be0fd91a
tarilabs Nov 27, 2025
fa8b8de
get things back into place for merging
tarilabs Nov 27, 2025
58063af
feedback: use Tag regex from OCI spec
tarilabs Nov 28, 2025
1479775
feedback: internals to avoid OCI_PATH altogether
tarilabs Nov 28, 2025
be802b0
feedback: test rename
tarilabs Nov 28, 2025
893c736
feedback: refactor cmd to "avoid repetitions"
tarilabs Nov 28, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions .github/workflows/build-and-push.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -170,6 +170,10 @@ jobs:

rm -Rf $(ls . | grep -v config)
rm -Rf .gitignore .dockerignore .github .git .yamllint.yaml

cat ./config/base/params.env
cat ./config/overlays/odh/params.env
cat ./config/overlays/rhoai/params.env
# pysh to ci-manifest repo
- uses: cpina/github-action-push-to-another-repository@main
if: env.BUILD_CONTEXT == 'ci'
Expand Down
3 changes: 3 additions & 0 deletions Dockerfile.lmes-job
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,9 @@ FROM registry.access.redhat.com/ubi9/python-311@sha256:fccda5088dd13d2a3f2659e4c
USER root
RUN sed -i.bak 's/include-system-site-packages = false/include-system-site-packages = true/' /opt/app-root/pyvenv.cfg

# required dependency for oci.py in lmes-job; put here for the `needs-lmes-build`, but already incorporated here: https://github.com/opendatahub-io/lm-evaluation-harness/blob/3c4dec006a4a096b546d60c3364c78acbe33cd48/Dockerfile.lmes-job#L16
RUN dnf install -y skopeo && dnf clean all

USER default
WORKDIR /opt/app-root/src
RUN mkdir /opt/app-root/src/hf_home && chmod g+rwx /opt/app-root/src/hf_home
Expand Down
50 changes: 50 additions & 0 deletions api/lmes/v1alpha1/lmevaljob_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -400,6 +400,9 @@ type Outputs struct {
// Create an operator managed PVC
// +optional
PersistentVolumeClaimManaged *PersistentVolumeClaimManaged `json:"pvcManaged,omitempty"`
// Upload results to OCI registry
// +optional
OCISpec *OCISpec `json:"oci,omitempty"`
}

func (c *LMEvalContainer) GetSecurityContext() *corev1.SecurityContext {
Expand Down Expand Up @@ -463,6 +466,36 @@ type OfflineS3Spec struct {
CABundle *corev1.SecretKeySelector `json:"caBundle,omitempty"`
}

type OCISpec struct {
// Registry URL (e.g., quay.io, registry.redhat.com)
Registry corev1.SecretKeySelector `json:"registry"`
// Repository path (e.g., myorg/evaluation-results)
// +kubebuilder:validation:Pattern=`^[a-zA-Z0-9._/-]*$`
Repository string `json:"repository"`
// Optional tag for the artifact (defaults to job name if not specified)
// +optional
// +kubebuilder:validation:Pattern=`^[a-zA-Z0-9_][a-zA-Z0-9._-]{0,127}$`
// +kubebuilder:validation:MaxLength=128
Tag string `json:"tag,omitempty"`
// Subject for the OCI artifact
// +optional
// +kubebuilder:validation:Pattern=`^[a-zA-Z0-9._:/@-]*$`
// +kubebuilder:validation:MaxLength=255
Subject string `json:"subject,omitempty"`
// Username for registry authentication
// +optional
UsernameRef *corev1.SecretKeySelector `json:"username,omitempty"`
// Password for registry authentication
// +optional
PasswordRef *corev1.SecretKeySelector `json:"password,omitempty"`
// DockerConfigJson for registry authentication (alternative to username/password)
// +optional
DockerConfigJsonRef *corev1.SecretKeySelector `json:"dockerConfigJson,omitempty"`
// Whether to verify SSL certificates
// +optional
VerifySSL *bool `json:"verifySSL,omitempty"`
}

// OfflineStorageSpec defines the storage configuration for LMEvalJob's offline mode
type OfflineStorageSpec struct {
PersistentVolumeClaimName *string `json:"pvcName,omitempty"`
Expand Down Expand Up @@ -585,10 +618,22 @@ func (s *LMEvalJobSpec) HasOfflineS3() bool {
return s.Offline != nil && s.Offline.StorageSpec.S3Spec != nil
}

func (s *LMEvalJobSpec) HasOCIOutput() bool {
return s.Outputs != nil && s.Outputs.OCISpec != nil
}

func (s *OfflineS3Spec) HasCertificates() bool {
return s.CABundle != nil
}

func (s *OCISpec) HasUsernamePassword() bool {
return s.UsernameRef != nil && s.PasswordRef != nil
}

func (s *OCISpec) HasDockerConfigJson() bool {
return s.DockerConfigJsonRef != nil
}

// HasCustomOutput returns whether an LMEvalJobSpec defines custom outputs or not
func (s *LMEvalJobSpec) HasCustomOutput() bool {
return s.Outputs != nil
Expand All @@ -604,6 +649,11 @@ func (o *Outputs) HasExistingPVC() bool {
return o.PersistentVolumeClaimName != nil
}

// HasOCI returns whether the outputs define OCI upload
func (o *Outputs) HasOCI() bool {
return o != nil && o.OCISpec != nil
}

// LMEvalJobStatus defines the observed state of LMEvalJob
type LMEvalJobStatus struct {
// Important: Run "make" to regenerate code after modifying this file
Expand Down
41 changes: 41 additions & 0 deletions api/lmes/v1alpha1/zz_generated.deepcopy.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 2 additions & 0 deletions cmd/lmes_driver/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,7 @@ var (
detectDevice = flag.Bool("detect-device", false, "detect available device(s), CUDA or CPU")
commPort = flag.Int("listen-port", driver.DefaultPort, "driver serves APIs on the port")
downloadAssetsS3 = flag.Bool("download-assets-s3", false, "Download assets from S3")
uploadToOCI = flag.Bool("upload-to-oci", false, "Upload results to OCI registry")
customTaskGitURL = flag.String("custom-task-git-url", "", "Git repository URL for custom tasks")
customTaskGitBranch = flag.String("custom-task-git-branch", "", "Git repository branch for custom tasks")
customTaskGitCommit = flag.String("custom-task-git-commit", "", "Git commit for custom tasks")
Expand Down Expand Up @@ -129,6 +130,7 @@ func main() {
Args: args,
CommPort: *commPort,
DownloadAssetsS3: *downloadAssetsS3,
UploadToOCI: *uploadToOCI,
CustomTaskGitURL: *customTaskGitURL,
CustomTaskGitBranch: *customTaskGitBranch,
CustomTaskGitCommit: *customTaskGitCommit,
Expand Down
106 changes: 106 additions & 0 deletions config/crd/bases/trustyai.opendatahub.io_lmevaljobs.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -276,6 +276,112 @@ spec:
outputs:
description: Outputs specifies storage for evaluation results
properties:
oci:
description: Upload results to OCI registry
properties:
dockerConfigJson:
description: DockerConfigJson for registry authentication
(alternative to username/password)
properties:
key:
description: The key of the secret to select from. Must
be a valid secret key.
type: string
name:
description: |-
Name of the referent.
More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names
type: string
optional:
description: Specify whether the Secret or its key must
be defined
type: boolean
required:
- key
type: object
x-kubernetes-map-type: atomic
password:
description: Password for registry authentication
properties:
key:
description: The key of the secret to select from. Must
be a valid secret key.
type: string
name:
description: |-
Name of the referent.
More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names
type: string
optional:
description: Specify whether the Secret or its key must
be defined
type: boolean
required:
- key
type: object
x-kubernetes-map-type: atomic
registry:
description: Registry URL (e.g., quay.io, registry.redhat.com)
properties:
key:
description: The key of the secret to select from. Must
be a valid secret key.
type: string
name:
description: |-
Name of the referent.
More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names
type: string
optional:
description: Specify whether the Secret or its key must
be defined
type: boolean
required:
- key
type: object
x-kubernetes-map-type: atomic
repository:
description: Repository path (e.g., myorg/evaluation-results)
pattern: ^[a-zA-Z0-9._/-]*$
Comment on lines +343 to +345
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

issue (bug_risk): Repository field is required but regex allows empty string, which may lead to invalid-but-accepted configuration

The CRD marks repository as required, but the pattern ^[a-zA-Z0-9._/-]*$ still permits an empty string, so invalid configs can pass validation and only fail at runtime. Please change the pattern to require at least one character (e.g. ^[a-zA-Z0-9._/-]+$) so empty values are rejected by the schema.

type: string
subject:
description: Subject for the OCI artifact
maxLength: 255
pattern: ^[a-zA-Z0-9._:/@-]*$
type: string
tag:
description: Optional tag for the artifact (defaults to job
name if not specified)
maxLength: 128
pattern: ^[a-zA-Z0-9_][a-zA-Z0-9._-]{0,127}$
type: string
username:
description: Username for registry authentication
properties:
key:
description: The key of the secret to select from. Must
be a valid secret key.
type: string
name:
description: |-
Name of the referent.
More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names
type: string
optional:
description: Specify whether the Secret or its key must
be defined
type: boolean
required:
- key
type: object
x-kubernetes-map-type: atomic
verifySSL:
description: Whether to verify SSL certificates
type: boolean
required:
- registry
- repository
type: object
pvcManaged:
description: Create an operator managed PVC
properties:
Expand Down
55 changes: 55 additions & 0 deletions controllers/lmes/driver/driver.go
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,7 @@ type DriverOption struct {
Args []string
CommPort int
DownloadAssetsS3 bool
UploadToOCI bool
CustomTaskGitURL string
CustomTaskGitBranch string
CustomTaskGitCommit string
Expand Down Expand Up @@ -242,6 +243,55 @@ func (d *driverImpl) downloadS3Assets() error {
return nil
}

func (d *driverImpl) uploadToOCI() error {
if d == nil || !d.Option.UploadToOCI {
return nil
}

fmt.Println("Uploading results to OCI registry")

// Build command arguments: scripts/oci.py <registry> <output_location>
registryFromEnv := os.Getenv("OCI_REGISTRY")
if registryFromEnv == "" {
return fmt.Errorf("OCI_REGISTRY environment variable not set")
}

// Typically the end-user doesn't know how supply the results' subdirectory,
// so we hand-over the output path to the script; typically outputPath will contain:
// <lmeval subdir>, sterr.log, stout.log, lost+found(directory) and the script
// can figure out how to best handle it.
outputPath := d.Option.OutputPath
scriptPath := "/opt/app-root/src/scripts/oci.py"

fmt.Printf("[DEBUG] OCI upload CLI: python %s %s %s\n", scriptPath, registryFromEnv, outputPath)

// List all files and directories in resultsLocation
fmt.Printf("[DEBUG] Contents of results location (%s):\n", outputPath)
_ = filepath.Walk(outputPath, func(path string, info os.FileInfo, err error) error {
if err != nil {
fmt.Printf(" [error] %v\n", err)
return nil
}
rel, _ := filepath.Rel(outputPath, path)
if rel == "." {
fmt.Printf(" %s/\n", rel)
} else if info.IsDir() {
fmt.Printf(" %s/\n", rel)
} else {
fmt.Printf(" %s\n", rel)
}
return nil
})

output, err := exec.Command("python", scriptPath, registryFromEnv, outputPath).Output()
fmt.Println(string(output))
if err != nil {
return fmt.Errorf("failed to upload results to OCI: %v", err)
}

return nil
}

func patchDevice(args []string, hasCuda bool) []string {
device := "cpu"
if hasCuda {
Expand Down Expand Up @@ -443,6 +493,11 @@ func (d *driverImpl) updateCompleteStatus(err error) {
var results string
results, err = d.getResults()
d.status.Results = results

// Upload results to OCI if configured
if err == nil {
err = d.uploadToOCI()
}
}

if err != nil {
Expand Down
Loading
Loading