Skip to content

Commit facbed5

Browse files
authored
Merge pull request #335 from onepanelio/feat/core.288-add.pytorch.tensorflow.workspace.template.migrations
feat: Adding Pytorch and Tensorflow to workflow templates, as runnable workflow executions.
2 parents 07d6c1c + 7178142 commit facbed5

File tree

2 files changed

+301
-0
lines changed

2 files changed

+301
-0
lines changed
Lines changed: 151 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,151 @@
1+
package migration
2+
3+
import (
4+
"database/sql"
5+
"log"
6+
7+
v1 "github.com/onepanelio/core/pkg"
8+
uid2 "github.com/onepanelio/core/pkg/util/uid"
9+
"github.com/pressly/goose"
10+
)
11+
12+
const pytorchMnistWorkflowTemplate = `entrypoint: main
13+
arguments:
14+
parameters:
15+
- name: source
16+
value: https://github.com/onepanelio/pytorch-examples.git
17+
- name: command
18+
value: "python mnist/main.py --epochs=1"
19+
volumeClaimTemplates:
20+
- metadata:
21+
name: data
22+
spec:
23+
accessModes: [ "ReadWriteOnce" ]
24+
resources:
25+
requests:
26+
storage: 2Gi
27+
- metadata:
28+
name: output
29+
spec:
30+
accessModes: [ "ReadWriteOnce" ]
31+
resources:
32+
requests:
33+
storage: 2Gi
34+
templates:
35+
- name: main
36+
dag:
37+
tasks:
38+
- name: train-model
39+
template: pytorch
40+
# Uncomment section below to send metrics to Slack
41+
# - name: notify-in-slack
42+
# dependencies: [train-model]
43+
# template: slack-notify-success
44+
# arguments:
45+
# parameters:
46+
# - name: status
47+
# value: "{{tasks.train-model.status}}"
48+
# artifacts:
49+
# - name: metrics
50+
# from: "{{tasks.train-model.outputs.artifacts.sys-metrics}}"
51+
- name: pytorch
52+
inputs:
53+
artifacts:
54+
- name: src
55+
path: /mnt/src
56+
git:
57+
repo: "{{workflow.parameters.source}}"
58+
outputs:
59+
artifacts:
60+
- name: model
61+
path: /mnt/output
62+
optional: true
63+
archive:
64+
none: {}
65+
container:
66+
image: pytorch/pytorch:latest
67+
command: [sh,-c]
68+
args: ["{{workflow.parameters.command}}"]
69+
workingDir: /mnt/src
70+
volumeMounts:
71+
- name: data
72+
mountPath: /mnt/data
73+
- name: output
74+
mountPath: /mnt/output
75+
- name: slack-notify-success
76+
container:
77+
image: technosophos/slack-notify
78+
command: [sh,-c]
79+
args: ['SLACK_USERNAME=Worker SLACK_TITLE="{{workflow.name}} {{inputs.parameters.status}}" SLACK_ICON=https://www.gravatar.com/avatar/5c4478592fe00878f62f0027be59c1bd SLACK_MESSAGE=$(cat /tmp/metrics.json)} ./slack-notify']
80+
inputs:
81+
parameters:
82+
- name: status
83+
artifacts:
84+
- name: metrics
85+
path: /tmp/metrics.json
86+
optional: true
87+
`
88+
89+
const pytorchMnistWorkflowTemplateName = "PyTorch Training"
90+
91+
func init() {
92+
goose.AddMigration(Up20200605090509, Down20200605090509)
93+
}
94+
95+
// Up20200605090509 will insert a Pytorch workflow template to each user.
96+
// Each user is determined by onepanel enabled namespaces.
97+
// Any errors reported are logged as fatal.
98+
func Up20200605090509(tx *sql.Tx) error {
99+
client, err := getClient()
100+
if err != nil {
101+
return err
102+
}
103+
104+
namespaces, err := client.ListOnepanelEnabledNamespaces()
105+
if err != nil {
106+
return err
107+
}
108+
109+
workflowTemplate := &v1.WorkflowTemplate{
110+
Name: pytorchMnistWorkflowTemplateName,
111+
Manifest: pytorchMnistWorkflowTemplate,
112+
}
113+
114+
for _, namespace := range namespaces {
115+
if _, err := client.CreateWorkflowTemplate(namespace.Name, workflowTemplate); err != nil {
116+
log.Fatalf("error %v", err.Error())
117+
}
118+
}
119+
120+
return nil
121+
}
122+
123+
// Down20200605090509 will attempt to remove Pytorch workflow from each user.
124+
// Each user is determined by onepanel enabled namespaces.
125+
// DB entries are archived, K8S components are deleted.
126+
// Active workflows with that template are terminated.
127+
// Any errors reported are logged as fatal.
128+
func Down20200605090509(tx *sql.Tx) error {
129+
// This code is executed when the migration is rolled back.
130+
client, err := getClient()
131+
if err != nil {
132+
return err
133+
}
134+
135+
namespaces, err := client.ListOnepanelEnabledNamespaces()
136+
if err != nil {
137+
return err
138+
}
139+
140+
uid, err := uid2.GenerateUID(pytorchMnistWorkflowTemplateName, 30)
141+
if err != nil {
142+
return err
143+
}
144+
for _, namespace := range namespaces {
145+
if _, err := client.ArchiveWorkflowTemplate(namespace.Name, uid); err != nil {
146+
log.Fatalf("error %v", err.Error())
147+
}
148+
}
149+
150+
return nil
151+
}
Lines changed: 150 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,150 @@
1+
package migration
2+
3+
import (
4+
"database/sql"
5+
"log"
6+
7+
v1 "github.com/onepanelio/core/pkg"
8+
uid2 "github.com/onepanelio/core/pkg/util/uid"
9+
"github.com/pressly/goose"
10+
)
11+
12+
const tensorflowWorkflowTemplate = `entrypoint: main
13+
arguments:
14+
parameters:
15+
- name: source
16+
value: https://github.com/onepanelio/tensorflow-examples.git
17+
- name: command
18+
value: "python mnist/main.py --epochs=5"
19+
volumeClaimTemplates:
20+
- metadata:
21+
name: data
22+
spec:
23+
accessModes: [ "ReadWriteOnce" ]
24+
resources:
25+
requests:
26+
storage: 2Gi
27+
- metadata:
28+
name: output
29+
spec:
30+
accessModes: [ "ReadWriteOnce" ]
31+
resources:
32+
requests:
33+
storage: 2Gi
34+
templates:
35+
- name: main
36+
dag:
37+
tasks:
38+
- name: train-model
39+
template: pytorch
40+
# Uncomment section below to send metrics to Slack
41+
# - name: notify-in-slack
42+
# dependencies: [train-model]
43+
# template: slack-notify-success
44+
# arguments:
45+
# parameters:
46+
# - name: status
47+
# value: "{{tasks.train-model.status}}"
48+
# artifacts:
49+
# - name: metrics
50+
# from: "{{tasks.train-model.outputs.artifacts.sys-metrics}}"
51+
- name: pytorch
52+
inputs:
53+
artifacts:
54+
- name: src
55+
path: /mnt/src
56+
git:
57+
repo: "{{workflow.parameters.source}}"
58+
outputs:
59+
artifacts:
60+
- name: model
61+
path: /mnt/output
62+
optional: true
63+
archive:
64+
none: {}
65+
container:
66+
image: tensorflow/tensorflow:latest
67+
command: [sh,-c]
68+
args: ["{{workflow.parameters.command}}"]
69+
workingDir: /mnt/src
70+
volumeMounts:
71+
- name: data
72+
mountPath: /mnt/data
73+
- name: output
74+
mountPath: /mnt/output
75+
- name: slack-notify-success
76+
container:
77+
image: technosophos/slack-notify
78+
command: [sh,-c]
79+
args: ['SLACK_USERNAME=Worker SLACK_TITLE="{{workflow.name}} {{inputs.parameters.status}}" SLACK_ICON=https://www.gravatar.com/avatar/5c4478592fe00878f62f0027be59c1bd SLACK_MESSAGE=$(cat /tmp/metrics.json)} ./slack-notify']
80+
inputs:
81+
parameters:
82+
- name: status
83+
artifacts:
84+
- name: metrics
85+
path: /tmp/metrics.json
86+
optional: true
87+
`
88+
89+
const tensorflowWorkflowTemplateName = "TensorFlow Training"
90+
91+
func init() {
92+
goose.AddMigration(Up20200605090535, Down20200605090535)
93+
}
94+
95+
// Up20200605090535 will insert a tensorflow workflow template to each user.
96+
// Each user is determined by onepanel enabled namespaces.
97+
// Any errors reported are logged as fatal.
98+
func Up20200605090535(tx *sql.Tx) error {
99+
client, err := getClient()
100+
if err != nil {
101+
return err
102+
}
103+
104+
namespaces, err := client.ListOnepanelEnabledNamespaces()
105+
if err != nil {
106+
return err
107+
}
108+
109+
workflowTemplate := &v1.WorkflowTemplate{
110+
Name: tensorflowWorkflowTemplateName,
111+
Manifest: tensorflowWorkflowTemplate,
112+
}
113+
114+
for _, namespace := range namespaces {
115+
if _, err := client.CreateWorkflowTemplate(namespace.Name, workflowTemplate); err != nil {
116+
log.Fatalf("error %v", err.Error())
117+
}
118+
}
119+
return nil
120+
}
121+
122+
// Down20200605090535 will attempt to remove tensorflow workflow from each user.
123+
// Each user is determined by onepanel enabled namespaces.
124+
// DB entries are archived, K8S components are deleted.
125+
// Active workflows with that template are terminated.
126+
// Any errors reported are logged as fatal.
127+
func Down20200605090535(tx *sql.Tx) error {
128+
// This code is executed when the migration is rolled back.
129+
client, err := getClient()
130+
if err != nil {
131+
return err
132+
}
133+
134+
namespaces, err := client.ListOnepanelEnabledNamespaces()
135+
if err != nil {
136+
return err
137+
}
138+
139+
uid, err := uid2.GenerateUID(tensorflowWorkflowTemplateName, 30)
140+
if err != nil {
141+
return err
142+
}
143+
for _, namespace := range namespaces {
144+
if _, err := client.ArchiveWorkflowTemplate(namespace.Name, uid); err != nil {
145+
log.Fatalf("error %v", err.Error())
146+
}
147+
}
148+
149+
return nil
150+
}

0 commit comments

Comments
 (0)