|
| 1 | +package migration |
| 2 | + |
| 3 | +import ( |
| 4 | + "database/sql" |
| 5 | + "log" |
| 6 | + |
| 7 | + v1 "github.com/onepanelio/core/pkg" |
| 8 | + uid2 "github.com/onepanelio/core/pkg/util/uid" |
| 9 | + "github.com/pressly/goose" |
| 10 | +) |
| 11 | + |
| 12 | +const tensorflowWorkflowTemplate = `entrypoint: main |
| 13 | +arguments: |
| 14 | + parameters: |
| 15 | + - name: source |
| 16 | + value: https://github.com/onepanelio/tensorflow-examples.git |
| 17 | + - name: command |
| 18 | + value: "python mnist/main.py --epochs=5" |
| 19 | +volumeClaimTemplates: |
| 20 | + - metadata: |
| 21 | + name: data |
| 22 | + spec: |
| 23 | + accessModes: [ "ReadWriteOnce" ] |
| 24 | + resources: |
| 25 | + requests: |
| 26 | + storage: 2Gi |
| 27 | + - metadata: |
| 28 | + name: output |
| 29 | + spec: |
| 30 | + accessModes: [ "ReadWriteOnce" ] |
| 31 | + resources: |
| 32 | + requests: |
| 33 | + storage: 2Gi |
| 34 | +templates: |
| 35 | + - name: main |
| 36 | + dag: |
| 37 | + tasks: |
| 38 | + - name: train-model |
| 39 | + template: pytorch |
| 40 | +# Uncomment section below to send metrics to Slack |
| 41 | +# - name: notify-in-slack |
| 42 | +# dependencies: [train-model] |
| 43 | +# template: slack-notify-success |
| 44 | +# arguments: |
| 45 | +# parameters: |
| 46 | +# - name: status |
| 47 | +# value: "{{tasks.train-model.status}}" |
| 48 | +# artifacts: |
| 49 | +# - name: metrics |
| 50 | +# from: "{{tasks.train-model.outputs.artifacts.sys-metrics}}" |
| 51 | + - name: pytorch |
| 52 | + inputs: |
| 53 | + artifacts: |
| 54 | + - name: src |
| 55 | + path: /mnt/src |
| 56 | + git: |
| 57 | + repo: "{{workflow.parameters.source}}" |
| 58 | + outputs: |
| 59 | + artifacts: |
| 60 | + - name: model |
| 61 | + path: /mnt/output |
| 62 | + optional: true |
| 63 | + archive: |
| 64 | + none: {} |
| 65 | + container: |
| 66 | + image: tensorflow/tensorflow:latest |
| 67 | + command: [sh,-c] |
| 68 | + args: ["{{workflow.parameters.command}}"] |
| 69 | + workingDir: /mnt/src |
| 70 | + volumeMounts: |
| 71 | + - name: data |
| 72 | + mountPath: /mnt/data |
| 73 | + - name: output |
| 74 | + mountPath: /mnt/output |
| 75 | + - name: slack-notify-success |
| 76 | + container: |
| 77 | + image: technosophos/slack-notify |
| 78 | + command: [sh,-c] |
| 79 | + args: ['SLACK_USERNAME=Worker SLACK_TITLE="{{workflow.name}} {{inputs.parameters.status}}" SLACK_ICON=https://www.gravatar.com/avatar/5c4478592fe00878f62f0027be59c1bd SLACK_MESSAGE=$(cat /tmp/metrics.json)} ./slack-notify'] |
| 80 | + inputs: |
| 81 | + parameters: |
| 82 | + - name: status |
| 83 | + artifacts: |
| 84 | + - name: metrics |
| 85 | + path: /tmp/metrics.json |
| 86 | + optional: true |
| 87 | +` |
| 88 | + |
| 89 | +const tensorflowWorkflowTemplateName = "TensorFlow Training" |
| 90 | + |
| 91 | +func init() { |
| 92 | + goose.AddMigration(Up20200605090535, Down20200605090535) |
| 93 | +} |
| 94 | + |
| 95 | +// Up20200605090535 will insert a tensorflow workflow template to each user. |
| 96 | +// Each user is determined by onepanel enabled namespaces. |
| 97 | +// Any errors reported are logged as fatal. |
| 98 | +func Up20200605090535(tx *sql.Tx) error { |
| 99 | + client, err := getClient() |
| 100 | + if err != nil { |
| 101 | + return err |
| 102 | + } |
| 103 | + |
| 104 | + namespaces, err := client.ListOnepanelEnabledNamespaces() |
| 105 | + if err != nil { |
| 106 | + return err |
| 107 | + } |
| 108 | + |
| 109 | + workflowTemplate := &v1.WorkflowTemplate{ |
| 110 | + Name: tensorflowWorkflowTemplateName, |
| 111 | + Manifest: tensorflowWorkflowTemplate, |
| 112 | + } |
| 113 | + |
| 114 | + for _, namespace := range namespaces { |
| 115 | + if _, err := client.CreateWorkflowTemplate(namespace.Name, workflowTemplate); err != nil { |
| 116 | + log.Fatalf("error %v", err.Error()) |
| 117 | + } |
| 118 | + } |
| 119 | + return nil |
| 120 | +} |
| 121 | + |
| 122 | +// Down20200605090535 will attempt to remove tensorflow workflow from each user. |
| 123 | +// Each user is determined by onepanel enabled namespaces. |
| 124 | +// DB entries are archived, K8S components are deleted. |
| 125 | +// Active workflows with that template are terminated. |
| 126 | +// Any errors reported are logged as fatal. |
| 127 | +func Down20200605090535(tx *sql.Tx) error { |
| 128 | + // This code is executed when the migration is rolled back. |
| 129 | + client, err := getClient() |
| 130 | + if err != nil { |
| 131 | + return err |
| 132 | + } |
| 133 | + |
| 134 | + namespaces, err := client.ListOnepanelEnabledNamespaces() |
| 135 | + if err != nil { |
| 136 | + return err |
| 137 | + } |
| 138 | + |
| 139 | + uid, err := uid2.GenerateUID(tensorflowWorkflowTemplateName, 30) |
| 140 | + if err != nil { |
| 141 | + return err |
| 142 | + } |
| 143 | + for _, namespace := range namespaces { |
| 144 | + if _, err := client.ArchiveWorkflowTemplate(namespace.Name, uid); err != nil { |
| 145 | + log.Fatalf("error %v", err.Error()) |
| 146 | + } |
| 147 | + } |
| 148 | + |
| 149 | + return nil |
| 150 | +} |
0 commit comments