Skip to content

Commit d8efb12

Browse files
suaas21tamalsaha
authored andcommitted
Use POD_ORDINAL env var to restore using PVC template (#849)
1 parent 8df5be9 commit d8efb12

5 files changed

Lines changed: 57 additions & 34 deletions

File tree

pkg/controller/restore_session.go

Lines changed: 23 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -252,13 +252,13 @@ func (c *StashController) ensureRestoreJob(restoreSession *api_v1beta1.RestoreSe
252252

253253
if restoreSession.Spec.Task.Name != "" {
254254
// Restore process follows Function-Task model. So, resolve Function and Task to get desired job definition.
255-
jobTemplate, err = c.resolveRestoreTask(restoreSession, repository, jobMeta, ref, serviceAccountName)
255+
jobTemplate, err = c.resolveRestoreTask(restoreSession, repository, ref, serviceAccountName)
256256
if err != nil {
257257
return err
258258
}
259259
} else {
260260
// Restore process does not follow Function-Task model. So, generate simple volume restorer job definition.
261-
jobTemplate, err = util.NewPVCRestorerJob(restoreSession, repository, image, jobMeta)
261+
jobTemplate, err = util.NewPVCRestorerJob(restoreSession, repository, image)
262262
if err != nil {
263263
return err
264264
}
@@ -267,7 +267,7 @@ func (c *StashController) ensureRestoreJob(restoreSession *api_v1beta1.RestoreSe
267267
// If volumeClaimTemplate is not specified then we don't need any further processing. Just, create the job
268268
if restoreSession.Spec.Target == nil ||
269269
(restoreSession.Spec.Target != nil && len(restoreSession.Spec.Target.VolumeClaimTemplates) == 0) {
270-
return c.createRestoreJob(jobTemplate, jobTemplate.ObjectMeta, ref, serviceAccountName)
270+
return c.createRestoreJob(jobTemplate, jobMeta, ref, serviceAccountName)
271271
}
272272

273273
// volumeClaimTemplate has been specified. Now, we have to do the following for each replica:
@@ -295,13 +295,27 @@ func (c *StashController) ensureRestoreJob(restoreSession *api_v1beta1.RestoreSe
295295

296296
// add PVCs as volume to the job
297297
volumes := util.PVCListToVolumes(pvcList, ordinal)
298-
jobTemplate.Spec.Volumes = core_util.UpsertVolume(jobTemplate.Spec.Volumes, volumes...)
299298

299+
// use copy of the original job template. otherwise, each iteration will append volumes in the same template
300+
restoreJobTemplate := jobTemplate.DeepCopy()
301+
restoreJobMeta := jobMeta.DeepCopy()
300302
// add ordinal suffix to the job name so that multiple restore job can run concurrently
301-
jobTemplate.Name = fmt.Sprintf("%s-%d", jobMeta.Name, ordinal)
303+
restoreJobMeta.Name = fmt.Sprintf("%s-%d", jobMeta.Name, ordinal)
304+
305+
restoreJobTemplate.Spec.Volumes = core_util.UpsertVolume(restoreJobTemplate.Spec.Volumes, volumes...)
306+
307+
ordinalEnv := core.EnvVar{
308+
Name: util.KeyPodOrdinal,
309+
Value: fmt.Sprintf("%d", ordinal),
310+
}
311+
312+
// insert POD_ORDINAL env in all containers.
313+
for i, c := range restoreJobTemplate.Spec.Containers {
314+
restoreJobTemplate.Spec.Containers[i].Env = core_util.UpsertEnvVars(c.Env, ordinalEnv)
315+
}
302316

303317
// create restore job
304-
err = c.createRestoreJob(jobTemplate, jobTemplate.ObjectMeta, ref, serviceAccountName)
318+
err = c.createRestoreJob(restoreJobTemplate, *restoreJobMeta, ref, serviceAccountName)
305319
if err != nil {
306320
return err
307321
}
@@ -329,7 +343,7 @@ func (c *StashController) createRestoreJob(jobTemplate *core.PodTemplateSpec, me
329343

330344
// resolveRestoreTask resolves Functions and Tasks then returns a job definition to restore the target.
331345
func (c *StashController) resolveRestoreTask(restoreSession *api_v1beta1.RestoreSession,
332-
repository *api_v1alpha1.Repository, meta metav1.ObjectMeta, ref *core.ObjectReference, serviceAccountName string) (*core.PodTemplateSpec, error) {
346+
repository *api_v1alpha1.Repository, ref *core.ObjectReference, serviceAccountName string) (*core.PodTemplateSpec, error) {
333347

334348
// resolve task template
335349
explicitInputs := make(map[string]string)
@@ -384,8 +398,7 @@ func (c *StashController) resolveRestoreTask(restoreSession *api_v1beta1.Restore
384398
}
385399

386400
podTemplate := &core.PodTemplateSpec{
387-
ObjectMeta: meta,
388-
Spec: podSpec,
401+
Spec: podSpec,
389402
}
390403
return podTemplate, nil
391404
}
@@ -602,7 +615,7 @@ func getPVCFromVolumeClaimTemplates(ordinal int32, claimTemplates []core.Persist
602615
pvcList := make([]core.PersistentVolumeClaim, 0)
603616
for _, claim := range claimTemplates {
604617
inputs := make(map[string]string)
605-
inputs["POD_ORDINAL"] = strconv.Itoa(int(ordinal))
618+
inputs[util.KeyPodOrdinal] = strconv.Itoa(int(ordinal))
606619
err := resolve.ResolvePVCSpec(&claim, inputs)
607620
if err != nil {
608621
return pvcList, err

pkg/controller/stash.go

Lines changed: 11 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -198,21 +198,27 @@ func (c *StashController) getTotalHosts(target interface{}, namespace string, dr
198198
return types.Int32P(1), nil
199199
}
200200
targetRef = t.Ref
201+
201202
case *api_v1beta1.RestoreTarget:
202203
t := target.(*api_v1beta1.RestoreTarget)
203204
if t == nil {
204205
return types.Int32P(1), nil
205206
}
206207
targetRef = t.Ref
208+
209+
// for VolumeSnapshot, we consider each PVC as a separate host.
210+
// hence, number of host = replica * number of PVC in each replica
207211
if driver == api_v1beta1.VolumeSnapshotter {
208-
def := int32(1)
212+
replica := int32(1)
209213
if t.Replicas != nil {
210-
def = types.Int32(t.Replicas)
214+
replica = types.Int32(t.Replicas)
211215
}
212-
return types.Int32P(def * int32(len(t.VolumeClaimTemplates))), nil
216+
return types.Int32P(replica * int32(len(t.VolumeClaimTemplates))), nil
213217
}
214-
// if volumeClaimTemplates is specified when using Restic driver, we can calculate total host from it
215-
if driver != api_v1beta1.VolumeSnapshotter && t.VolumeClaimTemplates != nil {
218+
219+
// if volumeClaimTemplates is specified when using Restic driver, restore is done through job.
220+
// stash creates restore job for each replica. hence, number of total host is the number of replicas.
221+
if len(t.VolumeClaimTemplates) != 0 || t.Replicas != nil {
216222
if t.Replicas == nil {
217223
return types.Int32P(1), nil
218224
} else {

pkg/util/job.go

Lines changed: 1 addition & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -199,7 +199,7 @@ func NewRecoveryJob(stashClient cs.Interface, recovery *api_v1alpha1.Recovery, i
199199
}
200200

201201
// NewPVCRestorerJob return a job definition to restore pvc.
202-
func NewPVCRestorerJob(rs *api_v1beta1.RestoreSession, repository *api_v1alpha1.Repository, image docker.Docker, meta metav1.ObjectMeta) (*core.PodTemplateSpec, error) {
202+
func NewPVCRestorerJob(rs *api_v1beta1.RestoreSession, repository *api_v1alpha1.Repository, image docker.Docker) (*core.PodTemplateSpec, error) {
203203
container := core.Container{
204204
Name: StashContainer,
205205
Image: image.ToContainerImage(),
@@ -217,10 +217,6 @@ func NewPVCRestorerJob(rs *api_v1beta1.RestoreSession, repository *api_v1alpha1.
217217
fmt.Sprintf("--enable-analytics=%v", cli.EnableAnalytics),
218218
}, cli.LoggerOptions.ToFlags()...),
219219
Env: []core.EnvVar{
220-
{
221-
Name: KeyPodName,
222-
Value: meta.Name,
223-
},
224220
{
225221
Name: KeyNodeName,
226222
ValueFrom: &core.EnvVarSource{
@@ -276,7 +272,6 @@ func NewPVCRestorerJob(rs *api_v1beta1.RestoreSession, repository *api_v1alpha1.
276272
}
277273

278274
jobTemplate := &core.PodTemplateSpec{
279-
ObjectMeta: meta,
280275
Spec: core.PodSpec{
281276
Containers: []core.Container{container},
282277
RestartPolicy: core.RestartPolicyNever,

pkg/util/kubernetes.go

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -59,8 +59,9 @@ const (
5959
StashSecretVolume = "stash-secret-volume"
6060
StashSecretMountDir = "/etc/stash/repository/secret"
6161

62-
KeyPodName = "POD_NAME"
63-
KeyNodeName = "NODE_NAME"
62+
KeyPodName = "POD_NAME"
63+
KeyNodeName = "NODE_NAME"
64+
KeyPodOrdinal = "POD_ORDINAL"
6465

6566
RetryInterval = 50 * time.Millisecond
6667
ReadinessTimeout = 2 * time.Minute

pkg/util/util.go

Lines changed: 19 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -42,12 +42,15 @@ type RepoLabelData struct {
4242
NodeName string
4343
}
4444

45+
// GetHostName returns hostname for a target
4546
func GetHostName(target interface{}) (string, error) {
4647
// target nil for cluster backup
4748
var targetRef api_v1beta1.TargetRef
4849
if target == nil {
4950
return "host-0", nil
5051
}
52+
53+
// read targetRef field from BackupTarget or RestoreTarget
5154
switch target.(type) {
5255
case *api_v1beta1.BackupTarget:
5356
t := target.(*api_v1beta1.BackupTarget)
@@ -60,33 +63,38 @@ func GetHostName(target interface{}) (string, error) {
6063
if t == nil {
6164
return "host-0", nil
6265
}
63-
// replicas is specified when restore StatefulSet volumes using job.
64-
// so we have to handle this case too.
65-
if t.Replicas != nil { // StatefulSet volumes.
66-
podName := os.Getenv(KeyPodName)
67-
if podName == "" {
68-
return "", fmt.Errorf("missing podName for %s", apis.KindStatefulSet)
66+
67+
// if replicas or volumeClaimTemplate is specified then restore is done via job.
68+
// in this case, we need to know the ordinal to use as host suffix.
69+
// stash operator sets desired ordinal as 'POD_ORDINAL' env while creating the job.
70+
if t.Replicas != nil || len(t.VolumeClaimTemplates) != 0 {
71+
if os.Getenv(KeyPodOrdinal) != "" {
72+
return "host-" + os.Getenv(KeyPodOrdinal), nil
6973
}
70-
podInfo := strings.Split(podName, "-")
71-
podOrdinal := podInfo[len(podInfo)-1]
72-
return "host-" + podOrdinal, nil
74+
return "", fmt.Errorf("'target.replicas' or 'target.volumeClaimTemplate' has been specified in RestoreSession" +
75+
" but 'POD_ORDINAL' env not found")
7376
}
7477
targetRef = t.Ref
7578
}
7679

80+
// backup/restore is running through sidecar/init-container. identify hostname for them.
7781
switch targetRef.Kind {
7882
case apis.KindStatefulSet:
83+
// for StatefulSet, host name is 'host-<pod ordinal>'. stash operator set pod's name as 'POD_NAME' env
84+
// in the sidecar/init-container through downward api. we have to parse the pod name to get ordinal.
7985
podName := os.Getenv(KeyPodName)
8086
if podName == "" {
81-
return "", fmt.Errorf("missing podName for %s", apis.KindStatefulSet)
87+
return "", fmt.Errorf("missing 'POD_NAME' env in StatefulSet: %s", apis.KindStatefulSet)
8288
}
8389
podInfo := strings.Split(podName, "-")
8490
podOrdinal := podInfo[len(podInfo)-1]
8591
return "host-" + podOrdinal, nil
8692
case apis.KindDaemonSet:
93+
// for DaemonSet, host name is the node name. stash operator set the respective node name as 'NODE_NAME' env
94+
// in the sidecar/init-container through downward api.
8795
nodeName := os.Getenv(KeyNodeName)
8896
if nodeName == "" {
89-
return "", fmt.Errorf("missing nodeName for %s", apis.KindDaemonSet)
97+
return "", fmt.Errorf("missing 'NODE_NAME' env for DaemonSet: %s", apis.KindDaemonSet)
9098
}
9199
return nodeName, nil
92100
default:

0 commit comments

Comments
 (0)