Merge pull request #1240 from jagadeeshi2i/autoscaler

chauhang · web-flow · commit 60c1b15132ee · 2021-10-04T16:16:04.000-05:00
feat: Add kubernetes HPA for torchserve
diff --git a/kubernetes/EKS/config.properties b/kubernetes/EKS/config.properties
@@ -1,5 +1,6 @@
 inference_address=http://0.0.0.0:8080
 management_address=http://0.0.0.0:8081
+metrics_address=http://0.0.0.0:8082
 NUM_WORKERS=1
 number_of_gpu=1
 number_of_netty_threads=32
diff --git a/kubernetes/README.md b/kubernetes/README.md
@@ -279,10 +279,12 @@ Follow the link for log aggregation with EFK Stack.\
   * Helm is picking up other .yaml files. Make sure you’ve added other files correctly to .helmignore. It should only run with values.yaml.
 * `kubectl describe pod` shows error message "0/1 nodes are available: 1 Insufficient cpu."
   * Ensure that the `n_cpu` value in `values.yaml` is set to a number that can be supported by the nodes in the cluster.
-  
+
+## Autoscaling
+  [Autoscaling with torchserve metrics](autoscale.md)
+
 ## Roadmap
 
-* [] Autoscaling
 * [] Log / Metrics Aggregation using [AWS Container Insights](https://docs.aws.amazon.com/AmazonCloudWatch/latest/monitoring/ContainerInsights.html)
 * [] EFK Stack Integration
 * [] Readiness / Liveness Probes
diff --git a/kubernetes/adapter.yaml b/kubernetes/adapter.yaml
@@ -0,0 +1,200 @@
+# Default values for k8s-prometheus-adapter..
+affinity: {}
+
+image:
+  repository: k8s.gcr.io/prometheus-adapter/prometheus-adapter
+  tag: v0.9.0
+  pullPolicy: IfNotPresent
+
+logLevel: 4
+
+metricsRelistInterval: 1m
+
+listenPort: 6443
+
+nodeSelector: {}
+
+priorityClassName: ""
+
+# Url to access prometheus
+prometheus:
+  # Value is templated
+  url: http://prometheus-server.default.svc.cluster.local
+  port: 80
+  path: ""
+
+replicas: 1
+
+# k8s 1.21 needs fsGroup to be set for non root deployments
+# ref: https://github.com/kubernetes/kubernetes/issues/70679
+podSecurityContext:
+  fsGroup: 10001
+
+rbac:
+  # Specifies whether RBAC resources should be created
+  create: true
+
+psp:
+  # Specifies whether PSP resources should be created
+  create: false
+
+serviceAccount:
+  # Specifies whether a service account should be created
+  create: true
+  # The name of the service account to use.
+  # If not set and create is true, a name is generated using the fullname template
+  name:
+  # ServiceAccount annotations.
+  # Use case: AWS EKS IAM roles for service accounts
+  # ref: https://docs.aws.amazon.com/eks/latest/userguide/specify-service-account-role.html
+  annotations: {}
+
+# Custom DNS configuration to be added to prometheus-adapter pods
+dnsConfig: {}
+# nameservers:
+#   - 1.2.3.4
+# searches:
+#   - ns1.svc.cluster-domain.example
+#   - my.dns.search.suffix
+# options:
+#   - name: ndots
+#     value: "2"
+#   - name: edns0
+resources: {}
+  # requests:
+  #   cpu: 100m
+  #   memory: 128Mi
+  # limits:
+  #   cpu: 100m
+  #   memory: 128Mi
+
+rules:
+  default: true
+  custom: []
+# - seriesQuery: '{__name__=~"^some_metric_count$"}'
+#   resources:
+#     template: <<.Resource>>
+#   name:
+#     matches: ""
+#     as: "my_custom_metric"
+#   metricsQuery: sum(<<.Series>>{<<.LabelMatchers>>}) by (<<.GroupBy>>)
+  # Mounts a configMap with pre-generated rules for use. Overrides the
+  # default, custom, external and resource entries
+  existing:
+  external:
+  - seriesQuery: '{__name__=~"^ts_queue_latency_microseconds"}'
+    resources:
+      overrides:
+        namespace:
+          resource: namespace
+        service:
+          resource: service
+        pod:
+          resource: pod
+    name:
+      matches: "^(.*)_microseconds"
+      as: "ts_queue_latency_microseconds"
+    metricsQuery: ts_queue_latency_microseconds
+  resource: {}
+#   cpu:
+#     containerQuery: sum(rate(container_cpu_usage_seconds_total{<<.LabelMatchers>>, container!=""}[3m])) by (<<.GroupBy>>)
+#     nodeQuery: sum(rate(container_cpu_usage_seconds_total{<<.LabelMatchers>>, id='/'}[3m])) by (<<.GroupBy>>)
+#     resources:
+#       overrides:
+#         node:
+#           resource: node
+#         namespace:
+#           resource: namespace
+#         pod:
+#           resource: pod
+#     containerLabel: container
+#   memory:
+#     containerQuery: sum(container_memory_working_set_bytes{<<.LabelMatchers>>, container!=""}) by (<<.GroupBy>>)
+#     nodeQuery: sum(container_memory_working_set_bytes{<<.LabelMatchers>>,id='/'}) by (<<.GroupBy>>)
+#     resources:
+#       overrides:
+#         node:
+#           resource: node
+#         namespace:
+#           resource: namespace
+#         pod:
+#           resource: pod
+#     containerLabel: container
+#   window: 3m
+
+service:
+  annotations: {}
+  port: 443
+  type: ClusterIP
+# clusterIP: 1.2.3.4
+
+tls:
+  enable: false
+  ca: |-
+    # Public CA file that signed the APIService
+  key: |-
+    # Private key of the APIService
+  certificate: |-
+    # Public key of the APIService
+
+# Any extra arguments
+extraArguments: []
+  # - --tls-private-key-file=/etc/tls/tls.key
+  # - --tls-cert-file=/etc/tls/tls.crt
+
+# Any extra volumes
+extraVolumes: []
+  # - name: example-name
+  #   hostPath:
+  #     path: /path/on/host
+  #     type: DirectoryOrCreate
+  # - name: ssl-certs
+  #   hostPath:
+  #     path: /etc/ssl/certs/ca-bundle.crt
+  #     type: File
+
+# Any extra volume mounts
+extraVolumeMounts: []
+  #   - name: example-name
+  #     mountPath: /path/in/container
+  #   - name: ssl-certs
+  #     mountPath: /etc/ssl/certs/ca-certificates.crt
+  #     readOnly: true
+
+tolerations: []
+
+# Labels added to the pod
+podLabels: {}
+
+# Annotations added to the pod
+podAnnotations: {}
+
+hostNetwork:
+  # Specifies if prometheus-adapter should be started in hostNetwork mode.
+  #
+  # You would require this enabled if you use alternate overlay networking for pods and
+  # API server unable to communicate with metrics-server. As an example, this is required
+  # if you use Weave network on EKS. See also dnsPolicy
+  enabled: false
+
+# When hostNetwork is enabled, you probably want to set this to ClusterFirstWithHostNet
+# dnsPolicy: ClusterFirstWithHostNet
+
+# Deployment strategy type
+strategy:
+  type: RollingUpdate
+  rollingUpdate:
+    maxUnavailable: 25%
+    maxSurge: 25%
+
+podDisruptionBudget:
+  # Specifies if PodDisruptionBudget should be enabled
+  # When enabled, minAvailable or maxUnavailable should also be defined.
+  enabled: false
+  minAvailable:
+  maxUnavailable: 1
+
+certManager:
+  enabled: false
+  caCertDuration: 43800h
+  certDuration: 8760h
diff --git a/kubernetes/autoscale.md b/kubernetes/autoscale.md
@@ -0,0 +1,170 @@
+# Autoscaler 
+
+Setup Kubernetes HPA(Horizontal Pod Autoscaler) for Torchserve, tuned for torchserve metrics. This uses Prometheus as metrics collector and Prometheus Adapter as mertrics server, serving Torchserve metrics for HPA.
+
+## Steps
+
+### 1. Install Torchserve with metrics enabled for prometheus format
+
+[Install TorchServe using Helm Charts](README.md##-Deploy-TorchServe-using-Helm-Charts)
+### 2. Install Prometheus
+
+```bash
+helm repo add prometheus-community https://prometheus-community.github.io/helm-charts
+helm repo update
+helm install prometheus prometheus-community/prometheus
+```
+
+The above command outputs prometheus server url:
+
+```bash
+NAME: prometheus
+LAST DEPLOYED: Wed Sep  8 19:10:49 2021
+NAMESPACE: default
+STATUS: deployed
+REVISION: 1
+TEST SUITE: None
+NOTES:
+The Prometheus server can be accessed via port 80 on the following DNS name from within your cluster:
+prometheus-server.default.svc.cluster.local
+...
+...
+```
+
+### 3. Install Prometheus Adapater
+
+- Update Prometheus url and port in adapter.yaml. Use the url given in prometheus installation output.
+
+```yaml
+# Url to access prometheus
+prometheus:
+  # Value is templated
+  url: http://prometheus-server.default.svc.cluster.local
+  port: 80
+  path: ""
+```
+
+- Update external metrics rules in adapter.yaml. Here we enabling external metrics in prometheus adapter and serving `ts_queue_latency_microseconds` metric.
+
+```yaml
+external:
+- seriesQuery: '{__name__=~"^ts_queue_latency_microseconds"}'
+  resources:
+    overrides:
+      namespace:
+        resource: namespace
+      service:
+        resource: service
+      pod:
+        resource: pod
+  name:
+    matches: "^(.*)_microseconds"
+    as: "ts_queue_latency_microseconds"
+  metricsQuery: ts_queue_latency_microseconds
+```
+
+Refer: [Prometheus Adapter External Metrics](https://github.com/kubernetes-sigs/prometheus-adapter/blob/master/docs/externalmetrics.md)
+
+- Install Prometheus adapter
+
+```bash
+helm install -f adapter.yaml prometheus-adapter prometheus-community/prometheus-adapter
+```
+
+The output of above command is
+
+```
+NAME: adapter
+LAST DEPLOYED: Wed Sep  8 19:49:28 2021
+NAMESPACE: default
+STATUS: deployed
+REVISION: 1
+TEST SUITE: None
+NOTES:
+adapter-prometheus-adapter has been deployed.
+In a few minutes you should be able to list metrics using the following command(s):
+
+  kubectl get --raw /apis/custom.metrics.k8s.io/v1beta1
+
+  kubectl get --raw /apis/external.metrics.k8s.io/v1beta1
+```
+
+#### Check External metrics list
+
+```bash
+kubectl get --raw /apis/external.metrics.k8s.io/v1beta1 | jq
+```
+
+```json
+{
+  "kind": "APIResourceList",
+  "apiVersion": "v1",
+  "groupVersion": "external.metrics.k8s.io/v1beta1",
+  "resources": [
+    {
+      "name": "ts_queue_latency_microseconds",
+      "singularName": "",
+      "namespaced": true,
+      "kind": "ExternalMetricValueList",
+      "verbs": [
+        "get"
+      ]
+    }
+  ]
+}
+```
+
+### 4. Deploy Horizontal Pod Autoscaler for Torchserve
+
+- Change `targetValue` as per requirement.
+
+```yaml
+kind: HorizontalPodAutoscaler
+apiVersion: autoscaling/v2beta1
+metadata:
+  name: torchserve
+spec:
+  scaleTargetRef:
+    apiVersion: apps/v1
+    kind: Deployment
+    name: torchserve
+  # autoscale between 1 and 5 replicas
+  minReplicas: 1
+  maxReplicas: 5
+  metrics:
+  - type: External
+    external:
+      metricName: ts_queue_latency_microseconds
+      targetValue: "7000000m"
+```
+
+```bash
+kubectl apply -f hpa.yaml
+```
+
+### 5. Check status of HPG
+
+```bash
+kubectl describe hpa torchserve
+```
+
+```bash
+Name:                                              torchserve
+Namespace:                                         default
+Labels:                                            <none>
+Annotations:                                       <none>
+CreationTimestamp:                                 Wed, 08 Sep 2021 20:09:48 +0530
+Reference:                                         Deployment/torchserve
+Metrics:                                           ( current / target )
+  "ts_queue_latency_microseconds" (target value):  5257630m / 7k
+Min replicas:                                      1
+Max replicas:                                      5
+Deployment pods:                                   3 current / 3 desired
+Conditions:
+  Type            Status  Reason              Message
+  ----            ------  ------              -------
+  AbleToScale     True    ReadyForNewScale    recommended size matches current size
+  ScalingActive   True    ValidMetricFound    the HPA was able to successfully calculate a replica count from external metric ts_queue_latency_microseconds(nil)
+  ScalingLimited  False   DesiredWithinRange  the desired count is within the acceptable range
+Events:           <none>
+```
diff --git a/kubernetes/hpa.yaml b/kubernetes/hpa.yaml