Merge branch 'master' into master

msaroufim · web-flow · commit bba17ada5768 · 2021-10-05T18:14:07.000-07:00
diff --git a/examples/text_to_speech_synthesizer/waveglow_handler.py b/examples/text_to_speech_synthesizer/waveglow_handler.py
@@ -74,7 +74,9 @@ def initialize(self, ctx):
 
     def preprocess(self, data):
         """
-         Scales, crops, and normalizes a PIL image for a MNIST model,
+         converts text to sequence of IDs using tacatron2 text_to_sequence
+         with english cleaners to transform text and standardize input
+         (ex: lowercasing, expanding abbreviations and numbers, etc.)
          returns an Numpy array
         """
         text = data[0].get("data")
diff --git a/kubernetes/EKS/config.properties b/kubernetes/EKS/config.properties
@@ -1,5 +1,6 @@
 inference_address=http://0.0.0.0:8080
 management_address=http://0.0.0.0:8081
+metrics_address=http://0.0.0.0:8082
 NUM_WORKERS=1
 number_of_gpu=1
 number_of_netty_threads=32
diff --git a/kubernetes/README.md b/kubernetes/README.md
@@ -279,10 +279,12 @@ Follow the link for log aggregation with EFK Stack.\
   * Helm is picking up other .yaml files. Make sure you’ve added other files correctly to .helmignore. It should only run with values.yaml.
 * `kubectl describe pod` shows error message "0/1 nodes are available: 1 Insufficient cpu."
   * Ensure that the `n_cpu` value in `values.yaml` is set to a number that can be supported by the nodes in the cluster.
-  
+
+## Autoscaling
+  [Autoscaling with torchserve metrics](autoscale.md)
+
 ## Roadmap
 
-* [] Autoscaling
 * [] Log / Metrics Aggregation using [AWS Container Insights](https://docs.aws.amazon.com/AmazonCloudWatch/latest/monitoring/ContainerInsights.html)
 * [] EFK Stack Integration
 * [] Readiness / Liveness Probes
diff --git a/kubernetes/adapter.yaml b/kubernetes/adapter.yaml
@@ -0,0 +1,200 @@
+# Default values for k8s-prometheus-adapter..
+affinity: {}
+
+image:
+  repository: k8s.gcr.io/prometheus-adapter/prometheus-adapter
+  tag: v0.9.0
+  pullPolicy: IfNotPresent
+
+logLevel: 4
+
+metricsRelistInterval: 1m
+
+listenPort: 6443
+
+nodeSelector: {}
+
+priorityClassName: ""
+
+# Url to access prometheus
+prometheus:
+  # Value is templated
+  url: http://prometheus-server.default.svc.cluster.local
+  port: 80
+  path: ""
+
+replicas: 1
+
+# k8s 1.21 needs fsGroup to be set for non root deployments
+# ref: https://github.com/kubernetes/kubernetes/issues/70679
+podSecurityContext:
+  fsGroup: 10001
+
+rbac:
+  # Specifies whether RBAC resources should be created
+  create: true
+
+psp:
+  # Specifies whether PSP resources should be created
+  create: false
+
+serviceAccount:
+  # Specifies whether a service account should be created
+  create: true
+  # The name of the service account to use.
+  # If not set and create is true, a name is generated using the fullname template
+  name:
+  # ServiceAccount annotations.
+  # Use case: AWS EKS IAM roles for service accounts
+  # ref: https://docs.aws.amazon.com/eks/latest/userguide/specify-service-account-role.html
+  annotations: {}
+
+# Custom DNS configuration to be added to prometheus-adapter pods
+dnsConfig: {}
+# nameservers:
+#   - 1.2.3.4
+# searches:
+#   - ns1.svc.cluster-domain.example
+#   - my.dns.search.suffix
+# options:
+#   - name: ndots
+#     value: "2"
+#   - name: edns0
+resources: {}
+  # requests:
+  #   cpu: 100m
+  #   memory: 128Mi
+  # limits:
+  #   cpu: 100m
+  #   memory: 128Mi
+
+rules:
+  default: true
+  custom: []
+# - seriesQuery: '{__name__=~"^some_metric_count$"}'
+#   resources:
+#     template: <<.Resource>>
+#   name:
+#     matches: ""
+#     as: "my_custom_metric"
+#   metricsQuery: sum(<<.Series>>{<<.LabelMatchers>>}) by (<<.GroupBy>>)
+  # Mounts a configMap with pre-generated rules for use. Overrides the
+  # default, custom, external and resource entries
+  existing:
+  external:
+  - seriesQuery: '{__name__=~"^ts_queue_latency_microseconds"}'
+    resources:
+      overrides:
+        namespace:
+          resource: namespace
+        service:
+          resource: service
+        pod:
+          resource: pod
+    name:
+      matches: "^(.*)_microseconds"
+      as: "ts_queue_latency_microseconds"
+    metricsQuery: ts_queue_latency_microseconds
+  resource: {}
+#   cpu:
+#     containerQuery: sum(rate(container_cpu_usage_seconds_total{<<.LabelMatchers>>, container!=""}[3m])) by (<<.GroupBy>>)
+#     nodeQuery: sum(rate(container_cpu_usage_seconds_total{<<.LabelMatchers>>, id='/'}[3m])) by (<<.GroupBy>>)
+#     resources:
+#       overrides:
+#         node:
+#           resource: node
+#         namespace:
+#           resource: namespace
+#         pod:
+#           resource: pod
+#     containerLabel: container
+#   memory:
+#     containerQuery: sum(container_memory_working_set_bytes{<<.LabelMatchers>>, container!=""}) by (<<.GroupBy>>)
+#     nodeQuery: sum(container_memory_working_set_bytes{<<.LabelMatchers>>,id='/'}) by (<<.GroupBy>>)
+#     resources:
+#       overrides:
+#         node:
+#           resource: node
+#         namespace:
+#           resource: namespace
+#         pod:
+#           resource: pod
+#     containerLabel: container
+#   window: 3m
+
+service:
+  annotations: {}
+  port: 443
+  type: ClusterIP
+# clusterIP: 1.2.3.4
+
+tls:
+  enable: false
+  ca: |-
+    # Public CA file that signed the APIService
+  key: |-
+    # Private key of the APIService
+  certificate: |-
+    # Public key of the APIService
+
+# Any extra arguments
+extraArguments: []
+  # - --tls-private-key-file=/etc/tls/tls.key
+  # - --tls-cert-file=/etc/tls/tls.crt
+
+# Any extra volumes
+extraVolumes: []
+  # - name: example-name
+  #   hostPath:
+  #     path: /path/on/host
+  #     type: DirectoryOrCreate
+  # - name: ssl-certs
+  #   hostPath:
+  #     path: /etc/ssl/certs/ca-bundle.crt
+  #     type: File
+
+# Any extra volume mounts
+extraVolumeMounts: []
+  #   - name: example-name
+  #     mountPath: /path/in/container
+  #   - name: ssl-certs
+  #     mountPath: /etc/ssl/certs/ca-certificates.crt
+  #     readOnly: true
+
+tolerations: []
+
+# Labels added to the pod
+podLabels: {}
+
+# Annotations added to the pod
+podAnnotations: {}
+
+hostNetwork:
+  # Specifies if prometheus-adapter should be started in hostNetwork mode.
+  #
+  # You would require this enabled if you use alternate overlay networking for pods and
+  # API server unable to communicate with metrics-server. As an example, this is required
+  # if you use Weave network on EKS. See also dnsPolicy
+  enabled: false
+
+# When hostNetwork is enabled, you probably want to set this to ClusterFirstWithHostNet
+# dnsPolicy: ClusterFirstWithHostNet
+
+# Deployment strategy type
+strategy:
+  type: RollingUpdate
+  rollingUpdate:
+    maxUnavailable: 25%
+    maxSurge: 25%
+
+podDisruptionBudget:
+  # Specifies if PodDisruptionBudget should be enabled
+  # When enabled, minAvailable or maxUnavailable should also be defined.
+  enabled: false
+  minAvailable:
+  maxUnavailable: 1
+
+certManager:
+  enabled: false
+  caCertDuration: 43800h
+  certDuration: 8760h
diff --git a/kubernetes/autoscale.md b/kubernetes/autoscale.md
diff --git a/kubernetes/hpa.yaml b/kubernetes/hpa.yaml