Add Sumo examples (#681)

pmm-sumo · web-flow · commit f20e4061b14c · 2021-01-25T19:14:46.000+01:00
diff --git a/examples/README.md b/examples/README.md
@@ -0,0 +1,45 @@
+# Examples
+## Kubernetes configuration
+
+### Helm chart values template
+[kubernetes/custom-values.yaml](./kubernetes/custom-values.yaml) contains 
+an example template for Sumologic Kubernetes Collection Helm chart, which
+installs OpenTelemetry Collector in Agent and Gateway configuration, as described
+in the [documentation](https://help.sumologic.com/Traces/Getting_Started_with_Transaction_Tracing/Set_up_traces_collection_for_Kubernetes_environments).
+
+After filling the template values, you can install it following
+[Sumologic Kubernetes Collection installation instructions](https://github.com/SumoLogic/sumologic-kubernetes-collection/blob/release-v2.0/deploy/docs/Installation_with_Helm.md)
+For example, by running following commands:
+```shell
+helm repo add sumologic https://sumologic.github.io/sumologic-kubernetes-collection
+kubectl create namespace sumologic
+helm upgrade --install my-release -n sumologic sumologic/sumologic -f custom-values.yaml 
+```
+
+### Helm chart values template with cascading filter enabled
+
+Additionally, [kubernetes/custom-values-cascading-filter.yaml](./kubernetes/custom-values-cascading-filter.yaml) 
+includes an alternative example template that enables cascading filter,
+as described in [trace filtering documentation](https://help.sumologic.com/Traces/Getting_Started_with_Transaction_Tracing/What_if_I_don't_want_to_send_all_the_tracing_data_to_Sumo_Logic%3F).
+Note that cascading filter is currently supported only for single-instance
+OpenTelemetry Collector deployments.
+
+## Non-kubernetes configuration
+
+### Agent configuration (should be run on each host/node)
+[non-kubernetes/agent-configuration-template.yaml](non-kubernetes/agent-configuration-template.yaml) contains
+an OpenTelemetry Collector YAML file which includes configuration
+for OpenTelemetry Collector running in Agent mode. It should be 
+deployed on each host/node within the system.
+
+### Gateway configuration (should be run per each cluster/data-center/etc.)
+[non-kubernetes/gateway-configuration-template.yaml](non-kubernetes/gateway-configuration-template.yaml) contains
+an OpenTelemetry Collector YAML file which includes configuration
+for OpenTelemetry Collector running in Gateway mode. 
+
+Additionally, for [non-kubernetes/gateway-configuration-template-with-cascading-filter.yaml](non-kubernetes/gateway-configuration-template-with-cascading-filter.yaml)
+the configuration also includes cascading filter config,
+which is described in more detail in [trace filtering documentation](https://help.sumologic.com/Traces/Getting_Started_with_Transaction_Tracing/What_if_I_don't_want_to_send_all_the_tracing_data_to_Sumo_Logic%3F).
+
+Please refer to [relevant documentation](https://help.sumologic.com/Traces/Getting_Started_with_Transaction_Tracing/Set_up_traces_collection_for_other_environments)
+for more details.
diff --git a/examples/kubernetes/custom-values-cascading-filter.yaml b/examples/kubernetes/custom-values-cascading-filter.yaml
@@ -0,0 +1,73 @@
+sumologic:
+  accessId: <ENTER_YOUR_SUMOLOGIC_ACCESS_ID>
+  accessKey: <ENTER_YOUR_SUMOLOGIC_ACCESS_KEY>
+  clusterName: <ENTER_YOUR_CLUSTER_NAME>
+  traces:
+    enabled: true
+## Following enables OpenTelemetry Agent which runs on each node as a DaemonSet
+otelagent:
+  enabled:
+    true
+## Following configures OpenTelemetry Collector (gateway)
+## Note that if cascading_filter is used, deployment must include only a single instance
+otelcol:
+  metrics:
+    ## This enables exposing OpenTelemetry Collector metrics. Note that they will consume your DPM
+    ## hence by default they are disabled
+    enabled:
+      true
+  config:
+    processors:
+      ## Following enables a smart cascading filtering rules with preset limits.
+      cascading_filter:
+        ## (default = 30s): Wait time since the first span of a trace before making
+        ## a filtering decision
+        decision_wait: 30s
+        ## (default = 50000): Number of traces kept in memory
+        num_traces: 50000
+        ## (default = 0): Expected number of new traces (helps in allocating data structures)
+        expected_new_traces_per_sec: 100
+        ## (default = 0): defines maximum number of spans per second
+        spans_per_second: 1600
+        ## (default = 0.2): Ratio of spans that are always probabilistically filtered
+        ## (hence might be used for metrics calculation).
+        probabilistic_filtering_ratio: 0.2
+        ## (no default): Policies used to make a sampling decision
+        policies:
+          - name: sampling-priority,
+            ## string_attribute: allows to specify conditions that need to be met
+            string_attribute: {
+              key: sampling.priority, values: [ "1" ]
+            },
+            ## Spans_per_second: max number of emitted spans per second by this policy.
+            spans_per_second: 500
+          - name: everything-else
+            ## This selects all traces, up the to the global limit
+            spans_per_second: -1
+          ## Following are some examples of other rules that could be used
+          # - name: extended-duration
+          #  ## Spans_per_second: max number of emitted spans per second by this policy.
+          #  spans_per_second: 500
+          #   properties:
+          #     ## Selects the span if the duration is greater or equal the given
+          #     ## value (use s or ms as the suffix to indicate unit).
+          #     min_duration: 5s
+          # - name: "status_code_condition",
+          #   ## Spans_per_second: max number of emitted spans per second by this policy.
+          #   spans_per_second: 500,
+          #   ## numeric_attribute: provides a list of conditions that need to be met
+          #   numeric_attribute: {
+          #     key: "http.status_code", min_value: 400, max_value: 999
+          #   }
+          # - name: everything-that-is-not-healthcheck
+          #   ## This selects all traces where there is NO span starting with `health` operation name
+          #   ## If employed, "everything-else" rule must be replaced with it
+          #   properties:
+          #     name_pattern: "^(healthcheck|otherhealthcheck).*"
+          #   invert_match: true
+          #   spans_per_second: -1
+    service:
+      pipelines:
+        traces:
+          ## This is required to enable cascading_filter
+          processors: [memory_limiter, k8s_tagger, source, resource, cascading_filter, batch]
diff --git a/examples/kubernetes/custom-values.yaml b/examples/kubernetes/custom-values.yaml
@@ -0,0 +1,16 @@
+sumologic:
+  accessId: <ENTER_YOUR_SUMOLOGIC_ACCESS_ID>
+  accessKey: <ENTER_YOUR_SUMOLOGIC_ACCESS_KEY>
+  clusterName: <ENTER_YOUR_CLUSTER_NAME>
+  traces:
+    enabled: true
+otelcol:
+  ## This enables exposing OpenTelemetry Collector metrics. Note that they will consume your DPM
+  ## hence by default they are disabled
+  metrics:
+    enabled:
+      true
+## Following enables OpenTelemetry Agent which runs on each node as a DaemonSet
+otelagent:
+  enabled:
+    true
diff --git a/examples/non-kubernetes/agent-configuration-template.yaml b/examples/non-kubernetes/agent-configuration-template.yaml
@@ -0,0 +1,70 @@
+receivers:
+  jaeger:
+    protocols:
+      thrift_compact:
+        endpoint: "0.0.0.0:6831"
+      thrift_binary:
+        endpoint: "0.0.0.0:6832"
+      grpc:
+        endpoint: "0.0.0.0:14250"
+      thrift_http:
+        endpoint: "0.0.0.0:14268"
+  opencensus:
+    endpoint: "0.0.0.0:55678"
+  otlp:
+    protocols:
+      grpc:
+        endpoint: "0.0.0.0:4317"
+      http:
+        endpoint: "0.0.0.0:55681"
+  zipkin:
+    endpoint: "0.0.0.0:9411"
+processors:
+  ## The memory_limiter processor is used to prevent out of memory situations on the collector.
+  memory_limiter:
+    ## check_interval is the time between measurements of memory usage for the
+    ## purposes of avoiding going over the limits. Defaults to zero, so no
+    ## checks will be performed. Values below 1 second are not recommended since
+    ## it can result in unnecessary CPU consumption.
+    check_interval: 5s
+
+    ## Maximum amount of memory, in MiB, targeted to be allocated by the process heap.
+    ## Note that typically the total memory usage of process will be about 50MiB higher
+    ## than this value.
+    limit_mib: 500
+
+  ## Please enable/disable accordingly if on AWS, GCE, ECS, elastic_beanstalk or neither
+  resourcedetection:
+    detectors: [ ec2, gce, ecs, elastic_beanstalk ]
+    timeout: 5s
+    override: false
+
+  ## The batch processor accepts spans and places them into batches grouped by node and resource
+  batch:
+    ## Number of spans after which a batch will be sent regardless of time
+    send_batch_size: 256
+    ## Never more than this many spans are being sent in a batch
+    send_batch_max_size: 512
+    ## Time duration after which a batch will be sent regardless of size
+    timeout: 5s
+
+extensions:
+  health_check: {}
+exporters:
+  otlp:
+    ## Please enter OpenTelemetry Collector Gateway address here
+    endpoint: HOSTNAME
+    insecure: true
+  ## Following generates verbose logs with span content, useful to verify what
+  ## metadata is being tagged. To enable, uncomment and add "logging" to exporters below.
+  ## There are two levels that could be used: `debug` and `info` with the former
+  ## being much more verbose and including (sampled) spans content
+  # logging:
+  #   loglevel: debug
+service:
+  extensions: [health_check]
+  pipelines:
+    traces:
+      receivers: [jaeger, opencensus, otlp, zipkin]
+      processors: [memory_limiter, resourcedetection, batch]
+      exporters: [otlp]
diff --git a/examples/non-kubernetes/gateway-configuration-template-with-cascading-filter.yaml b/examples/non-kubernetes/gateway-configuration-template-with-cascading-filter.yaml
@@ -0,0 +1,104 @@
+receivers:
+  jaeger:
+    protocols:
+      thrift_compact:
+        endpoint: "0.0.0.0:6831"
+      thrift_binary:
+        endpoint: "0.0.0.0:6832"
+      grpc:
+        endpoint: "0.0.0.0:14250"
+      thrift_http:
+        endpoint: "0.0.0.0:14268"
+  opencensus:
+    endpoint: "0.0.0.0:55678"
+  otlp:
+    protocols:
+      grpc:
+        endpoint: "0.0.0.0:4317"
+      http:
+        endpoint: "0.0.0.0:55681"
+  zipkin:
+    endpoint: "0.0.0.0:9411"
+processors:
+  ## The memory_limiter processor is used to prevent out of memory situations on the collector.
+  memory_limiter:
+    ## check_interval is the time between measurements of memory usage for the
+    ## purposes of avoiding going over the limits. Defaults to zero, so no
+    ## checks will be performed. Values below 1 second are not recommended since
+    ## it can result in unnecessary CPU consumption.
+    check_interval: 5s
+
+    ## Maximum amount of memory, in MiB, targeted to be allocated by the process heap.
+    ## Note that typically the total memory usage of process will be about 50MiB higher
+    ## than this value.
+    limit_mib: 1900
+
+  ## Smart cascading filtering rules with preset limits.
+  cascading_filter:
+    ## (default = 30s): Wait time since the first span of a trace arrived before making
+    ## a filtering decision
+    decision_wait: 30s
+    ## (default = 50000): Maximum number of traces kept in memory
+    num_traces: 100000
+    ## (default = 0): Expected number of new traces (helps in allocating data structures)
+    expected_new_traces_per_sec: 1000
+    ## (default = 0): defines the global limit of maximum number of spans per second
+    ## that are going to be emitted
+    spans_per_second: 1660
+    ## (default = 0.2): Ratio of spans that are always probabilistically filtered
+    ## (hence might be used for metrics calculation).
+    probabilistic_filtering_ratio: 0.2
+    ## (no default): Policies used to make a sampling decision
+    policies:
+      - name: sampling-priority,
+        ## string_attribute: allows to specify conditions that need to be met
+        string_attribute: {
+          key: sampling.priority, values: [ "1" ]
+        },
+        ## Spans_per_second: max number of emitted spans per second by this policy.
+        spans_per_second: 500
+      - name: extended-duration
+        ## Spans_per_second: max number of emitted spans per second by this policy.
+        spans_per_second: 500
+        properties:
+          ## Selects the span if the duration is greater or equal the given
+          ## value (use s or ms as the suffix to indicate unit).
+          min_duration: 5s
+      - name: "status_code_condition",
+        ## Spans_per_second: max number of emitted spans per second by this policy.
+        spans_per_second: 500,
+        ## numeric_attribute: provides a list of conditions that need to be met
+        numeric_attribute: {
+          key: "http.status_code", min_value: 400, max_value: 999
+        }
+      - name: everything-else
+        ## This selects all traces, up the to the global limit
+        spans_per_second: -1
+
+  ## The batch processor accepts spans and places them into batches grouped by node and resource
+  batch:
+    ## Number of spans after which a batch will be sent regardless of time
+    send_batch_size: 256
+    ## Never more than this many spans are being sent in a batch
+    send_batch_max_size: 512
+    ## Time duration after which a batch will be sent regardless of size
+    timeout: 5s
+
+extensions:
+  health_check: {}
+exporters:
+  zipkin:
+    endpoint: ENDPOINT_URL
+  ## Following generates verbose logs with span content, useful to verify what
+  ## metadata is being tagged. To enable, uncomment and add "logging" to exporters below.
+  ## There are two levels that could be used: `debug` and `info` with the former
+  ## being much more verbose and including (sampled) spans content
+  # logging:
+  #   loglevel: debug
+service:
+  extensions: [health_check]
+  pipelines:
+    traces:
+      receivers: [jaeger, opencensus, otlp, zipkin]
+      processors: [memory_limiter, cascading_filter, batch]
+      exporters: [zipkin]
diff --git a/examples/non-kubernetes/gateway-configuration-template.yaml b/examples/non-kubernetes/gateway-configuration-template.yaml
@@ -0,0 +1,62 @@
+receivers:
+  jaeger:
+    protocols:
+      thrift_compact:
+        endpoint: "0.0.0.0:6831"
+      thrift_binary:
+        endpoint: "0.0.0.0:6832"
+      grpc:
+        endpoint: "0.0.0.0:14250"
+      thrift_http:
+        endpoint: "0.0.0.0:14268"
+  opencensus:
+    endpoint: "0.0.0.0:55678"
+  otlp:
+    protocols:
+      grpc:
+        endpoint: "0.0.0.0:4317"
+      http:
+        endpoint: "0.0.0.0:55681"
+  zipkin:
+    endpoint: "0.0.0.0:9411"
+processors:
+  ## The memory_limiter processor is used to prevent out of memory situations on the collector.
+  memory_limiter:
+    ## check_interval is the time between measurements of memory usage for the
+    ## purposes of avoiding going over the limits. Defaults to zero, so no
+    ## checks will be performed. Values below 1 second are not recommended since
+    ## it can result in unnecessary CPU consumption.
+    check_interval: 5s
+
+    ## Maximum amount of memory, in MiB, targeted to be allocated by the process heap.
+    ## Note that typically the total memory usage of process will be about 50MiB higher
+    ## than this value.
+    limit_mib: 1900
+
+  ## The batch processor accepts spans and places them into batches grouped by node and resource
+  batch:
+    ## Number of spans after which a batch will be sent regardless of time
+    send_batch_size: 256
+    ## Never more than this many spans are being sent in a batch
+    send_batch_max_size: 512
+    ## Time duration after which a batch will be sent regardless of size
+    timeout: 5s
+
+extensions:
+  health_check: {}
+exporters:
+  zipkin:
+    endpoint: ENDPOINT_URL
+  ## Following generates verbose logs with span content, useful to verify what
+  ## metadata is being tagged. To enable, uncomment and add "logging" to exporters below.
+  ## There are two levels that could be used: `debug` and `info` with the former
+  ## being much more verbose and including (sampled) spans content
+  # logging:
+  #   loglevel: debug
+service:
+  extensions: [health_check]
+  pipelines:
+    traces:
+      receivers: [jaeger, opencensus, otlp, zipkin]
+      processors: [memory_limiter, batch]
+      exporters: [zipkin]