tikv · mxinden · Jun 19, 2020 · May 26, 2020 · May 26, 2020 · May 26, 2020
diff --git a/Cargo.toml b/Cargo.toml
@@ -30,6 +30,7 @@ fnv = "1.0"
 lazy_static = "1.4"
 libc = { version = "0.2", optional = true }
 protobuf = { version = "2.0", optional = true }
+regex = "1.3"
 reqwest = { version = "0.10", features = ["blocking"], optional = true }
 thiserror = "1.0"
 parking_lot = "0.10.2"

diff --git a/benches/text_encoder.rs b/benches/text_encoder.rs
@@ -0,0 +1,79 @@
+// Copyright 2020 PingCAP, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#![feature(test)]
+
+extern crate test;
+
+use prometheus::{CounterVec, Encoder, HistogramOpts, HistogramVec, Opts, Registry, TextEncoder};
+use test::Bencher;
+
+#[bench]
+fn bench_text_encoder_without_escaping(b: &mut Bencher) {
+    let registry = registry_with_test_metrics(false);
+    run_text_encoder(registry, b)
+}
+
+#[bench]
+fn bench_text_encoder_with_escaping(b: &mut Bencher) {
+    let registry = registry_with_test_metrics(true);
+    run_text_encoder(registry, b)
+}
+
+fn registry_with_test_metrics(with_escaping: bool) -> Registry {
+    let registry = Registry::new();
+
+    for i in 0..100 {
+        let counter = CounterVec::new(
+            Opts::new(
+                format!("benchmark_counter_{}", i),
+                "A counter to benchmark it.",
+            ),
+            &["one", "two", "three"],
+        )
+        .unwrap();
+        registry.register(Box::new(counter.clone())).unwrap();
+
+        let histogram = HistogramVec::new(
+            HistogramOpts::new(
+                format!("benchmark_histogram_{}", i),
+                "A histogram to benchmark it.",
+            ),
+            &["one", "two", "three"],
+        )
+        .unwrap();
+        registry.register(Box::new(histogram.clone())).unwrap();
+
+        for j in 0..100 {
+            let j_string = j.to_string();
+            let label_values = if with_escaping {
+                ["ei\\ns\n", "zw\"e\"i", &j_string]
+            } else {
+                ["eins", "zwei", &j_string]
+            };
+
+            counter.with_label_values(&label_values).inc();
+            histogram.with_label_values(&label_values).observe(j.into());
+        }
+    }
+
+    registry
+}
+
+fn run_text_encoder(registry: Registry, b: &mut Bencher) {
+    let mut buffer = vec![];
+    let encoder = TextEncoder::new();
+    let metric_families = registry.gather();
+
+    b.iter(|| encoder.encode(&metric_families, &mut buffer).unwrap());
+}
diff --git a/src/encoder/text.rs b/src/encoder/text.rs
@@ -1,5 +1,7 @@
 // Copyright 2019 TiKV Project Authors. Licensed under Apache-2.0.
 
+use regex::{Match, Regex};
+use std::borrow::Cow;
 use std::io::Write;
 
 use crate::errors::Result;
@@ -32,23 +34,33 @@ impl Encoder for TextEncoder {
             // Fail-fast checks.
             check_metric_family(mf)?;
 
+            // Write `# HELP` header.
             let name = mf.get_name();
             let help = mf.get_help();
             if !help.is_empty() {
-                writeln!(writer, "# HELP {} {}", name, escape_string(help, false))?;
+                writer.write_all(b"# HELP ")?;
+                writer.write_all(name.as_bytes())?;
+                writer.write_all(b" ")?;
+                writer.write_all(escape_string(help, false).as_bytes())?;
+                writer.write_all(b"\n")?;
             }
 
+            // Write `# TYPE` header.
             let metric_type = mf.get_field_type();
             let lowercase_type = format!("{:?}", metric_type).to_lowercase();
-            writeln!(writer, "# TYPE {} {}", name, lowercase_type)?;
+            writer.write_all(b"# TYPE ")?;
+            writer.write_all(name.as_bytes())?;
+            writer.write_all(b" ")?;
+            writer.write_all(lowercase_type.as_bytes())?;
+            writer.write_all(b"\n")?;
 
             for m in mf.get_metric() {
                 match metric_type {
                     MetricType::COUNTER => {
-                        write_sample(name, m, "", "", m.get_counter().get_value(), writer)?;
+                        write_sample(writer, name, None, m, None, m.get_counter().get_value())?;
                     }
                     MetricType::GAUGE => {
-                        write_sample(name, m, "", "", m.get_gauge().get_value(), writer)?;
+                        write_sample(writer, name, None, m, None, m.get_gauge().get_value())?;
                     }
                     MetricType::HISTOGRAM => {
                         let h = m.get_histogram();
@@ -57,76 +69,62 @@ impl Encoder for TextEncoder {
                         for b in h.get_bucket() {
                             let upper_bound = b.get_upper_bound();
                             write_sample(
-                                &format!("{}_bucket", name),
+                                writer,
+                                name,
+                                Some("_bucket"),
                                 m,
-                                BUCKET_LABEL,
-                                &format!("{}", upper_bound),
+                                Some((BUCKET_LABEL, &upper_bound.to_string())),
                                 b.get_cumulative_count() as f64,
-                                writer,
                             )?;
                             if upper_bound.is_sign_positive() && upper_bound.is_infinite() {
                                 inf_seen = true;
                             }
                         }
                         if !inf_seen {
                             write_sample(
-                                &format!("{}_bucket", name),
+                                writer,
+                                name,
+                                Some("_bucket"),
                                 m,
-                                BUCKET_LABEL,
-                                POSITIVE_INF,
+                                Some((BUCKET_LABEL, POSITIVE_INF)),
                                 h.get_sample_count() as f64,
-                                writer,
                             )?;
                         }
 
-                        write_sample(
-                            &format!("{}_sum", name),
-                            m,
-                            "",
-                            "",
-                            h.get_sample_sum(),
-                            writer,
-                        )?;
+                        write_sample(writer, name, Some("_sum"), m, None, h.get_sample_sum())?;
 
                         write_sample(
-                            &format!("{}_count", name),
+                            writer,
+                            name,
+                            Some("_count"),
                             m,
-                            "",
-                            "",
+                            None,
                             h.get_sample_count() as f64,
-                            writer,
                         )?;
                     }
                     MetricType::SUMMARY => {
                         let s = m.get_summary();
 
                         for q in s.get_quantile() {
                             write_sample(
+                                writer,
                                 name,
+                                None,
                                 m,
-                                QUANTILE,
-                                &format!("{}", q.get_quantile()),
+                                Some((QUANTILE, &q.get_quantile().to_string())),
                                 q.get_value(),
-                                writer,
                             )?;
                         }
 
-                        write_sample(
-                            &format!("{}_sum", name),
-                            m,
-                            "",
-                            "",
-                            s.get_sample_sum(),
-                            writer,
-                        )?;
+                        write_sample(writer, name, Some("_sum"), m, None, s.get_sample_sum())?;
 
                         write_sample(
-                            &format!("{}_count", name),
+                            writer,
+                            name,
+                            Some("_count"),
                             m,
-                            "",
-                            "",
+                            None,
                             s.get_sample_count() as f64,
-                            writer,
                         )?;
                     }
                     MetricType::UNTYPED => {
@@ -145,31 +143,32 @@ impl Encoder for TextEncoder {
 }
 
 /// `write_sample` writes a single sample in text format to `writer`, given the
-/// metric name, the metric proto message itself, optionally an additional label
-/// name and value (use empty strings if not required), and the value.
-/// The function returns the number of bytes written and any error encountered.
+/// metric name, an optional metric name postfix, the metric proto message
+/// itself, optionally an additional label name and value (use empty strings if
+/// not required), and the value. The function returns the number of bytes
+/// written and any error encountered.
 fn write_sample(
+    writer: &mut dyn Write,
     name: &str,
+    name_postfix: Option<&str>,
     mc: &proto::Metric,
-    additional_label_name: &str,
-    additional_label_value: &str,
+    additional_label: Option<(&str, &str)>,
     value: f64,
-    writer: &mut dyn Write,
 ) -> Result<()> {
     writer.write_all(name.as_bytes())?;
+    if let Some(postfix) = name_postfix {
+        writer.write_all(postfix.as_bytes())?;
+    }
 
-    label_pairs_to_text(
-        mc.get_label(),
-        additional_label_name,
-        additional_label_value,
-        writer,
-    )?;
+    label_pairs_to_text(mc.get_label(), additional_label, writer)?;
 
-    write!(writer, " {}", value)?;
+    writer.write_all(b" ")?;
+    writer.write_all(value.to_string().as_bytes())?;
 
     let timestamp = mc.get_timestamp_ms();
     if timestamp != 0 {
-        write!(writer, " {}", timestamp)?;
+        writer.write_all(b" ")?;
+        writer.write_all(timestamp.to_string().as_bytes())?;
     }
 
     writer.write_all(b"\n")?;
@@ -186,35 +185,30 @@ fn write_sample(
 /// bytes written and any error encountered.
 fn label_pairs_to_text(
     pairs: &[proto::LabelPair],
-    additional_label_name: &str,
-    additional_label_value: &str,
+    additional_label: Option<(&str, &str)>,
     writer: &mut dyn Write,
 ) -> Result<()> {
-    if pairs.is_empty() && additional_label_name.is_empty() {
+    if pairs.is_empty() && additional_label.is_none() {
         return Ok(());
     }
 
-    let mut separator = "{";
+    let mut separator = b"{";
     for lp in pairs {
-        write!(
-            writer,
-            "{}{}=\"{}\"",
-            separator,
-            lp.get_name(),
-            escape_string(lp.get_value(), true)
-        )?;
-
-        separator = ",";
+        writer.write_all(separator)?;
+        writer.write_all(lp.get_name().as_bytes())?;
+        writer.write_all(b"=\"")?;
+        writer.write_all(escape_string(lp.get_value(), true).as_bytes())?;
+        writer.write_all(b"\"")?;
+
+        separator = b",";
     }
 
-    if !additional_label_name.is_empty() {
-        write!(
-            writer,
-            "{}{}=\"{}\"",
-            separator,
-            additional_label_name,
-            escape_string(additional_label_value, true)
-        )?;
+    if let Some((name, value)) = additional_label {
+        writer.write_all(separator)?;
+        writer.write_all(name.as_bytes())?;
+        writer.write_all(b"=\"")?;
+        writer.write_all(escape_string(value, true).as_bytes())?;
+        writer.write_all(b"\"")?;
     }
 
     writer.write_all(b"}")?;
@@ -224,26 +218,49 @@ fn label_pairs_to_text(
 
 /// `escape_string` replaces `\` by `\\`, new line character by `\n`, and `"` by `\"` if
 /// `include_double_quote` is true.
-fn escape_string(v: &str, include_double_quote: bool) -> String {
-    let mut escaped = String::with_capacity(v.len() * 2);
+///
+/// Implementation adapted from
+/// https://lise-henry.github.io/articles/optimising_strings.html
+fn escape_string(v: &str, include_double_quote: bool) -> Cow<'_, str> {
+    // Regex compilation is expensive. Use `lazy_static` to compile the regexes
+    // once per process lifetime and not once per function invocation.
+    lazy_static! {
+        static ref ESCAPER: Regex = Regex::new("(\\\\|\n)").expect("Regex to be valid.");
+        static ref QUOTED_ESCAPER: Regex = Regex::new("(\\\\|\n|\")").expect("Regex to be valid.");
+    }
 
-    for c in v.chars() {
-        match c {
-            '\\' | '\n' => {
-                escaped.extend(c.escape_default());
-            }
-            '"' if include_double_quote => {
-                escaped.extend(c.escape_default());
-            }
-            _ => {
-                escaped.push(c);
+    let first_occurence = if include_double_quote {
+        QUOTED_ESCAPER.find(v)
+    } else {
+        ESCAPER.find(v)
+    }
+    .as_ref()
+    .map(Match::start);
+
+    if let Some(first) = first_occurence {
+        let mut escaped = String::with_capacity(v.len() * 2);
+        escaped.push_str(&v[0..first]);
+        let remainder = v[first..].chars();
+
+        for c in remainder {
+            match c {
+                '\\' | '\n' => {
+                    escaped.extend(c.escape_default());
+                }
+                '"' if include_double_quote => {
+                    escaped.extend(c.escape_default());
+                }
+                _ => {
+                    escaped.push(c);
+                }
             }
         }
+        escaped.into()
+    } else {
+        // The input string does not contain any characters that would need to
+        // be escaped. Return it as it is.
+        v.into()
     }
-
-    escaped.shrink_to_fit();
-
-    escaped
 }
 
 #[cfg(test)]