diff --git a/Cargo.toml b/Cargo.toml index f8743b31..5d5d0802 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -30,6 +30,7 @@ fnv = "1.0" lazy_static = "1.4" libc = { version = "0.2", optional = true } protobuf = { version = "2.0", optional = true } +regex = "1.3" reqwest = { version = "0.10", features = ["blocking"], optional = true } thiserror = "1.0" parking_lot = "0.10.2" diff --git a/benches/text_encoder.rs b/benches/text_encoder.rs new file mode 100644 index 00000000..f4ffc070 --- /dev/null +++ b/benches/text_encoder.rs @@ -0,0 +1,79 @@ +// Copyright 2020 PingCAP, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// See the License for the specific language governing permissions and +// limitations under the License. + +#![feature(test)] + +extern crate test; + +use prometheus::{CounterVec, Encoder, HistogramOpts, HistogramVec, Opts, Registry, TextEncoder}; +use test::Bencher; + +#[bench] +fn bench_text_encoder_without_escaping(b: &mut Bencher) { + let registry = registry_with_test_metrics(false); + run_text_encoder(registry, b) +} + +#[bench] +fn bench_text_encoder_with_escaping(b: &mut Bencher) { + let registry = registry_with_test_metrics(true); + run_text_encoder(registry, b) +} + +fn registry_with_test_metrics(with_escaping: bool) -> Registry { + let registry = Registry::new(); + + for i in 0..100 { + let counter = CounterVec::new( + Opts::new( + format!("benchmark_counter_{}", i), + "A counter to benchmark it.", + ), + &["one", "two", "three"], + ) + .unwrap(); + registry.register(Box::new(counter.clone())).unwrap(); + + let histogram = HistogramVec::new( + HistogramOpts::new( + format!("benchmark_histogram_{}", i), + "A histogram to benchmark it.", + ), + &["one", "two", "three"], + ) + .unwrap(); + registry.register(Box::new(histogram.clone())).unwrap(); + + for j in 0..100 { + let j_string = j.to_string(); + let label_values = if with_escaping { + ["ei\\ns\n", "zw\"e\"i", &j_string] + } else { + ["eins", "zwei", &j_string] + }; + + counter.with_label_values(&label_values).inc(); + histogram.with_label_values(&label_values).observe(j.into()); + } + } + + registry +} + +fn run_text_encoder(registry: Registry, b: &mut Bencher) { + let mut buffer = vec![]; + let encoder = TextEncoder::new(); + let metric_families = registry.gather(); + + b.iter(|| encoder.encode(&metric_families, &mut buffer).unwrap()); +} diff --git a/src/encoder/text.rs b/src/encoder/text.rs index f638c924..cfdeb64a 100644 --- a/src/encoder/text.rs +++ b/src/encoder/text.rs @@ -1,5 +1,7 @@ // Copyright 2019 TiKV Project Authors. Licensed under Apache-2.0. +use regex::{Match, Regex}; +use std::borrow::Cow; use std::io::Write; use crate::errors::Result; @@ -32,23 +34,33 @@ impl Encoder for TextEncoder { // Fail-fast checks. check_metric_family(mf)?; + // Write `# HELP` header. let name = mf.get_name(); let help = mf.get_help(); if !help.is_empty() { - writeln!(writer, "# HELP {} {}", name, escape_string(help, false))?; + writer.write_all(b"# HELP ")?; + writer.write_all(name.as_bytes())?; + writer.write_all(b" ")?; + writer.write_all(escape_string(help, false).as_bytes())?; + writer.write_all(b"\n")?; } + // Write `# TYPE` header. let metric_type = mf.get_field_type(); let lowercase_type = format!("{:?}", metric_type).to_lowercase(); - writeln!(writer, "# TYPE {} {}", name, lowercase_type)?; + writer.write_all(b"# TYPE ")?; + writer.write_all(name.as_bytes())?; + writer.write_all(b" ")?; + writer.write_all(lowercase_type.as_bytes())?; + writer.write_all(b"\n")?; for m in mf.get_metric() { match metric_type { MetricType::COUNTER => { - write_sample(name, m, "", "", m.get_counter().get_value(), writer)?; + write_sample(writer, name, None, m, None, m.get_counter().get_value())?; } MetricType::GAUGE => { - write_sample(name, m, "", "", m.get_gauge().get_value(), writer)?; + write_sample(writer, name, None, m, None, m.get_gauge().get_value())?; } MetricType::HISTOGRAM => { let h = m.get_histogram(); @@ -57,12 +69,12 @@ impl Encoder for TextEncoder { for b in h.get_bucket() { let upper_bound = b.get_upper_bound(); write_sample( - &format!("{}_bucket", name), + writer, + name, + Some("_bucket"), m, - BUCKET_LABEL, - &format!("{}", upper_bound), + Some((BUCKET_LABEL, &upper_bound.to_string())), b.get_cumulative_count() as f64, - writer, )?; if upper_bound.is_sign_positive() && upper_bound.is_infinite() { inf_seen = true; @@ -70,31 +82,24 @@ impl Encoder for TextEncoder { } if !inf_seen { write_sample( - &format!("{}_bucket", name), + writer, + name, + Some("_bucket"), m, - BUCKET_LABEL, - POSITIVE_INF, + Some((BUCKET_LABEL, POSITIVE_INF)), h.get_sample_count() as f64, - writer, )?; } - write_sample( - &format!("{}_sum", name), - m, - "", - "", - h.get_sample_sum(), - writer, - )?; + write_sample(writer, name, Some("_sum"), m, None, h.get_sample_sum())?; write_sample( - &format!("{}_count", name), + writer, + name, + Some("_count"), m, - "", - "", + None, h.get_sample_count() as f64, - writer, )?; } MetricType::SUMMARY => { @@ -102,31 +107,24 @@ impl Encoder for TextEncoder { for q in s.get_quantile() { write_sample( + writer, name, + None, m, - QUANTILE, - &format!("{}", q.get_quantile()), + Some((QUANTILE, &q.get_quantile().to_string())), q.get_value(), - writer, )?; } - write_sample( - &format!("{}_sum", name), - m, - "", - "", - s.get_sample_sum(), - writer, - )?; + write_sample(writer, name, Some("_sum"), m, None, s.get_sample_sum())?; write_sample( - &format!("{}_count", name), + writer, + name, + Some("_count"), m, - "", - "", + None, s.get_sample_count() as f64, - writer, )?; } MetricType::UNTYPED => { @@ -145,31 +143,32 @@ impl Encoder for TextEncoder { } /// `write_sample` writes a single sample in text format to `writer`, given the -/// metric name, the metric proto message itself, optionally an additional label -/// name and value (use empty strings if not required), and the value. -/// The function returns the number of bytes written and any error encountered. +/// metric name, an optional metric name postfix, the metric proto message +/// itself, optionally an additional label name and value (use empty strings if +/// not required), and the value. The function returns the number of bytes +/// written and any error encountered. fn write_sample( + writer: &mut dyn Write, name: &str, + name_postfix: Option<&str>, mc: &proto::Metric, - additional_label_name: &str, - additional_label_value: &str, + additional_label: Option<(&str, &str)>, value: f64, - writer: &mut dyn Write, ) -> Result<()> { writer.write_all(name.as_bytes())?; + if let Some(postfix) = name_postfix { + writer.write_all(postfix.as_bytes())?; + } - label_pairs_to_text( - mc.get_label(), - additional_label_name, - additional_label_value, - writer, - )?; + label_pairs_to_text(mc.get_label(), additional_label, writer)?; - write!(writer, " {}", value)?; + writer.write_all(b" ")?; + writer.write_all(value.to_string().as_bytes())?; let timestamp = mc.get_timestamp_ms(); if timestamp != 0 { - write!(writer, " {}", timestamp)?; + writer.write_all(b" ")?; + writer.write_all(timestamp.to_string().as_bytes())?; } writer.write_all(b"\n")?; @@ -186,35 +185,30 @@ fn write_sample( /// bytes written and any error encountered. fn label_pairs_to_text( pairs: &[proto::LabelPair], - additional_label_name: &str, - additional_label_value: &str, + additional_label: Option<(&str, &str)>, writer: &mut dyn Write, ) -> Result<()> { - if pairs.is_empty() && additional_label_name.is_empty() { + if pairs.is_empty() && additional_label.is_none() { return Ok(()); } - let mut separator = "{"; + let mut separator = b"{"; for lp in pairs { - write!( - writer, - "{}{}=\"{}\"", - separator, - lp.get_name(), - escape_string(lp.get_value(), true) - )?; - - separator = ","; + writer.write_all(separator)?; + writer.write_all(lp.get_name().as_bytes())?; + writer.write_all(b"=\"")?; + writer.write_all(escape_string(lp.get_value(), true).as_bytes())?; + writer.write_all(b"\"")?; + + separator = b","; } - if !additional_label_name.is_empty() { - write!( - writer, - "{}{}=\"{}\"", - separator, - additional_label_name, - escape_string(additional_label_value, true) - )?; + if let Some((name, value)) = additional_label { + writer.write_all(separator)?; + writer.write_all(name.as_bytes())?; + writer.write_all(b"=\"")?; + writer.write_all(escape_string(value, true).as_bytes())?; + writer.write_all(b"\"")?; } writer.write_all(b"}")?; @@ -224,26 +218,51 @@ fn label_pairs_to_text( /// `escape_string` replaces `\` by `\\`, new line character by `\n`, and `"` by `\"` if /// `include_double_quote` is true. -fn escape_string(v: &str, include_double_quote: bool) -> String { - let mut escaped = String::with_capacity(v.len() * 2); +/// +/// Implementation adapted from +/// https://lise-henry.github.io/articles/optimising_strings.html +fn escape_string(v: &str, include_double_quote: bool) -> Cow<'_, str> { + // Regex compilation is expensive. Use `lazy_static` to compile the regexes + // once per process lifetime and not once per function invocation. + lazy_static! { + static ref ESCAPER: Regex = Regex::new("(\\\\|\n)").expect("Regex to be valid."); + static ref QUOTED_ESCAPER: Regex = Regex::new("(\\\\|\n|\")").expect("Regex to be valid."); + } - for c in v.chars() { - match c { - '\\' | '\n' => { - escaped.extend(c.escape_default()); - } - '"' if include_double_quote => { - escaped.extend(c.escape_default()); - } - _ => { - escaped.push(c); + let first_occurence = if include_double_quote { + QUOTED_ESCAPER.find(v) + } else { + ESCAPER.find(v) + } + .as_ref() + .map(Match::start); + + if let Some(first) = first_occurence { + let mut escaped = String::with_capacity(v.len() * 2); + escaped.push_str(&v[0..first]); + let remainder = v[first..].chars(); + + for c in remainder { + match c { + '\\' | '\n' => { + escaped.extend(c.escape_default()); + } + '"' if include_double_quote => { + escaped.extend(c.escape_default()); + } + _ => { + escaped.push(c); + } } } - } - escaped.shrink_to_fit(); - - escaped + escaped.shrink_to_fit(); + escaped.into() + } else { + // The input string does not contain any characters that would need to + // be escaped. Return it as it is. + v.into() + } } #[cfg(test)]