Skip to content

Commit 6bdac98

Browse files
pirgeojoaopgrassi
andauthored
fix metadata normalization (#43)
Co-authored-by: Joao Grassi <[email protected]>
1 parent baf631c commit 6bdac98

File tree

8 files changed

+288
-10
lines changed

8 files changed

+288
-10
lines changed

lib/build.gradle

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@ plugins {
2020
}
2121

2222
group 'com.dynatrace.metric.util'
23-
version = '2.3.0'
23+
version = '2.4.0'
2424

2525
repositories {
2626
mavenCentral()

lib/src/main/java/com/dynatrace/metric/util/CodePoints.java

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@ final class CodePoints {
2020
static final int EQUALS = "=".codePointAt(0);
2121
static final int BLANK = " ".codePointAt(0);
2222
static final int COLON = ":".codePointAt(0);
23+
static final int NEWLINE = "\n".codePointAt(0);
2324

2425
static final int ZERO = "0".codePointAt(0);
2526
static final int NINE = "9".codePointAt(0);
@@ -47,6 +48,8 @@ final class CodePoints {
4748

4849
static final String QUOTATION_MARK = "\"";
4950
static final String EMPTY_STRING = "";
51+
static final String ESCAPED_NEWLINE = "\\n";
52+
static final String ESCAPED_QUOTES = "\\\"";
5053

5154
private CodePoints() {}
5255
}

lib/src/main/java/com/dynatrace/metric/util/MetricLineBuilderImpl.java

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -447,7 +447,7 @@ public String build() {
447447
this.description));
448448
} else {
449449
NormalizationResult normalizationResult =
450-
Normalizer.normalizeDimensionValue(
450+
Normalizer.normalizeMetadataString(
451451
this.description, MetadataLineConstants.Limits.MAX_DESCRIPTION_LENGTH);
452452
if (normalizationResult.messageType() != NormalizationResult.MessageType.NONE) {
453453
logger.warning(
@@ -503,7 +503,7 @@ public String build() {
503503
this.displayName));
504504
} else {
505505
NormalizationResult normalizationResult =
506-
Normalizer.normalizeDimensionValue(
506+
Normalizer.normalizeMetadataString(
507507
this.displayName, MetadataLineConstants.Limits.MAX_DISPLAY_NAME_LENGTH);
508508
if (normalizationResult.messageType() != NormalizationResult.MessageType.NONE) {
509509
logger.warning(

lib/src/main/java/com/dynatrace/metric/util/MetricLineConstants.java

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -100,6 +100,8 @@ private ValidationMessages() {}
100100
"Dimension key normalized from '%s' to '%s'";
101101
static final String DIMENSION_VALUE_NORMALIZED_MESSAGE =
102102
"Dimension value normalized from '%s' to '%s'";
103+
static final String METADATA_VALUE_NORMALIZED_MESSAGE =
104+
"Metadata value normalized from '%s' to '%s'";
103105
static final String DIMENSION_DROPPED_KEY_EMPTY_MESSAGE =
104106
"Dimension with empty dimension key has been dropped";
105107
static final String DIMENSION_DROPPED_KEY_EMPTY_WITH_METRIC_KEY_MESSAGE =

lib/src/main/java/com/dynatrace/metric/util/Normalizer.java

Lines changed: 134 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
package com.dynatrace.metric.util;
1515

1616
import com.dynatrace.metric.util.MetricLineConstants.ValidationMessages;
17+
import java.util.function.Supplier;
1718

1819
/** Offers normalization methods for metric key, dimension key and dimension value */
1920
final class Normalizer {
@@ -257,6 +258,139 @@ static NormalizationResult normalizeDimensionValue(String value, int maxDimensio
257258
return normalizeUnquotedStringDimValue(value, maxDimensionValueLength);
258259
}
259260

261+
static NormalizationResult normalizeMetadataString(String value, int maxLength) {
262+
if (StringValueValidator.isNullOrEmpty(value)) {
263+
return NormalizationResult.newValid(CodePoints.EMPTY_STRING);
264+
}
265+
266+
// in a quoted string, quotes do not count towards the string character limit
267+
boolean isQuoted =
268+
value.startsWith(CodePoints.QUOTATION_MARK) && value.endsWith(CodePoints.QUOTATION_MARK);
269+
270+
int numValidBytes = 0;
271+
boolean needsNormalization = false;
272+
boolean needsEscaping = false;
273+
274+
// if quoted, start at index 1 (the one after the quote)
275+
int start = isQuoted ? 1 : 0;
276+
// if quoted, stop at length - 1 (the last byte before the quote)
277+
int end = isQuoted ? value.length() - 1 : value.length();
278+
279+
for (int i = start; i < end; ) {
280+
final int codePoint = value.codePointAt(i);
281+
final int codePointLength = Character.charCount(codePoint);
282+
283+
// code point is valid but needs escaping
284+
if (codePointNeedsEscaping(codePoint)) {
285+
needsEscaping = true;
286+
}
287+
288+
// code point is not valid, and needs normalizing (e.g. unicode control char)
289+
if (codePointNeedsNormalizing(codePoint)) {
290+
// if there are control characters, note that normalization is necessary but don't break yet
291+
// this way we know how many characters there will be in the output
292+
needsNormalization = true;
293+
} else {
294+
// if the char is valid, count up
295+
numValidBytes += codePointLength;
296+
// if the maxLength is reached, break.
297+
if (numValidBytes > maxLength) {
298+
needsNormalization = true;
299+
break;
300+
}
301+
}
302+
i += codePointLength;
303+
}
304+
305+
// return early if no normalization is needed.
306+
if (!needsNormalization) {
307+
// if the string is already quoted or doesn't need escaping, return as is
308+
if (isQuoted || !needsEscaping) {
309+
return NormalizationResult.newValid(value);
310+
} else {
311+
// not quoted, needs escaping, but no normalization (e.g., it contains a '=' character).
312+
// turn the string into a quoted string, where the '=' sign does not need escaping.
313+
return NormalizationResult.newValid("\"" + value + "\"");
314+
}
315+
}
316+
317+
// do normalization
318+
StringBuilder builder = new StringBuilder(numValidBytes + 2);
319+
builder.append(CodePoints.QUOTATION_MARK);
320+
Supplier<String> warningMessageSupplier = null;
321+
322+
// don't need to escape anything since we're working in a quoted string.
323+
for (int i = start; i < end; ) {
324+
final int codePoint = value.codePointAt(i);
325+
final int codePointLength = Character.charCount(codePoint);
326+
327+
// special handling: only newlines are considered, since the resulting string will be quoted
328+
// either way.
329+
if (codePoint == CodePoints.NEWLINE) {
330+
// check if the escaped character fits, exit if it does not.
331+
if (i + CodePoints.ESCAPED_NEWLINE.length() > maxLength) {
332+
break;
333+
}
334+
builder.append(CodePoints.ESCAPED_NEWLINE);
335+
} else if (codePoint == CodePoints.QUOTE) {
336+
// check if the escaped character fits, exit if it does not.
337+
if (i + CodePoints.ESCAPED_QUOTES.length() > maxLength) {
338+
break;
339+
}
340+
builder.append(CodePoints.ESCAPED_QUOTES);
341+
} else if (!codePointNeedsNormalizing(codePoint)) {
342+
// code point is valid, check if it fits, then add.
343+
if (i + codePointLength > maxLength) {
344+
break;
345+
}
346+
builder.appendCodePoint(codePoint);
347+
}
348+
349+
i += codePointLength;
350+
}
351+
builder.append(CodePoints.QUOTATION_MARK);
352+
353+
String normalized = builder.toString();
354+
if (normalized.length() == 2) {
355+
return NormalizationResult.newInvalid(
356+
() -> "no valid characters after normalization (input: " + value + ")");
357+
} else
358+
return NormalizationResult.newWarning(
359+
normalized,
360+
() ->
361+
String.format(
362+
ValidationMessages.METADATA_VALUE_NORMALIZED_MESSAGE, value, normalized));
363+
}
364+
365+
private static boolean codePointNeedsEscaping(int codePoint) {
366+
return codePoint == CodePoints.NEWLINE
367+
|| codePoint == CodePoints.BLANK
368+
|| codePoint == CodePoints.COMMA
369+
|| codePoint == CodePoints.EQUALS
370+
|| codePoint == CodePoints.QUOTE
371+
|| codePoint == CodePoints.BACKSLASH;
372+
}
373+
374+
private static boolean codePointNeedsNormalizing(int codePoint) {
375+
int type = Character.getType(codePoint);
376+
377+
// unassigned characters outside the range of Unicode 10.0 "Supplemental Symbols and
378+
// Pictographs" are not allowed.
379+
if (type == Character.UNASSIGNED) {
380+
// Unicode version 10 allowed Supplemental Symbols and pictographs
381+
// https://www.unicode.org/charts/PDF/Unicode-10.0/U100-1F900.pdf
382+
return codePoint <= CodePoints.UC_SUPPLEMENTAL_SYMBOLS_AND_PICTOGRAPHS_START
383+
|| codePoint >= CodePoints.UC_SUPPLEMENTAL_SYMBOLS_AND_PICTOGRAPHS_END;
384+
}
385+
386+
return type == Character.CONTROL
387+
|| type == Character.PRIVATE_USE
388+
|| type == Character.SURROGATE
389+
|| type == Character.LINE_SEPARATOR
390+
|| type == Character.PARAGRAPH_SEPARATOR
391+
|| codePoint == CodePoints.QUOTE;
392+
}
393+
260394
/**
261395
* Applies normalization to an unquoted string dimension value.
262396
*

lib/src/test/java/com/dynatrace/metric/util/MetadataBuilderTest.java

Lines changed: 18 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -105,8 +105,15 @@ private static Stream<Arguments> provideMetadataInformation() {
105105
"do_not_normalize_description",
106106
"do_not_normalize_description"),
107107
Tuple.of(
108-
"description to normalize/escape", "escape description", "escape\\ description"),
109-
Tuple.of("all invalid characters description", "\u0000", "_"),
108+
"description to normalize/escape", "escape description", "\"escape description\""),
109+
Tuple.of("all invalid characters description", "\u0000", null),
110+
Tuple.of(
111+
"multiline description", "multiline\ndescription", "\"multiline\\ndescription\""),
112+
Tuple.of("description with emoji (no escaping needed)", "\uD83E\uDD20", "\uD83E\uDD20"),
113+
Tuple.of(
114+
"description with emoji (with char to escape)",
115+
"\uD83E\uDD20 \uD83C\uDF1E",
116+
"\"\uD83E\uDD20 \uD83C\uDF1E\""),
110117
Tuple.of("empty description", "", null),
111118
Tuple.of("empty quoted description", "\"\"", null),
112119
Tuple.of("null description", null, null));
@@ -127,9 +134,16 @@ private static Stream<Arguments> provideMetadataInformation() {
127134
Tuple.of(
128135
"displayName to normalize/escape",
129136
"escape display name",
130-
"escape\\ display\\ name"),
131-
Tuple.of("all invalid characters displayName", "\u0000", "_"),
137+
"\"escape display name\""),
138+
Tuple.of("all invalid characters displayName", "\u0000", null),
132139
Tuple.of("empty displayName", "", null),
140+
Tuple.of(
141+
"multiline displayName", "multiline\ndisplayName", "\"multiline\\ndisplayName\""),
142+
Tuple.of("displayName with emoji (no escaping needed)", "\uD83E\uDD20", "\uD83E\uDD20"),
143+
Tuple.of(
144+
"displayName with emoji and escapable char",
145+
"\uD83C\uDF35 \uD83C\uDF1E",
146+
"\"\uD83C\uDF35 \uD83C\uDF1E\""),
133147
Tuple.of("empty quoted displayName", "\"\"", null),
134148
Tuple.of("null displayName", null, null));
135149

lib/src/test/java/com/dynatrace/metric/util/MetricBuilderTest.java

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -666,7 +666,7 @@ void testCreateMetadataLineWithDescription() throws MetricException {
666666

667667
assertEquals("prefix.name gauge,3", builder.value(3.).build());
668668
assertEquals(
669-
"#prefix.name gauge dt.meta.description=my\\ description\\ goes\\ here",
669+
"#prefix.name gauge dt.meta.description=\"my description goes here\"",
670670
builder.metadata().description("my description goes here").build());
671671
}
672672

@@ -679,7 +679,7 @@ void testCreateMetadataLineWithDisplayName() throws MetricException {
679679

680680
assertEquals("prefix.name gauge,3", builder.value(3.).build());
681681
assertEquals(
682-
"#prefix.name gauge dt.meta.displayName=my\\ displayName\\ goes\\ here",
682+
"#prefix.name gauge dt.meta.displayName=\"my displayName goes here\"",
683683
builder.metadata().displayName("my displayName goes here").build());
684684
}
685685

@@ -692,7 +692,7 @@ void testCreateMetadataLineWithUnitAndDescriptionAndDisplayName() throws MetricE
692692

693693
assertEquals("prefix.name gauge,3", builder.value(3.).build());
694694
assertEquals(
695-
"#prefix.name gauge dt.meta.description=my\\ description\\ goes\\ here,dt.meta.unit=unit,dt.meta.displayName=displayName",
695+
"#prefix.name gauge dt.meta.description=\"my description goes here\",dt.meta.unit=unit,dt.meta.displayName=displayName",
696696
builder
697697
.metadata()
698698
.description("my description goes here")

0 commit comments

Comments
 (0)