@@ -406,7 +406,7 @@ func (r *Record) applyValueLimits(val log.Value) log.Value {
406406 case log .KindString :
407407 s := val .AsString ()
408408 if len (s ) > r .attributeValueLengthLimit {
409- val = log .StringValue (truncate (s , r .attributeValueLengthLimit ))
409+ val = log .StringValue (truncate (r .attributeValueLengthLimit , s ))
410410 }
411411 case log .KindSlice :
412412 sl := val .AsSlice ()
@@ -427,40 +427,78 @@ func (r *Record) applyValueLimits(val log.Value) log.Value {
427427 return val
428428}
429429
430- // truncate returns a copy of str truncated to have a length of at most n
431- // characters. If the length of str is less than n, str itself is returned.
430+ // truncate returns a truncated version of s such that it contains less than
431+ // the limit number of characters. Truncation is applied by returning the limit
432+ // number of valid characters contained in s.
432433//
433- // The truncate of str ensures that no valid UTF-8 code point is split. The
434- // copy returned will be less than n if a characters straddles the length
435- // limit.
434+ // If limit is negative, it returns the original string.
436435//
437- // No truncation is performed if n is less than zero.
438- func truncate (str string , n int ) string {
439- if n < 0 {
440- return str
436+ // UTF-8 is supported. When truncating, all invalid characters are dropped
437+ // before applying truncation.
438+ //
439+ // If s already contains less than the limit number of bytes, it is returned
440+ // unchanged. No invalid characters are removed.
441+ func truncate (limit int , s string ) string {
442+ // This prioritize performance in the following order based on the most
443+ // common expected use-cases.
444+ //
445+ // - Short values less than the default limit (128).
446+ // - Strings with valid encodings that exceed the limit.
447+ // - No limit.
448+ // - Strings with invalid encodings that exceed the limit.
449+ if limit < 0 || len (s ) <= limit {
450+ return s
441451 }
442452
443- // cut returns a copy of the s truncated to not exceed a length of n. If
444- // invalid UTF-8 is encountered, s is returned with false. Otherwise, the
445- // truncated copy will be returned with true.
446- cut := func (s string ) (string , bool ) {
447- var i int
448- for i = 0 ; i < n ; {
449- r , size := utf8 .DecodeRuneInString (s [i :])
450- if r == utf8 .RuneError {
451- return s , false
453+ // Optimistically, assume all valid UTF-8.
454+ var b strings.Builder
455+ count := 0
456+ for i , c := range s {
457+ if c != utf8 .RuneError {
458+ count ++
459+ if count > limit {
460+ return s [:i ]
452461 }
453- if i + size > n {
454- break
455- }
456- i += size
462+ continue
463+ }
464+
465+ _ , size := utf8 .DecodeRuneInString (s [i :])
466+ if size == 1 {
467+ // Invalid encoding.
468+ b .Grow (len (s ) - 1 )
469+ _ , _ = b .WriteString (s [:i ])
470+ s = s [i :]
471+ break
457472 }
458- return s [:i ], true
459473 }
460474
461- cp , ok := cut ( str )
462- if ! ok {
463- cp , _ = cut ( strings . ToValidUTF8 ( str , "" ))
475+ // Fast-path, no invalid input.
476+ if b . Cap () == 0 {
477+ return s
464478 }
465- return cp
479+
480+ // Truncate while validating UTF-8.
481+ for i := 0 ; i < len (s ) && count < limit ; {
482+ c := s [i ]
483+ if c < utf8 .RuneSelf {
484+ // Optimization for single byte runes (common case).
485+ _ = b .WriteByte (c )
486+ i ++
487+ count ++
488+ continue
489+ }
490+
491+ _ , size := utf8 .DecodeRuneInString (s [i :])
492+ if size == 1 {
493+ // We checked for all 1-byte runes above, this is a RuneError.
494+ i ++
495+ continue
496+ }
497+
498+ _ , _ = b .WriteString (s [i : i + size ])
499+ i += size
500+ count ++
501+ }
502+
503+ return b .String ()
466504}
0 commit comments