Skip to content
This repository was archived by the owner on Apr 1, 2025. It is now read-only.

Commit 36babc3

Browse files
cjcullenniemeyer
authored andcommitted
Port stale simple_keys fix to v2 (#543)
This should simplify the logic and significantly improve performance in edge cases as found and reported on #537 by CJ Cullen.
1 parent 770b8da commit 36babc3

File tree

3 files changed

+57
-34
lines changed

3 files changed

+57
-34
lines changed

decode_test.go

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -722,6 +722,18 @@ var unmarshalTests = []struct {
722722
"a: 5.5\n",
723723
&struct{ A jsonNumberT }{"5.5"},
724724
},
725+
{
726+
`
727+
a:
728+
b
729+
b:
730+
? a
731+
: a`,
732+
&M{"a": "b",
733+
"b": M{
734+
"a": "a",
735+
}},
736+
},
725737
}
726738

727739
type M map[interface{}]interface{}

limit_test.go

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,8 @@ var limitTests = []struct {
3737
{name: "10kb of maps", data: []byte(`a: &a [{a}` + strings.Repeat(`,{a}`, 10*1024/4-1) + `]`)},
3838
{name: "100kb of maps", data: []byte(`a: &a [{a}` + strings.Repeat(`,{a}`, 100*1024/4-1) + `]`)},
3939
{name: "1000kb of maps", data: []byte(`a: &a [{a}` + strings.Repeat(`,{a}`, 1000*1024/4-1) + `]`)},
40+
{name: "1000kb slice nested at max-depth", data: []byte(strings.Repeat(`[`, 10000) + `1` + strings.Repeat(`,1`, 1000*1024/2-20000-1) + strings.Repeat(`]`, 10000))},
41+
{name: "1000kb slice nested in maps at max-depth", data: []byte("{a,b:\n" + strings.Repeat(" {a,b:", 10000-2) + ` [1` + strings.Repeat(",1", 1000*1024/2-6*10000-1) + `]` + strings.Repeat(`}`, 10000-1))},
4042
}
4143

4244
func (s *S) TestLimits(c *C) {
@@ -82,6 +84,14 @@ func Benchmark1000KBMaps(b *testing.B) {
8284
benchmark(b, "1000kb of maps")
8385
}
8486

87+
func BenchmarkDeepSlice(b *testing.B) {
88+
benchmark(b, "1000kb slice nested at max-depth")
89+
}
90+
91+
func BenchmarkDeepFlow(b *testing.B) {
92+
benchmark(b, "1000kb slice nested in maps at max-depth")
93+
}
94+
8595
func benchmark(b *testing.B, name string) {
8696
for _, t := range limitTests {
8797
if t.name != name {

scannerc.go

Lines changed: 35 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -634,13 +634,12 @@ func yaml_parser_fetch_more_tokens(parser *yaml_parser_t) bool {
634634
need_more_tokens = true
635635
} else {
636636
// Check if any potential simple key may occupy the head position.
637-
if !yaml_parser_stale_simple_keys(parser) {
638-
return false
639-
}
640-
641-
for i := range parser.simple_keys {
637+
for i := len(parser.simple_keys) - 1; i >= 0; i-- {
642638
simple_key := &parser.simple_keys[i]
643-
if simple_key.possible && simple_key.token_number == parser.tokens_parsed {
639+
if simple_key.token_number < parser.tokens_parsed {
640+
break
641+
}
642+
if yaml_simple_key_is_valid(parser, simple_key) && simple_key.token_number == parser.tokens_parsed {
644643
need_more_tokens = true
645644
break
646645
}
@@ -678,11 +677,6 @@ func yaml_parser_fetch_next_token(parser *yaml_parser_t) bool {
678677
return false
679678
}
680679

681-
// Remove obsolete potential simple keys.
682-
if !yaml_parser_stale_simple_keys(parser) {
683-
return false
684-
}
685-
686680
// Check the indentation level against the current column.
687681
if !yaml_parser_unroll_indent(parser, parser.mark.column) {
688682
return false
@@ -837,27 +831,28 @@ func yaml_parser_fetch_next_token(parser *yaml_parser_t) bool {
837831
"found character that cannot start any token")
838832
}
839833

840-
// Check the list of potential simple keys and remove the positions that
841-
// cannot contain simple keys anymore.
842-
func yaml_parser_stale_simple_keys(parser *yaml_parser_t) bool {
843-
// Check for a potential simple key for each flow level.
844-
for i := range parser.simple_keys {
845-
simple_key := &parser.simple_keys[i]
846-
847-
// The specification requires that a simple key
848-
//
849-
// - is limited to a single line,
850-
// - is shorter than 1024 characters.
851-
if simple_key.possible && (simple_key.mark.line < parser.mark.line || simple_key.mark.index+1024 < parser.mark.index) {
852-
853-
// Check if the potential simple key to be removed is required.
854-
if simple_key.required {
855-
return yaml_parser_set_scanner_error(parser,
856-
"while scanning a simple key", simple_key.mark,
857-
"could not find expected ':'")
858-
}
859-
simple_key.possible = false
834+
func yaml_simple_key_is_valid(parser *yaml_parser_t, simple_key *yaml_simple_key_t) bool {
835+
if !simple_key.possible {
836+
return false
837+
}
838+
839+
// The 1.2 specification says:
840+
//
841+
// "If the ? indicator is omitted, parsing needs to see past the
842+
// implicit key to recognize it as such. To limit the amount of
843+
// lookahead required, the “:” indicator must appear at most 1024
844+
// Unicode characters beyond the start of the key. In addition, the key
845+
// is restricted to a single line."
846+
//
847+
if simple_key.mark.line < parser.mark.line || simple_key.mark.index+1024 < parser.mark.index {
848+
// Check if the potential simple key to be removed is required.
849+
if simple_key.required {
850+
return yaml_parser_set_scanner_error(parser,
851+
"while scanning a simple key", simple_key.mark,
852+
"could not find expected ':'")
860853
}
854+
simple_key.possible = false
855+
return false
861856
}
862857
return true
863858
}
@@ -879,8 +874,8 @@ func yaml_parser_save_simple_key(parser *yaml_parser_t) bool {
879874
possible: true,
880875
required: required,
881876
token_number: parser.tokens_parsed + (len(parser.tokens) - parser.tokens_head),
877+
mark: parser.mark,
882878
}
883-
simple_key.mark = parser.mark
884879

885880
if !yaml_parser_remove_simple_key(parser) {
886881
return false
@@ -912,7 +907,12 @@ const max_flow_level = 10000
912907
// Increase the flow level and resize the simple key list if needed.
913908
func yaml_parser_increase_flow_level(parser *yaml_parser_t) bool {
914909
// Reset the simple key on the next level.
915-
parser.simple_keys = append(parser.simple_keys, yaml_simple_key_t{})
910+
parser.simple_keys = append(parser.simple_keys, yaml_simple_key_t{
911+
possible: false,
912+
required: false,
913+
token_number: parser.tokens_parsed + (len(parser.tokens) - parser.tokens_head),
914+
mark: parser.mark,
915+
})
916916

917917
// Increase the flow level.
918918
parser.flow_level++
@@ -1286,7 +1286,8 @@ func yaml_parser_fetch_value(parser *yaml_parser_t) bool {
12861286
simple_key := &parser.simple_keys[len(parser.simple_keys)-1]
12871287

12881288
// Have we found a simple key?
1289-
if simple_key.possible {
1289+
if yaml_simple_key_is_valid(parser, simple_key) {
1290+
12901291
// Create the KEY token and insert it into the queue.
12911292
token := yaml_token_t{
12921293
typ: yaml_KEY_TOKEN,

0 commit comments

Comments
 (0)