@@ -70,6 +70,22 @@ static void S_parser_feed(cmark_parser *parser, const unsigned char *buffer,
7070static void S_process_line (cmark_parser * parser , const unsigned char * buffer ,
7171 bufsize_t bytes );
7272
73+ static void subtract_open_block_counts (cmark_parser * parser , cmark_node * node ) {
74+ do {
75+ decr_open_block_count (parser , S_type (node ));
76+ node -> flags &= ~CMARK_NODE__OPEN_BLOCK ;
77+ node = node -> last_child ;
78+ } while (node );
79+ }
80+
81+ static void add_open_block_counts (cmark_parser * parser , cmark_node * node ) {
82+ do {
83+ incr_open_block_count (parser , S_type (node ));
84+ node -> flags |= CMARK_NODE__OPEN_BLOCK ;
85+ node = node -> last_child ;
86+ } while (node );
87+ }
88+
7389static cmark_node * make_block (cmark_mem * mem , cmark_node_type tag ,
7490 int start_line , int start_column ) {
7591 cmark_node * e ;
@@ -129,6 +145,7 @@ static void cmark_parser_reset(cmark_parser *parser) {
129145 parser -> refmap = cmark_reference_map_new (parser -> mem );
130146 parser -> root = document ;
131147 parser -> current = document ;
148+ add_open_block_counts (parser , document );
132149
133150 parser -> syntax_extensions = saved_exts ;
134151 parser -> inline_syntax_extensions = saved_inline_exts ;
@@ -242,15 +259,18 @@ static void remove_trailing_blank_lines(cmark_strbuf *ln) {
242259// Check to see if a node ends with a blank line, descending
243260// if needed into lists and sublists.
244261static bool S_ends_with_blank_line (cmark_node * node ) {
245- if (S_last_line_checked (node )) {
246- return (S_last_line_blank (node ));
247- } else if ((S_type (node ) == CMARK_NODE_LIST ||
248- S_type (node ) == CMARK_NODE_ITEM ) && node -> last_child ) {
249- S_set_last_line_checked (node );
250- return (S_ends_with_blank_line (node -> last_child ));
251- } else {
252- S_set_last_line_checked (node );
253- return (S_last_line_blank (node ));
262+ while (true) {
263+ if (S_last_line_checked (node )) {
264+ return (S_last_line_blank (node ));
265+ } else if ((S_type (node ) == CMARK_NODE_LIST ||
266+ S_type (node ) == CMARK_NODE_ITEM ) && node -> last_child ) {
267+ S_set_last_line_checked (node );
268+ node = node -> last_child ;
269+ continue ;
270+ } else {
271+ S_set_last_line_checked (node );
272+ return (S_last_line_blank (node ));
273+ }
254274 }
255275}
256276
@@ -310,6 +330,12 @@ static cmark_node *finalize(cmark_parser *parser, cmark_node *b) {
310330 has_content = resolve_reference_link_definitions (parser , b );
311331 if (!has_content ) {
312332 // remove blank node (former reference def)
333+ if (b -> flags & CMARK_NODE__OPEN_BLOCK ) {
334+ decr_open_block_count (parser , S_type (b ));
335+ if (b -> prev ) {
336+ add_open_block_counts (parser , b -> prev );
337+ }
338+ }
313339 cmark_node_free (b );
314340 }
315341 break ;
@@ -382,6 +408,17 @@ static cmark_node *finalize(cmark_parser *parser, cmark_node *b) {
382408 return parent ;
383409}
384410
411+ // Recalculates the number of open blocks. Returns true if it matches what's currently stored
412+ // in parser. (Used to check that the counts in parser, which are updated incrementally, are
413+ // correct.)
414+ bool check_open_block_counts (cmark_parser * parser ) {
415+ cmark_parser tmp_parser = {0 }; // Only used for its open_block_counts and total_open_blocks fields.
416+ add_open_block_counts (& tmp_parser , parser -> root );
417+ return
418+ tmp_parser .total_open_blocks == parser -> total_open_blocks &&
419+ memcmp (tmp_parser .open_block_counts , parser -> open_block_counts , sizeof (parser -> open_block_counts )) == 0 ;
420+ }
421+
385422// Add a node as child of another. Return pointer to child.
386423static cmark_node * add_child (cmark_parser * parser , cmark_node * parent ,
387424 cmark_node_type block_type , int start_column ) {
@@ -400,11 +437,14 @@ static cmark_node *add_child(cmark_parser *parser, cmark_node *parent,
400437 if (parent -> last_child ) {
401438 parent -> last_child -> next = child ;
402439 child -> prev = parent -> last_child ;
440+ subtract_open_block_counts (parser , parent -> last_child );
403441 } else {
404442 parent -> first_child = child ;
405443 child -> prev = NULL ;
406444 }
407445 parent -> last_child = child ;
446+ add_open_block_counts (parser , child );
447+
408448 return child ;
409449}
410450
@@ -1047,8 +1087,14 @@ static cmark_node *check_open_blocks(cmark_parser *parser, cmark_chunk *input,
10471087 * all_matched = false;
10481088 cmark_node * container = parser -> root ;
10491089 cmark_node_type cont_type ;
1090+ cmark_parser tmp_parser ; // Only used for its open_block_counts and total_open_blocks fields.
1091+ memcpy (tmp_parser .open_block_counts , parser -> open_block_counts , sizeof (parser -> open_block_counts ));
1092+ tmp_parser .total_open_blocks = parser -> total_open_blocks ;
1093+
1094+ assert (check_open_block_counts (parser ));
10501095
10511096 while (S_last_child_is_open (container )) {
1097+ decr_open_block_count (& tmp_parser , S_type (container ));
10521098 container = container -> last_child ;
10531099 cont_type = S_type (container );
10541100
@@ -1060,6 +1106,53 @@ static cmark_node *check_open_blocks(cmark_parser *parser, cmark_chunk *input,
10601106 continue ;
10611107 }
10621108
1109+ // This block of code is a workaround for the quadratic performance
1110+ // issue described here (issue 2):
1111+ //
1112+ // https://github.com/github/cmark-gfm/security/advisories/GHSA-66g8-4hjf-77xh
1113+ //
1114+ // If the current line is empty then we might be able to skip directly
1115+ // to the end of the list of open blocks. To determine whether this is
1116+ // possible, we have been maintaining a count of the number of
1117+ // different types of open blocks. The main criterium is that every
1118+ // remaining block, except the last element of the list, is a LIST or
1119+ // ITEM. The code below checks the conditions, and if they're ok, skips
1120+ // forward to parser->current.
1121+ if (parser -> blank && parser -> indent == 0 ) { // Current line is empty
1122+ // Make sure that parser->current doesn't point to a closed block.
1123+ if (parser -> current -> flags & CMARK_NODE__OPEN_BLOCK ) {
1124+ if (parser -> current -> flags & CMARK_NODE__OPEN ) {
1125+ const size_t n_list = read_open_block_count (& tmp_parser , CMARK_NODE_LIST );
1126+ const size_t n_item = read_open_block_count (& tmp_parser , CMARK_NODE_ITEM );
1127+ // At most one block can be something other than a LIST or ITEM.
1128+ if (n_list + n_item + 1 >= tmp_parser .total_open_blocks ) {
1129+ // Check that parser->current is suitable for jumping to.
1130+ switch (S_type (parser -> current )) {
1131+ case CMARK_NODE_LIST :
1132+ case CMARK_NODE_ITEM :
1133+ if (n_list + n_item != tmp_parser .total_open_blocks ) {
1134+ if (parser -> current -> last_child == NULL ) {
1135+ // There's another node type somewhere in the middle of
1136+ // the list, so don't attempt the optimization.
1137+ break ;
1138+ }
1139+ }
1140+ // fall through
1141+ case CMARK_NODE_CODE_BLOCK :
1142+ case CMARK_NODE_PARAGRAPH :
1143+ case CMARK_NODE_HTML_BLOCK :
1144+ // Jump to parser->current
1145+ container = parser -> current ;
1146+ cont_type = S_type (container );
1147+ break ;
1148+ default :
1149+ break ;
1150+ }
1151+ }
1152+ }
1153+ }
1154+ }
1155+
10631156 switch (cont_type ) {
10641157 case CMARK_NODE_BLOCK_QUOTE :
10651158 if (!parse_block_quote_prefix (parser , input ))
@@ -1193,8 +1286,9 @@ static void open_new_blocks(cmark_parser *parser, cmark_node **container,
11931286 has_content = resolve_reference_link_definitions (parser , * container );
11941287
11951288 if (has_content ) {
1196-
1197- (* container )-> type = (uint16_t )CMARK_NODE_HEADING ;
1289+ cmark_node_set_type (* container , CMARK_NODE_HEADING );
1290+ decr_open_block_count (parser , CMARK_NODE_PARAGRAPH );
1291+ incr_open_block_count (parser , CMARK_NODE_HEADING );
11981292 (* container )-> as .heading .level = lev ;
11991293 (* container )-> as .heading .setext = true;
12001294 S_advance_offset (parser , input , input -> len - 1 - parser -> offset , false);
@@ -1349,7 +1443,7 @@ static void add_text_to_container(cmark_parser *parser, cmark_node *container,
13491443 S_set_last_line_blank (container , last_line_blank );
13501444
13511445 tmp = container ;
1352- while (tmp -> parent ) {
1446+ while (tmp -> parent && S_last_line_blank ( tmp -> parent ) ) {
13531447 S_set_last_line_blank (tmp -> parent , false);
13541448 tmp = tmp -> parent ;
13551449 }
@@ -1478,6 +1572,7 @@ static void S_process_line(cmark_parser *parser, const unsigned char *buffer,
14781572
14791573 parser -> line_number ++ ;
14801574
1575+ assert (parser -> current -> next == NULL );
14811576 last_matched_container = check_open_blocks (parser , & input , & all_matched );
14821577
14831578 if (!last_matched_container )
0 commit comments