@@ -151,25 +151,45 @@ def Text.check string, pattern, doctype
151151 end
152152 end
153153
154- # context sensitive
155- string . scan ( pattern ) do
156- if $1[ -1 ] != ?;
157- raise "Illegal character #{ $1. inspect } in raw string #{ string . inspect } "
158- elsif $1[ 0 ] == ?&
159- if $5 and $5[ 0 ] == ?#
160- case ( $5[ 1 ] == ?x ? $5[ 2 ..-1 ] . to_i ( 16 ) : $5[ 1 ..-1 ] . to_i )
161- when *VALID_CHAR
154+ pos = 0
155+ while ( index = string . index ( /<|&/ , pos ) )
156+ if string [ index ] == "<"
157+ raise "Illegal character \" #{ string [ index ] } \" in raw string #{ string . inspect } "
158+ end
159+
160+ unless ( end_index = string . index ( /[^\s ];/ , index + 1 ) )
161+ raise "Illegal character \" #{ string [ index ] } \" in raw string #{ string . inspect } "
162+ end
163+
164+ value = string [ ( index + 1 ) ..end_index ]
165+ if /\s / . match? ( value )
166+ raise "Illegal character \" #{ string [ index ] } \" in raw string #{ string . inspect } "
167+ end
168+
169+ if value [ 0 ] == "#"
170+ character_reference = value [ 1 ..-1 ]
171+
172+ unless ( /\A (\d +|x[0-9a-fA-F]+)\z / . match? ( character_reference ) )
173+ if character_reference [ 0 ] == "x" || character_reference [ -1 ] == "x"
174+ raise "Illegal character \" #{ string [ index ] } \" in raw string #{ string . inspect } "
162175 else
163- raise "Illegal character #{ $1 . inspect } in raw string #{ string . inspect } "
176+ raise "Illegal character #{ string . inspect } in raw string #{ string . inspect } "
164177 end
165- # FIXME: below can't work but this needs API change.
166- # elsif @parent and $3 and !SUBSTITUTES.include?($1)
167- # if !doctype or !doctype.entities.has_key?($3)
168- # raise "Undeclared entity '#{$1}' in raw string \"#{string}\""
169- # end
170178 end
179+
180+ case ( character_reference [ 0 ] == "x" ? character_reference [ 1 ..-1 ] . to_i ( 16 ) : character_reference [ 0 ..-1 ] . to_i )
181+ when *VALID_CHAR
182+ else
183+ raise "Illegal character #{ string . inspect } in raw string #{ string . inspect } "
184+ end
185+ elsif !( /\A #{ Entity ::NAME } \z /um . match? ( value ) )
186+ raise "Illegal character \" #{ string [ index ] } \" in raw string #{ string . inspect } "
171187 end
188+
189+ pos = end_index + 1
172190 end
191+
192+ string
173193 end
174194
175195 def node_type
0 commit comments