@@ -144,6 +144,7 @@ module Private
144144 PEREFERENCE_PATTERN = /#{ PEREFERENCE } /um
145145 TAG_PATTERN = /((?>#{ QNAME_STR } ))\s */um
146146 CLOSE_PATTERN = /(#{ QNAME_STR } )\s *>/um
147+ EQUAL_PATTERN = /\s *=\s */um
147148 ATTLISTDECL_END = /\s +#{ NAME } (?:#{ ATTDEF } )*\s *>/um
148149 NAME_PATTERN = /#{ NAME } /um
149150 GEDECL_PATTERN = "\\ s+#{ NAME } \\ s+#{ ENTITYDEF } \\ s*>"
@@ -168,6 +169,7 @@ def initialize( source )
168169 @entity_expansion_limit = Security . entity_expansion_limit
169170 @entity_expansion_text_limit = Security . entity_expansion_text_limit
170171 @source . ensure_buffer
172+ @version = nil
171173 end
172174
173175 def add_listener ( listener )
@@ -642,6 +644,10 @@ def need_source_encoding_update?(xml_declaration_encoding)
642644 true
643645 end
644646
647+ def normalize_xml_declaration_encoding ( xml_declaration_encoding )
648+ /\A UTF-16(?:BE|LE)\z /i . match? ( xml_declaration_encoding ) ? "UTF-16" : nil
649+ end
650+
645651 def parse_name ( base_error_message )
646652 md = @source . match ( Private ::NAME_PATTERN , true )
647653 unless md
@@ -735,37 +741,85 @@ def process_comment
735741
736742 def process_instruction
737743 name = parse_name ( "Malformed XML: Invalid processing instruction node" )
738- if @source . skip_spaces
739- match_data = @source . match ( /(.*?)\? >/um , true )
740- unless match_data
741- raise ParseException . new ( "Malformed XML: Unclosed processing instruction" , @source )
744+ if name == "xml"
745+ xml_declaration
746+ else # PITarget
747+ if @source . skip_spaces # e.g. <?name content?>
748+ start_position = @source . position
749+ content = @source . read_until ( "?>" )
750+ unless content . chomp! ( "?>" )
751+ @source . position = start_position
752+ raise ParseException . new ( "Malformed XML: Unclosed processing instruction: <#{ name } >" , @source )
753+ end
754+ else # e.g. <?name?>
755+ content = nil
756+ unless @source . match? ( "?>" , true )
757+ raise ParseException . new ( "Malformed XML: Unclosed processing instruction: <#{ name } >" , @source )
758+ end
742759 end
743- content = match_data [ 1 ]
744- else
745- content = nil
760+ [ :processing_instruction , name , content ]
761+ end
762+ end
763+
764+ def xml_declaration
765+ unless @version . nil?
766+ raise ParseException . new ( "Malformed XML: XML declaration is duplicated" , @source )
767+ end
768+ if @document_status
769+ raise ParseException . new ( "Malformed XML: XML declaration is not at the start" , @source )
770+ end
771+ unless @source . skip_spaces
772+ raise ParseException . new ( "Malformed XML: XML declaration misses spaces before version" , @source )
773+ end
774+ unless @source . match? ( "version" , true )
775+ raise ParseException . new ( "Malformed XML: XML declaration misses version" , @source )
776+ end
777+ @version = parse_attribute_value_with_equal ( "xml" )
778+ unless @source . skip_spaces
746779 unless @source . match? ( "?>" , true )
747- raise ParseException . new ( "Malformed XML: Unclosed processing instruction " , @source )
780+ raise ParseException . new ( "Malformed XML: Unclosed XML declaration " , @source )
748781 end
782+ encoding = normalize_xml_declaration_encoding ( @source . encoding )
783+ return [ :xmldecl , @version , encoding , nil ] # e.g. <?xml version="1.0"?>
749784 end
750- if name == "xml"
751- if @document_status
752- raise ParseException . new ( "Malformed XML: XML declaration is not at the start" , @source )
753- end
754- version = VERSION . match ( content )
755- version = version [ 1 ] unless version . nil?
756- encoding = ENCODING . match ( content )
757- encoding = encoding [ 1 ] unless encoding . nil?
758- if need_source_encoding_update? ( encoding )
759- @source . encoding = encoding
785+
786+ if @source . match? ( "encoding" , true )
787+ encoding = parse_attribute_value_with_equal ( "xml" )
788+ unless @source . skip_spaces
789+ unless @source . match? ( "?>" , true )
790+ raise ParseException . new ( "Malformed XML: Unclosed XML declaration" , @source )
791+ end
792+ if need_source_encoding_update? ( encoding )
793+ @source . encoding = encoding
794+ end
795+ encoding ||= normalize_xml_declaration_encoding ( @source . encoding )
796+ return [ :xmldecl , @version , encoding , nil ] # e.g. <?xml version="1.1" encoding="UTF-8"?>
760797 end
761- if encoding . nil? and /\A UTF-16(?:BE|LE)\z /i =~ @source . encoding
762- encoding = "UTF-16"
798+ end
799+
800+ if @source . match? ( "standalone" , true )
801+ standalone = parse_attribute_value_with_equal ( "xml" )
802+ case standalone
803+ when "yes" , "no"
804+ else
805+ raise ParseException . new ( "Malformed XML: XML declaration standalone is not yes or no : <#{ standalone } >" , @source )
763806 end
764- standalone = STANDALONE . match ( content )
765- standalone = standalone [ 1 ] unless standalone . nil?
766- return [ :xmldecl , version , encoding , standalone ]
767807 end
768- [ :processing_instruction , name , content ]
808+ @source . skip_spaces
809+ unless @source . match? ( "?>" , true )
810+ raise ParseException . new ( "Malformed XML: Unclosed XML declaration" , @source )
811+ end
812+
813+ if need_source_encoding_update? ( encoding )
814+ @source . encoding = encoding
815+ end
816+ encoding ||= normalize_xml_declaration_encoding ( @source . encoding )
817+
818+ # e.g. <?xml version="1.0" ?>
819+ # <?xml version="1.1" encoding="UTF-8" ?>
820+ # <?xml version="1.1" standalone="yes"?>
821+ # <?xml version="1.1" encoding="UTF-8" standalone="yes" ?>
822+ [ :xmldecl , @version , encoding , standalone ]
769823 end
770824
771825 if StringScanner ::Version < "3.1.1"
@@ -787,6 +841,25 @@ def scan_quote
787841 end
788842 end
789843
844+ def parse_attribute_value_with_equal ( name )
845+ unless @source . match? ( Private ::EQUAL_PATTERN , true )
846+ message = "Missing attribute equal: <#{ name } >"
847+ raise REXML ::ParseException . new ( message , @source )
848+ end
849+ unless quote = scan_quote
850+ message = "Missing attribute value start quote: <#{ name } >"
851+ raise REXML ::ParseException . new ( message , @source )
852+ end
853+ start_position = @source . position
854+ value = @source . read_until ( quote )
855+ unless value . chomp! ( quote )
856+ @source . position = start_position
857+ message = "Missing attribute value end quote: <#{ name } >: <#{ quote } >"
858+ raise REXML ::ParseException . new ( message , @source )
859+ end
860+ value
861+ end
862+
790863 def parse_attributes ( prefixes )
791864 attributes = { }
792865 expanded_names = { }
@@ -801,22 +874,7 @@ def parse_attributes(prefixes)
801874 name = match [ 1 ]
802875 prefix = match [ 2 ]
803876 local_part = match [ 3 ]
804-
805- unless @source . match? ( /\s *=\s */um , true )
806- message = "Missing attribute equal: <#{ name } >"
807- raise REXML ::ParseException . new ( message , @source )
808- end
809- unless quote = scan_quote
810- message = "Missing attribute value start quote: <#{ name } >"
811- raise REXML ::ParseException . new ( message , @source )
812- end
813- start_position = @source . position
814- value = @source . read_until ( quote )
815- unless value . chomp! ( quote )
816- @source . position = start_position
817- message = "Missing attribute value end quote: <#{ name } >: <#{ quote } >"
818- raise REXML ::ParseException . new ( message , @source )
819- end
877+ value = parse_attribute_value_with_equal ( name )
820878 @source . skip_spaces
821879 if prefix == "xmlns"
822880 if local_part == "xml"
0 commit comments