Skip to content
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion lib/rexml/source.rb
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,7 @@ def initialize(arg, encoding=nil)
detect_encoding
end
@line = 0
@term_encord = {}
end

# The current buffer (what we're going to read next)
Expand Down Expand Up @@ -227,7 +228,7 @@ def read(term = nil, min_bytes = 1)

def read_until(term)
pattern = Private::PRE_DEFINED_TERM_PATTERNS[term] || /#{Regexp.escape(term)}/
term = encode(term)
term = @term_encord[term] ||= encode(term)
until str = @scanner.scan_until(pattern)
break if @source.nil?
break if @source.eof?
Expand Down
34 changes: 34 additions & 0 deletions test/test_document.rb
Original file line number Diff line number Diff line change
Expand Up @@ -403,6 +403,40 @@ def test_utf_16
assert_equal(expected_xml, actual_xml)
end
end

class Read_until_Test < Test::Unit::TestCase
def test_utf_8
xml = <<-EOX.force_encoding("ASCII-8BIT")
<?xml version="1.0" encoding="UTF-8"?>
<message testing=">">Hello world!</message>
EOX
document = REXML::Document.new(xml)
assert_equal("UTF-8", document.encoding)
assert_equal(">", REXML::XPath.match(document, "/message")[0].attribute("testing").value)
end

def test_utf_16le
xml = <<-EOX.encode("UTF-16LE").force_encoding("ASCII-8BIT")
<?xml version="1.0" encoding="UTF-16"?>
<message testing=">">Hello world!</message>
EOX
bom = "\ufeff".encode("UTF-16LE").force_encoding("ASCII-8BIT")
document = REXML::Document.new(bom + xml)
assert_equal("UTF-16", document.encoding)
assert_equal(">", REXML::XPath.match(document, "/message")[0].attribute("testing").value)
end

def test_utf_16be
xml = <<-EOX.encode("UTF-16BE").force_encoding("ASCII-8BIT")
<?xml version="1.0" encoding="UTF-16"?>
<message testing=">">Hello world!</message>
EOX
bom = "\ufeff".encode("UTF-16BE").force_encoding("ASCII-8BIT")
document = REXML::Document.new(bom + xml)
assert_equal("UTF-16", document.encoding)
assert_equal(">", REXML::XPath.match(document, "/message")[0].attribute("testing").value)
end
end
end
end
end
Loading