From b2ab29e6ba8d3aad4b568584ef71ac3a5a41ae45 Mon Sep 17 00:00:00 2001 From: Lilith Orion Hafner Date: Mon, 16 Sep 2024 07:46:46 -0500 Subject: [PATCH 1/5] Don't assume that `SubString` has `pointer` and copy instead Still assume `Substring{String}` has `pointer` Add tests --- src/parse_stream.jl | 2 +- test/parse_stream.jl | 12 +++++++++++- 2 files changed, 12 insertions(+), 2 deletions(-) diff --git a/src/parse_stream.jl b/src/parse_stream.jl index 8aad71df..fe10b333 100644 --- a/src/parse_stream.jl +++ b/src/parse_stream.jl @@ -391,7 +391,7 @@ function ParseStream(text::String, index::Integer=1; version=VERSION) ParseStream(unsafe_wrap(Vector{UInt8}, text), text, index, version) end -function ParseStream(text::SubString, index::Integer=1; version=VERSION) +function ParseStream(text::SubString{String}, index::Integer=1; version=VERSION) # See also IOBuffer(SubString("x")) ParseStream(unsafe_wrap(Vector{UInt8}, pointer(text), sizeof(text)), text, index, version) diff --git a/test/parse_stream.jl b/test/parse_stream.jl index f7c0bd60..1ef70cfd 100644 --- a/test/parse_stream.jl +++ b/test/parse_stream.jl @@ -7,7 +7,10 @@ using JuliaSyntax: ParseStream, peek, peek_token, bump, bump_trivia, bump_invisible, emit, emit_diagnostic, TRIVIA_FLAG, INFIX_FLAG, - ParseStreamPosition, first_child_position, last_child_position + ParseStreamPosition, first_child_position, last_child_position, + parsestmt + +import InlineStrings # Here we manually issue parse events in the order the Julia parser would issue # them @@ -147,3 +150,10 @@ end @test first_child_position(st, position(st)) == ParseStreamPosition(4, 1) @test last_child_position(st, position(st)) == ParseStreamPosition(7, 2) end + +@testset "SubString{String3} (issue #505)" begin + x = split(InlineStrings.InlineString("1 2"))[1] + @test x == "1" + @test ParseStream(x) isa ParseStream + @test parsestmt(Expr, x) == parsestmt(Expr, "1") +end From e1d287798e2c03b80c4a7c1634e510b2f7069aba Mon Sep 17 00:00:00 2001 From: Lilith Orion Hafner Date: Mon, 16 Sep 2024 07:51:36 -0500 Subject: [PATCH 2/5] Add InlineStrings as test dep --- Project.toml | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/Project.toml b/Project.toml index 92f233e6..0afd3ef4 100644 --- a/Project.toml +++ b/Project.toml @@ -7,12 +7,11 @@ version = "1.0.0-DEV" Serialization = "1.0" julia = "1.0" -[deps] - [extras] Logging = "56ddb016-857b-54e1-b83d-db4d58db5568" Serialization = "9e88b42a-f829-5b0c-bbe9-9e923198166b" Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40" +InlineStrings = "842dd82b-1e85-43dc-bf29-5d0ee9dffc48" [targets] -test = ["Test", "Serialization", "Logging"] +test = ["Test", "Serialization", "Logging", "InlineStrings"] From 550545c3beb54d92e43eb7fc75f87a9cdb4d789a Mon Sep 17 00:00:00 2001 From: Lilith Orion Hafner Date: Mon, 16 Sep 2024 08:18:40 -0500 Subject: [PATCH 3/5] inline InlineStrings into tests for compuat with 1.2 and earlier --- Project.toml | 3 +- test/parse_stream.jl | 81 ++++++++++++++++++++++++++++++++++++++++---- 2 files changed, 76 insertions(+), 8 deletions(-) diff --git a/Project.toml b/Project.toml index 0afd3ef4..8425359b 100644 --- a/Project.toml +++ b/Project.toml @@ -11,7 +11,6 @@ julia = "1.0" Logging = "56ddb016-857b-54e1-b83d-db4d58db5568" Serialization = "9e88b42a-f829-5b0c-bbe9-9e923198166b" Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40" -InlineStrings = "842dd82b-1e85-43dc-bf29-5d0ee9dffc48" [targets] -test = ["Test", "Serialization", "Logging", "InlineStrings"] +test = ["Test", "Serialization", "Logging"] diff --git a/test/parse_stream.jl b/test/parse_stream.jl index 1ef70cfd..21183568 100644 --- a/test/parse_stream.jl +++ b/test/parse_stream.jl @@ -10,8 +10,6 @@ using JuliaSyntax: ParseStream, ParseStreamPosition, first_child_position, last_child_position, parsestmt -import InlineStrings - # Here we manually issue parse events in the order the Julia parser would issue # them @testset "ParseStream" begin @@ -151,9 +149,80 @@ end @test last_child_position(st, position(st)) == ParseStreamPosition(7, 2) end +# This is coppied from InlineStrings.jl instead of depending +# on InlineStrings for compat with Julia 1.2 and earlier: +primitive type String3 <: AbstractString 4*8 end +Base.ncodeunits(x::String3) = Int(Base.trunc_int(UInt8, x)) +function Base.codeunit(x::T, i::Int) where {T <: String3} + @boundscheck checkbounds(Bool, x, i) || throw(BoundsError(x, i)) + return Base.trunc_int(UInt8, Base.lshr_int(x, 8 * (sizeof(T) - i))) +end +function Base.String(x::T) where {T <: String3} + len = ncodeunits(x) + out = Base._string_n(len) + ref = Ref{T}(_bswap(x)) + GC.@preserve ref out begin + ptr = convert(Ptr{UInt8}, Base.unsafe_convert(Ptr{T}, ref)) + unsafe_copyto!(pointer(out), ptr, len) + end + return out +end +function Base.isvalid(x::String3, i::Int) + @boundscheck checkbounds(Bool, x, i) || throw(BoundsError(x, i)) + return @inbounds thisind(x, i) == i +end +function Base.thisind(s::String3, i::Int) + i == 0 && return 0 + n = ncodeunits(s) + i == n + 1 && return i + @boundscheck Base.between(i, 1, n) || throw(BoundsError(s, i)) + @inbounds b = codeunit(s, i) + (b & 0xc0 == 0x80) & (i-1 > 0) || return i + @inbounds b = codeunit(s, i-1) + Base.between(b, 0b11000000, 0b11110111) && return i-1 + (b & 0xc0 == 0x80) & (i-2 > 0) || return i + @inbounds b = codeunit(s, i-2) + Base.between(b, 0b11100000, 0b11110111) && return i-2 + (b & 0xc0 == 0x80) & (i-3 > 0) || return i + @inbounds b = codeunit(s, i-3) + Base.between(b, 0b11110000, 0b11110111) && return i-3 + return i +end +Base.@propagate_inbounds function Base.iterate(s::String3, i::Int=firstindex(s)) + (i % UInt) - 1 < ncodeunits(s) || return nothing + b = @inbounds codeunit(s, i) + u = UInt32(b) << 24 + Base.between(b, 0x80, 0xf7) || return reinterpret(Char, u), i+1 + return iterate_continued(s, i, u) +end +function iterate_continued(s::String3, i::Int, u::UInt32) + u < 0xc0000000 && (i += 1; @goto ret) + n = ncodeunits(s) + # first continuation byte + (i += 1) > n && @goto ret + @inbounds b = codeunit(s, i) + b & 0xc0 == 0x80 || @goto ret + u |= UInt32(b) << 16 + # second continuation byte + ((i += 1) > n) | (u < 0xe0000000) && @goto ret + @inbounds b = codeunit(s, i) + b & 0xc0 == 0x80 || @goto ret + u |= UInt32(b) << 8 + # third continuation byte + ((i += 1) > n) | (u < 0xf0000000) && @goto ret + @inbounds b = codeunit(s, i) + b & 0xc0 == 0x80 || @goto ret + u |= UInt32(b); i += 1 +@label ret + return reinterpret(Char, u), i +end +# End coppied from InlineStrings.jl + @testset "SubString{String3} (issue #505)" begin - x = split(InlineStrings.InlineString("1 2"))[1] - @test x == "1" - @test ParseStream(x) isa ParseStream - @test parsestmt(Expr, x) == parsestmt(Expr, "1") + x = reinterpret(String3, 0x31203203) + @test x == "1 2" + y = split(x)[1] + @test y == "1" + @test ParseStream(y) isa ParseStream + @test parsestmt(Expr, y) == parsestmt(Expr, "1") end From a6521599e5f231fc0cc056a0dc48716ec669d89a Mon Sep 17 00:00:00 2001 From: Lilith Orion Hafner Date: Mon, 16 Sep 2024 16:29:19 -0500 Subject: [PATCH 4/5] use Test.GenericString for testing --- test/parse_stream.jl | 74 ++------------------------------------------ 1 file changed, 3 insertions(+), 71 deletions(-) diff --git a/test/parse_stream.jl b/test/parse_stream.jl index 21183568..f5148f27 100644 --- a/test/parse_stream.jl +++ b/test/parse_stream.jl @@ -149,80 +149,12 @@ end @test last_child_position(st, position(st)) == ParseStreamPosition(7, 2) end -# This is coppied from InlineStrings.jl instead of depending -# on InlineStrings for compat with Julia 1.2 and earlier: -primitive type String3 <: AbstractString 4*8 end -Base.ncodeunits(x::String3) = Int(Base.trunc_int(UInt8, x)) -function Base.codeunit(x::T, i::Int) where {T <: String3} - @boundscheck checkbounds(Bool, x, i) || throw(BoundsError(x, i)) - return Base.trunc_int(UInt8, Base.lshr_int(x, 8 * (sizeof(T) - i))) -end -function Base.String(x::T) where {T <: String3} - len = ncodeunits(x) - out = Base._string_n(len) - ref = Ref{T}(_bswap(x)) - GC.@preserve ref out begin - ptr = convert(Ptr{UInt8}, Base.unsafe_convert(Ptr{T}, ref)) - unsafe_copyto!(pointer(out), ptr, len) - end - return out -end -function Base.isvalid(x::String3, i::Int) - @boundscheck checkbounds(Bool, x, i) || throw(BoundsError(x, i)) - return @inbounds thisind(x, i) == i -end -function Base.thisind(s::String3, i::Int) - i == 0 && return 0 - n = ncodeunits(s) - i == n + 1 && return i - @boundscheck Base.between(i, 1, n) || throw(BoundsError(s, i)) - @inbounds b = codeunit(s, i) - (b & 0xc0 == 0x80) & (i-1 > 0) || return i - @inbounds b = codeunit(s, i-1) - Base.between(b, 0b11000000, 0b11110111) && return i-1 - (b & 0xc0 == 0x80) & (i-2 > 0) || return i - @inbounds b = codeunit(s, i-2) - Base.between(b, 0b11100000, 0b11110111) && return i-2 - (b & 0xc0 == 0x80) & (i-3 > 0) || return i - @inbounds b = codeunit(s, i-3) - Base.between(b, 0b11110000, 0b11110111) && return i-3 - return i -end -Base.@propagate_inbounds function Base.iterate(s::String3, i::Int=firstindex(s)) - (i % UInt) - 1 < ncodeunits(s) || return nothing - b = @inbounds codeunit(s, i) - u = UInt32(b) << 24 - Base.between(b, 0x80, 0xf7) || return reinterpret(Char, u), i+1 - return iterate_continued(s, i, u) -end -function iterate_continued(s::String3, i::Int, u::UInt32) - u < 0xc0000000 && (i += 1; @goto ret) - n = ncodeunits(s) - # first continuation byte - (i += 1) > n && @goto ret - @inbounds b = codeunit(s, i) - b & 0xc0 == 0x80 || @goto ret - u |= UInt32(b) << 16 - # second continuation byte - ((i += 1) > n) | (u < 0xe0000000) && @goto ret - @inbounds b = codeunit(s, i) - b & 0xc0 == 0x80 || @goto ret - u |= UInt32(b) << 8 - # third continuation byte - ((i += 1) > n) | (u < 0xf0000000) && @goto ret - @inbounds b = codeunit(s, i) - b & 0xc0 == 0x80 || @goto ret - u |= UInt32(b); i += 1 -@label ret - return reinterpret(Char, u), i -end -# End coppied from InlineStrings.jl - -@testset "SubString{String3} (issue #505)" begin - x = reinterpret(String3, 0x31203203) +@testset "SubString{GenericString} (issue #505)" begin + x = Test.GenericString("1 2") @test x == "1 2" y = split(x)[1] @test y == "1" + @test y isa SubString{GenericString} @test ParseStream(y) isa ParseStream @test parsestmt(Expr, y) == parsestmt(Expr, "1") end From 73a395fc0d9c45adbde358f5e43f1181bffa410f Mon Sep 17 00:00:00 2001 From: Lilith Orion Hafner Date: Mon, 16 Sep 2024 16:30:44 -0500 Subject: [PATCH 5/5] put back empty [deps] section --- Project.toml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/Project.toml b/Project.toml index 8425359b..92f233e6 100644 --- a/Project.toml +++ b/Project.toml @@ -7,6 +7,8 @@ version = "1.0.0-DEV" Serialization = "1.0" julia = "1.0" +[deps] + [extras] Logging = "56ddb016-857b-54e1-b83d-db4d58db5568" Serialization = "9e88b42a-f829-5b0c-bbe9-9e923198166b"