Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions docs/src/reference.md
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,7 @@ class of tokenization errors and lets the parser deal with them.
* We use flags rather than child nodes to represent the difference between `struct` and `mutable struct`, `module` and `baremodule` (#220)
* Iterations are represented with the `iteration` and `in` heads rather than `=` within the header of a `for`. Thus `for i=is ; body end` parses to `(for (iteration (in i is)) (block body))`. Cartesian iteration as in `for a=as, b=bs body end` are represented with a nested `(iteration (in a as) (in b bs))` rather than a `block` containing `=` because these lists of iterators are neither semantically nor syntactically a sequence of statements, unlike other uses of `block`. Generators also use the `iteration` head - see information on that below.
* Short form functions like `f(x) = x + 1` are represented with the `function` head rather than the `=` head. In this case the `SHORT_FORM_FUNCTION_FLAG` flag is set to allow the surface syntactic form to be easily distinguished from long form functions.
* All kinds of updating assignment operators like `+=` are represented with a single `K"op="` head, with the operator itself in infix position. For example, `x += 1` is `(op= x + 1)`, where the plus token is of kind `K"Identifer"`. This greatly reduces the number of distinct forms here from a rather big list (`$=` `%=` `&=` `*=` `+=` `-=` `//=` `/=` `<<=` `>>=` `>>>=` `\=` `^=` `|=` `÷=` `⊻=`) and makes the operator itself appear in the AST as kind `K"Identifier"`, as it should. It also makes it possible to add further unicode updating operators while keeping the AST stable.

## More detail on tree differences

Expand Down
10 changes: 10 additions & 0 deletions src/expr.jl
Original file line number Diff line number Diff line change
Expand Up @@ -232,6 +232,16 @@ function _internal_node_to_Expr(source, srcrange, head, childranges, childheads,

if k == K"?"
headsym = :if
elseif k == K"op=" && length(args) == 3
lhs = args[1]
op = args[2]
rhs = args[3]
headstr = string(args[2], '=')
if is_dotted(head)
headstr = '.'*headstr
end
headsym = Symbol(headstr)
args = Any[lhs, rhs]
elseif k == K"macrocall"
if length(args) >= 2
a2 = args[2]
Expand Down
17 changes: 1 addition & 16 deletions src/kinds.jl
Original file line number Diff line number Diff line change
Expand Up @@ -293,23 +293,8 @@ register_kinds!(JuliaSyntax, 0, [
"BEGIN_ASSIGNMENTS"
"BEGIN_SYNTACTIC_ASSIGNMENTS"
"="
"+="
"-=" # Also used for "−="
"*="
"/="
"//="
"|="
"^="
"÷="
"%="
"<<="
">>="
">>>="
"\\="
"&="
"op=" # Updating assignment operator ( $= %= &= *= += -= //= /= <<= >>= >>>= \= ^= |= ÷= ⊻= )
":="
"\$="
"⊻="
"END_SYNTACTIC_ASSIGNMENTS"
"~"
"≔"
Expand Down
9 changes: 6 additions & 3 deletions src/parse_stream.jl
Original file line number Diff line number Diff line change
Expand Up @@ -871,8 +871,9 @@ end
Bump the next token, splitting it into several pieces

Tokens are defined by a number of `token_spec` of shape `(nbyte, kind, flags)`.
The number of input bytes of the last spec is taken from the remaining bytes of
the input token, with the associated `nbyte` ignored.
If all `nbyte` are positive, the sum must equal the token length. If one
`nbyte` is negative, that token is given `tok_len + nbyte` bytes and the sum of
all `nbyte` must equal zero.

This is a hack which helps resolves the occasional lexing ambiguity. For
example
Expand All @@ -887,12 +888,14 @@ function bump_split(stream::ParseStream, split_spec::Vararg{Any, N}) where {N}
tok = stream.lookahead[stream.lookahead_index]
stream.lookahead_index += 1
b = _next_byte(stream)
toklen = tok.next_byte - b
for (i, (nbyte, k, f)) in enumerate(split_spec)
h = SyntaxHead(k, f)
b = (i == length(split_spec)) ? tok.next_byte : b + nbyte
b += nbyte < 0 ? (toklen + nbyte) : nbyte
orig_k = k == K"." ? K"." : kind(tok)
push!(stream.tokens, SyntaxToken(h, orig_k, false, b))
end
@assert tok.next_byte == b
stream.peek_count = 0
return position(stream)
end
Expand Down
36 changes: 28 additions & 8 deletions src/parser.jl
Original file line number Diff line number Diff line change
Expand Up @@ -340,7 +340,7 @@ function bump_dotsplit(ps, flags=EMPTY_FLAGS;
bump_trivia(ps)
mark = position(ps)
k = remap_kind != K"None" ? remap_kind : kind(t)
pos = bump_split(ps, (1, K".", TRIVIA_FLAG), (0, k, flags))
pos = bump_split(ps, (1, K".", TRIVIA_FLAG), (-1, k, flags))
if emit_dot_node
pos = emit(ps, mark, K".")
end
Expand Down Expand Up @@ -626,7 +626,22 @@ function parse_assignment_with_initial_ex(ps::ParseState, mark, down::T) where {
# a += b ==> (+= a b)
# a .= b ==> (.= a b)
is_short_form_func = k == K"=" && !is_dotted(t) && was_eventually_call(ps)
bump(ps, TRIVIA_FLAG)
if k == K"op="
# x += y ==> (op= x + y)
# x .+= y ==> (.op= x + y)
bump_trivia(ps)
if is_dotted(t)
bump_split(ps, (1, K".", TRIVIA_FLAG),
(-2, K"Identifier", EMPTY_FLAGS), # op
(1, K"=", TRIVIA_FLAG))
else
bump_split(ps,
(-1, K"Identifier", EMPTY_FLAGS), # op
(1, K"=", TRIVIA_FLAG))
end
else
bump(ps, TRIVIA_FLAG)
end
bump_trivia(ps)
# Syntax Edition TODO: We'd like to call `down` here when
# is_short_form_func is true, to prevent `f() = 1 = 2` from parsing.
Expand Down Expand Up @@ -1843,7 +1858,7 @@ function parse_resword(ps::ParseState)
# let x::1 ; end ==> (let (block (::-i x 1)) (block))
# let x ; end ==> (let (block x) (block))
# let x=1,y=2 ; end ==> (let (block (= x 1) (= y 2) (block)))
# let x+=1 ; end ==> (let (block (+= x 1)) (block))
# let x+=1 ; end ==> (let (block (op= x + 1)) (block))
parse_comma_separated(ps, parse_eq_star)
end
emit(ps, m, K"block")
Expand Down Expand Up @@ -2571,7 +2586,7 @@ function parse_import_path(ps::ParseState)
# Modules with operator symbol names
# import .⋆ ==> (import (importpath . ⋆))
bump_trivia(ps)
bump_split(ps, (1,K".",EMPTY_FLAGS), (1,peek(ps),EMPTY_FLAGS))
bump_split(ps, (1,K".",EMPTY_FLAGS), (-1,peek(ps),EMPTY_FLAGS))
else
# import @x ==> (import (importpath @x))
# import $A ==> (import (importpath ($ A)))
Expand Down Expand Up @@ -2599,7 +2614,12 @@ function parse_import_path(ps::ParseState)
warning="space between dots in import path")
end
bump_trivia(ps)
bump_split(ps, (1,K".",TRIVIA_FLAG), (1,k,EMPTY_FLAGS))
m = position(ps)
bump_split(ps, (1,K".",TRIVIA_FLAG), (-1,k,EMPTY_FLAGS))
if is_syntactic_operator(k)
# import A.= ==> (import (importpath A (error =)))
emit(ps, m, K"error", error="syntactic operators not allowed in import")
end
elseif k == K"..."
# Import the .. operator
# import A... ==> (import (importpath A ..))
Expand Down Expand Up @@ -3550,13 +3570,13 @@ function parse_atom(ps::ParseState, check_identifiers=true)
bump_dotsplit(ps, emit_dot_node=true, remap_kind=
is_syntactic_operator(leading_kind) ? leading_kind : K"Identifier")
if check_identifiers && !is_valid_identifier(leading_kind)
# += ==> (error +=)
# += ==> (error (op= +))
# ? ==> (error ?)
# .+= ==> (error (. +=))
# .+= ==> (error (. (op= +)))
emit(ps, mark, K"error", error="invalid identifier")
else
# Quoted syntactic operators allowed
# :+= ==> (quote-: +=)
# :+= ==> (quote-: (op= +))
end
elseif is_keyword(leading_kind)
if leading_kind == K"var" && (t = peek_token(ps,2);
Expand Down
41 changes: 21 additions & 20 deletions src/tokenize.jl
Original file line number Diff line number Diff line change
Expand Up @@ -93,6 +93,7 @@ function _nondot_symbolic_operator_kinds()
K"isa"
K"in"
K".'"
K"op="
])
end

Expand Down Expand Up @@ -527,14 +528,14 @@ function _next_token(l::Lexer, c)
elseif c == '-'
return lex_minus(l);
elseif c == '−' # \minus '−' treated as hyphen '-'
return emit(l, accept(l, '=') ? K"-=" : K"-")
return emit(l, accept(l, '=') ? K"op=" : K"-")
elseif c == '`'
return lex_backtick(l);
elseif is_identifier_start_char(c)
return lex_identifier(l, c)
elseif isdigit(c)
return lex_digit(l, K"Integer")
elseif (k = get(_unicode_ops, c, K"error")) != K"error"
elseif (k = get(_unicode_ops, c, K"None")) != K"None"
return emit(l, k)
else
emit(l,
Expand Down Expand Up @@ -797,12 +798,12 @@ function lex_greater(l::Lexer)
if accept(l, '>')
if accept(l, '>')
if accept(l, '=')
return emit(l, K">>>=")
return emit(l, K"op=")
else # >>>?, ? not a =
return emit(l, K">>>")
end
elseif accept(l, '=')
return emit(l, K">>=")
return emit(l, K"op=")
else
return emit(l, K">>")
end
Expand All @@ -819,7 +820,7 @@ end
function lex_less(l::Lexer)
if accept(l, '<')
if accept(l, '=')
return emit(l, K"<<=")
return emit(l, K"op=")
else # '<<?', ? not =, ' '
return emit(l, K"<<")
end
Expand Down Expand Up @@ -888,15 +889,15 @@ end

function lex_percent(l::Lexer)
if accept(l, '=')
return emit(l, K"%=")
return emit(l, K"op=")
else
return emit(l, K"%")
end
end

function lex_bar(l::Lexer)
if accept(l, '=')
return emit(l, K"|=")
return emit(l, K"op=")
elseif accept(l, '>')
return emit(l, K"|>")
elseif accept(l, '|')
Expand All @@ -910,7 +911,7 @@ function lex_plus(l::Lexer)
if accept(l, '+')
return emit(l, K"++")
elseif accept(l, '=')
return emit(l, K"+=")
return emit(l, K"op=")
end
return emit(l, K"+")
end
Expand All @@ -925,7 +926,7 @@ function lex_minus(l::Lexer)
elseif !l.dotop && accept(l, '>')
return emit(l, K"->")
elseif accept(l, '=')
return emit(l, K"-=")
return emit(l, K"op=")
end
return emit(l, K"-")
end
Expand All @@ -934,35 +935,35 @@ function lex_star(l::Lexer)
if accept(l, '*')
return emit(l, K"Error**") # "**" is an invalid operator use ^
elseif accept(l, '=')
return emit(l, K"*=")
return emit(l, K"op=")
end
return emit(l, K"*")
end

function lex_circumflex(l::Lexer)
if accept(l, '=')
return emit(l, K"^=")
return emit(l, K"op=")
end
return emit(l, K"^")
end

function lex_division(l::Lexer)
if accept(l, '=')
return emit(l, K"÷=")
return emit(l, K"op=")
end
return emit(l, K"÷")
end

function lex_dollar(l::Lexer)
if accept(l, '=')
return emit(l, K"$=")
return emit(l, K"op=")
end
return emit(l, K"$")
end

function lex_xor(l::Lexer)
if accept(l, '=')
return emit(l, K"=")
return emit(l, K"op=")
end
return emit(l, K"⊻")
end
Expand Down Expand Up @@ -1110,7 +1111,7 @@ function lex_amper(l::Lexer)
if accept(l, '&')
return emit(l, K"&&")
elseif accept(l, '=')
return emit(l, K"&=")
return emit(l, K"op=")
else
return emit(l, K"&")
end
Expand Down Expand Up @@ -1148,20 +1149,20 @@ end
function lex_forwardslash(l::Lexer)
if accept(l, '/')
if accept(l, '=')
return emit(l, K"//=")
return emit(l, K"op=")
else
return emit(l, K"//")
end
elseif accept(l, '=')
return emit(l, K"/=")
return emit(l, K"op=")
else
return emit(l, K"/")
end
end

function lex_backslash(l::Lexer)
if accept(l, '=')
return emit(l, K"\=")
return emit(l, K"op=")
end
return emit(l, K"\\")
end
Expand Down Expand Up @@ -1193,7 +1194,7 @@ function lex_dot(l::Lexer)
elseif pc == '−'
l.dotop = true
readchar(l)
return emit(l, accept(l, '=') ? K"-=" : K"-")
return emit(l, accept(l, '=') ? K"op=" : K"-")
elseif pc =='*'
l.dotop = true
readchar(l)
Expand Down Expand Up @@ -1222,7 +1223,7 @@ function lex_dot(l::Lexer)
l.dotop = true
readchar(l)
if accept(l, '=')
return emit(l, K"&=")
return emit(l, K"op=")
else
if accept(l, '&')
return emit(l, K"&&")
Expand Down
10 changes: 10 additions & 0 deletions test/expr.jl
Original file line number Diff line number Diff line change
Expand Up @@ -501,6 +501,16 @@
@test parsestmt("./x", ignore_errors=true) == Expr(:call, Expr(:error, Expr(:., :/)), :x)
end

@testset "syntactic update-assignment operators" begin
@test parsestmt("x += y") == Expr(:(+=), :x, :y)
@test parsestmt("x .+= y") == Expr(:(.+=), :x, :y)
@test parsestmt(":+=") == QuoteNode(Symbol("+="))
@test parsestmt(":(+=)") == QuoteNode(Symbol("+="))
@test parsestmt(":.+=") == QuoteNode(Symbol(".+="))
@test parsestmt(":(.+=)") == QuoteNode(Symbol(".+="))
@test parsestmt("x \u2212= y") == Expr(:(-=), :x, :y)
end

@testset "let" begin
@test parsestmt("let x=1\n end") ==
Expr(:let, Expr(:(=), :x, 1), Expr(:block, LineNumberNode(2)))
Expand Down
Loading
Loading