Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 1 addition & 3 deletions Tokenize/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -37,11 +37,9 @@ Each `Token` is represented by where it starts and ends, what string it contains
The API for a `Token` (non exported from the `Tokenize.Tokens` module) is.

```julia
startpos(t)::Tuple{Int, Int} # row and column where the token start
endpos(t)::Tuple{Int, Int} # row and column where the token ends
startbyte(T)::Int # byte offset where the token start
endbyte(t)::Int # byte offset where the token ends
untokenize(t)::String # string representation of the token
untokenize(t, str)::String # string representation of the token
kind(t)::Token.Kind # kind of the token
exactkind(t)::Token.Kind # exact kind of the token
```
Expand Down
6 changes: 2 additions & 4 deletions Tokenize/benchmark/lex_base.jl
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ using Tokenize
using BenchmarkTools
using Printf

function speed_test(::Type{T}=Tokenize.Tokens.Token) where T <: Tokenize.Tokens.AbstractToken
function speed_test()
tot_files = 0
tot_tokens = 0
tot_errors = 0
Expand All @@ -14,7 +14,7 @@ function speed_test(::Type{T}=Tokenize.Tokens.Token) where T <: Tokenize.Tokens.
tot_files += 1
file = joinpath(root, file)
str = read(file, String)::String
l = tokenize(str, T)
l = tokenize(str)
while !Tokenize.Lexers.eof(l)
t = Tokenize.Lexers.next_token(l)
tot_tokens += 1
Expand All @@ -31,8 +31,6 @@ end

tot_files, tot_tokens, tot_errors = speed_test()
tot_time_token = @belapsed speed_test()
tot_time_rawtoken = @belapsed speed_test(Tokenize.Tokens.RawToken)
println("Lexed ", tot_files, " files, with a total of ", tot_tokens,
" tokens with ", tot_errors, " errors")
println("Time Token: ", @sprintf("%3.4f", tot_time_token), " seconds")
println("Time RawToken: ", @sprintf("%3.4f", tot_time_rawtoken), " seconds")
108 changes: 43 additions & 65 deletions Tokenize/src/_precompile.jl
Original file line number Diff line number Diff line change
Expand Up @@ -8,87 +8,65 @@ function _precompile_()
precompile(Tokenize.Tokens.Token, (Tokenize.Tokens.Kind,Tuple{Int,Int},Tuple{Int,Int},Int,Int,String))
precompile(Tokenize.Tokens.Token, ())
precompile(Tokenize.Tokens.kind, (Tokenize.Tokens.Token,))
precompile(Tokenize.Tokens.startpos, (Tokenize.Tokens.Token,))
precompile(Tokenize.Tokens.endpos, (Tokenize.Tokens.Token,))
precompile(Tokenize.Tokens.untokenize, (Tokenize.Tokens.Token,))
precompile(Tokenize.Tokens.untokenize, (Tokenize.Tokens.RawToken,String))
precompile(Tokenize.Tokens.untokenize, (Array{Tokenize.Tokens.Token, 1},))
precompile(Tokenize.Tokens.untokenize, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}},Tokenize.Tokens.Token},))

precompile(Tokenize.Lexers.is_cat_id_start, (Char, Int32,))
precompile(Tokenize.Lexers.is_identifier_char, (Char,))
precompile(Tokenize.Lexers.is_identifier_start_char, (Char,))
precompile(Tokenize.Lexers.peekchar, (GenericIOBuffer{Array{UInt8, 1}},))
precompile(Tokenize.Lexers.dpeekchar, (GenericIOBuffer{Array{UInt8, 1}},))
precompile(Tokenize.Lexers.readchar, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}},Tokenize.Tokens.Token},))
precompile(Tokenize.Lexers.readchar, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}},Tokenize.Tokens.RawToken},))
precompile(Tokenize.Lexers.next_token, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}},Tokenize.Tokens.Token},))
precompile(Tokenize.Lexers.readchar, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}}},))
precompile(Tokenize.Lexers.next_token, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}}},))

precompile(Tokenize.Lexers.ishex, (Char,))
precompile(Tokenize.Lexers.isbinary, (Char,))
precompile(Tokenize.Lexers.isoctal, (Char,))
precompile(Tokenize.Lexers.iswhitespace, (Char,))
precompile(Tokenize.Lexers.Lexer, (String,))
precompile(Tokenize.Lexers.Lexer, (String,Type{Tokenize.Tokens.Token}))
precompile(Tokenize.Lexers.Lexer, (String,Type{Tokenize.Tokens.RawToken}))
precompile(Tokenize.Lexers.Lexer, (GenericIOBuffer{Array{UInt8, 1}},Type{Tokenize.Tokens.Token}))
precompile(Tokenize.Lexers.Lexer, (GenericIOBuffer{Array{UInt8, 1}},Type{Tokenize.Tokens.RawToken}))
precompile(Tokenize.Lexers.Lexer, (GenericIOBuffer{Array{UInt8, 1}},))
precompile(Tokenize.Lexers.tokenize, (String,))

precompile(Tokenize.Lexers.iterate, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}},Tokenize.Tokens.Token},))
precompile(Tokenize.Lexers.iterate, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}},Tokenize.Tokens.RawToken},))
precompile(Tokenize.Lexers.iterate, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}},Tokenize.Tokens.Token}, Bool,))
precompile(Tokenize.Lexers.iterate, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}},Tokenize.Tokens.RawToken}, Bool,))
precompile(Tokenize.Lexers.iterate, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}},Tokenize.Tokens.Token}, Bool,))
precompile(Tokenize.Lexers.iterate, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}},Tokenize.Tokens.RawToken}, Bool,))
precompile(Tokenize.Lexers.startpos, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}},Tokenize.Tokens.Token},))
precompile(Tokenize.Lexers.startpos, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}},Tokenize.Tokens.RawToken},))
precompile(Tokenize.Lexers.startpos!, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}},Tokenize.Tokens.Token},Int))
precompile(Tokenize.Lexers.startpos!, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}},Tokenize.Tokens.RawToken},Int))
precompile(Tokenize.Lexers.start_token!, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}},Tokenize.Tokens.Token},))
precompile(Tokenize.Lexers.start_token!, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}},Tokenize.Tokens.RawToken},))
precompile(Tokenize.Lexers.iterate, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}}}, Bool,))
precompile(Tokenize.Lexers.iterate, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}}}, Bool,))
precompile(Tokenize.Lexers.startpos, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}}},))
precompile(Tokenize.Lexers.startpos!, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}}},Int))
precompile(Tokenize.Lexers.start_token!, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}}},))

precompile(Tokenize.Lexers.lex_greater, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}}},))
precompile(Tokenize.Lexers.lex_prime, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}}},))
precompile(Tokenize.Lexers.lex_digit, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}}},Tokenize.Tokens.Kind))
precompile(Tokenize.Lexers.lex_identifier, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}}}, Char,))
precompile(Tokenize.Lexers.lex_less, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}}},))
precompile(Tokenize.Lexers.lex_forwardslash, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}}},))
precompile(Tokenize.Lexers.lex_minus, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}}},))
precompile(Tokenize.Lexers.lex_xor, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}}},))
precompile(Tokenize.Lexers.lex_equal, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}}},))
precompile(Tokenize.Lexers.lex_bar, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}}},))
precompile(Tokenize.Lexers.lex_quote, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}}},))
precompile(Tokenize.Lexers.lex_plus, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}}},))
precompile(Tokenize.Lexers.lex_dot, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}}},))
precompile(Tokenize.Lexers.lex_exclaim, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}}},))
precompile(Tokenize.Lexers.lex_colon, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}}},))
precompile(Tokenize.Lexers.lex_percent, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}}},))
precompile(Tokenize.Lexers.lex_comment, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}}},))
precompile(Tokenize.Lexers.lex_comment, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}}},Bool))
precompile(Tokenize.Lexers.lex_division, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}}},))
precompile(Tokenize.Lexers.lex_circumflex, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}}},))
precompile(Tokenize.Lexers.lex_backslash, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}}},))
precompile(Tokenize.Lexers.lex_star, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}}},))
precompile(Tokenize.Lexers.lex_amper, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}}},))

precompile(Tokenize.Lexers.lex_whitespace, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}}},Char))

precompile(Tokenize.Lexers.lex_greater, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}},Tokenize.Tokens.Token},))
precompile(Tokenize.Lexers.lex_prime, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}},Tokenize.Tokens.Token},))
precompile(Tokenize.Lexers.lex_digit, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}},Tokenize.Tokens.Token},Tokenize.Tokens.Kind))
precompile(Tokenize.Lexers.lex_identifier, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}},Tokenize.Tokens.Token}, Char,))
precompile(Tokenize.Lexers.lex_less, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}},Tokenize.Tokens.Token},))
precompile(Tokenize.Lexers.lex_forwardslash, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}},Tokenize.Tokens.Token},))
precompile(Tokenize.Lexers.lex_minus, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}},Tokenize.Tokens.Token},))
precompile(Tokenize.Lexers.lex_xor, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}},Tokenize.Tokens.Token},))
precompile(Tokenize.Lexers.lex_equal, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}},Tokenize.Tokens.Token},))
precompile(Tokenize.Lexers.lex_bar, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}},Tokenize.Tokens.Token},))
precompile(Tokenize.Lexers.lex_quote, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}},Tokenize.Tokens.Token},))
precompile(Tokenize.Lexers.lex_plus, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}},Tokenize.Tokens.Token},))
precompile(Tokenize.Lexers.lex_dot, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}},Tokenize.Tokens.Token},))
precompile(Tokenize.Lexers.lex_exclaim, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}},Tokenize.Tokens.Token},))
precompile(Tokenize.Lexers.lex_colon, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}},Tokenize.Tokens.Token},))
precompile(Tokenize.Lexers.lex_percent, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}},Tokenize.Tokens.Token},))
precompile(Tokenize.Lexers.lex_comment, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}},Tokenize.Tokens.Token},))
precompile(Tokenize.Lexers.lex_comment, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}},Tokenize.Tokens.Token},Bool))
precompile(Tokenize.Lexers.lex_division, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}},Tokenize.Tokens.Token},))
precompile(Tokenize.Lexers.lex_circumflex, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}},Tokenize.Tokens.Token},))
precompile(Tokenize.Lexers.lex_backslash, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}},Tokenize.Tokens.Token},))
precompile(Tokenize.Lexers.lex_star, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}},Tokenize.Tokens.Token},))
precompile(Tokenize.Lexers.lex_amper, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}},Tokenize.Tokens.Token},))

precompile(Tokenize.Lexers.lex_whitespace, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}},Tokenize.Tokens.Token},))

precompile(Tokenize.Lexers.accept, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}},Tokenize.Tokens.Token}, Char,))
precompile(Tokenize.Lexers.accept, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}},Tokenize.Tokens.Token}, String,))
precompile(Tokenize.Lexers.accept, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}},Tokenize.Tokens.Token},typeof( Base.isdigit),))
precompile(Tokenize.Lexers.accept, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}},Tokenize.Tokens.Token},typeof( Tokenize.Lexers.iswhitespace),))
precompile(Tokenize.Lexers.accept, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}},Tokenize.Tokens.Token},typeof( Tokenize.Lexers.is_identifier_char),))
precompile(Tokenize.Lexers.accept, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}},Tokenize.Tokens.Token},typeof(Tokenize.Lexers.ishex),))
precompile(Tokenize.Lexers.accept_batch, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}},Tokenize.Tokens.Token}, typeof(Tokenize.Lexers.iswhitespace),))
precompile(Tokenize.Lexers.accept_batch, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}},Tokenize.Tokens.Token}, typeof(Tokenize.Lexers.isdigit),))

precompile(Tokenize.Lexers.accept_batch, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}},Tokenize.Tokens.RawToken}, typeof(Tokenize.Lexers.iswhitespace),))
precompile(Tokenize.Lexers.accept_batch, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}},Tokenize.Tokens.RawToken}, typeof(Tokenize.Lexers.isdigit),))
precompile(Tokenize.Lexers.accept, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}},Tokenize.Tokens.RawToken}, Char,))
precompile(Tokenize.Lexers.accept, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}}}, Char,))
precompile(Tokenize.Lexers.accept, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}}}, String,))
precompile(Tokenize.Lexers.accept, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}}},typeof( Base.isdigit),))
precompile(Tokenize.Lexers.accept, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}}},typeof( Tokenize.Lexers.iswhitespace),))
precompile(Tokenize.Lexers.accept, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}}},typeof( Tokenize.Lexers.is_identifier_char),))
precompile(Tokenize.Lexers.accept, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}}},typeof(Tokenize.Lexers.ishex),))
precompile(Tokenize.Lexers.accept_batch, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}}}, typeof(Tokenize.Lexers.iswhitespace),))
precompile(Tokenize.Lexers.accept_batch, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}}}, typeof(Tokenize.Lexers.isdigit),))

precompile(Tokenize.Lexers.accept_batch, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}}}, typeof(Tokenize.Lexers.iswhitespace),))
precompile(Tokenize.Lexers.accept_batch, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}}}, typeof(Tokenize.Lexers.isdigit),))
precompile(Tokenize.Lexers.accept, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}}}, Char,))

precompile(Tokenize.Lexers.readchar, (GenericIOBuffer{Array{UInt8, 1}},))
end
Loading