@@ -25,8 +25,6 @@ module Kore.Parser.Lexer
2525 , parseId
2626 , parseAnyId , parseSetId , isSymbolId
2727 , isElementVariableId , isSetVariableId
28- , elementVariableIdParser
29- , setVariableIdParser
3028 , parseSortId
3129 , parseSymbolId
3230 , parseModuleName
@@ -48,6 +46,9 @@ import Data.Map.Strict
4846 ( Map
4947 )
5048import qualified Data.Map.Strict as Map
49+ import Data.Text
50+ ( Text
51+ )
5152import qualified Data.Text as Text
5253import Text.Megaparsec
5354 ( SourcePos (.. )
@@ -84,6 +85,7 @@ space = L.space Parser.space1 lineComment blockComment
8485 where
8586 lineComment = L. skipLineComment " //"
8687 blockComment = L. skipBlockComment " /*" " */"
88+ {-# INLINE space #-}
8789
8890{- | Parse the character, but skip its result.
8991 -}
@@ -97,7 +99,7 @@ skipChar = Monad.void . Parser.char
9799See also: 'L.symbol', 'space'
98100
99101 -}
100- symbol :: String -> Parser ()
102+ symbol :: Text -> Parser ()
101103symbol = Monad. void . L. symbol space
102104
103105colon :: Parser ()
@@ -163,7 +165,7 @@ consumes any trailing whitespace.
163165See also: 'space'
164166
165167 -}
166- keyword :: String -> Parser ()
168+ keyword :: Text -> Parser ()
167169keyword s = lexeme $ do
168170 _ <- Parser. chunk s
169171 -- Check that the next character cannot be part of an @id@, i.e. check that
@@ -183,19 +185,16 @@ sourcePosToFileLocation
183185 , column = unPos column'
184186 }
185187
186- {- Takes a parser for the string of the identifier
187- and returns an 'Id' annotated with position.
188- -}
189- stringParserToIdParser :: Parser String -> Parser Id
190- stringParserToIdParser stringRawParser = do
188+ {- | Annotate a 'Text' parser with an 'AstLocation'.
189+ -}
190+ parseIntoId :: Parser Text -> Parser Id
191+ parseIntoId stringRawParser = do
191192 ! pos <- sourcePosToFileLocation <$> getSourcePos
192- name <- lexeme stringRawParser
193- return Id
194- { getId = Text. pack name
195- , idLocation = AstLocationFile pos
196- }
193+ getId <- lexeme stringRawParser
194+ return Id { getId, idLocation = AstLocationFile pos }
195+ {-# INLINE parseIntoId #-}
197196
198- koreKeywordsSet :: HashSet String
197+ koreKeywordsSet :: HashSet Text
199198koreKeywordsSet = HashSet. fromList keywords
200199 where
201200 keywords =
@@ -224,17 +223,18 @@ genericIdRawParser
224223 :: (Char -> Bool ) -- ^ contains the characters allowed for @⟨prefix-char⟩@.
225224 -> (Char -> Bool ) -- ^ contains the characters allowed for @⟨body-char⟩@.
226225 -> IdKeywordParsing
227- -> Parser String
226+ -> Parser Text
228227genericIdRawParser isFirstChar isBodyChar idKeywordParsing = do
229- c <- Parser. satisfy isFirstChar <?> " first identifier character"
230- cs <- Parser. takeWhileP (Just " identifier character" ) isBodyChar
231- let genericId = c : cs
232- keywordsForbidden = idKeywordParsing == KeywordsForbidden
228+ (genericId, _) <- Parser. match $ do
229+ _ <- Parser. satisfy isFirstChar <?> " first identifier character"
230+ _ <- Parser. takeWhileP (Just " identifier character" ) isBodyChar
231+ pure ()
232+ let keywordsForbidden = idKeywordParsing == KeywordsForbidden
233233 isKeyword = HashSet. member genericId koreKeywordsSet
234234 when (keywordsForbidden && isKeyword)
235235 $ fail
236236 ( " Identifiers should not be keywords: '"
237- ++ genericId
237+ ++ Text. unpack genericId
238238 ++ " '."
239239 )
240240 return genericId
@@ -293,11 +293,14 @@ isIdChar c = isIdFirstChar c || isIdOtherChar c
293293An identifier cannot be a keyword.
294294-}
295295parseId :: Parser Id
296- parseId = stringParserToIdParser (parseIdRaw KeywordsForbidden )
296+ parseId = parseIntoId parseIdText
297297
298- parseIdRaw :: IdKeywordParsing -> Parser String
298+ parseIdRaw :: IdKeywordParsing -> Parser Text
299299parseIdRaw = genericIdRawParser isIdFirstChar isIdChar
300300
301+ parseIdText :: Parser Text
302+ parseIdText = parseIdRaw KeywordsForbidden
303+
301304{- | Parse a module name.
302305
303306@
@@ -309,7 +312,7 @@ parseModuleName = lexeme moduleNameRawParser
309312
310313moduleNameRawParser :: Parser ModuleName
311314moduleNameRawParser =
312- ModuleName . Text. pack <$> parseIdRaw KeywordsForbidden
315+ ModuleName <$> parseIdRaw KeywordsForbidden
313316
314317{- | Parses a 'Sort' 'Id'
315318
@@ -321,7 +324,9 @@ parseSortId :: Parser Id
321324parseSortId = parseId <?> " sort identifier"
322325
323326parseAnyId :: Parser Id
324- parseAnyId = (parseSpecialId <|> parseSetId <|> parseId) <?> " identifier"
327+ parseAnyId = parseIntoId
328+ (parseSpecialIdText <|> parseSetIdText <|> parseIdText)
329+ <?> " identifier"
325330
326331isSymbolId :: Id -> Bool
327332isSymbolId Id { getId } =
@@ -336,19 +341,16 @@ isElementVariableId Id { getId } =
336341isSetVariableId :: Id -> Bool
337342isSetVariableId Id { getId } = Text. head getId == ' @'
338343
339- parseSpecialId :: Parser Id
340- parseSpecialId =
341- stringParserToIdParser parseSpecialIdString
342- where
343- parseSpecialIdString =
344- (:) <$> Parser. char ' \\ ' <*> parseIdRaw KeywordsPermitted
344+ parseSpecialIdText :: Parser Text
345+ parseSpecialIdText = fst <$> Parser. match
346+ (Parser. char ' \\ ' >> parseIdRaw KeywordsPermitted )
347+
348+ parseSetIdText :: Parser Text
349+ parseSetIdText = fst <$> Parser. match
350+ (Parser. char ' @' >> parseIdRaw KeywordsPermitted )
345351
346352parseSetId :: Parser Id
347- parseSetId =
348- stringParserToIdParser parseSetIdString
349- where
350- parseSetIdString =
351- (:) <$> Parser. char ' @' <*> parseIdRaw KeywordsPermitted
353+ parseSetId = parseIntoId parseSetIdText
352354
353355{- | Parses a 'Symbol' 'Id'
354356
@@ -357,41 +359,12 @@ parseSetId =
357359@
358360-}
359361parseSymbolId :: Parser Id
360- parseSymbolId =
361- stringParserToIdParser symbolIdRawParser <?> " symbol or alias identifier"
362-
363- symbolIdRawParser :: Parser String
364- symbolIdRawParser = do
365- c <- peekChar'
366- if c == ' \\ '
367- then do
368- skipChar ' \\ '
369- (c : ) <$> parseIdRaw KeywordsPermitted
370- else parseIdRaw KeywordsForbidden
371-
372- {-| Parses a @set-variable-id@, which always starts with @\@@.
373-
374- @
375- <set-variable-id> ::= ['@'] <id>
376- @
377- -}
378- setVariableIdParser :: Parser Id
379- setVariableIdParser = stringParserToIdParser setVariableIdRawParser
362+ parseSymbolId = parseIntoId symbolIdRawParser <?> " symbol or alias identifier"
380363
381- setVariableIdRawParser :: Parser String
382- setVariableIdRawParser = do
383- start <- Parser. char ' @'
384- end <- parseIdRaw KeywordsPermitted
385- return (start: end)
386-
387- {-| Parses an @element-variable-id@
388-
389- @
390- <element-variable-id> ::= <id>
391- @
392- -}
393- elementVariableIdParser :: Parser Id
394- elementVariableIdParser = parseId
364+ symbolIdRawParser :: Parser Text
365+ symbolIdRawParser = fmap fst $ Parser. match $
366+ (Parser. char ' \\ ' >> parseIdRaw KeywordsPermitted )
367+ <|> parseIdRaw KeywordsForbidden
395368
396369{- | Parses a C-style string literal, unescaping it.
397370
0 commit comments