diff --git a/Grammar/python.gram b/Grammar/python.gram index 1c794c516a06a6..7c7366c7a212fa 100644 --- a/Grammar/python.gram +++ b/Grammar/python.gram @@ -1,6 +1,14 @@ # Simplified grammar for Python @bytecode True +@modulename 'peg_parser' # Non needed for now, but might be needed later +@trailer ''' +mod_ty +parse_start(Parser *p) +{ + return start_rule(p); +} +''' start[mod_ty]: a=[statements] ENDMARKER { Module(a, NULL, p->arena) } statements[asdl_seq*]: a=statement+ { seq_flatten(p, a) } diff --git a/Include/pegen_interface.h b/Include/pegen_interface.h new file mode 100644 index 00000000000000..c48137fff04619 --- /dev/null +++ b/Include/pegen_interface.h @@ -0,0 +1,20 @@ +#ifndef Py_LIMITED_API +#ifndef Py_PEGENINTERFACE +#define Py_PEGENINTERFACE +#ifdef __cplusplus +extern "C" { +#endif + +#include "Python.h" +#include "Python-ast.h" + +PyAPI_FUNC(mod_ty) PyPegen_ASTFromFile(const char *filename, PyArena *arena); +PyAPI_FUNC(mod_ty) PyPegen_ASTFromString(const char *str, PyArena *arena); +PyAPI_FUNC(PyCodeObject *) PyPegen_CodeObjectFromFile(const char *filename, PyArena *arena); +PyAPI_FUNC(PyCodeObject *) PyPegen_CodeObjectFromString(const char *str, PyArena *arena); + +#ifdef __cplusplus +} +#endif +#endif /* !Py_PEGENINTERFACE*/ +#endif /* !Py_LIMITED_API */ diff --git a/Lib/test/test_peg_parser.py b/Lib/test/test_peg_parser.py index 37c916bb356add..f2d376119eb847 100644 --- a/Lib/test/test_peg_parser.py +++ b/Lib/test/test_peg_parser.py @@ -615,7 +615,7 @@ class ASTGenerationTest(unittest.TestCase): def test_correct_ast_generation_on_source_files(self) -> None: self.maxDiff = None for source in TEST_SOURCES: - actual_ast = peg_parser.parse_string(source, mode=1) + actual_ast = peg_parser.parse_string(source) expected_ast = ast.parse(source) self.assertEqual( ast.dump(actual_ast, include_attributes=True), @@ -626,12 +626,12 @@ def test_correct_ast_generation_on_source_files(self) -> None: def test_incorrect_ast_generation_on_source_files(self) -> None: for source in FAIL_SOURCES: with self.assertRaises(SyntaxError, msg=f"Parsing {source} did not raise an exception"): - peg_parser.parse_string(source, mode=0) + peg_parser.parse_string(source) @unittest.expectedFailure def test_correct_but_known_to_fail_ast_generation_on_source_files(self) -> None: for source in GOOD_BUT_FAIL_SOURCES: - actual_ast = peg_parser.parse_string(source, mode=1) + actual_ast = peg_parser.parse_string(source) expected_ast = ast.parse(source) self.assertEqual( ast.dump(actual_ast, include_attributes=True), @@ -641,7 +641,7 @@ def test_correct_but_known_to_fail_ast_generation_on_source_files(self) -> None: def test_correct_ast_generation_without_pos_info(self) -> None: for source in GOOD_BUT_FAIL_SOURCES: - actual_ast = peg_parser.parse_string(source, mode=1) + actual_ast = peg_parser.parse_string(source) expected_ast = ast.parse(source) self.assertEqual( ast.dump(actual_ast), diff --git a/Makefile.pre.in b/Makefile.pre.in index dec25ae7bc141f..35caf35959d04a 100644 --- a/Makefile.pre.in +++ b/Makefile.pre.in @@ -295,6 +295,19 @@ LIBFFI_INCLUDEDIR= @LIBFFI_INCLUDEDIR@ ########################################################################## # Parser + +PEGEN_OBJS= \ + Parser/pegen/pegen.o \ + Parser/pegen/parse.o \ + Parser/pegen/parse_string.o \ + Parser/pegen/peg_api.o + + +PEGEN_HEADERS= \ + $(srcdir)/Include/pegen_interface.h \ + $(srcdir)/Parser/pegen/pegen.h \ + $(srcdir)/Parser/pegen/parse_string.h + POBJS= \ Parser/acceler.o \ Parser/grammar1.o \ @@ -303,9 +316,10 @@ POBJS= \ Parser/parser.o \ Parser/token.o \ -PARSER_OBJS= $(POBJS) Parser/myreadline.o Parser/parsetok.o Parser/tokenizer.o +PARSER_OBJS= $(POBJS) $(PEGEN_OBJS) Parser/myreadline.o Parser/parsetok.o Parser/tokenizer.o PARSER_HEADERS= \ + $(PEGEN_HEADERS) \ $(srcdir)/Include/grammar.h \ $(srcdir)/Include/parsetok.h \ $(srcdir)/Parser/parser.h \ @@ -808,8 +822,8 @@ regen-grammar: regen-token .PHONY: regen-pegen regen-pegen: PYTHONPATH=$(srcdir)/Tools/peg_generator $(PYTHON_FOR_REGEN) -m pegen -c -q $(srcdir)/Grammar/python.gram \ - -o $(srcdir)/Modules/peg_parser/parse.new.c - $(UPDATE_FILE) $(srcdir)/Modules/peg_parser/parse.c $(srcdir)/Modules/peg_parser/parse.new.c + -o $(srcdir)/Parser/pegen/parse.new.c + $(UPDATE_FILE) $(srcdir)/Parser/pegen/parse.c $(srcdir)/Parser/pegen/parse.new.c .PHONY=regen-ast regen-ast: diff --git a/Modules/Setup b/Modules/Setup index 2b4174eb370935..ecba5a8beb5a3d 100644 --- a/Modules/Setup +++ b/Modules/Setup @@ -135,7 +135,7 @@ faulthandler faulthandler.c _tracemalloc _tracemalloc.c hashtable.c # PEG-based parser module -- slated to be *the* parser -peg_parser -DPy_BUILD_CORE_BUILTIN -I$(srcdir)/Include/internal -I$(srcdir)/Parser -I$(srcdir)/Modules/peg_parser peg_parser/parse.c peg_parser/parse_string.c peg_parser/pegen.c +peg_parser peg_parser.c # The rest of the modules listed in this file are all commented out by # default. Usually they can be detected and built as dynamically diff --git a/Modules/peg_parser.c b/Modules/peg_parser.c new file mode 100644 index 00000000000000..b0122fb6e3595b --- /dev/null +++ b/Modules/peg_parser.c @@ -0,0 +1,71 @@ +#include +#include + +PyObject * +_Py_parse_file(PyObject *self, PyObject *args) +{ + char *filename; + + if (!PyArg_ParseTuple(args, "s", &filename)) { + return NULL; + } + + PyArena *arena = PyArena_New(); + if (arena == NULL) { + return NULL; + } + + mod_ty res = PyPegen_ASTFromFile(filename, arena); + if (res == NULL) { + PyArena_Free(arena); + return NULL; + } + PyObject *result = PyAST_mod2obj(res); + + PyArena_Free(arena); + return result; +} + +PyObject * +_Py_parse_string(PyObject *self, PyObject *args) +{ + char *the_string; + + if (!PyArg_ParseTuple(args, "s", &the_string)) { + return NULL; + } + + PyArena *arena = PyArena_New(); + if (arena == NULL) { + return NULL; + } + + mod_ty res = PyPegen_ASTFromString(the_string, arena); + if (res == NULL) { + PyArena_Free(arena); + return NULL; + } + PyObject *result = PyAST_mod2obj(res); + + PyArena_Free(arena); + return result; +} + +static PyMethodDef ParseMethods[] = { + {"parse_file", (PyCFunction)(void(*)(void))_Py_parse_file, METH_VARARGS, "Parse a file."}, + {"parse_string", (PyCFunction)(void(*)(void))_Py_parse_string, METH_VARARGS, "Parse a string."}, + {NULL, NULL, 0, NULL} /* Sentinel */ +}; + +static struct PyModuleDef parsemodule = { + PyModuleDef_HEAD_INIT, + .m_name = "peg_parser", + .m_doc = "A parser.", + .m_methods = ParseMethods, +}; + +PyMODINIT_FUNC +PyInit_peg_parser(void) +{ + return PyModule_Create(&parsemodule); +} diff --git a/Modules/peg_parser/parse.c b/Parser/pegen/parse.c similarity index 99% rename from Modules/peg_parser/parse.c rename to Parser/pegen/parse.c index fbe302c28d5ead..de7d64c2a1e38c 100644 --- a/Modules/peg_parser/parse.c +++ b/Parser/pegen/parse.c @@ -1,6 +1,7 @@ // @generated by pegen.py from ./Grammar/python.gram #include "pegen.h" -static KeywordToken *reserved_keywords[] = { +const int n_keyword_lists = 9; +KeywordToken *reserved_keywords[] = { NULL, NULL, (KeywordToken[]) { @@ -13558,55 +13559,8 @@ _tmp_124_rule(Parser *p) return res; } -static PyObject * -parse_file(PyObject *self, PyObject *args, PyObject *kwds) +mod_ty +parse_start(Parser *p) { - static char *keywords[] = {"file", "mode", NULL}; - const char *filename; - int mode = 2; - - if (!PyArg_ParseTupleAndKeywords(args, kwds, "s|i", keywords, &filename, &mode)) - return NULL; - if (mode < 0 || mode > 2) - return PyErr_Format(PyExc_ValueError, "Bad mode, must be 0 <= mode <= 2"); - return run_parser_from_file(filename, (void *)start_rule, mode, reserved_keywords, 9); + return start_rule(p); } - -static PyObject * -parse_string(PyObject *self, PyObject *args, PyObject *kwds) -{ - static char *keywords[] = {"string", "mode", NULL}; - const char *the_string; - int mode = 2; - - if (!PyArg_ParseTupleAndKeywords(args, kwds, "s|i", keywords, &the_string, &mode)) - return NULL; - if (mode < 0 || mode > 2) - return PyErr_Format(PyExc_ValueError, "Bad mode, must be 0 <= mode <= 2"); - return run_parser_from_string(the_string, (void *)start_rule, mode, reserved_keywords, 9); -} - -static PyMethodDef ParseMethods[] = { - {"parse_file", (PyCFunction)(void(*)(void))parse_file, METH_VARARGS|METH_KEYWORDS, "Parse a file."}, - {"parse_string", (PyCFunction)(void(*)(void))parse_string, METH_VARARGS|METH_KEYWORDS, "Parse a string."}, - {NULL, NULL, 0, NULL} /* Sentinel */ -}; - -static struct PyModuleDef parsemodule = { - PyModuleDef_HEAD_INIT, - .m_name = "peg_parser", - .m_doc = "A parser.", - .m_methods = ParseMethods, -}; - -PyMODINIT_FUNC -PyInit_peg_parser(void) -{ - PyObject *m = PyModule_Create(&parsemodule); - if (m == NULL) - return NULL; - - return m; -} - -// The end diff --git a/Modules/peg_parser/parse_string.c b/Parser/pegen/parse_string.c similarity index 99% rename from Modules/peg_parser/parse_string.c rename to Parser/pegen/parse_string.c index cb7f5595564a4c..ae0fdb4b85d223 100644 --- a/Modules/peg_parser/parse_string.c +++ b/Parser/pegen/parse_string.c @@ -1,6 +1,6 @@ #include -#include +#include "../tokenizer.h" #include "pegen.h" #include "parse_string.h" diff --git a/Modules/peg_parser/parse_string.h b/Parser/pegen/parse_string.h similarity index 100% rename from Modules/peg_parser/parse_string.h rename to Parser/pegen/parse_string.h diff --git a/Parser/pegen/peg_api.c b/Parser/pegen/peg_api.c new file mode 100644 index 00000000000000..479028c37d4c61 --- /dev/null +++ b/Parser/pegen/peg_api.c @@ -0,0 +1,28 @@ +#include + +#include "../tokenizer.h" +#include "pegen.h" + +mod_ty +PyPegen_ASTFromString(const char *str, PyArena *arena) +{ + return run_parser_from_string(str, parse_start, RAW_AST_OBJECT, arena); +} + +mod_ty +PyPegen_ASTFromFile(const char *filename, PyArena *arena) +{ + return run_parser_from_file(filename, parse_start, RAW_AST_OBJECT, arena); +} + +PyCodeObject * +PyPegen_CodeObjectFromString(const char *str, PyArena *arena) +{ + return run_parser_from_string(str, parse_start, CODE_OBJECT, arena); +} + +PyCodeObject * +PyPegen_CodeObjectFromFile(const char *file, PyArena *arena) +{ + return run_parser_from_file(file, parse_start, CODE_OBJECT, arena); +} diff --git a/Modules/peg_parser/pegen.c b/Parser/pegen/pegen.c similarity index 95% rename from Modules/peg_parser/pegen.c rename to Parser/pegen/pegen.c index 9c4f701c8e04ab..f76518d59fb070 100644 --- a/Modules/peg_parser/pegen.c +++ b/Parser/pegen/pegen.c @@ -1,5 +1,5 @@ #include -#include +#include "../tokenizer.h" #include "pegen.h" #include "parse_string.h" @@ -168,7 +168,7 @@ CONSTRUCTOR(Parser *p, ...) } static int -_get_keyword_or_name_type(Parser *p, char *name, int name_len) +_get_keyword_or_name_type(Parser *p, const char *name, int name_len) { if (name_len >= p->n_keyword_lists || p->keywords[name_len] == NULL) { return NAME; @@ -184,7 +184,7 @@ _get_keyword_or_name_type(Parser *p, char *name, int name_len) int fill_token(Parser *p) { - char *start, *end; + const char *start, *end; int type = PyTokenizer_Get(p->tok, &start, &end); if (type == ERRORTOKEN) { if (!PyErr_Occurred()) { @@ -490,25 +490,34 @@ number_token(Parser *p) p->arena); } -PyObject * -run_parser(struct tok_state *tok, void *(start_rule_func)(Parser *), int mode, int input_mode, - KeywordToken **keywords, int n_keyword_lists) +void +Parser_Free(Parser *p) +{ + for (int i = 0; i < p->size; i++) { + PyMem_Free(p->tokens[i]); + } + PyMem_Free(p->tokens); + PyMem_Free(p); +} + +Parser * +Parser_New(struct tok_state *tok, mod_ty(*parse_func)(Parser *), int input_mode, + PyArena *arena) { - PyObject *result = NULL; Parser *p = PyMem_Malloc(sizeof(Parser)); if (p == NULL) { PyErr_Format(PyExc_MemoryError, "Out of memory for Parser"); - goto exit; + return NULL; } assert(tok != NULL); p->tok = tok; p->input_mode = input_mode; - p->keywords = keywords; + p->keywords = reserved_keywords; p->n_keyword_lists = n_keyword_lists; p->tokens = PyMem_Malloc(sizeof(Token *)); if (!p->tokens) { PyErr_Format(PyExc_MemoryError, "Out of memory for tokens"); - goto exit; + return NULL; } p->tokens[0] = PyMem_Malloc(sizeof(Token)); memset(p->tokens[0], '\0', sizeof(Token)); @@ -516,28 +525,30 @@ run_parser(struct tok_state *tok, void *(start_rule_func)(Parser *), int mode, i p->fill = 0; p->size = 1; - p->arena = PyArena_New(); - if (!p->arena) { - goto exit; - } + p->arena = arena; if (fill_token(p) < 0) { - goto exit; + return NULL; } - PyErr_Clear(); + p->start_rule_func = parse_func; - p->start_rule_func = start_rule_func; + return p; +} +void * +run_parser(Parser *p, int mode) +{ int error = setjmp(p->error_env); if (error) { - goto exit; + return NULL; } - void *res = (*start_rule_func)(p); + mod_ty (*parse_func)(Parser *) = p->start_rule_func; + mod_ty res = (*parse_func)(p); if (res == NULL) { if (PyErr_Occurred()) { - goto exit; + return NULL; } if (p->fill == 0) { raise_syntax_error(p, "error at start before reading any input"); @@ -545,35 +556,23 @@ run_parser(struct tok_state *tok, void *(start_rule_func)(Parser *), int mode, i else { raise_syntax_error(p, "invalid syntax"); } - goto exit; + return NULL; } - if (mode == 2) { - result = (PyObject *)PyAST_CompileObject(res, tok->filename, NULL, -1, p->arena); - } - else if (mode == 1) { + void *result = NULL; + if (mode == CODE_OBJECT) { + result = PyAST_CompileObject(res, p->tok->filename, NULL, -1, p->arena); + } else if (mode == AST_OBJECT) { result = PyAST_mod2obj(res); - } - else { - result = Py_None; - Py_INCREF(result); + } else { + result = res; } -exit: - for (int i = 0; i < p->size; i++) { - PyMem_Free(p->tokens[i]); - } - PyMem_Free(p->tokens); - if (p->arena != NULL) { - PyArena_Free(p->arena); - } - PyMem_Free(p); return result; } -PyObject * -run_parser_from_file(const char *filename, void *(start_rule_func)(Parser *), int mode, - KeywordToken **keywords, int n_keyword_lists) +void * +run_parser_from_file(const char *filename, mod_ty (*parse_func)(Parser *), int mode, PyArena *arena) { FILE *fp = fopen(filename, "rb"); if (fp == NULL) { @@ -587,7 +586,7 @@ run_parser_from_file(const char *filename, void *(start_rule_func)(Parser *), in } // From here on we need to clean up even if there's an error - PyObject *result = NULL; + void *result = NULL; struct tok_state *tok = PyTokenizer_FromFile(fp, NULL, NULL, NULL); if (tok == NULL) { @@ -598,8 +597,15 @@ run_parser_from_file(const char *filename, void *(start_rule_func)(Parser *), in tok->filename = filename_ob; filename_ob = NULL; - result = run_parser(tok, start_rule_func, mode, FILE_INPUT, keywords, n_keyword_lists); + Parser *p = Parser_New(tok, parse_func, FILE_INPUT, arena); + if (p == NULL) { + PyTokenizer_Free(tok); + goto error; + } + + result = run_parser(p, mode); + Parser_Free(p); PyTokenizer_Free(tok); error: @@ -608,11 +614,10 @@ run_parser_from_file(const char *filename, void *(start_rule_func)(Parser *), in return result; } -PyObject * -run_parser_from_string(const char *str, void *(start_rule_func)(Parser *), int mode, - KeywordToken **keywords, int n_keyword_lists) +void * +run_parser_from_string(const char *str, mod_ty(*parse_func)(Parser *), int mode, PyArena *arena) { - PyObject *result = NULL; + void *result = NULL; struct tok_state *tok = PyTokenizer_FromString(str, 1); if (tok == NULL) { return NULL; @@ -624,7 +629,16 @@ run_parser_from_string(const char *str, void *(start_rule_func)(Parser *), int m return NULL; } - result = run_parser(tok, start_rule_func, mode, STRING_INPUT, keywords, n_keyword_lists); + Parser *p = Parser_New(tok, parse_func, STRING_INPUT, arena); + if (p == NULL) { + PyTokenizer_Free(tok); + return NULL; + } + + result = run_parser(p, mode); + + Parser_Free(p); + PyTokenizer_Free(tok); return result; } @@ -1449,4 +1463,4 @@ concatenate_strings(Parser *p, asdl_seq *strings) Py_XDECREF(bytes_str); FstringParser_Dealloc(&state); return NULL; -} +} \ No newline at end of file diff --git a/Modules/peg_parser/pegen.h b/Parser/pegen/pegen.h similarity index 89% rename from Modules/peg_parser/pegen.h rename to Parser/pegen/pegen.h index 37690cb3153b73..7ba70f9731219d 100644 --- a/Modules/peg_parser/pegen.h +++ b/Parser/pegen/pegen.h @@ -14,6 +14,13 @@ enum INPUT_MODE { }; typedef enum INPUT_MODE INPUT_MODE; +enum MODE { + RAW_AST_OBJECT, + AST_OBJECT, + CODE_OBJECT +}; +typedef enum MODE MODE; + typedef struct _memo { int type; void *node; @@ -81,6 +88,9 @@ typedef struct { int is_keyword; } KeywordOrStarred; +extern const int n_keyword_lists; +extern KeywordToken *reserved_keywords[]; + int insert_memo(Parser *p, int mark, int type, void *node); int update_memo(Parser *p, int mark, int type, void *node); int is_memoized(Parser *p, int type, void *pres); @@ -133,16 +143,8 @@ CHECK_CALL_NULL_ALLOWED(Parser *p, void *result) #define CHECK_NULL_ALLOWED(result) CHECK_CALL_NULL_ALLOWED(p, result) PyObject *new_identifier(Parser *, char *); -PyObject *run_parser_from_file(const char *filename, - void *(start_rule_func)(Parser *), - int mode, - KeywordToken **keywords_list, - int n_keyword_lists); -PyObject *run_parser_from_string(const char *str, - void *(start_rule_func)(Parser *), - int mode, - KeywordToken **keywords_list, - int n_keyword_lists); +void *run_parser_from_file(const char *, mod_ty(*)(Parser *), int, PyArena *); +void *run_parser_from_string(const char *, mod_ty(*)(Parser *), int, PyArena *); asdl_seq *singleton_seq(Parser *, void *); asdl_seq *seq_insert_in_front(Parser *, void *, asdl_seq *); asdl_seq *seq_flatten(Parser *, asdl_seq *); @@ -171,4 +173,6 @@ asdl_seq *seq_extract_starred_exprs(Parser *, asdl_seq *); asdl_seq *seq_delete_starred_exprs(Parser *, asdl_seq *); expr_ty concatenate_strings(Parser *p, asdl_seq *); +mod_ty parse_start(Parser *); + #endif diff --git a/Python/pythonrun.c b/Python/pythonrun.c index 76bc48d19b27ea..dce601729036cc 100644 --- a/Python/pythonrun.c +++ b/Python/pythonrun.c @@ -28,6 +28,8 @@ #include "osdefs.h" #include +#include + #ifdef HAVE_SIGNAL_H #include #endif @@ -1030,6 +1032,12 @@ PyRun_StringFlags(const char *str, int start, PyObject *globals, if (arena == NULL) return NULL; + // TODO: This crashes because it returns a Python module instead of a mod_ty + // I left this here so when you run 'nm python | grep PyPegen' you can see + // that the symbol is included and this function is called. Obviously this is + // wrong, and the function needs to be changed to return + mod = PyPegen_ASTFromString(str, arena); + mod = PyParser_ASTFromStringObject(str, filename, start, flags, arena); if (mod != NULL) ret = run_mod(mod, filename, globals, locals, flags, arena); diff --git a/Tools/peg_generator/peg_parser/pegen.c b/Tools/peg_generator/peg_parser/pegen.c index 9e1e770a45ad69..31e2c150fd957f 100644 --- a/Tools/peg_generator/peg_parser/pegen.c +++ b/Tools/peg_generator/peg_parser/pegen.c @@ -487,25 +487,34 @@ number_token(Parser *p) p->arena); } -PyObject * -run_parser(struct tok_state *tok, void *(start_rule_func)(Parser *), int mode, int input_mode, - KeywordToken **keywords, int n_keyword_lists) +void +Parser_Free(Parser *p) +{ + for (int i = 0; i < p->size; i++) { + PyMem_Free(p->tokens[i]); + } + PyMem_Free(p->tokens); + PyMem_Free(p); +} + +Parser * +Parser_New(struct tok_state *tok, mod_ty(*parse_func)(Parser *), int input_mode, + PyArena *arena) { - PyObject *result = NULL; Parser *p = PyMem_Malloc(sizeof(Parser)); if (p == NULL) { PyErr_Format(PyExc_MemoryError, "Out of memory for Parser"); - goto exit; + return NULL; } assert(tok != NULL); p->tok = tok; p->input_mode = input_mode; - p->keywords = keywords; + p->keywords = reserved_keywords; p->n_keyword_lists = n_keyword_lists; p->tokens = PyMem_Malloc(sizeof(Token *)); if (!p->tokens) { PyErr_Format(PyExc_MemoryError, "Out of memory for tokens"); - goto exit; + return NULL; } p->tokens[0] = PyMem_Malloc(sizeof(Token)); memset(p->tokens[0], '\0', sizeof(Token)); @@ -513,28 +522,30 @@ run_parser(struct tok_state *tok, void *(start_rule_func)(Parser *), int mode, i p->fill = 0; p->size = 1; - p->arena = PyArena_New(); - if (!p->arena) { - goto exit; - } + p->arena = arena; if (fill_token(p) < 0) { - goto exit; + return NULL; } - PyErr_Clear(); + p->start_rule_func = parse_func; - p->start_rule_func = start_rule_func; + return p; +} +void * +run_parser(Parser *p, int mode) +{ int error = setjmp(p->error_env); if (error) { - goto exit; + return NULL; } - void *res = (*start_rule_func)(p); + mod_ty (*parse_func)(Parser *) = p->start_rule_func; + mod_ty res = (*parse_func)(p); if (res == NULL) { if (PyErr_Occurred()) { - goto exit; + return NULL; } if (p->fill == 0) { raise_syntax_error(p, "error at start before reading any input"); @@ -542,35 +553,23 @@ run_parser(struct tok_state *tok, void *(start_rule_func)(Parser *), int mode, i else { raise_syntax_error(p, "invalid syntax"); } - goto exit; + return NULL; } - if (mode == 2) { - result = (PyObject *)PyAST_CompileObject(res, tok->filename, NULL, -1, p->arena); - } - else if (mode == 1) { + void *result = NULL; + if (mode == CODE_OBJECT) { + result = PyAST_CompileObject(res, p->tok->filename, NULL, -1, p->arena); + } else if (mode == AST_OBJECT) { result = PyAST_mod2obj(res); - } - else { - result = Py_None; - Py_INCREF(result); + } else { + result = res; } -exit: - for (int i = 0; i < p->size; i++) { - PyMem_Free(p->tokens[i]); - } - PyMem_Free(p->tokens); - if (p->arena != NULL) { - PyArena_Free(p->arena); - } - PyMem_Free(p); return result; } -PyObject * -run_parser_from_file(const char *filename, void *(start_rule_func)(Parser *), int mode, - KeywordToken **keywords, int n_keyword_lists) +void * +run_parser_from_file(const char *filename, mod_ty (*parse_func)(Parser *), int mode, PyArena *arena) { FILE *fp = fopen(filename, "rb"); if (fp == NULL) { @@ -584,7 +583,7 @@ run_parser_from_file(const char *filename, void *(start_rule_func)(Parser *), in } // From here on we need to clean up even if there's an error - PyObject *result = NULL; + void *result = NULL; struct tok_state *tok = PyTokenizer_FromFile(fp, NULL, NULL, NULL); if (tok == NULL) { @@ -595,8 +594,15 @@ run_parser_from_file(const char *filename, void *(start_rule_func)(Parser *), in tok->filename = filename_ob; filename_ob = NULL; - result = run_parser(tok, start_rule_func, mode, FILE_INPUT, keywords, n_keyword_lists); + Parser *p = Parser_New(tok, parse_func, FILE_INPUT, arena); + if (p == NULL) { + PyTokenizer_Free(tok); + goto error; + } + + result = run_parser(p, mode); + Parser_Free(p); PyTokenizer_Free(tok); error: @@ -605,11 +611,10 @@ run_parser_from_file(const char *filename, void *(start_rule_func)(Parser *), in return result; } -PyObject * -run_parser_from_string(const char *str, void *(start_rule_func)(Parser *), int mode, - KeywordToken **keywords, int n_keyword_lists) +void * +run_parser_from_string(const char *str, mod_ty(*parse_func)(Parser *), int mode, PyArena *arena) { - PyObject *result = NULL; + void *result = NULL; struct tok_state *tok = PyTokenizer_FromString(str, 1); if (tok == NULL) { return NULL; @@ -621,7 +626,16 @@ run_parser_from_string(const char *str, void *(start_rule_func)(Parser *), int m return NULL; } - result = run_parser(tok, start_rule_func, mode, STRING_INPUT, keywords, n_keyword_lists); + Parser *p = Parser_New(tok, parse_func, STRING_INPUT, arena); + if (p == NULL) { + PyTokenizer_Free(tok); + return NULL; + } + + result = run_parser(p, mode); + + Parser_Free(p); + PyTokenizer_Free(tok); return result; } diff --git a/Tools/peg_generator/peg_parser/pegen.h b/Tools/peg_generator/peg_parser/pegen.h index 020713fac23022..32c770ca978dae 100644 --- a/Tools/peg_generator/peg_parser/pegen.h +++ b/Tools/peg_generator/peg_parser/pegen.h @@ -14,6 +14,13 @@ enum INPUT_MODE { }; typedef enum INPUT_MODE INPUT_MODE; +enum MODE { + RAW_AST_OBJECT, + AST_OBJECT, + CODE_OBJECT, +}; +typedef enum MODE MODE; + typedef struct _memo { int type; void *node; @@ -81,6 +88,9 @@ typedef struct { int is_keyword; } KeywordOrStarred; +extern const int n_keyword_lists; +extern KeywordToken *reserved_keywords[]; + int insert_memo(Parser *p, int mark, int type, void *node); int update_memo(Parser *p, int mark, int type, void *node); int is_memoized(Parser *p, int type, void *pres); @@ -107,10 +117,8 @@ void *CONSTRUCTOR(Parser *p, ...); #define UNUSED(expr) do { (void)(expr); } while (0) #define EXTRA_EXPR(head, tail) head->lineno, head->col_offset, tail->end_lineno, tail->end_col_offset, p->arena #define EXTRA start_lineno, start_col_offset, end_lineno, end_col_offset, p->arena -#define CHECK(result) CHECK_CALL(p, result) -#define CHECK_NULL_ALLOWED(result) CHECK_CALL_NULL_ALLOWED(p, result) -inline void * +Py_LOCAL_INLINE(void *) CHECK_CALL(Parser *p, void *result) { if (result == NULL) { @@ -122,7 +130,7 @@ CHECK_CALL(Parser *p, void *result) /* This is needed for helper functions that are allowed to return NULL without an error. Example: seq_extract_starred_exprs */ -inline void * +Py_LOCAL_INLINE(void *) CHECK_CALL_NULL_ALLOWED(Parser *p, void *result) { if (result == NULL && PyErr_Occurred()) { @@ -131,17 +139,12 @@ CHECK_CALL_NULL_ALLOWED(Parser *p, void *result) return result; } +#define CHECK(result) CHECK_CALL(p, result) +#define CHECK_NULL_ALLOWED(result) CHECK_CALL_NULL_ALLOWED(p, result) + PyObject *new_identifier(Parser *, char *); -PyObject *run_parser_from_file(const char *filename, - void *(start_rule_func)(Parser *), - int mode, - KeywordToken **keywords_list, - int n_keyword_lists); -PyObject *run_parser_from_string(const char *str, - void *(start_rule_func)(Parser *), - int mode, - KeywordToken **keywords_list, - int n_keyword_lists); +void *run_parser_from_file(const char *, mod_ty(*)(Parser *), int, PyArena *); +void *run_parser_from_string(const char *, mod_ty(*)(Parser *), int, PyArena *); asdl_seq *singleton_seq(Parser *, void *); asdl_seq *seq_insert_in_front(Parser *, void *, asdl_seq *); asdl_seq *seq_flatten(Parser *, asdl_seq *); diff --git a/Tools/peg_generator/pegen/c_generator.py b/Tools/peg_generator/pegen/c_generator.py index 5782ec25d16d6a..14e1aeb9838feb 100644 --- a/Tools/peg_generator/pegen/c_generator.py +++ b/Tools/peg_generator/pegen/c_generator.py @@ -26,7 +26,9 @@ EXTENSION_PREFIX = """\ #include "pegen.h" + """ + EXTENSION_SUFFIX = """ static PyObject * parse_file(PyObject *self, PyObject *args, PyObject *kwds) @@ -34,26 +36,50 @@ static char *keywords[] = {"file", "mode", NULL}; const char *filename; int mode = %(mode)s; - if (!PyArg_ParseTupleAndKeywords(args, kwds, "s|i", keywords, &filename, &mode)) return NULL; if (mode < 0 || mode > %(mode)s) return PyErr_Format(PyExc_ValueError, "Bad mode, must be 0 <= mode <= %(mode)s"); - return run_parser_from_file(filename, (void *)start_rule, mode, reserved_keywords, %(n_keyword_lists)s); + + PyArena *arena = PyArena_New(); + if (arena == NULL) { + return NULL; + } + + void *result = run_parser_from_file(filename, start_rule, mode, arena); + if (result != NULL && mode == 0) { + result = Py_None; + Py_INCREF(result); + } + + PyArena_Free(arena); + return result; } static PyObject * parse_string(PyObject *self, PyObject *args, PyObject *kwds) { - static char *keywords[] = {"string", "mode", NULL}; + static char *keywords[] = {"str", "mode", NULL}; const char *the_string; int mode = %(mode)s; - if (!PyArg_ParseTupleAndKeywords(args, kwds, "s|i", keywords, &the_string, &mode)) return NULL; if (mode < 0 || mode > %(mode)s) return PyErr_Format(PyExc_ValueError, "Bad mode, must be 0 <= mode <= %(mode)s"); - return run_parser_from_string(the_string, (void *)start_rule, mode, reserved_keywords, %(n_keyword_lists)s); + + PyArena *arena = PyArena_New(); + if (arena == NULL) { + return NULL; + } + + void *result = run_parser_from_string(the_string, start_rule, mode, arena); + if (result != NULL && mode == 0) { + result = Py_None; + Py_INCREF(result); + } + + PyArena_Free(arena); + return result; } static PyMethodDef ParseMethods[] = { @@ -75,7 +101,6 @@ PyObject *m = PyModule_Create(&parsemodule); if (m == NULL) return NULL; - return m; } @@ -265,20 +290,11 @@ def generate(self, filename: str) -> None: mode = int(self.rules["start"].type == "mod_ty") if mode == 1 and self.grammar.metas.get("bytecode"): mode += 1 - modulename = self.grammar.metas.get("modulename", "peg_parser") + modulename = self.grammar.metas.get("modulename", "parse") trailer = self.grammar.metas.get("trailer", EXTENSION_SUFFIX) keyword_cache = self.callmakervisitor.keyword_cache if trailer: - self.print( - trailer.rstrip("\n") - % dict( - mode=mode, - modulename=modulename, - n_keyword_lists=len(max(keyword_cache.keys(), key=len)) + 1 - if len(keyword_cache) > 0 - else 0, - ) - ) + self.print(trailer.rstrip("\n") % dict(mode=mode, modulename=modulename)) def _group_keywords_by_length(self) -> Dict[int, List[Tuple[str, int]]]: groups: Dict[int, List[Tuple[str, int]]] = {} @@ -291,8 +307,13 @@ def _group_keywords_by_length(self) -> Dict[int, List[Tuple[str, int]]]: return groups def _setup_keywords(self) -> None: + keyword_cache = self.callmakervisitor.keyword_cache + n_keyword_lists = ( + len(max(keyword_cache.keys(), key=len)) + 1 if len(keyword_cache) > 0 else 0 + ) + self.print(f"const int n_keyword_lists = {n_keyword_lists};") groups = self._group_keywords_by_length() - self.print("static KeywordToken *reserved_keywords[] = {") + self.print("KeywordToken *reserved_keywords[] = {") with self.indent(): num_groups = max(groups) + 1 if groups else 1 for keywords_length in range(num_groups):