Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 11 additions & 11 deletions Grammar/python.gram
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ simple_stmt[asdl_seq*]:
| a=';'.small_stmt+ [';'] NEWLINE { a }
# NOTE: assignment MUST precede expression, else parsing a simple assignment
# will throw a SyntaxError.
small_stmt[stmt_ty]:
small_stmt[stmt_ty] (memo):
| assignment
| e=expressions { _Py_Expr(e, EXTRA) }
| &'return' return_stmt
Expand Down Expand Up @@ -201,7 +201,7 @@ name_with_optional_default[NameDefaultPair*]:
names_with_default[asdl_seq*]: a=','.name_with_default+ { a }
name_with_default[NameDefaultPair*]:
| n=plain_name '=' e=expression { name_default_pair(p, n, e) }
plain_names[asdl_seq*]: a=','.(plain_name !'=')+ { a }
plain_names[asdl_seq*] (memo): a=','.(plain_name !'=')+ { a }
plain_name[arg_ty]:
| a=NAME b=[':' z=annotation { z }] { _Py_arg(a->v.Name.id, b, NULL, EXTRA) }
kwds[arg_ty]:
Expand All @@ -220,15 +220,15 @@ class_def_raw[stmt_ty]:
(b) ? ((expr_ty) b)->v.Call.keywords : NULL,
c, NULL, EXTRA) }

block[asdl_seq*]: NEWLINE INDENT a=statements DEDENT { a } | simple_stmt
block[asdl_seq*] (memo): NEWLINE INDENT a=statements DEDENT { a } | simple_stmt

expressions_list[asdl_seq*]: a=','.star_expression+ [','] { a }
expressions[expr_ty]:
| a=star_expression b=(',' c=star_expression { c })+ [','] {
_Py_Tuple(CHECK(seq_insert_in_front(p, a, b)), Load, EXTRA) }
| a=star_expression ',' { _Py_Tuple(CHECK(singleton_seq(p, a)), Load, EXTRA) }
| star_expression
star_expression[expr_ty]:
star_expression[expr_ty] (memo):
| '*' a=bitwise_or { _Py_Starred(a, Load, EXTRA) }
| expression

Expand All @@ -240,7 +240,7 @@ named_expression[expr_ty]:
| a=NAME ':=' b=expression { _Py_NamedExpr(CHECK(set_expr_context(p, a, Store)), b, EXTRA) }
| expression
annotated_rhs[expr_ty]: yield_expr | expressions
expression[expr_ty]:
expression[expr_ty] (memo):
| a=disjunction 'if' b=disjunction 'else' c=expression { _Py_IfExp(b, a, c, EXTRA) }
| disjunction
| lambdef
Expand Down Expand Up @@ -274,19 +274,19 @@ lambda_plain_names[asdl_seq*]: a=','.(lambda_plain_name !'=')+ { a }
lambda_plain_name[arg_ty]: a=NAME { _Py_arg(a->v.Name.id, NULL, NULL, EXTRA) }
lambda_kwds[arg_ty]: '**' a=lambda_plain_name { a }

disjunction[expr_ty]:
disjunction[expr_ty] (memo):
| a=conjunction b=('or' c=conjunction { c })+ { _Py_BoolOp(
Or,
CHECK(seq_insert_in_front(p, a, b)),
EXTRA) }
| conjunction
conjunction[expr_ty]:
conjunction[expr_ty] (memo):
| a=inversion b=('and' c=inversion { c })+ { _Py_BoolOp(
And,
CHECK(seq_insert_in_front(p, a, b)),
EXTRA) }
| inversion
inversion[expr_ty]:
inversion[expr_ty] (memo):
| 'not' a=inversion { _Py_UnaryOp(Not, a, EXTRA) }
| comparison
comparison[expr_ty]:
Expand Down Expand Up @@ -340,15 +340,15 @@ term[expr_ty]:
| a=term '%' b=factor { _Py_BinOp(a, Mod, b, EXTRA) }
| a=term '@' b=factor { _Py_BinOp(a, MatMult, b, EXTRA) }
| factor
factor[expr_ty]:
factor[expr_ty] (memo):
| '+' a=factor { _Py_UnaryOp(UAdd, a, EXTRA) }
| '-' a=factor { _Py_UnaryOp(USub, a, EXTRA) }
| '~' a=factor { _Py_UnaryOp(Invert, a, EXTRA) }
| power
power[expr_ty]:
| a=await_primary '**' b=factor { _Py_BinOp(a, Pow, b, EXTRA) }
| await_primary
await_primary[expr_ty]:
await_primary[expr_ty] (memo):
| AWAIT a=primary { _Py_Await(a, EXTRA) }
| primary
primary[expr_ty]:
Expand Down Expand Up @@ -410,7 +410,7 @@ yield_expr[expr_ty]:
| 'yield' 'from' a=expression { _Py_YieldFrom(a, EXTRA) }
| 'yield' a=[expressions] { _Py_Yield(a, EXTRA) }

arguments[expr_ty]:
arguments[expr_ty] (memo):
| a=args [','] { a }
args[expr_ty]:
| a=starred_expression b=[',' c=args { c }] {
Expand Down
45 changes: 45 additions & 0 deletions Parser/pegen/pegen.c
Original file line number Diff line number Diff line change
Expand Up @@ -239,6 +239,43 @@ fill_token(Parser *p)
return 0;
}

// Instrumentation to count the effectiveness of memoization.
// The array counts the number of tokens skipped by memoization,
// indexed by type.

#define NSTATISTICS 2000
static long memo_statistics[NSTATISTICS];

void
clear_memo_statistics()
{
for (int i = 0; i < NSTATISTICS; i++) {
memo_statistics[i] = 0;
}
}

PyObject *
get_memo_statistics()
{
PyObject *ret = PyList_New(NSTATISTICS);
if (ret == NULL) {
return NULL;
}
for (int i = 0; i < NSTATISTICS; i++) {
PyObject *value = PyLong_FromLong(memo_statistics[i]);
if (value == NULL) {
Py_DECREF(ret);
return NULL;
}
// PyList_SetItem borrows a reference to value.
if (PyList_SetItem(ret, i, value) < 0) {
Py_DECREF(ret);
return NULL;
}
}
return ret;
}

int // bool
is_memoized(Parser *p, int type, void *pres)
{
Expand All @@ -252,6 +289,14 @@ is_memoized(Parser *p, int type, void *pres)

for (Memo *m = t->memo; m != NULL; m = m->next) {
if (m->type == type) {
if (0 <= type && type < NSTATISTICS) {
long count = m->mark - p->mark;
// A memoized negative result counts for one.
if (count <= 0) {
count = 1;
}
memo_statistics[type] += count;
}
p->mark = m->mark;
*(void **)(pres) = m->node;
// fprintf(stderr, "%d < %d: memoized!\n", p->mark, p->fill);
Expand Down
3 changes: 3 additions & 0 deletions Parser/pegen/pegen.h
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,9 @@ typedef struct {
int is_keyword;
} KeywordOrStarred;

void clear_memo_statistics(void);
PyObject *get_memo_statistics(void);

int insert_memo(Parser *p, int mark, int type, void *node);
int update_memo(Parser *p, int mark, int type, void *node);
int is_memoized(Parser *p, int type, void *pres);
Expand Down
2 changes: 1 addition & 1 deletion Tools/peg_generator/.gitignore
Original file line number Diff line number Diff line change
@@ -1 +1 @@
peg_parser/parse.c
peg_extension/parse.c
6 changes: 5 additions & 1 deletion Tools/peg_generator/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,10 @@ parse: peg_extension/parse.c
check: peg_extension/parse.c
$(PYTHON) -c "from peg_extension import parse; t = parse.parse_file('$(TESTFILE)', mode=0)"

stats: peg_extension/parse.c
$(PYTHON) -c "from peg_extension import parse; t = parse.parse_file('$(TIMEFILE)', mode=0); parse.dump_memo_stats()" >@data
$(PYTHON) scripts/joinstats.py @data

time: time_compile

time_compile: peg_extension/parse.c
Expand Down Expand Up @@ -92,7 +96,7 @@ mypy: regen-metaparser
$(MYPY) # For list of files, see mypy.ini

format-python:
black pegen test scripts
black pegen scripts

bench: cpython
$(MAKE) -s test_global 2>/dev/null
Expand Down
22 changes: 11 additions & 11 deletions Tools/peg_generator/data/python.gram
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ simple_stmt[asdl_seq*]:
| a=';'.small_stmt+ [';'] NEWLINE { a }
# NOTE: assignment MUST precede expression, else parsing a simple assignment
# will throw a SyntaxError.
small_stmt[stmt_ty]:
small_stmt[stmt_ty] (memo):
| assignment
| e=expressions { _Py_Expr(e, EXTRA) }
| &'return' return_stmt
Expand Down Expand Up @@ -180,7 +180,7 @@ name_with_optional_default[NameDefaultPair*]:
names_with_default[asdl_seq*]: a=','.name_with_default+ { a }
name_with_default[NameDefaultPair*]:
| n=plain_name '=' e=expression { name_default_pair(p, n, e) }
plain_names[asdl_seq*]: a=','.(plain_name !'=')+ { a }
plain_names[asdl_seq*] (memo): a=','.(plain_name !'=')+ { a }
plain_name[arg_ty]:
| a=NAME b=[':' z=annotation { z }] { _Py_arg(a->v.Name.id, b, NULL, EXTRA) }
kwds[arg_ty]:
Expand All @@ -199,15 +199,15 @@ class_def_raw[stmt_ty]:
(b) ? ((expr_ty) b)->v.Call.keywords : NULL,
c, NULL, EXTRA) }

block[asdl_seq*]: NEWLINE INDENT a=statements DEDENT { a } | simple_stmt
block[asdl_seq*] (memo): NEWLINE INDENT a=statements DEDENT { a } | simple_stmt

expressions_list[asdl_seq*]: a=','.star_expression+ [','] { a }
expressions[expr_ty]:
| a=star_expression b=(',' c=star_expression { c })+ [','] {
_Py_Tuple(CHECK(seq_insert_in_front(p, a, b)), Load, EXTRA) }
| a=star_expression ',' { _Py_Tuple(CHECK(singleton_seq(p, a)), Load, EXTRA) }
| star_expression
star_expression[expr_ty]:
star_expression[expr_ty] (memo):
| '*' a=bitwise_or { _Py_Starred(a, Load, EXTRA) }
| expression

Expand All @@ -219,7 +219,7 @@ named_expression[expr_ty]:
| a=NAME ':=' b=expression { _Py_NamedExpr(CHECK(set_expr_context(p, a, Store)), b, EXTRA) }
| expression
annotated_rhs[expr_ty]: yield_expr | expressions
expression[expr_ty]:
expression[expr_ty] (memo):
| a=disjunction 'if' b=disjunction 'else' c=expression { _Py_IfExp(b, a, c, EXTRA) }
| disjunction
| lambdef
Expand Down Expand Up @@ -253,19 +253,19 @@ lambda_plain_names[asdl_seq*]: a=','.(lambda_plain_name !'=')+ { a }
lambda_plain_name[arg_ty]: a=NAME { _Py_arg(a->v.Name.id, NULL, NULL, EXTRA) }
lambda_kwds[arg_ty]: '**' a=lambda_plain_name { a }

disjunction[expr_ty]:
disjunction[expr_ty] (memo):
| a=conjunction b=('or' c=conjunction { c })+ { _Py_BoolOp(
Or,
CHECK(seq_insert_in_front(p, a, b)),
EXTRA) }
| conjunction
conjunction[expr_ty]:
conjunction[expr_ty] (memo):
| a=inversion b=('and' c=inversion { c })+ { _Py_BoolOp(
And,
CHECK(seq_insert_in_front(p, a, b)),
EXTRA) }
| inversion
inversion[expr_ty]:
inversion[expr_ty] (memo):
| 'not' a=inversion { _Py_UnaryOp(Not, a, EXTRA) }
| comparison
comparison[expr_ty]:
Expand Down Expand Up @@ -319,15 +319,15 @@ term[expr_ty]:
| a=term '%' b=factor { _Py_BinOp(a, Mod, b, EXTRA) }
| a=term '@' b=factor { _Py_BinOp(a, MatMult, b, EXTRA) }
| factor
factor[expr_ty]:
factor[expr_ty] (memo):
| '+' a=factor { _Py_UnaryOp(UAdd, a, EXTRA) }
| '-' a=factor { _Py_UnaryOp(USub, a, EXTRA) }
| '~' a=factor { _Py_UnaryOp(Invert, a, EXTRA) }
| power
power[expr_ty]:
| a=await_primary '**' b=factor { _Py_BinOp(a, Pow, b, EXTRA) }
| await_primary
await_primary[expr_ty]:
await_primary[expr_ty] (memo):
| AWAIT a=primary { _Py_Await(a, EXTRA) }
| primary
primary[expr_ty]:
Expand Down Expand Up @@ -389,7 +389,7 @@ yield_expr[expr_ty]:
| 'yield' 'from' a=expression { _Py_YieldFrom(a, EXTRA) }
| 'yield' a=[expressions] { _Py_Yield(a, EXTRA) }

arguments[expr_ty]:
arguments[expr_ty] (memo):
| a=args [','] { a }
args[expr_ty]:
| a=starred_expression b=[',' c=args { c }] {
Expand Down
2 changes: 1 addition & 1 deletion Tools/peg_generator/mypy.ini
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
[mypy]
files = pegen, scripts, test
files = pegen, scripts

follow_imports = error
no_implicit_optional = True
Expand Down
39 changes: 39 additions & 0 deletions Tools/peg_generator/peg_extension/peg_extension.c
Original file line number Diff line number Diff line change
Expand Up @@ -93,9 +93,48 @@ parse_string(PyObject *self, PyObject *args, PyObject *kwds)
return result;
}

static PyObject *
clear_memo_stats()
{
clear_memo_statistics();
Py_RETURN_NONE;
}

static PyObject *
get_memo_stats()
{
return get_memo_statistics();
}

// TODO: Write to Python's sys.stdout instead of C's stdout.
static PyObject *
dump_memo_stats()
{
PyObject *list = get_memo_statistics();
if (list == NULL) {
return NULL;
}
Py_ssize_t len = PyList_Size(list);
for (Py_ssize_t i = 0; i < len; i++) {
PyObject *value = PyList_GetItem(list, i); // Borrowed reference.
long count = PyLong_AsLong(value);
if (count < 0) {
break;
}
if (count > 0) {
printf("%4ld %9ld\n", i, count);
}
}
Py_DECREF(list);
Py_RETURN_NONE;
}

static PyMethodDef ParseMethods[] = {
{"parse_file", (PyCFunction)(void(*)(void))parse_file, METH_VARARGS|METH_KEYWORDS, "Parse a file."},
{"parse_string", (PyCFunction)(void(*)(void))parse_string, METH_VARARGS|METH_KEYWORDS, "Parse a string."},
{"clear_memo_stats", clear_memo_stats, METH_NOARGS},
{"dump_memo_stats", dump_memo_stats, METH_NOARGS},
{"get_memo_stats", get_memo_stats, METH_NOARGS},
{NULL, NULL, 0, NULL} /* Sentinel */
};

Expand Down
14 changes: 10 additions & 4 deletions Tools/peg_generator/pegen/c_generator.py
Original file line number Diff line number Diff line change
Expand Up @@ -201,8 +201,9 @@ def generate(self, filename: str) -> None:
if subheader:
self.print(subheader)
self._setup_keywords()
for i, rulename in enumerate(self.todo, 1000):
self.print(f"#define {rulename}_type {i}")
for i, (rulename, rule) in enumerate(self.todo.items(), 1000):
comment = " // Left-recursive" if rule.left_recursive else ""
self.print(f"#define {rulename}_type {i}{comment}")
self.print()
for rulename, rule in self.todo.items():
if rule.is_loop() or rule.is_gather():
Expand All @@ -217,6 +218,8 @@ def generate(self, filename: str) -> None:
for rulename, rule in list(self.todo.items()):
del self.todo[rulename]
self.print()
if rule.left_recursive:
self.print("// Left-recursive")
self.visit(rule)
if self.skip_actions:
mode = 0
Expand Down Expand Up @@ -311,8 +314,11 @@ def _set_up_rule_memoization(self, node: Rule, result_type: str) -> None:
self.print(f"static {result_type}")
self.print(f"{node.name}_raw(Parser *p)")

def _should_memoize(self, node: Rule) -> bool:
return node.memo and not node.left_recursive

def _handle_default_rule_body(self, node: Rule, rhs: Rhs, result_type: str) -> None:
memoize = not node.left_recursive
memoize = self._should_memoize(node)

with self.indent():
self.print(f"{result_type} res = NULL;")
Expand All @@ -339,7 +345,7 @@ def _handle_default_rule_body(self, node: Rule, rhs: Rhs, result_type: str) -> N
self.print("return res;")

def _handle_loop_rule_body(self, node: Rule, rhs: Rhs) -> None:
memoize = not node.left_recursive
memoize = self._should_memoize(node)
is_repeat1 = node.name.startswith("_loop1")

with self.indent():
Expand Down
3 changes: 2 additions & 1 deletion Tools/peg_generator/pegen/grammar.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,10 +71,11 @@ def __iter__(self) -> Iterator[Rule]:


class Rule:
def __init__(self, name: str, type: Optional[str], rhs: Rhs):
def __init__(self, name: str, type: Optional[str], rhs: Rhs, memo: Optional[object] = None):
self.name = name
self.type = type
self.rhs = rhs
self.memo = bool(memo)
self.visited = False
self.nullable = False
self.left_recursive = False
Expand Down
Loading