Skip to content

Commit 5f4b4bb

Browse files
committed
Py3.12: Always expose greenlet frames on a switch.
Add benchmarks for testing this.
1 parent 13148f9 commit 5f4b4bb

File tree

7 files changed

+178
-109
lines changed

7 files changed

+178
-109
lines changed

benchmarks/chain.py

Lines changed: 142 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -5,10 +5,24 @@
55
along.
66
"""
77

8+
import os
89
import pyperf
910
import greenlet
1011

12+
# This is obsolete now, we always expose frames for Python 3.12.
13+
# See https://github.com/python-greenlet/greenlet/pull/393/
14+
# for a complete discussion of performance.
15+
EXPOSE_FRAMES = 'EXPOSE_FRAMES' in os.environ
1116

17+
# Exposing
18+
# 100 frames Mean +- std dev: 5.62 us +- 0.10 us
19+
# 200 frames Mean +- std dev: 14.0 us +- 0.6 us
20+
# 300 frames Mean +- std dev: 22.7 us +- 0.4 us
21+
#
22+
# Non-exposing
23+
# 100 frames Mean +- std dev: 3.64 us +- 0.06 us -> 1.54/1.98us
24+
# 200 frames Mean +- std dev: 9.49 us +- 0.13 us -> 1.47/4.51us
25+
# 300 frames Mean +- std dev: 15.7 us +- 0.3 us -> 1.45/7us
1226

1327
def link(next_greenlet):
1428
value = greenlet.getcurrent().parent.switch()
@@ -23,6 +37,7 @@ def bm_chain(loops):
2337
start_node = greenlet.getcurrent()
2438
for _ in range(CHAIN_GREENLET_COUNT):
2539
g = greenlet.greenlet(link)
40+
g.gr_frames_always_exposed = EXPOSE_FRAMES
2641
g.switch(start_node)
2742
start_node = g
2843
x = start_node.switch(0)
@@ -51,7 +66,8 @@ def bm_getcurrent(loops):
5166
return end - begin
5267

5368
SWITCH_INNER_LOOPS = 10000
54-
def bm_switch(loops):
69+
def bm_switch_shallow(loops):
70+
# pylint:disable=attribute-defined-outside-init
5571
class G(greenlet.greenlet):
5672
other = None
5773
def run(self):
@@ -60,15 +76,63 @@ def run(self):
6076
o.switch()
6177

6278
begin = pyperf.perf_counter()
79+
80+
for _ in range(loops):
81+
gl1 = G()
82+
gl2 = G()
83+
gl1.gr_frames_always_exposed = EXPOSE_FRAMES
84+
gl2.gr_frames_always_exposed = EXPOSE_FRAMES
85+
gl1.other = gl2
86+
gl2.other = gl1
87+
gl1.switch()
88+
89+
gl1.switch()
90+
gl2.switch()
91+
gl1.other = gl2.other = None
92+
assert gl1.dead
93+
assert gl2.dead
94+
95+
end = pyperf.perf_counter()
96+
return end - begin
97+
98+
def bm_switch_deep(loops, _MAX_DEPTH=200):
99+
# pylint:disable=attribute-defined-outside-init
100+
class G(greenlet.greenlet):
101+
other = None
102+
def run(self):
103+
for _ in range(SWITCH_INNER_LOOPS):
104+
self.recur_then_switch()
105+
106+
def recur_then_switch(self, depth=_MAX_DEPTH):
107+
if not depth:
108+
self.other.switch()
109+
else:
110+
self.recur_then_switch(depth - 1)
111+
112+
begin = pyperf.perf_counter()
113+
63114
for _ in range(loops):
64115
gl1 = G()
65116
gl2 = G()
117+
gl1.gr_frames_always_exposed = EXPOSE_FRAMES
118+
gl2.gr_frames_always_exposed = EXPOSE_FRAMES
66119
gl1.other = gl2
67120
gl2.other = gl1
68121
gl1.switch()
122+
123+
gl1.switch()
124+
gl2.switch()
125+
gl1.other = gl2.other = None
126+
assert gl1.dead
127+
assert gl2.dead
128+
69129
end = pyperf.perf_counter()
70130
return end - begin
71131

132+
def bm_switch_deeper(loops):
133+
return bm_switch_deep(loops, 400)
134+
135+
72136
CREATE_INNER_LOOPS = 10
73137
def bm_create(loops):
74138
gl = greenlet.greenlet
@@ -87,20 +151,81 @@ def bm_create(loops):
87151
end = pyperf.perf_counter()
88152
return end - begin
89153

154+
155+
156+
157+
def _bm_recur_frame(loops, RECUR_DEPTH):
158+
159+
def recur(depth):
160+
if not depth:
161+
return greenlet.getcurrent().parent.switch(greenlet.getcurrent())
162+
return recur(depth - 1)
163+
164+
165+
begin = pyperf.perf_counter()
166+
for _ in range(loops):
167+
168+
for _ in range(CHAIN_GREENLET_COUNT):
169+
g = greenlet.greenlet(recur)
170+
g.gr_frames_always_exposed = EXPOSE_FRAMES
171+
g2 = g.switch(RECUR_DEPTH)
172+
assert g2 is g, (g2, g)
173+
f = g2.gr_frame
174+
assert f is not None, "frame is none"
175+
count = 0
176+
while f:
177+
count += 1
178+
f = f.f_back
179+
# This assertion fails with the released versions of greenlet
180+
# on Python 3.12
181+
#assert count == RECUR_DEPTH + 1, (count, RECUR_DEPTH)
182+
# Switch back so it can be collected; otherwise they build
183+
# up forever.
184+
g.switch()
185+
# fall off the end of it and back to us.
186+
del g
187+
del g2
188+
del f
189+
190+
191+
end = pyperf.perf_counter()
192+
return end - begin
193+
194+
def bm_recur_frame_2(loops):
195+
return _bm_recur_frame(loops, 2)
196+
197+
def bm_recur_frame_20(loops):
198+
return _bm_recur_frame(loops, 20)
199+
200+
def bm_recur_frame_200(loops):
201+
return _bm_recur_frame(loops, 200)
202+
90203
if __name__ == '__main__':
91204
runner = pyperf.Runner()
205+
92206
runner.bench_time_func(
93207
'create a greenlet',
94208
bm_create,
95209
inner_loops=CREATE_INNER_LOOPS
96210
)
97211

98212
runner.bench_time_func(
99-
'switch between two greenlets',
100-
bm_switch,
213+
'switch between two greenlets (shallow)',
214+
bm_switch_shallow,
101215
inner_loops=SWITCH_INNER_LOOPS
102216
)
103217

218+
runner.bench_time_func(
219+
'switch between two greenlets (deep)',
220+
bm_switch_deep,
221+
inner_loops=SWITCH_INNER_LOOPS
222+
)
223+
224+
runner.bench_time_func(
225+
'switch between two greenlets (deeper)',
226+
bm_switch_deeper,
227+
inner_loops=SWITCH_INNER_LOOPS
228+
)
104229
runner.bench_time_func(
105230
'getcurrent single thread',
106231
bm_getcurrent,
@@ -110,3 +235,17 @@ def bm_create(loops):
110235
'chain(%s)' % CHAIN_GREENLET_COUNT,
111236
bm_chain,
112237
)
238+
239+
runner.bench_time_func(
240+
'read 2 nested frames',
241+
bm_recur_frame_2,
242+
)
243+
244+
runner.bench_time_func(
245+
'read 20 nested frames',
246+
bm_recur_frame_20,
247+
)
248+
runner.bench_time_func(
249+
'read 200 nested frames',
250+
bm_recur_frame_200,
251+
)

src/greenlet/TGreenlet.cpp

Lines changed: 10 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -168,11 +168,7 @@ Greenlet::g_switchstack(void)
168168
current->exception_state << tstate;
169169
this->python_state.will_switch_from(tstate);
170170
switching_thread_state = this;
171-
#if GREENLET_PY312
172-
if (current->python_state.expose_frames_on_every_suspension) {
173-
current->expose_frames();
174-
}
175-
#endif
171+
current->expose_frames();
176172
}
177173
assert(this->args() || PyErr_Occurred());
178174
// If this is the first switch into a greenlet, this will
@@ -614,8 +610,8 @@ bool Greenlet::is_currently_running_in_some_thread() const
614610
#if GREENLET_PY312
615611
void GREENLET_NOINLINE(Greenlet::expose_frames)()
616612
{
617-
if (!this->python_state.frame_exposure_needs_stack_rewrite()) {
618-
return; // nothing to do
613+
if (!this->python_state.top_frame()) {
614+
return;
619615
}
620616

621617
_PyInterpreterFrame* last_complete_iframe = nullptr;
@@ -680,8 +676,8 @@ void GREENLET_NOINLINE(Greenlet::expose_frames)()
680676
// which can't have happened yet because the frame is currently
681677
// executing as far as the interpreter is concerned. So, we can
682678
// reuse it for our own purposes.
683-
assert(iframe->owner == FRAME_OWNED_BY_THREAD ||
684-
iframe->owner == FRAME_OWNED_BY_GENERATOR);
679+
assert(iframe->owner == FRAME_OWNED_BY_THREAD
680+
|| iframe->owner == FRAME_OWNED_BY_GENERATOR);
685681
if (last_complete_iframe) {
686682
assert(last_complete_iframe->frame_obj);
687683
memcpy(&last_complete_iframe->frame_obj->_f_frame_data[0],
@@ -707,6 +703,11 @@ void GREENLET_NOINLINE(Greenlet::expose_frames)()
707703
&last_complete_iframe->previous, sizeof(void *));
708704
last_complete_iframe->previous = nullptr;
709705
}
706+
}
707+
#else
708+
void Greenlet::expose_frames()
709+
{
710+
710711
}
711712
#endif
712713

src/greenlet/TPythonState.cpp

Lines changed: 8 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -25,10 +25,6 @@ PythonState::PythonState()
2525
,datastack_top(nullptr)
2626
,datastack_limit(nullptr)
2727
#endif
28-
#if GREENLET_PY312
29-
,frames_were_exposed(false)
30-
,expose_frames_on_every_suspension(false)
31-
#endif
3228
{
3329
#if GREENLET_USE_CFRAME
3430
/*
@@ -162,21 +158,24 @@ void PythonState::operator<<(const PyThreadState *const tstate) noexcept
162158
#if GREENLET_PY312
163159
void GREENLET_NOINLINE(PythonState::unexpose_frames)()
164160
{
165-
if (!this->frames_were_exposed) {
161+
if (!this->top_frame()) {
166162
return;
167163
}
164+
168165
// See GreenletState::expose_frames() and the comment on frames_were_exposed
169166
// for more information about this logic.
170-
for (_PyInterpreterFrame *iframe = this->_top_frame->f_frame;
171-
iframe != nullptr; ) {
167+
_PyInterpreterFrame *iframe = this->_top_frame->f_frame;
168+
while (iframe != nullptr) {
172169
_PyInterpreterFrame *prev_exposed = iframe->previous;
173170
assert(iframe->frame_obj);
174171
memcpy(&iframe->previous, &iframe->frame_obj->_f_frame_data[0],
175172
sizeof(void *));
176173
iframe = prev_exposed;
177174
}
178-
this->frames_were_exposed = false;
179175
}
176+
#else
177+
void PythonState::unexpose_frames()
178+
{}
180179
#endif
181180

182181
void PythonState::operator>>(PyThreadState *const tstate) noexcept
@@ -201,9 +200,7 @@ void PythonState::operator>>(PyThreadState *const tstate) noexcept
201200
#if GREENLET_PY312
202201
tstate->py_recursion_remaining = tstate->py_recursion_limit - this->py_recursion_depth;
203202
tstate->c_recursion_remaining = C_RECURSION_LIMIT - this->c_recursion_depth;
204-
if (this->frames_were_exposed) {
205-
this->unexpose_frames();
206-
}
203+
this->unexpose_frames();
207204
#else // \/ 3.11
208205
tstate->recursion_remaining = tstate->recursion_limit - this->recursion_depth;
209206
#endif // GREENLET_PY312

src/greenlet/TStackState.cpp

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -230,25 +230,27 @@ void StackState::copy_from_stack(void* vdest, const void* vsrc, size_t n) const
230230
{
231231
char* dest = static_cast<char*>(vdest);
232232
const char* src = static_cast<const char*>(vsrc);
233-
if (src + n <= _stack_start || src >= _stack_start + _stack_saved ||
234-
_stack_saved == 0) {
233+
if (src + n <= this->_stack_start
234+
|| src >= this->_stack_start + this->_stack_saved
235+
|| this->_stack_saved == 0) {
235236
// Nothing we're copying was spilled from the stack
236237
memcpy(dest, src, n);
237238
return;
238239
}
239-
if (src < _stack_start) {
240+
241+
if (src < this->_stack_start) {
240242
// Copy the part before the saved stack.
241243
// We know src + n > _stack_start due to the test above.
242-
size_t nbefore = _stack_start - src;
244+
const size_t nbefore = this->_stack_start - src;
243245
memcpy(dest, src, nbefore);
244246
dest += nbefore;
245247
src += nbefore;
246248
n -= nbefore;
247249
}
248250
// We know src >= _stack_start after the before-copy, and
249251
// src < _stack_start + _stack_saved due to the first if condition
250-
size_t nspilled = std::min<size_t>(n, _stack_start + _stack_saved - src);
251-
memcpy(dest, stack_copy + (src - _stack_start), nspilled);
252+
size_t nspilled = std::min<size_t>(n, this->_stack_start + this->_stack_saved - src);
253+
memcpy(dest, this->stack_copy + (src - this->_stack_start), nspilled);
252254
dest += nspilled;
253255
src += nspilled;
254256
n -= nspilled;

src/greenlet/greenlet.cpp

Lines changed: 0 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -758,38 +758,10 @@ green_setcontext(BorrowedGreenlet self, PyObject* nctx, void* UNUSED(context))
758758
static PyObject*
759759
green_getframe(BorrowedGreenlet self, void* UNUSED(context))
760760
{
761-
#if GREENLET_PY312
762-
self->expose_frames();
763-
#endif
764761
const PythonState::OwnedFrame& top_frame = self->top_frame();
765762
return top_frame.acquire_or_None();
766763
}
767764

768-
static PyObject*
769-
green_getframeexposed(BorrowedGreenlet self, void* UNUSED(context))
770-
{
771-
#if GREENLET_PY312
772-
if (!self->expose_frames_on_every_suspension()) {
773-
Py_RETURN_FALSE;
774-
}
775-
#endif
776-
Py_RETURN_TRUE;
777-
}
778-
779-
static int
780-
green_setframeexposed(BorrowedGreenlet self, PyObject* val, void* UNUSED(context))
781-
{
782-
if (val != Py_True && val != Py_False) {
783-
PyErr_Format(PyExc_TypeError,
784-
"expected a bool, not '%s'",
785-
Py_TYPE(val)->tp_name);
786-
return -1;
787-
}
788-
#if GREENLET_PY312
789-
self->set_expose_frames_on_every_suspension(val == Py_True);
790-
#endif
791-
return 0;
792-
}
793765

794766
static PyObject*
795767
green_getstate(PyGreenlet* self)
@@ -1008,10 +980,6 @@ static PyGetSetDef green_getsets[] = {
1008980
{"run", (getter)green_getrun, (setter)green_setrun, /*XXX*/ NULL},
1009981
{"parent", (getter)green_getparent, (setter)green_setparent, /*XXX*/ NULL},
1010982
{"gr_frame", (getter)green_getframe, NULL, /*XXX*/ NULL},
1011-
{"gr_frames_always_exposed",
1012-
(getter)green_getframeexposed,
1013-
(setter)green_setframeexposed,
1014-
/*XXX*/ NULL},
1015983
{"gr_context",
1016984
(getter)green_getcontext,
1017985
(setter)green_setcontext,

0 commit comments

Comments
 (0)