Skip to content

Commit d8a3982

Browse files
ErikUggeldahlErikUggeldahl
andcommitted
fix(Android): Render Target ANR (#12470) fc77e0e0ef
The synchronization point introduced when creating render targets may be causing causing ANRs. This fixes that by removing the need to synchronize. We initially decided to create the render target on the command server since we need the sample count, which is only available with the OpenGL context being active. We also wanted to keep surface creation synchronous, so we made render target's runOnce block the main until done. It was assumed that it would be fast and done once early on. But this was a dangerous assumption and lead to ANRs in practice. We want to avoid synchronization with the main thread as much as possible. Rather than make the whole surface creation deferred, we instead retain the sync API but only allocate a holder (or "handle" though not in the command queue sense). This is effectively a pointer-to-a-pointer which only allocates the true render target at the time of first use. This works so long as we assume that allocation to be cheap, which currently it is. Because of this design, we no longer need the command queue to create the render target. The sample count is accessible at the deferred creation time. So the whole creation through the command queue can be shifted out and simply done in the render context bindings. Adds a test to ensure the main thread is no longer blocked. Co-authored-by: Erik <erik@rive.app>
1 parent a8e7057 commit d8a3982

10 files changed

Lines changed: 265 additions & 132 deletions

File tree

.rive_head

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
3b74a521481e40e29c940665e26f074e7c5fca97
1+
fc77e0e0eff91fc2d1b554e8cb664ca59a0b1dbd

kotlin/src/androidTest/kotlin/app/rive/core/CommandQueueLifecycleTest.kt

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -45,11 +45,11 @@ class CommandQueueLifecycleTest : RiveAndroidTest() {
4545

4646
assertTrue(
4747
releaseAttempted.await(2, TimeUnit.SECONDS),
48-
"Command-server release callback did not run"
48+
"Command server release callback did not run"
4949
)
5050
assertTrue(
5151
thrown.get() is IllegalStateException,
52-
"Expected command-server release to throw IllegalStateException"
52+
"Expected command server release to throw IllegalStateException"
5353
)
5454
assertFalse(
5555
commandQueue.isDisposed,

kotlin/src/androidTest/kotlin/app/rive/core/CommandQueueTestHelpers.kt

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@ import kotlin.test.assertTrue
1616
/**
1717
* Runs [block] while a temporary background thread continuously polls this command queue.
1818
*
19-
* Android instrumentation tests that enqueue command-server work need polling even when the code
19+
* Android instrumentation tests that enqueue command server work need polling even when the code
2020
* under test is not running inside the normal lifecycle-driven polling loop. Polling is stopped in
2121
* `finally` so failures inside [block] do not leak the helper thread.
2222
*/
@@ -115,6 +115,7 @@ private suspend fun RiveWorker.loadRiveFileOrFail(@RawRes rawResourceId: Int): R
115115
is Result.Success -> result.value
116116
is Result.Error ->
117117
throw AssertionError("Failed to load Rive file: ${result.throwable.message}")
118+
118119
is Result.Loading ->
119120
throw AssertionError("RiveFile.fromSource should not return Loading")
120121
}

kotlin/src/androidTest/kotlin/app/rive/core/RiveSurfaceLifecycleTest.kt

Lines changed: 53 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -12,14 +12,22 @@ import androidx.test.ext.junit.runners.AndroidJUnit4
1212
import app.rive.Fit
1313
import app.rive.RiveAndroidTest
1414
import app.rive.runtime.kotlin.test.R
15+
import kotlinx.coroutines.Dispatchers
16+
import kotlinx.coroutines.async
17+
import kotlinx.coroutines.runBlocking
18+
import kotlinx.coroutines.withTimeoutOrNull
1519
import org.junit.runner.RunWith
1620
import java.util.concurrent.CountDownLatch
1721
import java.util.concurrent.TimeUnit
1822
import java.util.concurrent.atomic.AtomicBoolean
1923
import kotlin.test.Test
2024
import kotlin.test.assertFailsWith
2125
import kotlin.test.assertFalse
26+
import kotlin.test.assertNotNull
2227
import kotlin.test.assertTrue
28+
import kotlin.time.Duration.Companion.seconds
29+
30+
private val TEST_TIMEOUT = 3.seconds.inWholeMilliseconds
2331

2432
@RunWith(AndroidJUnit4::class)
2533
class RiveSurfaceLifecycleTest : RiveAndroidTest() {
@@ -30,10 +38,7 @@ class RiveSurfaceLifecycleTest : RiveAndroidTest() {
3038

3139
surface.close()
3240

33-
assertTrue(
34-
closeableSurface.awaitClosed(),
35-
"Surface-backed resources were not closed before CommandQueue disposal completed"
36-
)
41+
assertClosed(closeableSurface)
3742
}
3843

3944
@Test
@@ -55,10 +60,42 @@ class RiveSurfaceLifecycleTest : RiveAndroidTest() {
5560
surface.close()
5661

5762
assertDisposed(commandQueue)
58-
assertTrue(
59-
closeableSurface.awaitClosed(),
60-
"Surface-backed resources were not closed before CommandQueue disposal completed"
61-
)
63+
assertClosed(closeableSurface)
64+
}
65+
66+
@Test
67+
fun createRiveSurface_returnsImmediately() = runBlocking {
68+
val blockEntered = CountDownLatch(1)
69+
val blockMayExit = CountDownLatch(1)
70+
71+
// Block the command server before surface creation. Surface creation should only allocate
72+
// the lazy render target holder and must not wait for this queued work to complete.
73+
riveWorker.runOnCommandServer {
74+
blockEntered.countDown()
75+
blockMayExit.await()
76+
}
77+
78+
val closeableSurface = LatchingImageReaderSurface()
79+
try {
80+
assertTrue(
81+
blockEntered.await(TEST_TIMEOUT, TimeUnit.MILLISECONDS),
82+
"Command server did not enter blocking test work"
83+
)
84+
85+
val createSurface = async(Dispatchers.Default) {
86+
riveWorker.createRiveSurface(closeableSurface)
87+
}
88+
assertNotNull(
89+
withTimeoutOrNull(TEST_TIMEOUT) {
90+
createSurface.await()
91+
},
92+
"Surface creation did not complete before command server block was allowed to exit"
93+
).close()
94+
} finally {
95+
blockMayExit.countDown()
96+
}
97+
98+
assertClosed(closeableSurface)
6299
}
63100

64101
@Test
@@ -86,10 +123,7 @@ class RiveSurfaceLifecycleTest : RiveAndroidTest() {
86123
surface.close()
87124
gate.countDown()
88125

89-
assertTrue(
90-
closeableSurface.awaitClosed(),
91-
"Surface-backed resources were not closed before CommandQueue disposal completed"
92-
)
126+
assertClosed(closeableSurface)
93127
assertFalse(
94128
closeableSurface.awaitFrameAvailable(timeoutMillis = 200),
95129
"Expected draw queued before surface close to be canceled before producing a frame"
@@ -198,4 +232,11 @@ class RiveSurfaceLifecycleTest : RiveAndroidTest() {
198232

199233
fun awaitClosed(): Boolean = closed.await(2, TimeUnit.SECONDS)
200234
}
235+
236+
private fun assertClosed(surface: LatchingImageReaderSurface) {
237+
assertTrue(
238+
surface.awaitClosed(),
239+
"Surface-backed resources were not closed before CommandQueue disposal completed"
240+
)
241+
}
201242
}
Lines changed: 90 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,90 @@
1+
#pragma once
2+
3+
#include <GLES3/gl3.h>
4+
#include <cstdint>
5+
#include <memory>
6+
7+
#include "helpers/rive_log.hpp"
8+
#include "rive/renderer/gl/render_target_gl.hpp"
9+
10+
namespace rive_android
11+
{
12+
13+
/**
14+
* Owns a GL framebuffer render target whose concrete GL resources are created
15+
* on first use.
16+
*
17+
* Android surface creation can happen on the UI thread, while the Rive GL
18+
* context is owned by the command server thread. This holder allows surface
19+
* creation to return synchronously without blocking the main thread on command
20+
* server backlog or GL work. It is returned to Kotlin as an opaque native
21+
* handle, but it defers GL work until getOrCreate() runs on the command server
22+
* after the target EGL surface has been made current.
23+
*
24+
* Creating the concrete render target is assumed to be cheap enough to perform
25+
* lazily during the first draw.
26+
*
27+
* This holder is uniquely owned by RiveSurface disposal ordering and is deleted
28+
* on the command server thread after pending draws for the same draw key have
29+
* been canceled.
30+
*/
31+
class LazyFramebufferRenderTargetGL
32+
{
33+
public:
34+
LazyFramebufferRenderTargetGL(uint32_t width, uint32_t height) :
35+
m_width(width), m_height(height)
36+
{}
37+
38+
LazyFramebufferRenderTargetGL(const LazyFramebufferRenderTargetGL&) =
39+
delete;
40+
LazyFramebufferRenderTargetGL& operator=(
41+
const LazyFramebufferRenderTargetGL&) = delete;
42+
43+
/**
44+
* Returns the concrete GL render target, creating it if necessary.
45+
*
46+
* Must be called on the command server thread with the intended EGL surface
47+
* already current. The current framebuffer's sample count is captured on
48+
* first creation and reused for the lifetime of this surface.
49+
*/
50+
rive::gpu::RenderTargetGL* getOrCreate()
51+
{
52+
if (!m_renderTarget)
53+
{
54+
GLint actualSampleCount = 1;
55+
glBindFramebuffer(GL_FRAMEBUFFER, 0);
56+
glGetIntegerv(GL_SAMPLES, &actualSampleCount);
57+
RiveLogD("RiveN/LazyRT",
58+
"Creating render target on command server "
59+
"(sample count: %d)",
60+
actualSampleCount);
61+
62+
m_renderTarget.reset(
63+
new rive::gpu::FramebufferRenderTargetGL(m_width,
64+
m_height,
65+
0, // Framebuffer ID
66+
actualSampleCount));
67+
}
68+
return m_renderTarget.get();
69+
}
70+
71+
private:
72+
struct RenderTargetUnref
73+
{
74+
void operator()(
75+
rive::gpu::FramebufferRenderTargetGL* renderTarget) const
76+
{
77+
if (renderTarget != nullptr)
78+
{
79+
renderTarget->unref();
80+
}
81+
}
82+
};
83+
84+
const uint32_t m_width;
85+
const uint32_t m_height;
86+
std::unique_ptr<rive::gpu::FramebufferRenderTargetGL, RenderTargetUnref>
87+
m_renderTarget;
88+
};
89+
90+
} // namespace rive_android

0 commit comments

Comments
 (0)