Skip to content

Commit ddd39cd

Browse files
authored
Patch qemu in CI to fix madvise semantics. (#3770)
We currently skip some tests when running our qemu-based tests for aarch64 and s390x. Qemu has broken madvise(MADV_DONTNEED) semantics -- specifically, it just ignores madvise() [1]. We could continue to whack-a-mole the tests whenever we create new functionality that relies on madvise() semantics, but ideally we'd just have emulation that properly emulates! The earlier discussions on the qemu mailing list [2] had a proposed patch for this, but (i) this patch doesn't seem to apply cleanly anymore (it's 3.5 years old) and (ii) it's pretty complex due to the need to handle qemu's ability to emulate differing page sizes on host and guest. It turns out that we only really need this for CI when host and guest have the same page size (4KiB), so we *could* just pass the madvise()s through. I wouldn't expect such a patch to ever land upstream in qemu, but it satisfies our needs I think. So this PR modifies our CI setup to patch qemu before building it locally with a little one-off patch. [1] #2518 (comment) [2] https://lists.gnu.org/archive/html/qemu-devel/2018-08/msg05416.html
1 parent 43b3794 commit ddd39cd

4 files changed

Lines changed: 63 additions & 29 deletions

File tree

.github/workflows/main.yml

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -253,7 +253,7 @@ jobs:
253253
- uses: actions/cache@v2
254254
with:
255255
path: ${{ runner.tool_cache }}/qemu
256-
key: qemu-${{ matrix.target }}-${{ env.QEMU_BUILD_VERSION }}
256+
key: qemu-${{ matrix.target }}-${{ env.QEMU_BUILD_VERSION }}-patchmadvise2
257257
if: matrix.target != '' && matrix.os == 'ubuntu-latest'
258258
- name: Install cross-compilation tools
259259
run: |
@@ -286,6 +286,7 @@ jobs:
286286
# quickly.
287287
curl https://download.qemu.org/qemu-$QEMU_BUILD_VERSION.tar.xz | tar xJf -
288288
cd qemu-$QEMU_BUILD_VERSION
289+
patch -p1 < $GITHUB_WORKSPACE/ci/qemu-madvise.patch
289290
./configure --target-list=${{ matrix.qemu_target }} --prefix=${{ runner.tool_cache}}/qemu --disable-tools --disable-slirp --disable-fdt --disable-capstone --disable-docs
290291
ninja -C build install
291292
touch ${{ runner.tool_cache }}/qemu/built

ci/qemu-madvise.patch

Lines changed: 61 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,61 @@
1+
From 1ec3de1634195a4d4410cc33fdc66c68057e16a3 Mon Sep 17 00:00:00 2001
2+
From: Chris Fallin <[email protected]>
3+
Date: Sat, 5 Feb 2022 22:45:58 -0800
4+
Subject: [PATCH] Emulate Linux madvise() properly when possible.
5+
6+
Curently madvise() is not emulated for Linux targets because it is not
7+
trivial to emulate when the guest and host page sizes differ -- in this
8+
case, mmap()s are not passed straight through, so the semantics of
9+
various MADV_* flags are not trivial to replicate.
10+
11+
However, if the guest and host are both Linux, and the page sizes are
12+
the same on both ends (which is often the case, e.g. 4KiB for x86-64,
13+
aarch64, s390x, and possibly others), then the mmap()s are in fact
14+
passed straight through. Furthermore, the MADV_* flags are defined in
15+
target-independent headers, so we can pass the base, length, and
16+
`advice` arugments to `madvise()` straight through.
17+
18+
This patch alters the Linux-userspace syscall emulation to do just that,
19+
passing through the `madvise()` calls when possible and returning
20+
`EINVAL` otherwise so the guest is properly informed that the desired
21+
semantics (e.g., MADV_DONTNEED to clear memory) are not available.
22+
---
23+
linux-user/syscall.c | 22 ++++++++++++++++------
24+
1 file changed, 16 insertions(+), 6 deletions(-)
25+
26+
diff --git a/linux-user/syscall.c b/linux-user/syscall.c
27+
index 5950222a77..836e39df5f 100644
28+
--- a/linux-user/syscall.c
29+
+++ b/linux-user/syscall.c
30+
@@ -11853,12 +11853,22 @@ static abi_long do_syscall1(void *cpu_env, int num, abi_long arg1,
31+
32+
#ifdef TARGET_NR_madvise
33+
case TARGET_NR_madvise:
34+
- /* A straight passthrough may not be safe because qemu sometimes
35+
- turns private file-backed mappings into anonymous mappings.
36+
- This will break MADV_DONTNEED.
37+
- This is a hint, so ignoring and returning success is ok. */
38+
- return 0;
39+
-#endif
40+
+#ifdef __linux__
41+
+ /* If the host is Linux, and the guest and host page sizes are the
42+
+ * same, then mmaps will have been passed through one-to-one, so we can
43+
+ * rely on the madvise semantics of the host. Note that the advice
44+
+ * argument (arg3) is fully architecture-independent. */
45+
+ if (TARGET_PAGE_SIZE == sysconf(_SC_PAGESIZE)) {
46+
+ return get_errno(madvise(g2h_untagged(arg1), (size_t)arg2, (int)arg3));
47+
+ } else {
48+
+ return -TARGET_EINVAL;
49+
+ }
50+
+#else // __linux__
51+
+ /* We will not be able to emulate the Linux-specific semantics, so we
52+
+ * raise an error. */
53+
+ return -TARGET_EINVAL;
54+
+#endif // !__linux__
55+
+#endif // TARGET_NR_madvise
56+
#ifdef TARGET_NR_fcntl64
57+
case TARGET_NR_fcntl64:
58+
{
59+
--
60+
2.34.1
61+

crates/runtime/src/instance/allocator/pooling.rs

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1748,11 +1748,6 @@ mod test {
17481748
#[cfg(all(unix, target_pointer_width = "64", feature = "async"))]
17491749
#[test]
17501750
fn test_stack_zeroed() -> Result<()> {
1751-
// https://github.com/bytecodealliance/wasmtime/pull/2518#issuecomment-747280133
1752-
if std::env::var("WASMTIME_TEST_NO_HOG_MEMORY").is_ok() {
1753-
return Ok(());
1754-
}
1755-
17561751
let allocator = PoolingInstanceAllocator::new(
17571752
PoolingAllocationStrategy::NextAvailable,
17581753
ModuleLimits {

crates/runtime/src/memfd.rs

Lines changed: 0 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -554,10 +554,6 @@ mod test {
554554

555555
#[test]
556556
fn instantiate_no_image() {
557-
if skip_tests_due_to_qemu_madvise_semantics() {
558-
return;
559-
}
560-
561557
// 4 MiB mmap'd area, not accessible
562558
let mut mmap = Mmap::accessible_reserved(0, 4 << 20).unwrap();
563559
// Create a MemFdSlot on top of it
@@ -590,10 +586,6 @@ mod test {
590586

591587
#[test]
592588
fn instantiate_image() {
593-
if skip_tests_due_to_qemu_madvise_semantics() {
594-
return;
595-
}
596-
597589
// 4 MiB mmap'd area, not accessible
598590
let mut mmap = Mmap::accessible_reserved(0, 4 << 20).unwrap();
599591
// Create a MemFdSlot on top of it
@@ -637,19 +629,4 @@ mod test {
637629
let slice = mmap.as_slice();
638630
assert_eq!(&[1, 2, 3, 4], &slice[4096..4100]);
639631
}
640-
641-
/// qemu's madvise implementation does not implement the
642-
/// "flash-reset back to zero or CoW backing" semantics that Linux
643-
/// does. Our CI setup uses qemu (in usermode-binary mode, not
644-
/// whole-system mode) to run tests on aarch64 and s390x. We want
645-
/// to skip these tests when under qemu, but not when someone is
646-
/// developing natively on one of these architectures. So instead,
647-
/// we dynamically detect an environment variable that our CI
648-
/// setup sets.
649-
///
650-
/// See `skip_pooling_allocator_tests()` in `tests/all/main.rs`
651-
/// for more.
652-
fn skip_tests_due_to_qemu_madvise_semantics() -> bool {
653-
std::env::var("WASMTIME_TEST_NO_HOG_MEMORY").is_ok()
654-
}
655632
}

0 commit comments

Comments
 (0)