wenet-e2e · cdliang11 · Mar 30, 2026 · Mar 30, 2026 · Mar 30, 2026 · Mar 30, 2026
diff --git a/runtime/android/.gitignore b/runtime/android/.gitignore
@@ -0,0 +1,8 @@
+.gradle/
+build/
+local.properties
+*.iml
+.idea/
+.cxx/
+.externalNativeBuild/
+captures/
diff --git a/runtime/android/README.md b/runtime/android/README.md
@@ -0,0 +1,41 @@
+# WeSpeaker Android Speaker Verification Demo
+
+This app extracts speaker embeddings from two **16 kHz PCM WAV** clips on device, computes cosine similarity (same mapping to 0–1 as desktop `runtime/onnxruntime` `asv_main`), and compares against a threshold to decide same vs different speaker.
+
+## ONNX model
+
+Export ONNX on a PC following the repo docs:
+
+```bash
+python wespeaker/bin/export_onnx.py \
+  --config $exp/config.yaml \
+  --checkpoint $exp/avg_model.pt \
+  --output_model final.onnx
+```
+
+Copy `final.onnx` to `app/src/main/assets/final.onnx` and build (filename must match).
+
+## Build
+
+**JDK 17 or newer** is required (Android Gradle Plugin 8.x). If only Java 8 is installed, install JDK 17 or pick the bundled JDK under Android Studio *Settings → Build → Gradle → Gradle JDK*.
+
+Open `runtime/android` in Android Studio, or use the Gradle wrapper:
+
+```bash
+cd runtime/android
+./gradlew :app:assembleDebug
+```
+
+The app depends on [ONNX Runtime Android](https://github.com/microsoft/onnxruntime) (`onnxruntime-android` AAR). Native integration matches [wekws/runtime/android](https://github.com/wenet-e2e/wekws/tree/main/runtime/android): a resolvable `extractForNativeBuild` configuration unpacks `headers/` and `jni/` from the AAR; CMake uses `include_directories` and links `libonnxruntime.so` (no Prefab / `find_package`). App logic reuses this repo’s `runtime/core` Fbank, `SpeakerEngine`, and ONNX backend.
+
+## Usage
+
+1. After installing the APK, pick enroll and test WAV files (16 kHz recommended; other rates are not resampled in-app and may hurt quality).
+2. Tune threshold, embedding dim, and chunk samples to match training/export settings.
+3. Tap **Compare** to see similarity score and same/different verdict.
+
+## Notes
+
+- If CMake reports missing `onnxruntime*.aar` extract dir, **Sync** and run a full **Build** so `extractAARForNativeBuild` runs before `configureCMake` (same idea as wekws).
+- First inference copies the model from assets to app-private storage; ensure `assets/final.onnx` exists and is non-empty.
+- Default threshold is 0.5; tune on your validation set.
diff --git a/runtime/android/app/proguard-rules.pro b/runtime/android/app/proguard-rules.pro
@@ -0,0 +1,4 @@
+# WeSpeaker JNI
+-keepclasseswithmembernames class com.wespeaker.app.WespeakerNative {
+    native <methods>;
+}
diff --git a/runtime/android/app/src/main/AndroidManifest.xml b/runtime/android/app/src/main/AndroidManifest.xml
@@ -0,0 +1,23 @@
+<?xml version="1.0" encoding="utf-8"?>
+<manifest xmlns:android="http://schemas.android.com/apk/res/android">
+
+    <uses-permission android:name="android.permission.RECORD_AUDIO" />
+
+    <application
+        android:allowBackup="true"
+        android:icon="@mipmap/ic_launcher"
+        android:label="@string/app_name"
+        android:roundIcon="@mipmap/ic_launcher_round"
+        android:supportsRtl="true"
+        android:theme="@style/Theme.Wespeaker">
+        <activity
+            android:name=".MainActivity"
+            android:exported="true"
+            android:windowSoftInputMode="adjustResize">
+            <intent-filter>
+                <action android:name="android.intent.action.MAIN" />
+                <category android:name="android.intent.category.LAUNCHER" />
+            </intent-filter>
+        </activity>
+    </application>
+</manifest>
diff --git a/runtime/android/app/src/main/assets/README.txt b/runtime/android/app/src/main/assets/README.txt
@@ -0,0 +1,2 @@
+Copy the final.onnx produced by export_onnx.py into this directory (filename must be final.onnx).
+After rebuild and install, the app copies it from assets to internal storage for native inference.
diff --git a/runtime/android/app/src/main/cpp/CMakeLists.txt b/runtime/android/app/src/main/cpp/CMakeLists.txt
@@ -0,0 +1,55 @@
+cmake_minimum_required(VERSION 3.22.1)
+project(wespeaker_jni)
+
+set(CMAKE_CXX_STANDARD 14)
+set(CMAKE_POSITION_INDEPENDENT_CODE ON)
+
+# runtime/core: from this dir, five levels up to runtime/, then into core/
+set(CORE_DIR "${CMAKE_CURRENT_SOURCE_DIR}/../../../../../core")
+
+include(deps.cmake)
+
+# Same as wekws/runtime/android: headers and jni from Gradle-extracted AAR (no Prefab find_package).
+# Typical path: app/build/onnxruntime-android-x.y.z.aar/
+set(build_DIR "${CMAKE_CURRENT_SOURCE_DIR}/../../../build")
+file(GLOB ORT_ROOT_LIST "${build_DIR}/onnxruntime*.aar")
+list(LENGTH ORT_ROOT_LIST _ort_len)
+if(_ort_len EQUAL 0)
+  message(FATAL_ERROR
+    "No ${build_DIR}/onnxruntime*.aar found. Run Gradle task extractAARForNativeBuild first (pulled in before configureCMake).")
+endif()
+list(GET ORT_ROOT_LIST 0 ORT_ROOT)
+# onnxruntime-android AAR: headers live under headers/ (not headers/include/).
+include_directories("${ORT_ROOT}/headers")
+link_directories("${ORT_ROOT}/jni/${ANDROID_ABI}")
+
+add_definitions(-DUSE_ONNX)
+
+add_library(utils STATIC "${CORE_DIR}/utils/utils.cc")
+target_include_directories(utils PUBLIC "${CORE_DIR}")
+target_link_libraries(utils PUBLIC glog gflags)
+
+add_library(frontend STATIC
+    "${CORE_DIR}/frontend/feature_pipeline.cc"
+    "${CORE_DIR}/frontend/fft.cc"
+)
+target_include_directories(frontend PUBLIC "${CORE_DIR}")
+target_link_libraries(frontend PUBLIC utils)
+
+add_library(speaker STATIC
+    "${CORE_DIR}/speaker/speaker_engine.cc"
+    "${CORE_DIR}/speaker/onnx_speaker_model.cc"
+)
+target_include_directories(speaker PUBLIC "${CORE_DIR}")
+target_link_libraries(speaker PUBLIC frontend onnxruntime)
+
+add_library(wespeaker_jni SHARED wespeaker_jni.cpp)
+target_include_directories(wespeaker_jni PRIVATE "${CORE_DIR}")
+target_link_libraries(wespeaker_jni
+    PRIVATE
+    speaker
+    onnxruntime
+    glog
+    gflags
+    log
+)
diff --git a/runtime/android/app/src/main/cpp/deps.cmake b/runtime/android/app/src/main/cpp/deps.cmake
@@ -0,0 +1,34 @@
+# Android NDK: FetchContent for gflags / glog.
+include(FetchContent)
+set(FETCHCONTENT_QUIET ON)
+
+FetchContent_Declare(gflags
+  URL https://github.com/gflags/gflags/archive/refs/tags/v2.3.0.zip
+  URL_HASH SHA256=ca732b5fd17bf3a27a01a6784b947cbe6323644ecc9e26bbe2117ec43bf7e13b)
+FetchContent_MakeAvailable(gflags)
+
+set(BUILD_TESTING OFF CACHE BOOL "" FORCE)
+set(WITH_GFLAGS ON CACHE BOOL "" FORCE)
+
+FetchContent_Declare(glog
+  URL https://github.com/google/glog/archive/v0.4.0.zip
+  URL_HASH SHA256=9e1b54eb2782f53cd8af107ecf08d2ab64b8d0dc2b7f5594472f3bd63ca85cdc)
+FetchContent_GetProperties(glog)
+if(NOT glog_POPULATED)
+  FetchContent_Populate(glog)
+  file(READ ${glog_SOURCE_DIR}/CMakeLists.txt _glog_cm)
+  # glog 0.4.0: bump cmake_minimum for CMake 4+; on Android, execinfo probe can pass but link fails.
+  string(REGEX REPLACE
+    "cmake_minimum_required[ ]*\\([ ]*VERSION[ ]+[^)]+\\)"
+    "cmake_minimum_required(VERSION 3.10)" _glog_cm "${_glog_cm}")
+  if(ANDROID)
+    string(REPLACE
+      "check_include_file (execinfo.h HAVE_EXECINFO_H)"
+      "if(ANDROID)\n  set(HAVE_EXECINFO_H 0)\nelse()\n  check_include_file (execinfo.h HAVE_EXECINFO_H)\nendif()"
+      _glog_cm "${_glog_cm}")
+  endif()
+  file(WRITE ${glog_SOURCE_DIR}/CMakeLists.txt "${_glog_cm}")
+  add_subdirectory(${glog_SOURCE_DIR} ${glog_BINARY_DIR})
+endif()
+
+include_directories(${gflags_BINARY_DIR}/include ${glog_SOURCE_DIR}/src ${glog_BINARY_DIR})
diff --git a/runtime/android/app/src/main/cpp/wespeaker_jni.cpp b/runtime/android/app/src/main/cpp/wespeaker_jni.cpp
@@ -0,0 +1,110 @@
+// Copyright 2023 Chengdong Liang (WeSpeaker runtime)
+// SPDX-License-Identifier: Apache-2.0
+
+#include <jni.h>
+#include <memory>
+#include <mutex>
+#include <string>
+#include <vector>
+
+#include "frontend/wav.h"
+#include "glog/logging.h"
+#include "speaker/speaker_engine.h"
+
+namespace {
+
+std::once_flag g_glog_init;
+
+void EnsureGlog() {
+  std::call_once(g_glog_init, []() {
+    google::InitGoogleLogging("wespeaker");
+    FLAGS_logtostderr = 1;
+    FLAGS_minloglevel = 2;
+  });
+}
+
+jfloatArray MakeFloatArray(JNIEnv* env, float a, float b) {
+  jfloatArray out = env->NewFloatArray(2);
+  if (!out) return nullptr;
+  jfloat buf[2] = {a, b};
+  env->SetFloatArrayRegion(out, 0, 2, buf);
+  return out;
+}
+
+void ThrowIo(JNIEnv* env, const char* msg) {
+  jclass ex = env->FindClass("java/io/IOException");
+  if (ex) env->ThrowNew(ex, msg);
+}
+
+std::string JStringToUtf8(JNIEnv* env, jstring s) {
+  if (!s) return {};
+  const char* p = env->GetStringUTFChars(s, nullptr);
+  std::string out(p ? p : "");
+  if (p) env->ReleaseStringUTFChars(s, p);
+  return out;
+}
+
+}  // namespace
+
+extern "C" JNIEXPORT jfloatArray JNICALL
+Java_com_wespeaker_app_WespeakerNative_compare(JNIEnv* env, jclass /* clazz */,
+                                               jstring j_enroll, jstring j_test,
+                                               jstring j_model,
+                                               jdouble j_threshold,
+                                               jint j_fbank_dim,
+                                               jint j_sample_rate) {
+  EnsureGlog();
+
+  const std::string enroll_path = JStringToUtf8(env, j_enroll);
+  const std::string test_path = JStringToUtf8(env, j_test);
+  const std::string model_path = JStringToUtf8(env, j_model);
+  const float threshold = static_cast<float>(j_threshold);
+
+  if (enroll_path.empty() || test_path.empty() || model_path.empty()) {
+    ThrowIo(env, "路径不能为空");
+    return nullptr;
+  }
+
+  try {
+    wenet::WavReader enroll_reader;
+    if (!enroll_reader.Open(enroll_path)) {
+      ThrowIo(env, "无法打开注册音频（需有效 WAV）");
+      return nullptr;
+    }
+    wenet::WavReader test_reader;
+    if (!test_reader.Open(test_path)) {
+      ThrowIo(env, "无法打开测试音频（需有效 WAV）");
+      return nullptr;
+    }
+    if (enroll_reader.num_sample() <= 0 || test_reader.num_sample() <= 0) {
+      ThrowIo(env, "音频长度无效");
+      return nullptr;
+    }
+
+    auto speaker_engine = std::make_shared<wespeaker::SpeakerEngine>(
+        model_path, j_fbank_dim, j_sample_rate,
+        0 /* embedding size: infer from ONNX output shape */,
+        -1 /* one embedding for full audio; same as per_chunk_samples_ <= 0 */);
+    const int embedding_size = speaker_engine->EmbeddingSize();
+
+    int16_t* enroll_data = const_cast<int16_t*>(enroll_reader.data());
+    const int enroll_samples = enroll_reader.num_sample();
+    int16_t* test_data = const_cast<int16_t*>(test_reader.data());
+    const int test_samples = test_reader.num_sample();
+
+    std::vector<float> enroll_emb(embedding_size, 0.f);
+    std::vector<float> test_emb(embedding_size, 0.f);
+    speaker_engine->ExtractEmbedding(enroll_data, enroll_samples, &enroll_emb);
+    speaker_engine->ExtractEmbedding(test_data, test_samples, &test_emb);
+
+    const float score = speaker_engine->CosineSimilarity(enroll_emb, test_emb);
+    const float same = (score >= threshold) ? 1.f : 0.f;
+    return MakeFloatArray(env, score, same);
+  } catch (const std::exception& e) {
+    ThrowIo(env, e.what());
+    return nullptr;
+  } catch (...) {
+    ThrowIo(env, "native 推理异常");
+    return nullptr;
+  }
+}
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1,2 @@
		Copy the final.onnx produced by export_onnx.py into this directory (filename must be final.onnx).
		After rebuild and install, the app copies it from assets to internal storage for native inference.