diff --git a/.gitignore b/.gitignore
index 43dcdabfb85..792f690b193 100644
--- a/.gitignore
+++ b/.gitignore
@@ -34,7 +34,7 @@ build-sanitize-thread/
/quantize
/server
/lsp
-
+/models
arm_neon.h
sync.sh
libwhisper.a
diff --git a/examples/whisper.android/.gitignore b/examples/whisper.android/.gitignore
index aa724b77071..3e0b4de7bf3 100644
--- a/examples/whisper.android/.gitignore
+++ b/examples/whisper.android/.gitignore
@@ -13,3 +13,6 @@
.externalNativeBuild
.cxx
local.properties
+/app/src/main/assets/
+/app/src/main/assets/models/
+
diff --git a/examples/whisper.android/.idea/gradle.xml b/examples/whisper.android/.idea/gradle.xml
index 4a09ccc1215..c34ccc3d545 100644
--- a/examples/whisper.android/.idea/gradle.xml
+++ b/examples/whisper.android/.idea/gradle.xml
@@ -4,17 +4,16 @@
diff --git a/examples/whisper.android/.idea/misc.xml b/examples/whisper.android/.idea/misc.xml
index 0ad17cbd33a..8978d23db56 100644
--- a/examples/whisper.android/.idea/misc.xml
+++ b/examples/whisper.android/.idea/misc.xml
@@ -1,4 +1,3 @@
-
diff --git a/examples/whisper.android/.idea/vcs.xml b/examples/whisper.android/.idea/vcs.xml
index b2bdec2d71b..e0c7f72305f 100644
--- a/examples/whisper.android/.idea/vcs.xml
+++ b/examples/whisper.android/.idea/vcs.xml
@@ -2,5 +2,6 @@
+
\ No newline at end of file
diff --git a/examples/whisper.android/app/.gitignore b/examples/whisper.android/app/.gitignore
index 42afabfd2ab..796b96d1c40 100644
--- a/examples/whisper.android/app/.gitignore
+++ b/examples/whisper.android/app/.gitignore
@@ -1 +1 @@
-/build
\ No newline at end of file
+/build
diff --git a/examples/whisper.android/app/build.gradle b/examples/whisper.android/app/build.gradle
index 9f407998cdb..a883a5a022b 100644
--- a/examples/whisper.android/app/build.gradle
+++ b/examples/whisper.android/app/build.gradle
@@ -1,6 +1,7 @@
plugins {
id 'com.android.application'
id 'org.jetbrains.kotlin.android'
+
}
android {
@@ -9,7 +10,7 @@ android {
defaultConfig {
applicationId "com.whispercppdemo"
- minSdk 26
+ minSdk 31
targetSdk 34
versionCode 1
versionName "1.0"
@@ -29,31 +30,44 @@ android {
}
}
compileOptions {
- sourceCompatibility JavaVersion.VERSION_17
- targetCompatibility JavaVersion.VERSION_17
+ sourceCompatibility JavaVersion.VERSION_1_8
+ targetCompatibility JavaVersion.VERSION_1_8
}
kotlinOptions {
- jvmTarget = '17'
+ jvmTarget = '1.8'
}
buildFeatures {
compose true
}
composeOptions {
- kotlinCompilerExtensionVersion '1.5.0'
+ kotlinCompilerExtensionVersion '1.5.2'
}
+ ndkVersion = "25.2.9519653"
}
dependencies {
- implementation project(':lib')
- implementation 'androidx.activity:activity-compose:1.7.2'
- implementation 'androidx.compose.material:material-icons-core:1.5.0'
- implementation 'androidx.compose.material3:material3:1.1.1'
- implementation "androidx.compose.ui:ui:1.5.0"
- implementation "androidx.compose.ui:ui-tooling-preview:1.5.0"
- implementation 'androidx.lifecycle:lifecycle-viewmodel-compose:2.6.1'
- implementation "com.google.accompanist:accompanist-permissions:0.28.0"
- implementation 'org.jetbrains.kotlinx:kotlinx-coroutines-core:1.7.2'
+ implementation(project(":lib"))
+ implementation("androidx.compose.ui:ui:1.5.2")
+ implementation("androidx.compose.material:material:1.5.2")
+ implementation("androidx.activity:activity-compose:1.7.2")
+ implementation("androidx.compose.material:material-icons-core:1.5.0")
+ implementation("androidx.compose.material3:material3:1.1.1")
+ implementation("androidx.compose.ui:ui:1.5.2")
+ implementation("androidx.compose.ui:ui-tooling-preview:1.5.2")
+ implementation("androidx.compose.runtime:runtime-livedata:1.5.2")
+ implementation("androidx.lifecycle:lifecycle-viewmodel-compose:2.6.1")
+ implementation("com.google.accompanist:accompanist-permissions:0.28.0")
+ implementation("org.jetbrains.kotlinx:kotlinx-coroutines-core:1.7.2")
+ implementation("androidx.lifecycle:lifecycle-viewmodel-ktx:2.7.0")
+ implementation("androidx.lifecycle:lifecycle-runtime-ktx:2.3.1")
+
+
+
+
+ implementation("org.jetbrains.kotlinx:kotlinx-coroutines-android:1.7.2")
+ implementation("androidx.lifecycle:lifecycle-viewmodel-ktx:2.7.0")
+ implementation ("androidx.lifecycle:lifecycle-runtime-ktx:2.3.1")
testImplementation 'junit:junit:4.13.2'
androidTestImplementation 'androidx.test.ext:junit:1.1.5'
androidTestImplementation 'androidx.test.espresso:espresso-core:3.5.1'
diff --git a/examples/whisper.android/app/src/main/java/com/whispercppdemo/recorder/Recorder.kt b/examples/whisper.android/app/src/main/java/com/whispercppdemo/recorder/Recorder.kt
index 68df9652521..58abce04713 100644
--- a/examples/whisper.android/app/src/main/java/com/whispercppdemo/recorder/Recorder.kt
+++ b/examples/whisper.android/app/src/main/java/com/whispercppdemo/recorder/Recorder.kt
@@ -11,78 +11,165 @@ import kotlinx.coroutines.withContext
import java.io.File
import java.util.concurrent.Executors
import java.util.concurrent.atomic.AtomicBoolean
+import android.util.Log
-class Recorder {
+import kotlinx.coroutines.runBlocking
+
+private const val TAG = "Recorder"
+
+
+
+class Recorder() {
private val scope: CoroutineScope = CoroutineScope(
Executors.newSingleThreadExecutor().asCoroutineDispatcher()
)
+
private var recorder: AudioRecordThread? = null
+ private var audioStream: AudioStreamThread? = null
+
- suspend fun startRecording(outputFile: File, onError: (Exception) -> Unit) = withContext(scope.coroutineContext) {
- recorder = AudioRecordThread(outputFile, onError)
- recorder?.start()
+ suspend fun startRecording(outputFile: File, onError: (Exception) -> Unit) =
+ withContext(scope.coroutineContext) {
+ recorder = AudioRecordThread(outputFile, onError)
+ recorder?.start()
+ }
+
+ fun startStreaming(onDataReceived: AudioDataReceivedListener, onError: (Exception) -> Unit) {
+ if (audioStream == null) {
+ audioStream = AudioStreamThread(onDataReceived, onError)
+ audioStream?.start()
+ } else {
+ Log.i(TAG, "AudioStreamThread is already running")
+ }
}
- suspend fun stopRecording() = withContext(scope.coroutineContext) {
+
+ fun stopRecording() {
recorder?.stopRecording()
- @Suppress("BlockingMethodInNonBlockingContext")
- recorder?.join()
- recorder = null
+ audioStream?.stopRecording()
+ runBlocking {
+ audioStream?.join()
+ audioStream = null
+ recorder?.join()
+ recorder = null
+ }
}
-}
-private class AudioRecordThread(
- private val outputFile: File,
- private val onError: (Exception) -> Unit
-) :
- Thread("AudioRecorder") {
- private var quit = AtomicBoolean(false)
- @SuppressLint("MissingPermission")
- override fun run() {
- try {
+ private class AudioRecordThread(
+ private val outputFile: File,
+ private val onError: (Exception) -> Unit
+ ) :
+ Thread("AudioRecorder") {
+ private var quit = AtomicBoolean(false)
+
+ @SuppressLint("MissingPermission")
+ override fun run() {
+ try {
+ val bufferSize = AudioRecord.getMinBufferSize(
+ 16000,
+ AudioFormat.CHANNEL_IN_MONO,
+ AudioFormat.ENCODING_PCM_16BIT
+ ) * 4
+ val buffer = ShortArray(bufferSize / 2)
+
+ val audioRecord = AudioRecord(
+ MediaRecorder.AudioSource.MIC,
+ 16000,
+ AudioFormat.CHANNEL_IN_MONO,
+ AudioFormat.ENCODING_PCM_16BIT,
+ bufferSize
+ )
+
+ try {
+ audioRecord.startRecording()
+
+ val allData = mutableListOf()
+
+ while (!quit.get()) {
+ val read = audioRecord.read(buffer, 0, buffer.size)
+ if (read > 0) {
+ for (i in 0 until read) {
+ allData.add(buffer[i])
+ }
+ } else {
+ throw java.lang.RuntimeException("audioRecord.read returned $read")
+ }
+ }
+
+ audioRecord.stop()
+ encodeWaveFile(
+ outputFile,
+ allData.toShortArray()
+ )
+ } finally {
+ audioRecord.release()
+ }
+ } catch (e: Exception) {
+ onError(e)
+ }
+ }
+
+ fun stopRecording() {
+ quit.set(true)
+ }
+
+
+ }
+
+ interface AudioDataReceivedListener {
+ fun onAudioDataReceived(data: FloatArray)
+ }
+ private class AudioStreamThread(
+ private val onDataReceived: AudioDataReceivedListener,
+ private val onError: (Exception) -> Unit
+ ) : Thread("AudioStreamer") {
+ private val quit = AtomicBoolean(false)
+
+ @SuppressLint("MissingPermission")
+ override fun run() {
val bufferSize = AudioRecord.getMinBufferSize(
16000,
AudioFormat.CHANNEL_IN_MONO,
- AudioFormat.ENCODING_PCM_16BIT
- ) * 4
- val buffer = ShortArray(bufferSize / 2)
-
+ AudioFormat.ENCODING_PCM_FLOAT) * 4
+ val floatBuffer = FloatArray(bufferSize / 2)
val audioRecord = AudioRecord(
MediaRecorder.AudioSource.MIC,
16000,
AudioFormat.CHANNEL_IN_MONO,
- AudioFormat.ENCODING_PCM_16BIT,
- bufferSize
- )
+ AudioFormat.ENCODING_PCM_FLOAT,
+ bufferSize)
+
+ if (audioRecord.state != AudioRecord.STATE_INITIALIZED) {
+ Log.e(TAG, "AudioRecord initialization failed")
+ return
+ }
try {
audioRecord.startRecording()
+ while (!quit.get()) {
- val allData = mutableListOf()
- while (!quit.get()) {
- val read = audioRecord.read(buffer, 0, buffer.size)
- if (read > 0) {
- for (i in 0 until read) {
- allData.add(buffer[i])
- }
- } else {
- throw java.lang.RuntimeException("audioRecord.read returned $read")
+ val readResult = audioRecord.read(floatBuffer, 0, floatBuffer.size, AudioRecord.READ_BLOCKING)
+ Log.i(TAG, "readResult: $readResult")
+ if (readResult > 0) {
+ Log.i(TAG, "READING FROM THE floatBuffer")
+
+ onDataReceived.onAudioDataReceived(floatBuffer.copyOf(readResult))
+ } else if (readResult < 0) {
+ throw RuntimeException("AudioRecord.read error: $readResult")
}
}
-
- audioRecord.stop()
- encodeWaveFile(outputFile, allData.toShortArray())
+ } catch (e: Exception) {
+ onError(e)
} finally {
+ audioRecord.stop()
audioRecord.release()
}
- } catch (e: Exception) {
- onError(e)
}
- }
- fun stopRecording() {
- quit.set(true)
+ fun stopRecording() {
+ quit.set(true)
+ }
}
-}
\ No newline at end of file
+}
diff --git a/examples/whisper.android/app/src/main/java/com/whispercppdemo/ui/main/MainScreen.kt b/examples/whisper.android/app/src/main/java/com/whispercppdemo/ui/main/MainScreen.kt
index 38f11b81c64..2f992c7b6a2 100644
--- a/examples/whisper.android/app/src/main/java/com/whispercppdemo/ui/main/MainScreen.kt
+++ b/examples/whisper.android/app/src/main/java/com/whispercppdemo/ui/main/MainScreen.kt
@@ -6,40 +6,63 @@ import androidx.compose.foundation.text.selection.SelectionContainer
import androidx.compose.foundation.verticalScroll
import androidx.compose.material3.*
import androidx.compose.runtime.Composable
+
import androidx.compose.ui.Modifier
import androidx.compose.ui.res.stringResource
import androidx.compose.ui.unit.dp
+import androidx.compose.runtime.livedata.observeAsState
+import androidx.compose.ui.Alignment
+
import com.google.accompanist.permissions.ExperimentalPermissionsApi
import com.google.accompanist.permissions.isGranted
import com.google.accompanist.permissions.rememberPermissionState
import com.whispercppdemo.R
+
@Composable
fun MainScreen(viewModel: MainScreenViewModel) {
+
+ val canTranscribeState = viewModel.canTranscribe.observeAsState(initial = false)
+ val isRecordingState = viewModel.isRecording.observeAsState(initial = false)
+ val isStreamingState = viewModel.isStreaming.observeAsState(initial = false)
+
+
+ val messageLogState = viewModel.dataLog.observeAsState(initial = "")
+ val processingTimeMessage = viewModel.processingTimeMessage.observeAsState(initial = "")
+
MainScreen(
- canTranscribe = viewModel.canTranscribe,
- isRecording = viewModel.isRecording,
- messageLog = viewModel.dataLog,
+ canTranscribe = canTranscribeState.value,
+ isRecording = isRecordingState.value,
+ isStreaming = isStreamingState.value,
+ messageLog = messageLogState.value,
+ processingTimeMessage = processingTimeMessage.value,
onBenchmarkTapped = viewModel::benchmark,
onTranscribeSampleTapped = viewModel::transcribeSample,
- onRecordTapped = viewModel::toggleRecord
+ onRecordTapped = viewModel::toggleRecord,
+ onStreamTapped = viewModel::toggleStream
)
+
}
@OptIn(ExperimentalMaterial3Api::class)
@Composable
-private fun MainScreen(
+fun MainScreen(
canTranscribe: Boolean,
isRecording: Boolean,
+ isStreaming: Boolean,
messageLog: String,
+ processingTimeMessage: String,
onBenchmarkTapped: () -> Unit,
onTranscribeSampleTapped: () -> Unit,
- onRecordTapped: () -> Unit
+ onRecordTapped: () -> Unit,
+ onStreamTapped: () -> Unit
) {
+ val scrollState = rememberScrollState()
+
Scaffold(
topBar = {
TopAppBar(
- title = { Text(stringResource(R.string.app_name)) }
+ title = { Text(stringResource(id = R.string.app_name)) }
)
},
) { innerPadding ->
@@ -53,33 +76,40 @@ private fun MainScreen(
BenchmarkButton(enabled = canTranscribe, onClick = onBenchmarkTapped)
TranscribeSampleButton(enabled = canTranscribe, onClick = onTranscribeSampleTapped)
}
- RecordButton(
+ RecordSection(
enabled = canTranscribe,
isRecording = isRecording,
+ processingTimeMessage = processingTimeMessage,
onClick = onRecordTapped
)
+ StreamButton(
+ enabled = canTranscribe,
+ isStreaming = isStreaming,
+ onClick = onStreamTapped
+ )
}
MessageLog(messageLog)
}
}
}
+
@Composable
-private fun MessageLog(log: String) {
+fun MessageLog(log: String) {
SelectionContainer {
Text(modifier = Modifier.verticalScroll(rememberScrollState()), text = log)
}
}
@Composable
-private fun BenchmarkButton(enabled: Boolean, onClick: () -> Unit) {
+fun BenchmarkButton(enabled: Boolean, onClick: () -> Unit) {
Button(onClick = onClick, enabled = enabled) {
Text("Benchmark")
}
}
@Composable
-private fun TranscribeSampleButton(enabled: Boolean, onClick: () -> Unit) {
+fun TranscribeSampleButton(enabled: Boolean, onClick: () -> Unit) {
Button(onClick = onClick, enabled = enabled) {
Text("Transcribe sample")
}
@@ -87,7 +117,7 @@ private fun TranscribeSampleButton(enabled: Boolean, onClick: () -> Unit) {
@OptIn(ExperimentalPermissionsApi::class)
@Composable
-private fun RecordButton(enabled: Boolean, isRecording: Boolean, onClick: () -> Unit) {
+fun RecordButton(enabled: Boolean, isRecording: Boolean, onClick: () -> Unit) {
val micPermissionState = rememberPermissionState(
permission = android.Manifest.permission.RECORD_AUDIO,
onPermissionResult = { granted ->
@@ -102,7 +132,7 @@ private fun RecordButton(enabled: Boolean, isRecording: Boolean, onClick: () ->
} else {
micPermissionState.launchPermissionRequest()
}
- }, enabled = enabled) {
+ }, enabled = enabled) {
Text(
if (isRecording) {
"Stop recording"
@@ -111,4 +141,46 @@ private fun RecordButton(enabled: Boolean, isRecording: Boolean, onClick: () ->
}
)
}
+}
+@Composable
+fun RecordSection(enabled: Boolean, isRecording: Boolean, processingTimeMessage: String, onClick: () -> Unit) {
+ Row(
+ verticalAlignment = Alignment.CenterVertically,
+ modifier = Modifier.fillMaxWidth()
+ ) {
+ RecordButton(
+ enabled = enabled,
+ isRecording = isRecording,
+ onClick = onClick
+ )
+ Spacer(Modifier.width(8.dp))
+ Text(text = processingTimeMessage)
+ }
+}
+@OptIn(ExperimentalPermissionsApi::class)
+@Composable
+fun StreamButton(enabled: Boolean, isStreaming: Boolean, onClick: () -> Unit) {
+ val micPermissionState = rememberPermissionState(
+ permission = android.Manifest.permission.RECORD_AUDIO,
+ onPermissionResult = { granted ->
+ if (granted) {
+ onClick()
+ }
+ }
+ )
+ Button(onClick = {
+ if (micPermissionState.status.isGranted) {
+ onClick()
+ } else {
+ micPermissionState.launchPermissionRequest()
+ }
+ }, enabled = enabled) {
+ Text(
+ if (isStreaming) {
+ "Stop streaming"
+ } else {
+ "Start streaming"
+ }
+ )
+ }
}
\ No newline at end of file
diff --git a/examples/whisper.android/app/src/main/java/com/whispercppdemo/ui/main/MainScreenViewModel.kt b/examples/whisper.android/app/src/main/java/com/whispercppdemo/ui/main/MainScreenViewModel.kt
index d614ce3338e..4065409522a 100644
--- a/examples/whisper.android/app/src/main/java/com/whispercppdemo/ui/main/MainScreenViewModel.kt
+++ b/examples/whisper.android/app/src/main/java/com/whispercppdemo/ui/main/MainScreenViewModel.kt
@@ -1,44 +1,75 @@
package com.whispercppdemo.ui.main
+import android.annotation.SuppressLint
import android.app.Application
import android.content.Context
import android.media.MediaPlayer
import android.util.Log
-import androidx.compose.runtime.getValue
-import androidx.compose.runtime.mutableStateOf
-import androidx.compose.runtime.setValue
+import androidx.core.content.PackageManagerCompat.LOG_TAG
import androidx.core.net.toUri
-import androidx.lifecycle.ViewModel
+import androidx.lifecycle.LiveData
+import androidx.lifecycle.MutableLiveData
+import androidx.lifecycle.AndroidViewModel
import androidx.lifecycle.ViewModelProvider
import androidx.lifecycle.viewModelScope
import androidx.lifecycle.viewmodel.initializer
import androidx.lifecycle.viewmodel.viewModelFactory
-import com.whispercppdemo.media.decodeWaveFile
-import com.whispercppdemo.recorder.Recorder
import com.whispercpp.whisper.WhisperContext
import kotlinx.coroutines.Dispatchers
import kotlinx.coroutines.launch
import kotlinx.coroutines.runBlocking
import kotlinx.coroutines.withContext
import java.io.File
+import com.whispercppdemo.recorder.Recorder
+import kotlinx.coroutines.channels.Channel
+import kotlinx.coroutines.delay
+import java.io.InputStream
+
+
+
+private const val TAG = "MainScreenViewModel" //logging tag
+
+
+class MainScreenViewModel(application: Application,
+) : AndroidViewModel(application) {
+
+ private val _canTranscribe = MutableLiveData(false)
+ val canTranscribe: LiveData = _canTranscribe
-private const val LOG_TAG = "MainScreenViewModel"
+ private val _dataLog = MutableLiveData("")
+ val dataLog: LiveData = _dataLog
-class MainScreenViewModel(private val application: Application) : ViewModel() {
- var canTranscribe by mutableStateOf(false)
- private set
- var dataLog by mutableStateOf("")
- private set
- var isRecording by mutableStateOf(false)
- private set
+ private val _isRecording = MutableLiveData(false)
+ val isRecording: LiveData = _isRecording
+
+ private val _isStreaming = MutableLiveData(false)
+ val isStreaming: LiveData = _isStreaming
+
+ private val _processingTimeMessage = MutableLiveData("")
+ val processingTimeMessage: LiveData = _processingTimeMessage
+
+
+ private var isActive = false
+
+// private var audioBuffer = mutableListOf()
private val modelsPath = File(application.filesDir, "models")
private val samplesPath = File(application.filesDir, "samples")
- private var recorder: Recorder = Recorder()
- private var whisperContext: com.whispercpp.whisper.WhisperContext? = null
+ private val recorder: Recorder = Recorder()
+ private var whisperContext: WhisperContext? = null
private var mediaPlayer: MediaPlayer? = null
private var recordedFile: File? = null
+ data class AudioState(
+ var isCapturing: Boolean = false,
+ var isTranscribing: Boolean = false,
+ var nSamples: Int = 0,
+ var audioBufferF32: MutableList = mutableListOf()
+ )
+
+
+
+
init {
viewModelScope.launch {
printSystemInfo()
@@ -47,15 +78,16 @@ class MainScreenViewModel(private val application: Application) : ViewModel() {
}
private suspend fun printSystemInfo() {
- printMessage(String.format("System Info: %s\n", com.whispercpp.whisper.WhisperContext.getSystemInfo()))
+ printMessage(String.format("System Info: %s\n", WhisperContext.getSystemInfo()))
}
+ @SuppressLint("RestrictedApi")
private suspend fun loadData() {
printMessage("Loading data...\n")
try {
copyAssets()
loadBaseModel()
- canTranscribe = true
+ _canTranscribe.value = true
} catch (e: Exception) {
Log.w(LOG_TAG, e)
printMessage("${e.localizedMessage}\n")
@@ -63,22 +95,24 @@ class MainScreenViewModel(private val application: Application) : ViewModel() {
}
private suspend fun printMessage(msg: String) = withContext(Dispatchers.Main) {
- dataLog += msg
+ _dataLog.value += msg
}
private suspend fun copyAssets() = withContext(Dispatchers.IO) {
modelsPath.mkdirs()
samplesPath.mkdirs()
//application.copyData("models", modelsPath, ::printMessage)
- application.copyData("samples", samplesPath, ::printMessage)
+ val appContext = getApplication()
+ appContext.copyData("samples", samplesPath, ::printMessage)
printMessage("All data copied to working directory.\n")
}
private suspend fun loadBaseModel() = withContext(Dispatchers.IO) {
printMessage("Loading model...\n")
- val models = application.assets.list("models/")
+ val models = getApplication().assets.list("models/")
if (models != null) {
- whisperContext = com.whispercpp.whisper.WhisperContext.createContextFromAsset(application.assets, "models/" + models[0])
+ whisperContext =
+ WhisperContext.createContextFromAsset(getApplication().assets, "models/" + models[0])
printMessage("Loaded model ${models[0]}.\n")
}
@@ -95,18 +129,26 @@ class MainScreenViewModel(private val application: Application) : ViewModel() {
}
private suspend fun runBenchmark(nthreads: Int) {
- if (!canTranscribe) {
+ val canTranscribeNow = withContext(Dispatchers.Main) {
+ _canTranscribe.value ?: false
+ }
+
+ if (!canTranscribeNow) {
return
}
- canTranscribe = false
+ withContext(Dispatchers.Main) {
+ _canTranscribe.value = false
+ }
printMessage("Running benchmark. This will take minutes...\n")
- whisperContext?.benchMemory(nthreads)?.let{ printMessage(it) }
+ whisperContext?.benchMemory(nthreads)?.let { printMessage(it) }
printMessage("\n")
- whisperContext?.benchGgmlMulMat(nthreads)?.let{ printMessage(it) }
+ whisperContext?.benchGgmlMulMat(nthreads)?.let { printMessage(it) }
- canTranscribe = true
+ withContext(Dispatchers.Main) {
+ _canTranscribe.value = true
+ }
}
private suspend fun getFirstSample(): File = withContext(Dispatchers.IO) {
@@ -116,9 +158,15 @@ class MainScreenViewModel(private val application: Application) : ViewModel() {
private suspend fun readAudioSamples(file: File): FloatArray = withContext(Dispatchers.IO) {
stopPlayback()
startPlayback(file)
- return@withContext decodeWaveFile(file)
+ return@withContext com.whispercppdemo.media.decodeWaveFile(file)
}
+// private suspend fun streamAudioSamples(shortArray: ShortArray): FloatArray = withContext(Dispatchers.IO) {
+// stopPlayback()
+// return@withContext com.example.csct_gui_demo.media.processAudioChunk()
+// }
+
+
private suspend fun stopPlayback() = withContext(Dispatchers.Main) {
mediaPlayer?.stop()
mediaPlayer?.release()
@@ -126,24 +174,43 @@ class MainScreenViewModel(private val application: Application) : ViewModel() {
}
private suspend fun startPlayback(file: File) = withContext(Dispatchers.Main) {
- mediaPlayer = MediaPlayer.create(application, file.absolutePath.toUri())
+ mediaPlayer = MediaPlayer.create(getApplication(), file.absolutePath.toUri())
mediaPlayer?.start()
}
- private suspend fun transcribeAudio(file: File) {
+ private val _transcriptionText = MutableLiveData("")
+ val transcriptionText: LiveData = _transcriptionText
+
+ // Function to process transcription - maybe put this into LibWhisper.kt???
+ @SuppressLint("RestrictedApi")
+
+
+ suspend fun transcribeAudio(file: File) {
+ val canTranscribe = withContext(Dispatchers.Main) {
+ _canTranscribe.value ?: false
+ }
if (!canTranscribe) {
return
}
- canTranscribe = false
-
try {
printMessage("Reading wave samples... ")
val data = readAudioSamples(file)
+ //chunk it here???
printMessage("${data.size / (16000 / 1000)} ms\n")
printMessage("Transcribing data...\n")
val start = System.currentTimeMillis()
val text = whisperContext?.transcribeData(data)
+ //text to be processed and then sent to SQL
+ if(text != null) {
+ withContext(Dispatchers.Main) {
+ // Update transcriptionText LiveData with the new transcription
+ _transcriptionText.value = text!!
+ if (isRecording.value != true) {
+ Log.i(TAG, "Text: $text")
+ }
+ }
+ }
val elapsed = System.currentTimeMillis() - start
printMessage("Done ($elapsed ms): $text\n")
} catch (e: Exception) {
@@ -151,15 +218,116 @@ class MainScreenViewModel(private val application: Application) : ViewModel() {
printMessage("${e.localizedMessage}\n")
}
- canTranscribe = true
+ _canTranscribe.value = true
}
+ //streamTranscribe???
+ private var lastProcessedTimestamp: Long = 0 // Keep track of the last processed audio timestamp
+ private val audioState = AudioState()
+ private var MAX_AUDIO_SEC = 30
+ private var SAMPLE_RATE = 16000
+ private var streamingStartTime: Long = 0
+ private var totalProcessingTime: Long = 0
+ //16*1024 * seconds you want for a chunk
+ private val chunkSize = 16*1024*5
+ private fun startStreaming() {
+ if (_isStreaming.value != true) {
+ Log.d(TAG, "Starting streaming 2 electric boogaloo...")
+ _isStreaming.value = true
+
+// audioBuffer.clear()
+ audioState.isCapturing = true
+ audioState.audioBufferF32.clear()
+ audioState.nSamples = 0
+
+ lastProcessedTimestamp = System.currentTimeMillis() // Resetting the timestamp
+ streamingStartTime = System.currentTimeMillis()
+ // onDataReceived to handle buffering and processing audio data
+ val onDataReceived = object : Recorder.AudioDataReceivedListener {
+ override fun onAudioDataReceived(data: FloatArray) {
+ // Add incoming data to the buffer
+// audioBuffer.addAll(data.toList())
+ if (!audioState.isCapturing) {
+ Log.d(TAG, "Not capturing, ignoring audio")
+ return
+ }
+ if (audioState.nSamples + data.size > MAX_AUDIO_SEC * SAMPLE_RATE) {
+ Log.d(TAG, "Too much audio data, ignoring")
+// toggleStream()
+ //empty the buffer
+ audioState.audioBufferF32.clear()
+ audioState.nSamples = 0
+ return
+ }
+ audioState.audioBufferF32.addAll(data.toList())
+ audioState.nSamples += data.size
+ // Process the buffer in chunks
+ processBufferedAudioChunks()
+ }
+ }
+
+ // Start streaming with the onDataReceived listener
+ recorder.startStreaming(onDataReceived) { e ->
+ Log.e(TAG, "Error during streaming: ${e.localizedMessage}", e)
+ _isStreaming.postValue(false)
+ }
+ } else {
+ Log.i(TAG, "Streaming is already active.")
+ }
+ }
+ private fun processBufferedAudioChunks() {
+ if (audioState.isTranscribing) {
+ return
+ }
+ viewModelScope.launch(Dispatchers.IO) {
+ try {
+ audioState.isTranscribing = true
+ while (audioState.audioBufferF32.size >= chunkSize) {
+ val processingStartTime = System.currentTimeMillis()
+ val chunkToProcess = audioState.audioBufferF32.take(chunkSize).toFloatArray()
+
+
+ val textChunk = whisperContext?.streamTranscribeData(chunkToProcess) ?: ""
+ Log.i(TAG, "Decoded Audio Chunk Text = $textChunk")
+ val processingEndTime = System.currentTimeMillis()
+ totalProcessingTime += (processingEndTime - processingStartTime)
+
+ val recordingTime = (System.currentTimeMillis() - streamingStartTime) / 1000.0
+ val processingTime = (processingEndTime - processingStartTime) / 1000.0
+ withContext(Dispatchers.Main) {
+ val currentText = _transcriptionText.value ?: ""
+ _transcriptionText.value = currentText + textChunk
+ val recordingTime = (System.currentTimeMillis() - streamingStartTime) / 1000.0
+ val cumulativeProcessingTime = totalProcessingTime / 1000.0
+ val realTimeFactor = cumulativeProcessingTime / recordingTime
+ val timeInfo = "Recording time: ${"%.3f".format(recordingTime)} s, " +
+ "Processing time: ${"%.3f".format(cumulativeProcessingTime)} s, " +
+ "Real-time factor: ${"%.3f".format(realTimeFactor)}"
+ Log.i(TAG,"$timeInfo")
+ printMessage(textChunk!!)
+ _processingTimeMessage.value = timeInfo
+ Log.i(TAG, "Final Text: ${_transcriptionText.value}")
+ }
+ audioState.audioBufferF32 = audioState.audioBufferF32.drop(chunkSize).toMutableList()
+// lastProcessedTimestamp = currentTimestamp // Update the last processed timestamp
+
+// audioBuffer = audioBuffer.drop(chunkSize).toMutableList()
+ }
+ audioState.isTranscribing = false
+ } catch (e: Exception) {
+ Log.e(TAG, "Error during buffer processing: ${e.localizedMessage}", e)
+ }
+ }
+ }
+ @SuppressLint("RestrictedApi")
fun toggleRecord() = viewModelScope.launch {
try {
- if (isRecording) {
+ if (_isRecording.value == true) {
recorder.stopRecording()
- isRecording = false
- recordedFile?.let { transcribeAudio(it) }
+ _isRecording.value = false
+ recordedFile?.let {
+ transcribeAudio(it)
+ }
} else {
stopPlayback()
val file = getTempFileForRecording()
@@ -167,20 +335,34 @@ class MainScreenViewModel(private val application: Application) : ViewModel() {
viewModelScope.launch {
withContext(Dispatchers.Main) {
printMessage("${e.localizedMessage}\n")
- isRecording = false
+ _isRecording.value = false
}
}
}
- isRecording = true
+ _isRecording.value = true
recordedFile = file
}
} catch (e: Exception) {
Log.w(LOG_TAG, e)
printMessage("${e.localizedMessage}\n")
- isRecording = false
+ _isRecording.value = false
+ }
+ }
+
+ fun toggleStream() = viewModelScope.launch {
+ if (_isStreaming.value == true) {
+ Log.d(TAG, "Stopping streaming...")
+ recorder.stopRecording()
+ _isStreaming.value = false
+ Log.d(TAG, "Streaming stopped")
+ } else {
+ Log.d(TAG, "Starting streaming...")
+ stopPlayback()
+ startStreaming()
}
}
+
private suspend fun getTempFileForRecording() = withContext(Dispatchers.IO) {
File.createTempFile("recording", "wav")
}
@@ -192,7 +374,6 @@ class MainScreenViewModel(private val application: Application) : ViewModel() {
stopPlayback()
}
}
-
companion object {
fun factory() = viewModelFactory {
initializer {
@@ -202,8 +383,9 @@ class MainScreenViewModel(private val application: Application) : ViewModel() {
}
}
}
-}
+
+}
private suspend fun Context.copyData(
assetDirName: String,
destDir: File,
@@ -211,15 +393,15 @@ private suspend fun Context.copyData(
) = withContext(Dispatchers.IO) {
assets.list(assetDirName)?.forEach { name ->
val assetPath = "$assetDirName/$name"
- Log.v(LOG_TAG, "Processing $assetPath...")
+ Log.v(TAG, "Processing $assetPath...")
val destination = File(destDir, name)
- Log.v(LOG_TAG, "Copying $assetPath to $destination...")
+ Log.v(TAG, "Copying $assetPath to $destination...")
printMessage("Copying $name...\n")
assets.open(assetPath).use { input ->
destination.outputStream().use { output ->
input.copyTo(output)
}
}
- Log.v(LOG_TAG, "Copied $assetPath to $destination")
+ Log.v(TAG, "Copied $assetPath to $destination")
}
-}
\ No newline at end of file
+}
diff --git a/examples/whisper.android/build.gradle b/examples/whisper.android/build.gradle
index ae1f486b658..1d6c1312e8d 100644
--- a/examples/whisper.android/build.gradle
+++ b/examples/whisper.android/build.gradle
@@ -1,6 +1,6 @@
// Top-level build file where you can add configuration options common to all sub-projects/modules.
plugins {
- id 'com.android.application' version '8.1.1' apply false
- id 'com.android.library' version '8.1.1' apply false
+ id 'com.android.application' version '8.3.0' apply false
+ id 'com.android.library' version '8.3.0' apply false
id 'org.jetbrains.kotlin.android' version '1.9.0' apply false
}
\ No newline at end of file
diff --git a/examples/whisper.android/gradle/wrapper/gradle-wrapper.properties b/examples/whisper.android/gradle/wrapper/gradle-wrapper.properties
index a7b943c97c8..70a33f5847d 100644
--- a/examples/whisper.android/gradle/wrapper/gradle-wrapper.properties
+++ b/examples/whisper.android/gradle/wrapper/gradle-wrapper.properties
@@ -1,6 +1,6 @@
#Wed Dec 14 10:37:24 EST 2022
distributionBase=GRADLE_USER_HOME
-distributionUrl=https\://services.gradle.org/distributions/gradle-8.2-bin.zip
+distributionUrl=https\://services.gradle.org/distributions/gradle-8.4-bin.zip
distributionPath=wrapper/dists
zipStorePath=wrapper/dists
zipStoreBase=GRADLE_USER_HOME
diff --git a/examples/whisper.android/lib/build.gradle b/examples/whisper.android/lib/build.gradle
index e4779e26527..9c40570790b 100644
--- a/examples/whisper.android/lib/build.gradle
+++ b/examples/whisper.android/lib/build.gradle
@@ -10,8 +10,6 @@ android {
defaultConfig {
minSdk 26
targetSdk 34
- versionCode 1
- versionName "1.0"
ndk {
abiFilters 'arm64-v8a', 'armeabi-v7a', 'x86', 'x86_64'
diff --git a/examples/whisper.android/lib/src/main/java/com/whispercpp/whisper/LibWhisper.kt b/examples/whisper.android/lib/src/main/java/com/whispercpp/whisper/LibWhisper.kt
index 513202fa689..b74c6a0560c 100644
--- a/examples/whisper.android/lib/src/main/java/com/whispercpp/whisper/LibWhisper.kt
+++ b/examples/whisper.android/lib/src/main/java/com/whispercpp/whisper/LibWhisper.kt
@@ -28,6 +28,18 @@ class WhisperContext private constructor(private var ptr: Long) {
}
}
}
+ suspend fun streamTranscribeData(data: FloatArray): String = withContext(scope.coroutineContext) {
+ require(ptr != 0L)
+ val numThreads = WhisperCpuConfig.preferredThreadCount
+ Log.d(LOG_TAG, "Selecting $numThreads threads")
+ WhisperLib.fullStreamTranscribe(ptr, numThreads, data)
+ val textCount = WhisperLib.getTextSegmentCount(ptr)
+ return@withContext buildString {
+ for (i in 0 until textCount) {
+ append(WhisperLib.getTextSegment(ptr, i))
+ }
+ }
+ }
suspend fun benchMemory(nthreads: Int): String = withContext(scope.coroutineContext) {
return@withContext WhisperLib.benchMemcpy(nthreads)
@@ -134,6 +146,7 @@ private class WhisperLib {
external fun getSystemInfo(): String
external fun benchMemcpy(nthread: Int): String
external fun benchGgmlMulMat(nthread: Int): String
+ external fun fullStreamTranscribe(contextPtr: Long, numThreads: Int, audioData: FloatArray)
}
}
diff --git a/examples/whisper.android/lib/src/main/jni/whisper/CMakeLists.txt b/examples/whisper.android/lib/src/main/jni/whisper/CMakeLists.txt
index faaa7b662cf..0385c3d83f1 100644
--- a/examples/whisper.android/lib/src/main/jni/whisper/CMakeLists.txt
+++ b/examples/whisper.android/lib/src/main/jni/whisper/CMakeLists.txt
@@ -75,3 +75,8 @@ endif ()
build_library("whisper") # Default target
include_directories(${WHISPER_LIB_DIR})
+find_program(CCACHE_FOUND ccache)
+if(CCACHE_FOUND)
+ set_property(GLOBAL PROPERTY RULE_LAUNCH_COMPILE ccache)
+ set_property(GLOBAL PROPERTY RULE_LAUNCH_LINK ccache)
+endif(CCACHE_FOUND)
\ No newline at end of file
diff --git a/examples/whisper.android/lib/src/main/jni/whisper/jni.c b/examples/whisper.android/lib/src/main/jni/whisper/jni.c
index 7f9d724617d..34440794b8d 100644
--- a/examples/whisper.android/lib/src/main/jni/whisper/jni.c
+++ b/examples/whisper.android/lib/src/main/jni/whisper/jni.c
@@ -192,7 +192,45 @@ Java_com_whispercpp_whisper_WhisperLib_00024Companion_fullTranscribe(
}
(*env)->ReleaseFloatArrayElements(env, audio_data, audio_data_arr, JNI_ABORT);
}
+//streaming attempt:
+JNIEXPORT void JNICALL
+Java_com_whispercpp_whisper_WhisperLib_00024Companion_fullStreamTranscribe(
+ JNIEnv *env, jobject thiz, jlong context_ptr, jint num_threads, jfloatArray audio_data) {
+ UNUSED(thiz);
+ struct whisper_context *context = (struct whisper_context *) context_ptr;
+ jfloat *audio_data_arr = (*env)->GetFloatArrayElements(env, audio_data, NULL);
+ const jsize audio_data_length = (*env)->GetArrayLength(env, audio_data);
+ // The below adapted from the Objective-C iOS sample
+ struct whisper_full_params params = whisper_full_default_params(WHISPER_SAMPLING_GREEDY);
+ params.print_realtime = true;
+ params.print_progress = false;
+ params.print_timestamps = true;
+ params.print_special = false;
+ params.translate = false;
+ params.language = "en";
+ params.n_threads = num_threads; //how many threads can I use on an S23?
+ //potentially use an initial prompt for custom vocabularies?
+ // initial_prompt: Optional[str]
+ // Optional text to provide as a prompt for the first window. This can be used to provide, or
+ // "prompt-engineer" a context for transcription, e.g. custom vocabularies or proper nouns
+ // to make it more likely to predict those word correctly.
+ //params.initial_prompt = "Transcription of Tactical Combat Casualty Drugs such as Fentanyl, Ibuprofen, Amoxicillin, Epinephrine, TXA, Hextend, Ketamine, Oral Transmucosal Fentanyl Citrate. ";
+ params.offset_ms = 0;
+ params.no_context = true;
+ params.single_segment = true; //hard code for true, objc example has it based on a button press
+ params.no_timestamps = params.single_segment; //from streaming objc example
+
+ whisper_reset_timings(context);
+
+ LOGI("About to run whisper_full");
+ if (whisper_full(context, params, audio_data_arr, audio_data_length) != 0) {
+ LOGI("Failed to run the model");
+ } else {
+ whisper_print_timings(context);
+ }
+ (*env)->ReleaseFloatArrayElements(env, audio_data, audio_data_arr, JNI_ABORT);
+}
JNIEXPORT jint JNICALL
Java_com_whispercpp_whisper_WhisperLib_00024Companion_getTextSegmentCount(
JNIEnv *env, jobject thiz, jlong context_ptr) {
diff --git a/ggml-backend.c b/ggml-backend.c
index 402d86ef3ac..219582757c5 100644
--- a/ggml-backend.c
+++ b/ggml-backend.c
@@ -1723,6 +1723,9 @@ ggml_backend_sched_t ggml_backend_sched_new(
struct ggml_backend_sched * sched = calloc(sizeof(struct ggml_backend_sched), 1);
+
+ fprintf(stderr, "ggml_backend_sched size: %zu KB\n", sizeof(struct ggml_backend_sched)/1024);
+
// initialize hash table
sched->hash_set = ggml_hash_set_new(graph_size);
sched->tensor_backend_id = calloc(sizeof(sched->tensor_backend_id[0]), sched->hash_set.size);
@@ -1732,6 +1735,7 @@ ggml_backend_sched_t ggml_backend_sched_new(
sched->node_backend_ids = calloc(sizeof(sched->node_backend_ids[0]), nodes_size);
sched->leaf_backend_ids = calloc(sizeof(sched->leaf_backend_ids[0]), nodes_size);
+
sched->n_backends = n_backends;
sched->n_copies = parallel ? GGML_SCHED_MAX_COPIES : 1;