diff --git a/.gitignore b/.gitignore index 43dcdabfb85..792f690b193 100644 --- a/.gitignore +++ b/.gitignore @@ -34,7 +34,7 @@ build-sanitize-thread/ /quantize /server /lsp - +/models arm_neon.h sync.sh libwhisper.a diff --git a/examples/whisper.android/.gitignore b/examples/whisper.android/.gitignore index aa724b77071..3e0b4de7bf3 100644 --- a/examples/whisper.android/.gitignore +++ b/examples/whisper.android/.gitignore @@ -13,3 +13,6 @@ .externalNativeBuild .cxx local.properties +/app/src/main/assets/ +/app/src/main/assets/models/ + diff --git a/examples/whisper.android/.idea/gradle.xml b/examples/whisper.android/.idea/gradle.xml index 4a09ccc1215..c34ccc3d545 100644 --- a/examples/whisper.android/.idea/gradle.xml +++ b/examples/whisper.android/.idea/gradle.xml @@ -4,17 +4,16 @@ diff --git a/examples/whisper.android/.idea/misc.xml b/examples/whisper.android/.idea/misc.xml index 0ad17cbd33a..8978d23db56 100644 --- a/examples/whisper.android/.idea/misc.xml +++ b/examples/whisper.android/.idea/misc.xml @@ -1,4 +1,3 @@ - diff --git a/examples/whisper.android/.idea/vcs.xml b/examples/whisper.android/.idea/vcs.xml index b2bdec2d71b..e0c7f72305f 100644 --- a/examples/whisper.android/.idea/vcs.xml +++ b/examples/whisper.android/.idea/vcs.xml @@ -2,5 +2,6 @@ + \ No newline at end of file diff --git a/examples/whisper.android/app/.gitignore b/examples/whisper.android/app/.gitignore index 42afabfd2ab..796b96d1c40 100644 --- a/examples/whisper.android/app/.gitignore +++ b/examples/whisper.android/app/.gitignore @@ -1 +1 @@ -/build \ No newline at end of file +/build diff --git a/examples/whisper.android/app/build.gradle b/examples/whisper.android/app/build.gradle index 9f407998cdb..a883a5a022b 100644 --- a/examples/whisper.android/app/build.gradle +++ b/examples/whisper.android/app/build.gradle @@ -1,6 +1,7 @@ plugins { id 'com.android.application' id 'org.jetbrains.kotlin.android' + } android { @@ -9,7 +10,7 @@ android { defaultConfig { applicationId "com.whispercppdemo" - minSdk 26 + minSdk 31 targetSdk 34 versionCode 1 versionName "1.0" @@ -29,31 +30,44 @@ android { } } compileOptions { - sourceCompatibility JavaVersion.VERSION_17 - targetCompatibility JavaVersion.VERSION_17 + sourceCompatibility JavaVersion.VERSION_1_8 + targetCompatibility JavaVersion.VERSION_1_8 } kotlinOptions { - jvmTarget = '17' + jvmTarget = '1.8' } buildFeatures { compose true } composeOptions { - kotlinCompilerExtensionVersion '1.5.0' + kotlinCompilerExtensionVersion '1.5.2' } + ndkVersion = "25.2.9519653" } dependencies { - implementation project(':lib') - implementation 'androidx.activity:activity-compose:1.7.2' - implementation 'androidx.compose.material:material-icons-core:1.5.0' - implementation 'androidx.compose.material3:material3:1.1.1' - implementation "androidx.compose.ui:ui:1.5.0" - implementation "androidx.compose.ui:ui-tooling-preview:1.5.0" - implementation 'androidx.lifecycle:lifecycle-viewmodel-compose:2.6.1' - implementation "com.google.accompanist:accompanist-permissions:0.28.0" - implementation 'org.jetbrains.kotlinx:kotlinx-coroutines-core:1.7.2' + implementation(project(":lib")) + implementation("androidx.compose.ui:ui:1.5.2") + implementation("androidx.compose.material:material:1.5.2") + implementation("androidx.activity:activity-compose:1.7.2") + implementation("androidx.compose.material:material-icons-core:1.5.0") + implementation("androidx.compose.material3:material3:1.1.1") + implementation("androidx.compose.ui:ui:1.5.2") + implementation("androidx.compose.ui:ui-tooling-preview:1.5.2") + implementation("androidx.compose.runtime:runtime-livedata:1.5.2") + implementation("androidx.lifecycle:lifecycle-viewmodel-compose:2.6.1") + implementation("com.google.accompanist:accompanist-permissions:0.28.0") + implementation("org.jetbrains.kotlinx:kotlinx-coroutines-core:1.7.2") + implementation("androidx.lifecycle:lifecycle-viewmodel-ktx:2.7.0") + implementation("androidx.lifecycle:lifecycle-runtime-ktx:2.3.1") + + + + + implementation("org.jetbrains.kotlinx:kotlinx-coroutines-android:1.7.2") + implementation("androidx.lifecycle:lifecycle-viewmodel-ktx:2.7.0") + implementation ("androidx.lifecycle:lifecycle-runtime-ktx:2.3.1") testImplementation 'junit:junit:4.13.2' androidTestImplementation 'androidx.test.ext:junit:1.1.5' androidTestImplementation 'androidx.test.espresso:espresso-core:3.5.1' diff --git a/examples/whisper.android/app/src/main/java/com/whispercppdemo/recorder/Recorder.kt b/examples/whisper.android/app/src/main/java/com/whispercppdemo/recorder/Recorder.kt index 68df9652521..58abce04713 100644 --- a/examples/whisper.android/app/src/main/java/com/whispercppdemo/recorder/Recorder.kt +++ b/examples/whisper.android/app/src/main/java/com/whispercppdemo/recorder/Recorder.kt @@ -11,78 +11,165 @@ import kotlinx.coroutines.withContext import java.io.File import java.util.concurrent.Executors import java.util.concurrent.atomic.AtomicBoolean +import android.util.Log -class Recorder { +import kotlinx.coroutines.runBlocking + +private const val TAG = "Recorder" + + + +class Recorder() { private val scope: CoroutineScope = CoroutineScope( Executors.newSingleThreadExecutor().asCoroutineDispatcher() ) + private var recorder: AudioRecordThread? = null + private var audioStream: AudioStreamThread? = null + - suspend fun startRecording(outputFile: File, onError: (Exception) -> Unit) = withContext(scope.coroutineContext) { - recorder = AudioRecordThread(outputFile, onError) - recorder?.start() + suspend fun startRecording(outputFile: File, onError: (Exception) -> Unit) = + withContext(scope.coroutineContext) { + recorder = AudioRecordThread(outputFile, onError) + recorder?.start() + } + + fun startStreaming(onDataReceived: AudioDataReceivedListener, onError: (Exception) -> Unit) { + if (audioStream == null) { + audioStream = AudioStreamThread(onDataReceived, onError) + audioStream?.start() + } else { + Log.i(TAG, "AudioStreamThread is already running") + } } - suspend fun stopRecording() = withContext(scope.coroutineContext) { + + fun stopRecording() { recorder?.stopRecording() - @Suppress("BlockingMethodInNonBlockingContext") - recorder?.join() - recorder = null + audioStream?.stopRecording() + runBlocking { + audioStream?.join() + audioStream = null + recorder?.join() + recorder = null + } } -} -private class AudioRecordThread( - private val outputFile: File, - private val onError: (Exception) -> Unit -) : - Thread("AudioRecorder") { - private var quit = AtomicBoolean(false) - @SuppressLint("MissingPermission") - override fun run() { - try { + private class AudioRecordThread( + private val outputFile: File, + private val onError: (Exception) -> Unit + ) : + Thread("AudioRecorder") { + private var quit = AtomicBoolean(false) + + @SuppressLint("MissingPermission") + override fun run() { + try { + val bufferSize = AudioRecord.getMinBufferSize( + 16000, + AudioFormat.CHANNEL_IN_MONO, + AudioFormat.ENCODING_PCM_16BIT + ) * 4 + val buffer = ShortArray(bufferSize / 2) + + val audioRecord = AudioRecord( + MediaRecorder.AudioSource.MIC, + 16000, + AudioFormat.CHANNEL_IN_MONO, + AudioFormat.ENCODING_PCM_16BIT, + bufferSize + ) + + try { + audioRecord.startRecording() + + val allData = mutableListOf() + + while (!quit.get()) { + val read = audioRecord.read(buffer, 0, buffer.size) + if (read > 0) { + for (i in 0 until read) { + allData.add(buffer[i]) + } + } else { + throw java.lang.RuntimeException("audioRecord.read returned $read") + } + } + + audioRecord.stop() + encodeWaveFile( + outputFile, + allData.toShortArray() + ) + } finally { + audioRecord.release() + } + } catch (e: Exception) { + onError(e) + } + } + + fun stopRecording() { + quit.set(true) + } + + + } + + interface AudioDataReceivedListener { + fun onAudioDataReceived(data: FloatArray) + } + private class AudioStreamThread( + private val onDataReceived: AudioDataReceivedListener, + private val onError: (Exception) -> Unit + ) : Thread("AudioStreamer") { + private val quit = AtomicBoolean(false) + + @SuppressLint("MissingPermission") + override fun run() { val bufferSize = AudioRecord.getMinBufferSize( 16000, AudioFormat.CHANNEL_IN_MONO, - AudioFormat.ENCODING_PCM_16BIT - ) * 4 - val buffer = ShortArray(bufferSize / 2) - + AudioFormat.ENCODING_PCM_FLOAT) * 4 + val floatBuffer = FloatArray(bufferSize / 2) val audioRecord = AudioRecord( MediaRecorder.AudioSource.MIC, 16000, AudioFormat.CHANNEL_IN_MONO, - AudioFormat.ENCODING_PCM_16BIT, - bufferSize - ) + AudioFormat.ENCODING_PCM_FLOAT, + bufferSize) + + if (audioRecord.state != AudioRecord.STATE_INITIALIZED) { + Log.e(TAG, "AudioRecord initialization failed") + return + } try { audioRecord.startRecording() + while (!quit.get()) { - val allData = mutableListOf() - while (!quit.get()) { - val read = audioRecord.read(buffer, 0, buffer.size) - if (read > 0) { - for (i in 0 until read) { - allData.add(buffer[i]) - } - } else { - throw java.lang.RuntimeException("audioRecord.read returned $read") + val readResult = audioRecord.read(floatBuffer, 0, floatBuffer.size, AudioRecord.READ_BLOCKING) + Log.i(TAG, "readResult: $readResult") + if (readResult > 0) { + Log.i(TAG, "READING FROM THE floatBuffer") + + onDataReceived.onAudioDataReceived(floatBuffer.copyOf(readResult)) + } else if (readResult < 0) { + throw RuntimeException("AudioRecord.read error: $readResult") } } - - audioRecord.stop() - encodeWaveFile(outputFile, allData.toShortArray()) + } catch (e: Exception) { + onError(e) } finally { + audioRecord.stop() audioRecord.release() } - } catch (e: Exception) { - onError(e) } - } - fun stopRecording() { - quit.set(true) + fun stopRecording() { + quit.set(true) + } } -} \ No newline at end of file +} diff --git a/examples/whisper.android/app/src/main/java/com/whispercppdemo/ui/main/MainScreen.kt b/examples/whisper.android/app/src/main/java/com/whispercppdemo/ui/main/MainScreen.kt index 38f11b81c64..2f992c7b6a2 100644 --- a/examples/whisper.android/app/src/main/java/com/whispercppdemo/ui/main/MainScreen.kt +++ b/examples/whisper.android/app/src/main/java/com/whispercppdemo/ui/main/MainScreen.kt @@ -6,40 +6,63 @@ import androidx.compose.foundation.text.selection.SelectionContainer import androidx.compose.foundation.verticalScroll import androidx.compose.material3.* import androidx.compose.runtime.Composable + import androidx.compose.ui.Modifier import androidx.compose.ui.res.stringResource import androidx.compose.ui.unit.dp +import androidx.compose.runtime.livedata.observeAsState +import androidx.compose.ui.Alignment + import com.google.accompanist.permissions.ExperimentalPermissionsApi import com.google.accompanist.permissions.isGranted import com.google.accompanist.permissions.rememberPermissionState import com.whispercppdemo.R + @Composable fun MainScreen(viewModel: MainScreenViewModel) { + + val canTranscribeState = viewModel.canTranscribe.observeAsState(initial = false) + val isRecordingState = viewModel.isRecording.observeAsState(initial = false) + val isStreamingState = viewModel.isStreaming.observeAsState(initial = false) + + + val messageLogState = viewModel.dataLog.observeAsState(initial = "") + val processingTimeMessage = viewModel.processingTimeMessage.observeAsState(initial = "") + MainScreen( - canTranscribe = viewModel.canTranscribe, - isRecording = viewModel.isRecording, - messageLog = viewModel.dataLog, + canTranscribe = canTranscribeState.value, + isRecording = isRecordingState.value, + isStreaming = isStreamingState.value, + messageLog = messageLogState.value, + processingTimeMessage = processingTimeMessage.value, onBenchmarkTapped = viewModel::benchmark, onTranscribeSampleTapped = viewModel::transcribeSample, - onRecordTapped = viewModel::toggleRecord + onRecordTapped = viewModel::toggleRecord, + onStreamTapped = viewModel::toggleStream ) + } @OptIn(ExperimentalMaterial3Api::class) @Composable -private fun MainScreen( +fun MainScreen( canTranscribe: Boolean, isRecording: Boolean, + isStreaming: Boolean, messageLog: String, + processingTimeMessage: String, onBenchmarkTapped: () -> Unit, onTranscribeSampleTapped: () -> Unit, - onRecordTapped: () -> Unit + onRecordTapped: () -> Unit, + onStreamTapped: () -> Unit ) { + val scrollState = rememberScrollState() + Scaffold( topBar = { TopAppBar( - title = { Text(stringResource(R.string.app_name)) } + title = { Text(stringResource(id = R.string.app_name)) } ) }, ) { innerPadding -> @@ -53,33 +76,40 @@ private fun MainScreen( BenchmarkButton(enabled = canTranscribe, onClick = onBenchmarkTapped) TranscribeSampleButton(enabled = canTranscribe, onClick = onTranscribeSampleTapped) } - RecordButton( + RecordSection( enabled = canTranscribe, isRecording = isRecording, + processingTimeMessage = processingTimeMessage, onClick = onRecordTapped ) + StreamButton( + enabled = canTranscribe, + isStreaming = isStreaming, + onClick = onStreamTapped + ) } MessageLog(messageLog) } } } + @Composable -private fun MessageLog(log: String) { +fun MessageLog(log: String) { SelectionContainer { Text(modifier = Modifier.verticalScroll(rememberScrollState()), text = log) } } @Composable -private fun BenchmarkButton(enabled: Boolean, onClick: () -> Unit) { +fun BenchmarkButton(enabled: Boolean, onClick: () -> Unit) { Button(onClick = onClick, enabled = enabled) { Text("Benchmark") } } @Composable -private fun TranscribeSampleButton(enabled: Boolean, onClick: () -> Unit) { +fun TranscribeSampleButton(enabled: Boolean, onClick: () -> Unit) { Button(onClick = onClick, enabled = enabled) { Text("Transcribe sample") } @@ -87,7 +117,7 @@ private fun TranscribeSampleButton(enabled: Boolean, onClick: () -> Unit) { @OptIn(ExperimentalPermissionsApi::class) @Composable -private fun RecordButton(enabled: Boolean, isRecording: Boolean, onClick: () -> Unit) { +fun RecordButton(enabled: Boolean, isRecording: Boolean, onClick: () -> Unit) { val micPermissionState = rememberPermissionState( permission = android.Manifest.permission.RECORD_AUDIO, onPermissionResult = { granted -> @@ -102,7 +132,7 @@ private fun RecordButton(enabled: Boolean, isRecording: Boolean, onClick: () -> } else { micPermissionState.launchPermissionRequest() } - }, enabled = enabled) { + }, enabled = enabled) { Text( if (isRecording) { "Stop recording" @@ -111,4 +141,46 @@ private fun RecordButton(enabled: Boolean, isRecording: Boolean, onClick: () -> } ) } +} +@Composable +fun RecordSection(enabled: Boolean, isRecording: Boolean, processingTimeMessage: String, onClick: () -> Unit) { + Row( + verticalAlignment = Alignment.CenterVertically, + modifier = Modifier.fillMaxWidth() + ) { + RecordButton( + enabled = enabled, + isRecording = isRecording, + onClick = onClick + ) + Spacer(Modifier.width(8.dp)) + Text(text = processingTimeMessage) + } +} +@OptIn(ExperimentalPermissionsApi::class) +@Composable +fun StreamButton(enabled: Boolean, isStreaming: Boolean, onClick: () -> Unit) { + val micPermissionState = rememberPermissionState( + permission = android.Manifest.permission.RECORD_AUDIO, + onPermissionResult = { granted -> + if (granted) { + onClick() + } + } + ) + Button(onClick = { + if (micPermissionState.status.isGranted) { + onClick() + } else { + micPermissionState.launchPermissionRequest() + } + }, enabled = enabled) { + Text( + if (isStreaming) { + "Stop streaming" + } else { + "Start streaming" + } + ) + } } \ No newline at end of file diff --git a/examples/whisper.android/app/src/main/java/com/whispercppdemo/ui/main/MainScreenViewModel.kt b/examples/whisper.android/app/src/main/java/com/whispercppdemo/ui/main/MainScreenViewModel.kt index d614ce3338e..4065409522a 100644 --- a/examples/whisper.android/app/src/main/java/com/whispercppdemo/ui/main/MainScreenViewModel.kt +++ b/examples/whisper.android/app/src/main/java/com/whispercppdemo/ui/main/MainScreenViewModel.kt @@ -1,44 +1,75 @@ package com.whispercppdemo.ui.main +import android.annotation.SuppressLint import android.app.Application import android.content.Context import android.media.MediaPlayer import android.util.Log -import androidx.compose.runtime.getValue -import androidx.compose.runtime.mutableStateOf -import androidx.compose.runtime.setValue +import androidx.core.content.PackageManagerCompat.LOG_TAG import androidx.core.net.toUri -import androidx.lifecycle.ViewModel +import androidx.lifecycle.LiveData +import androidx.lifecycle.MutableLiveData +import androidx.lifecycle.AndroidViewModel import androidx.lifecycle.ViewModelProvider import androidx.lifecycle.viewModelScope import androidx.lifecycle.viewmodel.initializer import androidx.lifecycle.viewmodel.viewModelFactory -import com.whispercppdemo.media.decodeWaveFile -import com.whispercppdemo.recorder.Recorder import com.whispercpp.whisper.WhisperContext import kotlinx.coroutines.Dispatchers import kotlinx.coroutines.launch import kotlinx.coroutines.runBlocking import kotlinx.coroutines.withContext import java.io.File +import com.whispercppdemo.recorder.Recorder +import kotlinx.coroutines.channels.Channel +import kotlinx.coroutines.delay +import java.io.InputStream + + + +private const val TAG = "MainScreenViewModel" //logging tag + + +class MainScreenViewModel(application: Application, +) : AndroidViewModel(application) { + + private val _canTranscribe = MutableLiveData(false) + val canTranscribe: LiveData = _canTranscribe -private const val LOG_TAG = "MainScreenViewModel" + private val _dataLog = MutableLiveData("") + val dataLog: LiveData = _dataLog -class MainScreenViewModel(private val application: Application) : ViewModel() { - var canTranscribe by mutableStateOf(false) - private set - var dataLog by mutableStateOf("") - private set - var isRecording by mutableStateOf(false) - private set + private val _isRecording = MutableLiveData(false) + val isRecording: LiveData = _isRecording + + private val _isStreaming = MutableLiveData(false) + val isStreaming: LiveData = _isStreaming + + private val _processingTimeMessage = MutableLiveData("") + val processingTimeMessage: LiveData = _processingTimeMessage + + + private var isActive = false + +// private var audioBuffer = mutableListOf() private val modelsPath = File(application.filesDir, "models") private val samplesPath = File(application.filesDir, "samples") - private var recorder: Recorder = Recorder() - private var whisperContext: com.whispercpp.whisper.WhisperContext? = null + private val recorder: Recorder = Recorder() + private var whisperContext: WhisperContext? = null private var mediaPlayer: MediaPlayer? = null private var recordedFile: File? = null + data class AudioState( + var isCapturing: Boolean = false, + var isTranscribing: Boolean = false, + var nSamples: Int = 0, + var audioBufferF32: MutableList = mutableListOf() + ) + + + + init { viewModelScope.launch { printSystemInfo() @@ -47,15 +78,16 @@ class MainScreenViewModel(private val application: Application) : ViewModel() { } private suspend fun printSystemInfo() { - printMessage(String.format("System Info: %s\n", com.whispercpp.whisper.WhisperContext.getSystemInfo())) + printMessage(String.format("System Info: %s\n", WhisperContext.getSystemInfo())) } + @SuppressLint("RestrictedApi") private suspend fun loadData() { printMessage("Loading data...\n") try { copyAssets() loadBaseModel() - canTranscribe = true + _canTranscribe.value = true } catch (e: Exception) { Log.w(LOG_TAG, e) printMessage("${e.localizedMessage}\n") @@ -63,22 +95,24 @@ class MainScreenViewModel(private val application: Application) : ViewModel() { } private suspend fun printMessage(msg: String) = withContext(Dispatchers.Main) { - dataLog += msg + _dataLog.value += msg } private suspend fun copyAssets() = withContext(Dispatchers.IO) { modelsPath.mkdirs() samplesPath.mkdirs() //application.copyData("models", modelsPath, ::printMessage) - application.copyData("samples", samplesPath, ::printMessage) + val appContext = getApplication() + appContext.copyData("samples", samplesPath, ::printMessage) printMessage("All data copied to working directory.\n") } private suspend fun loadBaseModel() = withContext(Dispatchers.IO) { printMessage("Loading model...\n") - val models = application.assets.list("models/") + val models = getApplication().assets.list("models/") if (models != null) { - whisperContext = com.whispercpp.whisper.WhisperContext.createContextFromAsset(application.assets, "models/" + models[0]) + whisperContext = + WhisperContext.createContextFromAsset(getApplication().assets, "models/" + models[0]) printMessage("Loaded model ${models[0]}.\n") } @@ -95,18 +129,26 @@ class MainScreenViewModel(private val application: Application) : ViewModel() { } private suspend fun runBenchmark(nthreads: Int) { - if (!canTranscribe) { + val canTranscribeNow = withContext(Dispatchers.Main) { + _canTranscribe.value ?: false + } + + if (!canTranscribeNow) { return } - canTranscribe = false + withContext(Dispatchers.Main) { + _canTranscribe.value = false + } printMessage("Running benchmark. This will take minutes...\n") - whisperContext?.benchMemory(nthreads)?.let{ printMessage(it) } + whisperContext?.benchMemory(nthreads)?.let { printMessage(it) } printMessage("\n") - whisperContext?.benchGgmlMulMat(nthreads)?.let{ printMessage(it) } + whisperContext?.benchGgmlMulMat(nthreads)?.let { printMessage(it) } - canTranscribe = true + withContext(Dispatchers.Main) { + _canTranscribe.value = true + } } private suspend fun getFirstSample(): File = withContext(Dispatchers.IO) { @@ -116,9 +158,15 @@ class MainScreenViewModel(private val application: Application) : ViewModel() { private suspend fun readAudioSamples(file: File): FloatArray = withContext(Dispatchers.IO) { stopPlayback() startPlayback(file) - return@withContext decodeWaveFile(file) + return@withContext com.whispercppdemo.media.decodeWaveFile(file) } +// private suspend fun streamAudioSamples(shortArray: ShortArray): FloatArray = withContext(Dispatchers.IO) { +// stopPlayback() +// return@withContext com.example.csct_gui_demo.media.processAudioChunk() +// } + + private suspend fun stopPlayback() = withContext(Dispatchers.Main) { mediaPlayer?.stop() mediaPlayer?.release() @@ -126,24 +174,43 @@ class MainScreenViewModel(private val application: Application) : ViewModel() { } private suspend fun startPlayback(file: File) = withContext(Dispatchers.Main) { - mediaPlayer = MediaPlayer.create(application, file.absolutePath.toUri()) + mediaPlayer = MediaPlayer.create(getApplication(), file.absolutePath.toUri()) mediaPlayer?.start() } - private suspend fun transcribeAudio(file: File) { + private val _transcriptionText = MutableLiveData("") + val transcriptionText: LiveData = _transcriptionText + + // Function to process transcription - maybe put this into LibWhisper.kt??? + @SuppressLint("RestrictedApi") + + + suspend fun transcribeAudio(file: File) { + val canTranscribe = withContext(Dispatchers.Main) { + _canTranscribe.value ?: false + } if (!canTranscribe) { return } - canTranscribe = false - try { printMessage("Reading wave samples... ") val data = readAudioSamples(file) + //chunk it here??? printMessage("${data.size / (16000 / 1000)} ms\n") printMessage("Transcribing data...\n") val start = System.currentTimeMillis() val text = whisperContext?.transcribeData(data) + //text to be processed and then sent to SQL + if(text != null) { + withContext(Dispatchers.Main) { + // Update transcriptionText LiveData with the new transcription + _transcriptionText.value = text!! + if (isRecording.value != true) { + Log.i(TAG, "Text: $text") + } + } + } val elapsed = System.currentTimeMillis() - start printMessage("Done ($elapsed ms): $text\n") } catch (e: Exception) { @@ -151,15 +218,116 @@ class MainScreenViewModel(private val application: Application) : ViewModel() { printMessage("${e.localizedMessage}\n") } - canTranscribe = true + _canTranscribe.value = true } + //streamTranscribe??? + private var lastProcessedTimestamp: Long = 0 // Keep track of the last processed audio timestamp + private val audioState = AudioState() + private var MAX_AUDIO_SEC = 30 + private var SAMPLE_RATE = 16000 + private var streamingStartTime: Long = 0 + private var totalProcessingTime: Long = 0 + //16*1024 * seconds you want for a chunk + private val chunkSize = 16*1024*5 + private fun startStreaming() { + if (_isStreaming.value != true) { + Log.d(TAG, "Starting streaming 2 electric boogaloo...") + _isStreaming.value = true + +// audioBuffer.clear() + audioState.isCapturing = true + audioState.audioBufferF32.clear() + audioState.nSamples = 0 + + lastProcessedTimestamp = System.currentTimeMillis() // Resetting the timestamp + streamingStartTime = System.currentTimeMillis() + // onDataReceived to handle buffering and processing audio data + val onDataReceived = object : Recorder.AudioDataReceivedListener { + override fun onAudioDataReceived(data: FloatArray) { + // Add incoming data to the buffer +// audioBuffer.addAll(data.toList()) + if (!audioState.isCapturing) { + Log.d(TAG, "Not capturing, ignoring audio") + return + } + if (audioState.nSamples + data.size > MAX_AUDIO_SEC * SAMPLE_RATE) { + Log.d(TAG, "Too much audio data, ignoring") +// toggleStream() + //empty the buffer + audioState.audioBufferF32.clear() + audioState.nSamples = 0 + return + } + audioState.audioBufferF32.addAll(data.toList()) + audioState.nSamples += data.size + // Process the buffer in chunks + processBufferedAudioChunks() + } + } + + // Start streaming with the onDataReceived listener + recorder.startStreaming(onDataReceived) { e -> + Log.e(TAG, "Error during streaming: ${e.localizedMessage}", e) + _isStreaming.postValue(false) + } + } else { + Log.i(TAG, "Streaming is already active.") + } + } + private fun processBufferedAudioChunks() { + if (audioState.isTranscribing) { + return + } + viewModelScope.launch(Dispatchers.IO) { + try { + audioState.isTranscribing = true + while (audioState.audioBufferF32.size >= chunkSize) { + val processingStartTime = System.currentTimeMillis() + val chunkToProcess = audioState.audioBufferF32.take(chunkSize).toFloatArray() + + + val textChunk = whisperContext?.streamTranscribeData(chunkToProcess) ?: "" + Log.i(TAG, "Decoded Audio Chunk Text = $textChunk") + val processingEndTime = System.currentTimeMillis() + totalProcessingTime += (processingEndTime - processingStartTime) + + val recordingTime = (System.currentTimeMillis() - streamingStartTime) / 1000.0 + val processingTime = (processingEndTime - processingStartTime) / 1000.0 + withContext(Dispatchers.Main) { + val currentText = _transcriptionText.value ?: "" + _transcriptionText.value = currentText + textChunk + val recordingTime = (System.currentTimeMillis() - streamingStartTime) / 1000.0 + val cumulativeProcessingTime = totalProcessingTime / 1000.0 + val realTimeFactor = cumulativeProcessingTime / recordingTime + val timeInfo = "Recording time: ${"%.3f".format(recordingTime)} s, " + + "Processing time: ${"%.3f".format(cumulativeProcessingTime)} s, " + + "Real-time factor: ${"%.3f".format(realTimeFactor)}" + Log.i(TAG,"$timeInfo") + printMessage(textChunk!!) + _processingTimeMessage.value = timeInfo + Log.i(TAG, "Final Text: ${_transcriptionText.value}") + } + audioState.audioBufferF32 = audioState.audioBufferF32.drop(chunkSize).toMutableList() +// lastProcessedTimestamp = currentTimestamp // Update the last processed timestamp + +// audioBuffer = audioBuffer.drop(chunkSize).toMutableList() + } + audioState.isTranscribing = false + } catch (e: Exception) { + Log.e(TAG, "Error during buffer processing: ${e.localizedMessage}", e) + } + } + } + @SuppressLint("RestrictedApi") fun toggleRecord() = viewModelScope.launch { try { - if (isRecording) { + if (_isRecording.value == true) { recorder.stopRecording() - isRecording = false - recordedFile?.let { transcribeAudio(it) } + _isRecording.value = false + recordedFile?.let { + transcribeAudio(it) + } } else { stopPlayback() val file = getTempFileForRecording() @@ -167,20 +335,34 @@ class MainScreenViewModel(private val application: Application) : ViewModel() { viewModelScope.launch { withContext(Dispatchers.Main) { printMessage("${e.localizedMessage}\n") - isRecording = false + _isRecording.value = false } } } - isRecording = true + _isRecording.value = true recordedFile = file } } catch (e: Exception) { Log.w(LOG_TAG, e) printMessage("${e.localizedMessage}\n") - isRecording = false + _isRecording.value = false + } + } + + fun toggleStream() = viewModelScope.launch { + if (_isStreaming.value == true) { + Log.d(TAG, "Stopping streaming...") + recorder.stopRecording() + _isStreaming.value = false + Log.d(TAG, "Streaming stopped") + } else { + Log.d(TAG, "Starting streaming...") + stopPlayback() + startStreaming() } } + private suspend fun getTempFileForRecording() = withContext(Dispatchers.IO) { File.createTempFile("recording", "wav") } @@ -192,7 +374,6 @@ class MainScreenViewModel(private val application: Application) : ViewModel() { stopPlayback() } } - companion object { fun factory() = viewModelFactory { initializer { @@ -202,8 +383,9 @@ class MainScreenViewModel(private val application: Application) : ViewModel() { } } } -} + +} private suspend fun Context.copyData( assetDirName: String, destDir: File, @@ -211,15 +393,15 @@ private suspend fun Context.copyData( ) = withContext(Dispatchers.IO) { assets.list(assetDirName)?.forEach { name -> val assetPath = "$assetDirName/$name" - Log.v(LOG_TAG, "Processing $assetPath...") + Log.v(TAG, "Processing $assetPath...") val destination = File(destDir, name) - Log.v(LOG_TAG, "Copying $assetPath to $destination...") + Log.v(TAG, "Copying $assetPath to $destination...") printMessage("Copying $name...\n") assets.open(assetPath).use { input -> destination.outputStream().use { output -> input.copyTo(output) } } - Log.v(LOG_TAG, "Copied $assetPath to $destination") + Log.v(TAG, "Copied $assetPath to $destination") } -} \ No newline at end of file +} diff --git a/examples/whisper.android/build.gradle b/examples/whisper.android/build.gradle index ae1f486b658..1d6c1312e8d 100644 --- a/examples/whisper.android/build.gradle +++ b/examples/whisper.android/build.gradle @@ -1,6 +1,6 @@ // Top-level build file where you can add configuration options common to all sub-projects/modules. plugins { - id 'com.android.application' version '8.1.1' apply false - id 'com.android.library' version '8.1.1' apply false + id 'com.android.application' version '8.3.0' apply false + id 'com.android.library' version '8.3.0' apply false id 'org.jetbrains.kotlin.android' version '1.9.0' apply false } \ No newline at end of file diff --git a/examples/whisper.android/gradle/wrapper/gradle-wrapper.properties b/examples/whisper.android/gradle/wrapper/gradle-wrapper.properties index a7b943c97c8..70a33f5847d 100644 --- a/examples/whisper.android/gradle/wrapper/gradle-wrapper.properties +++ b/examples/whisper.android/gradle/wrapper/gradle-wrapper.properties @@ -1,6 +1,6 @@ #Wed Dec 14 10:37:24 EST 2022 distributionBase=GRADLE_USER_HOME -distributionUrl=https\://services.gradle.org/distributions/gradle-8.2-bin.zip +distributionUrl=https\://services.gradle.org/distributions/gradle-8.4-bin.zip distributionPath=wrapper/dists zipStorePath=wrapper/dists zipStoreBase=GRADLE_USER_HOME diff --git a/examples/whisper.android/lib/build.gradle b/examples/whisper.android/lib/build.gradle index e4779e26527..9c40570790b 100644 --- a/examples/whisper.android/lib/build.gradle +++ b/examples/whisper.android/lib/build.gradle @@ -10,8 +10,6 @@ android { defaultConfig { minSdk 26 targetSdk 34 - versionCode 1 - versionName "1.0" ndk { abiFilters 'arm64-v8a', 'armeabi-v7a', 'x86', 'x86_64' diff --git a/examples/whisper.android/lib/src/main/java/com/whispercpp/whisper/LibWhisper.kt b/examples/whisper.android/lib/src/main/java/com/whispercpp/whisper/LibWhisper.kt index 513202fa689..b74c6a0560c 100644 --- a/examples/whisper.android/lib/src/main/java/com/whispercpp/whisper/LibWhisper.kt +++ b/examples/whisper.android/lib/src/main/java/com/whispercpp/whisper/LibWhisper.kt @@ -28,6 +28,18 @@ class WhisperContext private constructor(private var ptr: Long) { } } } + suspend fun streamTranscribeData(data: FloatArray): String = withContext(scope.coroutineContext) { + require(ptr != 0L) + val numThreads = WhisperCpuConfig.preferredThreadCount + Log.d(LOG_TAG, "Selecting $numThreads threads") + WhisperLib.fullStreamTranscribe(ptr, numThreads, data) + val textCount = WhisperLib.getTextSegmentCount(ptr) + return@withContext buildString { + for (i in 0 until textCount) { + append(WhisperLib.getTextSegment(ptr, i)) + } + } + } suspend fun benchMemory(nthreads: Int): String = withContext(scope.coroutineContext) { return@withContext WhisperLib.benchMemcpy(nthreads) @@ -134,6 +146,7 @@ private class WhisperLib { external fun getSystemInfo(): String external fun benchMemcpy(nthread: Int): String external fun benchGgmlMulMat(nthread: Int): String + external fun fullStreamTranscribe(contextPtr: Long, numThreads: Int, audioData: FloatArray) } } diff --git a/examples/whisper.android/lib/src/main/jni/whisper/CMakeLists.txt b/examples/whisper.android/lib/src/main/jni/whisper/CMakeLists.txt index faaa7b662cf..0385c3d83f1 100644 --- a/examples/whisper.android/lib/src/main/jni/whisper/CMakeLists.txt +++ b/examples/whisper.android/lib/src/main/jni/whisper/CMakeLists.txt @@ -75,3 +75,8 @@ endif () build_library("whisper") # Default target include_directories(${WHISPER_LIB_DIR}) +find_program(CCACHE_FOUND ccache) +if(CCACHE_FOUND) + set_property(GLOBAL PROPERTY RULE_LAUNCH_COMPILE ccache) + set_property(GLOBAL PROPERTY RULE_LAUNCH_LINK ccache) +endif(CCACHE_FOUND) \ No newline at end of file diff --git a/examples/whisper.android/lib/src/main/jni/whisper/jni.c b/examples/whisper.android/lib/src/main/jni/whisper/jni.c index 7f9d724617d..34440794b8d 100644 --- a/examples/whisper.android/lib/src/main/jni/whisper/jni.c +++ b/examples/whisper.android/lib/src/main/jni/whisper/jni.c @@ -192,7 +192,45 @@ Java_com_whispercpp_whisper_WhisperLib_00024Companion_fullTranscribe( } (*env)->ReleaseFloatArrayElements(env, audio_data, audio_data_arr, JNI_ABORT); } +//streaming attempt: +JNIEXPORT void JNICALL +Java_com_whispercpp_whisper_WhisperLib_00024Companion_fullStreamTranscribe( + JNIEnv *env, jobject thiz, jlong context_ptr, jint num_threads, jfloatArray audio_data) { + UNUSED(thiz); + struct whisper_context *context = (struct whisper_context *) context_ptr; + jfloat *audio_data_arr = (*env)->GetFloatArrayElements(env, audio_data, NULL); + const jsize audio_data_length = (*env)->GetArrayLength(env, audio_data); + // The below adapted from the Objective-C iOS sample + struct whisper_full_params params = whisper_full_default_params(WHISPER_SAMPLING_GREEDY); + params.print_realtime = true; + params.print_progress = false; + params.print_timestamps = true; + params.print_special = false; + params.translate = false; + params.language = "en"; + params.n_threads = num_threads; //how many threads can I use on an S23? + //potentially use an initial prompt for custom vocabularies? + // initial_prompt: Optional[str] + // Optional text to provide as a prompt for the first window. This can be used to provide, or + // "prompt-engineer" a context for transcription, e.g. custom vocabularies or proper nouns + // to make it more likely to predict those word correctly. + //params.initial_prompt = "Transcription of Tactical Combat Casualty Drugs such as Fentanyl, Ibuprofen, Amoxicillin, Epinephrine, TXA, Hextend, Ketamine, Oral Transmucosal Fentanyl Citrate. "; + params.offset_ms = 0; + params.no_context = true; + params.single_segment = true; //hard code for true, objc example has it based on a button press + params.no_timestamps = params.single_segment; //from streaming objc example + + whisper_reset_timings(context); + + LOGI("About to run whisper_full"); + if (whisper_full(context, params, audio_data_arr, audio_data_length) != 0) { + LOGI("Failed to run the model"); + } else { + whisper_print_timings(context); + } + (*env)->ReleaseFloatArrayElements(env, audio_data, audio_data_arr, JNI_ABORT); +} JNIEXPORT jint JNICALL Java_com_whispercpp_whisper_WhisperLib_00024Companion_getTextSegmentCount( JNIEnv *env, jobject thiz, jlong context_ptr) { diff --git a/ggml-backend.c b/ggml-backend.c index 402d86ef3ac..219582757c5 100644 --- a/ggml-backend.c +++ b/ggml-backend.c @@ -1723,6 +1723,9 @@ ggml_backend_sched_t ggml_backend_sched_new( struct ggml_backend_sched * sched = calloc(sizeof(struct ggml_backend_sched), 1); + + fprintf(stderr, "ggml_backend_sched size: %zu KB\n", sizeof(struct ggml_backend_sched)/1024); + // initialize hash table sched->hash_set = ggml_hash_set_new(graph_size); sched->tensor_backend_id = calloc(sizeof(sched->tensor_backend_id[0]), sched->hash_set.size); @@ -1732,6 +1735,7 @@ ggml_backend_sched_t ggml_backend_sched_new( sched->node_backend_ids = calloc(sizeof(sched->node_backend_ids[0]), nodes_size); sched->leaf_backend_ids = calloc(sizeof(sched->leaf_backend_ids[0]), nodes_size); + sched->n_backends = n_backends; sched->n_copies = parallel ? GGML_SCHED_MAX_COPIES : 1;