royshil · FabioZumbi12 · Nov 27, 2025 · Nov 27, 2025 · Nov 27, 2025 · Nov 27, 2025
diff --git a/data/locale/en-US.ini b/data/locale/en-US.ini
@@ -1,19 +1,39 @@
 CleanStreamAudioFilter="Clean stream audio filter"
 CleanStreamFilterPlugin="Clean stream filter plugin"
+Language="Language"
+None="None"
+Silence="Silence"
+Beep="Beep"
+Random="Random"
+Horn="Horn"
+External="External"
+WavFilesFilter="WAV files (*.wav);;All files (*.*)"
+AdvancedSettings="Advanced Settings"
+BeamSearch="Beam search"
+Greedy="Greedy"
+initial_prompt="Initial prompt"
+speed_up="Speed up"
+suppress_blank="Suppress blank"
+suppress_nst="Suppress non-speech tokens"
+temperature="Temperature"
+max_initial_ts="Max initial timestamp"
+length_penalty="Length penalty"
+CPUOnly="CPU only"
+GPUName="GPU: "
 detect_regex="Detect regex"
 advanced_settings="Advanced settings"
-filler_p_threshold="Filler p threshold"
-do_silence="Do silence"
 vad_enabled="VAD enabled"
 log_level="Log level"
 log_words="Log words"
 whisper_model="Whisper model"
 Whisper_Parameters="Whisper Parameters"
+whisper_sampling_method_tooltip="Greedy: Fastest method. Picks the most likely word at each step. Good for speed.\nBeam Search: More accurate but slower. Explores multiple possibilities to find the best overall sentence."
 whisper_sampling_method="Whisper sampling method"
 n_threads="Number of threads"
 n_max_text_ctx="Number of max text context"
 no_context="No context"
 replace_sound_path="Replace Sound Path"
+replace_sound_random_folder="Random Sounds Folder"
 replace_sound="Replace Sound"
 backend_group="Whisper Backend Configuration"
 backend_device="GPU device"

diff --git a/data/locale/pt-BR.ini b/data/locale/pt-BR.ini
@@ -0,0 +1,41 @@
+CleanStreamAudioFilter="Filtro de áudio CleanStream"
+CleanStreamFilterPlugin="Plugin de filtro CleanStream"
+Language="Idioma"
+None="Nenhum"
+Silence="Silêncio"
+Beep="Beep"
+Random="Aleatório"
+Horn="Buzina"
+External="Externo"
+WavFilesFilter="Arquivos WAV (*.wav);;Todos os arquivos (*.*)"
+AdvancedSettings="Configurações Avançadas"
+BeamSearch="Busca em feixe"
+Greedy="Busca Relaxada"
+initial_prompt="Prompt inicial"
+speed_up="Acelerar"
+suppress_blank="Suprimir espaços em branco"
+suppress_nst="Suprimir tokens não-falados"
+temperature="Temperatura"
+max_initial_ts="Timestamp inicial máx."
+length_penalty="Penalidade de comprimento"
+CPUOnly="Apenas CPU"
+GPUName="GPU: "
+detect_regex="Detectar com Regex"
+advanced_settings="Configurações avançadas"
+vad_enabled="VAD ativado"
+log_level="Nível de log"
+log_words="Registrar palavras"
+whisper_model="Modelo Whisper"
+Whisper_Parameters="Parâmetros do Whisper"
+whisper_sampling_method_tooltip="Busca Relaxada: Método mais rápido. Escolhe a palavra mais provável a cada passo. Bom para velocidade.\nBusca em Feixe: Mais preciso, porém mais lento. Explora múltiplas possibilidades para encontrar a melhor frase."
+whisper_sampling_method="Método de amostragem Whisper"
+n_threads="Número de threads"
+n_max_text_ctx="Número máximo de contexto de texto"
+no_context="Sem contexto"
+replace_sound_path="Caminho do Som de Substituição"
+replace_sound_random_folder="Pasta de Sons Aleatórios"
+replace_sound="Som de Substituição"
+backend_group="Configuração do Backend Whisper"
+backend_device="Dispositivo GPU"
+enable_flash_attn="Ativar Flash Attention"
+enable_flash_attn_tooltip="Melhora a velocidade de transcrição em algumas GPUs (NVidia: Ampere ou mais recente, AMD: RDNA ou mais recente). Pode diminuir a velocidade em outros casos"
diff --git a/src/cleanstream-filter-data.h b/src/cleanstream-filter-data.h
@@ -24,6 +24,7 @@ enum ReplaceSounds {
 	REPLACE_SOUNDS_SILENCE = 2,
 	REPLACE_SOUNDS_HORN = 3,
 	REPLACE_SOUNDS_EXTERNAL = 4,
+	REPLACE_SOUNDS_RANDOM = 5,
 };
 
 // Audio packet info
@@ -90,7 +91,10 @@ struct cleanstream_data {
 	bool log_words;
 	bool active;
 	long long replace_sound;
+	std::string current_random_audio;
+	std::vector<std::string> random_audio_files;
 	std::string replace_sound_external;
+	std::string replace_sound_random_folder;
 };
 
 #endif
diff --git a/src/cleanstream-filter.cpp b/src/cleanstream-filter.cpp
@@ -23,6 +23,7 @@
 #include "whisper-utils/whisper-language.h"
 #include "whisper-utils/whisper-processing.h"
 #include "whisper-utils/whisper-utils.h"
+#include "audio-utils/read-audio-file.h"
 #include "cleanstream-filter-data.h"
 
 #include "plugin-support.h"
@@ -104,13 +105,38 @@ struct obs_audio_data *cleanstream_filter_audio(void *data, struct obs_audio_dat
 
 	struct cleanstream_data *gf = static_cast<struct cleanstream_data *>(data);
 
-	if (!gf->active) {
+	bool muted = obs_source_muted(obs_filter_get_parent(gf->context));
+
+	if (!gf->active || muted) {
+		if (gf->whisper_context != nullptr) {
+			obs_log(LOG_INFO, "Source is muted or filter is inactive, shutting down whisper thread");
+			shutdown_whisper_thread(gf);
+
+			// Clear audio buffers to prevent leftover sound on unmute
+			std::lock_guard<std::mutex> lock(gf->whisper_buf_mutex);
+			for (size_t c = 0; c < gf->channels; c++) {
+				deque_free(&gf->input_buffers[c]);
+				deque_init(&gf->input_buffers[c]);
+			}
+			deque_free(&gf->info_buffer);
+			deque_init(&gf->info_buffer);
+			gf->audioFilePointer = 0;
+			gf->current_result = DETECTION_RESULT_UNKNOWN;
+			gf->current_result_start_timestamp = 0;
+			gf->current_result_end_timestamp = 0;
+		}
 		return audio;
 	}
 
 	if (gf->whisper_context == nullptr) {
-		// Whisper not initialized, just pass through
-		return audio;
+		// Whisper not initialized, try to start it
+		obs_log(LOG_INFO, "Whisper context is null, attempting to start whisper thread");
+		obs_data_t *settings = obs_source_get_settings(gf->context);
+		update_whisper_model(gf, settings);
+		obs_data_release(settings);
+		// If it's still null, pass through audio
+		if (gf->whisper_context == nullptr)
+			return audio;
 	}
 
 	size_t input_buffer_size = 0;
@@ -191,16 +217,25 @@ struct obs_audio_data *cleanstream_filter_audio(void *data, struct obs_audio_dat
 					temporary_buffers[i].resize(num_frames, 0.0f);
 				}
 			} else if (gf->replace_sound == REPLACE_SOUNDS_HORN ||
+				   gf->replace_sound == REPLACE_SOUNDS_RANDOM ||
 				   gf->replace_sound == REPLACE_SOUNDS_BEEP ||
 				   gf->replace_sound == REPLACE_SOUNDS_EXTERNAL) {
 
 				std::string replace_audio_name =
 					gf->replace_sound == REPLACE_SOUNDS_HORN   ? "horn.wav"
 					: gf->replace_sound == REPLACE_SOUNDS_BEEP ? "beep.wav"
 					: gf->replace_sound == REPLACE_SOUNDS_EXTERNAL
-						? gf->replace_sound_external
+						  ? gf->replace_sound_external
+					: gf->replace_sound == REPLACE_SOUNDS_RANDOM
+						? gf->current_random_audio
 						: "";
 
+				if (gf->replace_sound == REPLACE_SOUNDS_RANDOM && gf->audioFilePointer == 0 && !gf->random_audio_files.empty()) {
+					size_t random_index = rand() % gf->random_audio_files.size();
+					gf->current_random_audio = gf->random_audio_files[random_index];
+					replace_audio_name = gf->current_random_audio;
+				}
+
 				if (replace_audio_name != "") {
 					// replace the audio with beep or horn sound
 					const AudioDataFloat &replace_audio =
@@ -288,6 +323,50 @@ void cleanstream_update(void *data, obs_data_t *s)
 	gf->log_words = obs_data_get_bool(s, "log_words");
 	gf->delay_ms = BUFFER_SIZE_MSEC + INITIAL_DELAY_MSEC;
 	gf->current_result = DetectionResult::DETECTION_RESULT_UNKNOWN;
+
+#if defined(_WIN32) || defined(__APPLE__)
+	// Load external sound file if configured
+	if (gf->replace_sound == REPLACE_SOUNDS_EXTERNAL) {
+		std::string replace_sound_path_ =
+			obs_data_get_string(s, "replace_sound_path");
+		if (!replace_sound_path_.empty() &&
+		    gf->audioFileCache.find(replace_sound_path_) ==
+			    gf->audioFileCache.end()) {
+			AudioDataFloat audioFile =
+				read_audio_file(replace_sound_path_.c_str(), gf->sample_rate);
+			if (!audioFile.empty()) {
+				gf->audioFileCache[replace_sound_path_] = audioFile;
+				gf->replace_sound_external = replace_sound_path_;
+			}
+		}
+	}
+
+	// Load random sound files if folder is configured
+	if (gf->replace_sound == REPLACE_SOUNDS_RANDOM) {
+		std::string random_folder_path =
+			obs_data_get_string(s, "replace_sound_random_folder");
+		if (!random_folder_path.empty()) {
+			gf->replace_sound_random_folder = random_folder_path;
+			gf->random_audio_files.clear();
+			for (const auto &entry :
+			     std::filesystem::directory_iterator(random_folder_path)) {
+				if (entry.path().extension() == ".wav") {
+					std::string file_path = entry.path().string();
+					gf->random_audio_files.push_back(file_path);
+					if (gf->audioFileCache.find(file_path) ==
+					    gf->audioFileCache.end()) {
+						AudioDataFloat audioFile = read_audio_file(
+							file_path.c_str(), gf->sample_rate);
+						if (!audioFile.empty()) {
+							gf->audioFileCache[file_path] = audioFile;
+						}
+					}
+				}
+			}
+		}
+	}
+#endif
+
 	gf->current_result_start_timestamp = 0;
 	gf->current_result_end_timestamp = 0;
 
@@ -381,6 +460,7 @@ void *cleanstream_create(obs_data_t *settings, obs_source_t *filter)
 	gf->detect_regex = nullptr;
 	gf->replace_sound = REPLACE_SOUNDS_SILENCE;
 	gf->replace_sound_external = "";
+	gf->replace_sound_random_folder = "";
 
 	// get absolute path of the audio files
 	char *module_data_sounds_folder_path = obs_module_file("sounds");
@@ -481,15 +561,14 @@ void add_whisper_backend_group_properties(obs_properties_t *ppts, struct cleanst
 	obs_property_t *backend_device =
 		obs_properties_add_list(backend_group, "backend_device", MT_("backend_device"),
 					OBS_COMBO_TYPE_LIST, OBS_COMBO_FORMAT_INT);
-
-	obs_property_list_add_int(backend_device, "CPU only", -1);
+	
+	obs_property_list_add_int(backend_device, MT_("CPUOnly"), -1);
 	for (size_t i = 0; i < gf->gpu_devices.size(); i++) {
 		auto name = gf->gpu_devices.at(i).device_name;
 		auto description = gf->gpu_devices.at(i).device_description;
 		obs_property_list_add_int(
-			backend_device,
-			std::string("GPU: ").append(name).append(" - ").append(description).c_str(),
-			i);
+			backend_device, std::string(MT_("GPUName"))
+						.append(name).append(" - ").append(description).c_str(), i);
 	}
 
 	obs_property_t *enable_flash_attn = obs_properties_add_bool(
@@ -509,29 +588,41 @@ obs_properties_t *cleanstream_properties(void *data)
 	obs_property_t *replace_sounds_list =
 		obs_properties_add_list(ppts, "replace_sound", MT_("replace_sound"),
 					OBS_COMBO_TYPE_LIST, OBS_COMBO_FORMAT_INT);
-	obs_property_list_add_int(replace_sounds_list, "None", REPLACE_SOUNDS_NONE);
-	obs_property_list_add_int(replace_sounds_list, "Silence", REPLACE_SOUNDS_SILENCE);
+	obs_property_list_add_int(replace_sounds_list, MT_("None"), REPLACE_SOUNDS_NONE);
+	obs_property_list_add_int(replace_sounds_list, MT_("Silence"), REPLACE_SOUNDS_SILENCE);
 	// on windows and mac, add external file path for replace sound
 #if defined(_WIN32) || defined(__APPLE__)
 	if (!gf->audioFileCache["beep.wav"].empty()) {
-		obs_property_list_add_int(replace_sounds_list, "Beep", REPLACE_SOUNDS_BEEP);
+		obs_property_list_add_int(replace_sounds_list, MT_("Beep"), REPLACE_SOUNDS_BEEP);
 	}
+
 	if (!gf->audioFileCache["horn.wav"].empty()) {
-		obs_property_list_add_int(replace_sounds_list, "Horn", REPLACE_SOUNDS_HORN);
+		obs_property_list_add_int(replace_sounds_list, MT_("Random"), REPLACE_SOUNDS_RANDOM);
+		obs_property_list_add_int(replace_sounds_list, MT_("Horn"), REPLACE_SOUNDS_HORN);
 	}
-	obs_property_list_add_int(replace_sounds_list, "External", REPLACE_SOUNDS_EXTERNAL);
+
+	obs_property_list_add_int(replace_sounds_list, MT_("External"), REPLACE_SOUNDS_EXTERNAL);
 
 	// add external file path for replace sound
+	obs_property_t *random_sound_path = nullptr;
 	obs_property_t *replace_sound_path = obs_properties_add_path(
 		ppts, "replace_sound_path", MT_("replace_sound_path"), OBS_PATH_FILE,
-		"WAV files (*.wav);;All files (*.*)", nullptr);
+		MT_("WavFilesFilter"), nullptr);
+
+	// add folder path for random sounds
+	random_sound_path = obs_properties_add_path(
+		ppts, "replace_sound_random_folder", MT_("replace_sound_random_folder"),
+		OBS_PATH_DIRECTORY, nullptr, nullptr);
 
 	// show/hide external file path based on the selected replace sound
 	obs_property_set_modified_callback(replace_sounds_list, [](obs_properties_t *props,
 								   obs_property_t *property,
 								   obs_data_t *settings) {
 		UNUSED_PARAMETER(property);
 		const long long replace_sound = obs_data_get_int(settings, "replace_sound");
+		obs_property_set_visible(
+			obs_properties_get(props, "replace_sound_random_folder"),
+			replace_sound == REPLACE_SOUNDS_RANDOM);
 		obs_property_set_visible(obs_properties_get(props, "replace_sound_path"),
 					 replace_sound == REPLACE_SOUNDS_EXTERNAL);
 		return true;
@@ -566,6 +657,43 @@ obs_properties_t *cleanstream_properties(void *data)
 			return true;
 		},
 		gf);
+
+		obs_property_set_modified_callback2(
+			random_sound_path,
+			[](void *data_, obs_properties_t *props, obs_property_t *property,
+			obs_data_t *settings) {
+				UNUSED_PARAMETER(property);
+				UNUSED_PARAMETER(props);
+				struct cleanstream_data *gf_ =
+					static_cast<struct cleanstream_data *>(data_);
+				gf_->random_audio_files.clear();
+				std::string random_folder_path =
+					obs_data_get_string(settings, "replace_sound_random_folder");
+				if (random_folder_path.empty()) {
+					return true;
+				}
+				gf_->replace_sound_random_folder = random_folder_path;
+				for (const auto &entry :
+					std::filesystem::directory_iterator(random_folder_path)) {
+					if (entry.path().extension() == ".wav") {
+						std::string file_path = entry.path().string();
+						gf_->random_audio_files.push_back(file_path);
+						if (gf_->audioFileCache.find(file_path) ==
+							gf_->audioFileCache.end()) {
+							AudioDataFloat audioFile = read_audio_file(
+								file_path.c_str(), gf_->sample_rate);
+							if (audioFile.empty()) {
+								obs_log(LOG_ERROR, "Failed to load audio file: %s",
+									file_path.c_str());
+							} else {
+								gf_->audioFileCache[file_path] = audioFile;
+							}
+						}
+					}
+				}
+				return true;
+			},
+			gf);
 #endif
 
 	// Add a list of available whisper models to download
@@ -583,7 +711,7 @@ obs_properties_t *cleanstream_properties(void *data)
 
 	// Add language selector
 	obs_property_t *whisper_language_select_list =
-		obs_properties_add_list(ppts, "whisper_language_select", "Language",
+		obs_properties_add_list(ppts, "whisper_language_select", MT_("Language"),
 					OBS_COMBO_TYPE_LIST, OBS_COMBO_FORMAT_STRING);
 	// get a sorted list of available languages
 	std::vector<std::string> whisper_available_lang_keys;
@@ -617,7 +745,7 @@ obs_properties_t *cleanstream_properties(void *data)
 	add_whisper_backend_group_properties(ppts, gf);
 
 	obs_properties_t *advanced_settings_group = obs_properties_create();
-	obs_properties_add_group(ppts, "advanced_settings_group", MT_("Advanced_Settings"),
+	obs_properties_add_group(ppts, "advanced_settings_group", MT_("AdvancedSettings"),
 				 OBS_GROUP_NORMAL, advanced_settings_group);
 
 	obs_properties_add_bool(advanced_settings_group, "vad_enabled", MT_("vad_enabled"));
@@ -636,9 +764,10 @@ obs_properties_t *cleanstream_properties(void *data)
 	obs_property_t *whisper_sampling_method_list = obs_properties_add_list(
 		whisper_params_group, "whisper_sampling_method", MT_("whisper_sampling_method"),
 		OBS_COMBO_TYPE_LIST, OBS_COMBO_FORMAT_INT);
-	obs_property_list_add_int(whisper_sampling_method_list, "Beam search",
+	obs_property_list_add_int(whisper_sampling_method_list, MT_("BeamSearch"),
 				  WHISPER_SAMPLING_BEAM_SEARCH);
-	obs_property_list_add_int(whisper_sampling_method_list, "Greedy", WHISPER_SAMPLING_GREEDY);
+	obs_property_set_long_description(whisper_sampling_method_list, MT_("whisper_sampling_method_tooltip"));
+	obs_property_list_add_int(whisper_sampling_method_list, MT_("Greedy"), WHISPER_SAMPLING_GREEDY);
 
 	// int n_threads;
 	obs_properties_add_int_slider(whisper_params_group, "n_threads", MT_("n_threads"), 1, 8, 1);