Skip to content
Open
24 changes: 22 additions & 2 deletions data/locale/en-US.ini
Original file line number Diff line number Diff line change
@@ -1,19 +1,39 @@
CleanStreamAudioFilter="Clean stream audio filter"
CleanStreamFilterPlugin="Clean stream filter plugin"
Language="Language"
None="None"
Silence="Silence"
Beep="Beep"
Random="Random"
Horn="Horn"
External="External"
WavFilesFilter="WAV files (*.wav);;All files (*.*)"
AdvancedSettings="Advanced Settings"
BeamSearch="Beam search"
Greedy="Greedy"
initial_prompt="Initial prompt"
speed_up="Speed up"
suppress_blank="Suppress blank"
suppress_nst="Suppress non-speech tokens"
temperature="Temperature"
max_initial_ts="Max initial timestamp"
length_penalty="Length penalty"
CPUOnly="CPU only"
GPUName="GPU: "
detect_regex="Detect regex"
advanced_settings="Advanced settings"
filler_p_threshold="Filler p threshold"
do_silence="Do silence"
vad_enabled="VAD enabled"
log_level="Log level"
log_words="Log words"
whisper_model="Whisper model"
Whisper_Parameters="Whisper Parameters"
whisper_sampling_method_tooltip="Greedy: Fastest method. Picks the most likely word at each step. Good for speed.\nBeam Search: More accurate but slower. Explores multiple possibilities to find the best overall sentence."
whisper_sampling_method="Whisper sampling method"
n_threads="Number of threads"
n_max_text_ctx="Number of max text context"
no_context="No context"
replace_sound_path="Replace Sound Path"
replace_sound_random_folder="Random Sounds Folder"
replace_sound="Replace Sound"
backend_group="Whisper Backend Configuration"
backend_device="GPU device"
Expand Down
41 changes: 41 additions & 0 deletions data/locale/pt-BR.ini
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
CleanStreamAudioFilter="Filtro de áudio CleanStream"
CleanStreamFilterPlugin="Plugin de filtro CleanStream"
Language="Idioma"
None="Nenhum"
Silence="Silêncio"
Beep="Beep"
Random="Aleatório"
Horn="Buzina"
External="Externo"
WavFilesFilter="Arquivos WAV (*.wav);;Todos os arquivos (*.*)"
AdvancedSettings="Configurações Avançadas"
BeamSearch="Busca em feixe"
Greedy="Busca Relaxada"
initial_prompt="Prompt inicial"
speed_up="Acelerar"
suppress_blank="Suprimir espaços em branco"
suppress_nst="Suprimir tokens não-falados"
temperature="Temperatura"
max_initial_ts="Timestamp inicial máx."
length_penalty="Penalidade de comprimento"
CPUOnly="Apenas CPU"
GPUName="GPU: "
detect_regex="Detectar com Regex"
advanced_settings="Configurações avançadas"
vad_enabled="VAD ativado"
log_level="Nível de log"
log_words="Registrar palavras"
whisper_model="Modelo Whisper"
Whisper_Parameters="Parâmetros do Whisper"
whisper_sampling_method_tooltip="Busca Relaxada: Método mais rápido. Escolhe a palavra mais provável a cada passo. Bom para velocidade.\nBusca em Feixe: Mais preciso, porém mais lento. Explora múltiplas possibilidades para encontrar a melhor frase."
whisper_sampling_method="Método de amostragem Whisper"
n_threads="Número de threads"
n_max_text_ctx="Número máximo de contexto de texto"
no_context="Sem contexto"
replace_sound_path="Caminho do Som de Substituição"
replace_sound_random_folder="Pasta de Sons Aleatórios"
replace_sound="Som de Substituição"
backend_group="Configuração do Backend Whisper"
backend_device="Dispositivo GPU"
enable_flash_attn="Ativar Flash Attention"
enable_flash_attn_tooltip="Melhora a velocidade de transcrição em algumas GPUs (NVidia: Ampere ou mais recente, AMD: RDNA ou mais recente). Pode diminuir a velocidade em outros casos"
4 changes: 4 additions & 0 deletions src/cleanstream-filter-data.h
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ enum ReplaceSounds {
REPLACE_SOUNDS_SILENCE = 2,
REPLACE_SOUNDS_HORN = 3,
REPLACE_SOUNDS_EXTERNAL = 4,
REPLACE_SOUNDS_RANDOM = 5,
};

// Audio packet info
Expand Down Expand Up @@ -90,7 +91,10 @@ struct cleanstream_data {
bool log_words;
bool active;
long long replace_sound;
std::string current_random_audio;
std::vector<std::string> random_audio_files;
std::string replace_sound_external;
std::string replace_sound_random_folder;
};

#endif
167 changes: 148 additions & 19 deletions src/cleanstream-filter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
#include "whisper-utils/whisper-language.h"
#include "whisper-utils/whisper-processing.h"
#include "whisper-utils/whisper-utils.h"
#include "audio-utils/read-audio-file.h"
#include "cleanstream-filter-data.h"

#include "plugin-support.h"
Expand Down Expand Up @@ -104,13 +105,38 @@ struct obs_audio_data *cleanstream_filter_audio(void *data, struct obs_audio_dat

struct cleanstream_data *gf = static_cast<struct cleanstream_data *>(data);

if (!gf->active) {
bool muted = obs_source_muted(obs_filter_get_parent(gf->context));

if (!gf->active || muted) {
if (gf->whisper_context != nullptr) {
obs_log(LOG_INFO, "Source is muted or filter is inactive, shutting down whisper thread");
shutdown_whisper_thread(gf);

// Clear audio buffers to prevent leftover sound on unmute
std::lock_guard<std::mutex> lock(gf->whisper_buf_mutex);
for (size_t c = 0; c < gf->channels; c++) {
deque_free(&gf->input_buffers[c]);
deque_init(&gf->input_buffers[c]);
}
deque_free(&gf->info_buffer);
deque_init(&gf->info_buffer);
gf->audioFilePointer = 0;
gf->current_result = DETECTION_RESULT_UNKNOWN;
gf->current_result_start_timestamp = 0;
gf->current_result_end_timestamp = 0;
}
return audio;
}

if (gf->whisper_context == nullptr) {
// Whisper not initialized, just pass through
return audio;
// Whisper not initialized, try to start it
obs_log(LOG_INFO, "Whisper context is null, attempting to start whisper thread");
obs_data_t *settings = obs_source_get_settings(gf->context);
update_whisper_model(gf, settings);
obs_data_release(settings);
// If it's still null, pass through audio
if (gf->whisper_context == nullptr)
return audio;
}

size_t input_buffer_size = 0;
Expand Down Expand Up @@ -191,16 +217,25 @@ struct obs_audio_data *cleanstream_filter_audio(void *data, struct obs_audio_dat
temporary_buffers[i].resize(num_frames, 0.0f);
}
} else if (gf->replace_sound == REPLACE_SOUNDS_HORN ||
gf->replace_sound == REPLACE_SOUNDS_RANDOM ||
gf->replace_sound == REPLACE_SOUNDS_BEEP ||
gf->replace_sound == REPLACE_SOUNDS_EXTERNAL) {

std::string replace_audio_name =
gf->replace_sound == REPLACE_SOUNDS_HORN ? "horn.wav"
: gf->replace_sound == REPLACE_SOUNDS_BEEP ? "beep.wav"
: gf->replace_sound == REPLACE_SOUNDS_EXTERNAL
? gf->replace_sound_external
? gf->replace_sound_external
: gf->replace_sound == REPLACE_SOUNDS_RANDOM
? gf->current_random_audio
: "";

if (gf->replace_sound == REPLACE_SOUNDS_RANDOM && gf->audioFilePointer == 0 && !gf->random_audio_files.empty()) {
size_t random_index = rand() % gf->random_audio_files.size();
gf->current_random_audio = gf->random_audio_files[random_index];
replace_audio_name = gf->current_random_audio;
}

if (replace_audio_name != "") {
// replace the audio with beep or horn sound
const AudioDataFloat &replace_audio =
Expand Down Expand Up @@ -288,6 +323,50 @@ void cleanstream_update(void *data, obs_data_t *s)
gf->log_words = obs_data_get_bool(s, "log_words");
gf->delay_ms = BUFFER_SIZE_MSEC + INITIAL_DELAY_MSEC;
gf->current_result = DetectionResult::DETECTION_RESULT_UNKNOWN;

#if defined(_WIN32) || defined(__APPLE__)
// Load external sound file if configured
if (gf->replace_sound == REPLACE_SOUNDS_EXTERNAL) {
std::string replace_sound_path_ =
obs_data_get_string(s, "replace_sound_path");
if (!replace_sound_path_.empty() &&
gf->audioFileCache.find(replace_sound_path_) ==
gf->audioFileCache.end()) {
AudioDataFloat audioFile =
read_audio_file(replace_sound_path_.c_str(), gf->sample_rate);
if (!audioFile.empty()) {
gf->audioFileCache[replace_sound_path_] = audioFile;
gf->replace_sound_external = replace_sound_path_;
}
}
}

// Load random sound files if folder is configured
if (gf->replace_sound == REPLACE_SOUNDS_RANDOM) {
std::string random_folder_path =
obs_data_get_string(s, "replace_sound_random_folder");
if (!random_folder_path.empty()) {
gf->replace_sound_random_folder = random_folder_path;
gf->random_audio_files.clear();
for (const auto &entry :
std::filesystem::directory_iterator(random_folder_path)) {
if (entry.path().extension() == ".wav") {
std::string file_path = entry.path().string();
gf->random_audio_files.push_back(file_path);
if (gf->audioFileCache.find(file_path) ==
gf->audioFileCache.end()) {
AudioDataFloat audioFile = read_audio_file(
file_path.c_str(), gf->sample_rate);
if (!audioFile.empty()) {
gf->audioFileCache[file_path] = audioFile;
}
}
}
}
}
}
#endif

gf->current_result_start_timestamp = 0;
gf->current_result_end_timestamp = 0;

Expand Down Expand Up @@ -381,6 +460,7 @@ void *cleanstream_create(obs_data_t *settings, obs_source_t *filter)
gf->detect_regex = nullptr;
gf->replace_sound = REPLACE_SOUNDS_SILENCE;
gf->replace_sound_external = "";
gf->replace_sound_random_folder = "";

// get absolute path of the audio files
char *module_data_sounds_folder_path = obs_module_file("sounds");
Expand Down Expand Up @@ -481,15 +561,14 @@ void add_whisper_backend_group_properties(obs_properties_t *ppts, struct cleanst
obs_property_t *backend_device =
obs_properties_add_list(backend_group, "backend_device", MT_("backend_device"),
OBS_COMBO_TYPE_LIST, OBS_COMBO_FORMAT_INT);

obs_property_list_add_int(backend_device, "CPU only", -1);
obs_property_list_add_int(backend_device, MT_("CPUOnly"), -1);
for (size_t i = 0; i < gf->gpu_devices.size(); i++) {
auto name = gf->gpu_devices.at(i).device_name;
auto description = gf->gpu_devices.at(i).device_description;
obs_property_list_add_int(
backend_device,
std::string("GPU: ").append(name).append(" - ").append(description).c_str(),
i);
backend_device, std::string(MT_("GPUName"))
.append(name).append(" - ").append(description).c_str(), i);
}

obs_property_t *enable_flash_attn = obs_properties_add_bool(
Expand All @@ -509,29 +588,41 @@ obs_properties_t *cleanstream_properties(void *data)
obs_property_t *replace_sounds_list =
obs_properties_add_list(ppts, "replace_sound", MT_("replace_sound"),
OBS_COMBO_TYPE_LIST, OBS_COMBO_FORMAT_INT);
obs_property_list_add_int(replace_sounds_list, "None", REPLACE_SOUNDS_NONE);
obs_property_list_add_int(replace_sounds_list, "Silence", REPLACE_SOUNDS_SILENCE);
obs_property_list_add_int(replace_sounds_list, MT_("None"), REPLACE_SOUNDS_NONE);
obs_property_list_add_int(replace_sounds_list, MT_("Silence"), REPLACE_SOUNDS_SILENCE);
// on windows and mac, add external file path for replace sound
#if defined(_WIN32) || defined(__APPLE__)
if (!gf->audioFileCache["beep.wav"].empty()) {
obs_property_list_add_int(replace_sounds_list, "Beep", REPLACE_SOUNDS_BEEP);
obs_property_list_add_int(replace_sounds_list, MT_("Beep"), REPLACE_SOUNDS_BEEP);
}

if (!gf->audioFileCache["horn.wav"].empty()) {
obs_property_list_add_int(replace_sounds_list, "Horn", REPLACE_SOUNDS_HORN);
obs_property_list_add_int(replace_sounds_list, MT_("Random"), REPLACE_SOUNDS_RANDOM);
obs_property_list_add_int(replace_sounds_list, MT_("Horn"), REPLACE_SOUNDS_HORN);
}
obs_property_list_add_int(replace_sounds_list, "External", REPLACE_SOUNDS_EXTERNAL);

obs_property_list_add_int(replace_sounds_list, MT_("External"), REPLACE_SOUNDS_EXTERNAL);

// add external file path for replace sound
obs_property_t *random_sound_path = nullptr;
obs_property_t *replace_sound_path = obs_properties_add_path(
ppts, "replace_sound_path", MT_("replace_sound_path"), OBS_PATH_FILE,
"WAV files (*.wav);;All files (*.*)", nullptr);
MT_("WavFilesFilter"), nullptr);

// add folder path for random sounds
random_sound_path = obs_properties_add_path(
ppts, "replace_sound_random_folder", MT_("replace_sound_random_folder"),
OBS_PATH_DIRECTORY, nullptr, nullptr);

// show/hide external file path based on the selected replace sound
obs_property_set_modified_callback(replace_sounds_list, [](obs_properties_t *props,
obs_property_t *property,
obs_data_t *settings) {
UNUSED_PARAMETER(property);
const long long replace_sound = obs_data_get_int(settings, "replace_sound");
obs_property_set_visible(
obs_properties_get(props, "replace_sound_random_folder"),
replace_sound == REPLACE_SOUNDS_RANDOM);
obs_property_set_visible(obs_properties_get(props, "replace_sound_path"),
replace_sound == REPLACE_SOUNDS_EXTERNAL);
return true;
Expand Down Expand Up @@ -566,6 +657,43 @@ obs_properties_t *cleanstream_properties(void *data)
return true;
},
gf);

obs_property_set_modified_callback2(
random_sound_path,
[](void *data_, obs_properties_t *props, obs_property_t *property,
obs_data_t *settings) {
UNUSED_PARAMETER(property);
UNUSED_PARAMETER(props);
struct cleanstream_data *gf_ =
static_cast<struct cleanstream_data *>(data_);
gf_->random_audio_files.clear();
std::string random_folder_path =
obs_data_get_string(settings, "replace_sound_random_folder");
if (random_folder_path.empty()) {
return true;
}
gf_->replace_sound_random_folder = random_folder_path;
for (const auto &entry :
std::filesystem::directory_iterator(random_folder_path)) {
if (entry.path().extension() == ".wav") {
std::string file_path = entry.path().string();
gf_->random_audio_files.push_back(file_path);
if (gf_->audioFileCache.find(file_path) ==
gf_->audioFileCache.end()) {
AudioDataFloat audioFile = read_audio_file(
file_path.c_str(), gf_->sample_rate);
if (audioFile.empty()) {
obs_log(LOG_ERROR, "Failed to load audio file: %s",
file_path.c_str());
} else {
gf_->audioFileCache[file_path] = audioFile;
}
}
}
}
return true;
},
gf);
#endif

// Add a list of available whisper models to download
Expand All @@ -583,7 +711,7 @@ obs_properties_t *cleanstream_properties(void *data)

// Add language selector
obs_property_t *whisper_language_select_list =
obs_properties_add_list(ppts, "whisper_language_select", "Language",
obs_properties_add_list(ppts, "whisper_language_select", MT_("Language"),
OBS_COMBO_TYPE_LIST, OBS_COMBO_FORMAT_STRING);
// get a sorted list of available languages
std::vector<std::string> whisper_available_lang_keys;
Expand Down Expand Up @@ -617,7 +745,7 @@ obs_properties_t *cleanstream_properties(void *data)
add_whisper_backend_group_properties(ppts, gf);

obs_properties_t *advanced_settings_group = obs_properties_create();
obs_properties_add_group(ppts, "advanced_settings_group", MT_("Advanced_Settings"),
obs_properties_add_group(ppts, "advanced_settings_group", MT_("AdvancedSettings"),
OBS_GROUP_NORMAL, advanced_settings_group);

obs_properties_add_bool(advanced_settings_group, "vad_enabled", MT_("vad_enabled"));
Expand All @@ -636,9 +764,10 @@ obs_properties_t *cleanstream_properties(void *data)
obs_property_t *whisper_sampling_method_list = obs_properties_add_list(
whisper_params_group, "whisper_sampling_method", MT_("whisper_sampling_method"),
OBS_COMBO_TYPE_LIST, OBS_COMBO_FORMAT_INT);
obs_property_list_add_int(whisper_sampling_method_list, "Beam search",
obs_property_list_add_int(whisper_sampling_method_list, MT_("BeamSearch"),
WHISPER_SAMPLING_BEAM_SEARCH);
obs_property_list_add_int(whisper_sampling_method_list, "Greedy", WHISPER_SAMPLING_GREEDY);
obs_property_set_long_description(whisper_sampling_method_list, MT_("whisper_sampling_method_tooltip"));
obs_property_list_add_int(whisper_sampling_method_list, MT_("Greedy"), WHISPER_SAMPLING_GREEDY);

// int n_threads;
obs_properties_add_int_slider(whisper_params_group, "n_threads", MT_("n_threads"), 1, 8, 1);
Expand Down
Loading