diff --git a/README.md b/README.md index 7ad703f87..24cad2a5a 100644 --- a/README.md +++ b/README.md @@ -29,12 +29,24 @@ There is no longer any dependency on SphinxBase, because there is no reason for SphinxBase to exist. You can just link against the PocketSphinx library, which now includes all of its functionality. -So you can do something like this: +To install the Python module in a virtual environment (replace +`~/ve_pocketsphinx` with the virtual environment you wish to create), +from the top level directory: + +``` +python3 -m venv ~/ve_pocketsphinx +. ~/ve_pocketsphinx/bin/activate +pip install . +``` + +To install the C library and bindings (assuming you have access to +/usr/local - if not, use `-DCMAKE_INSTALL_PREFIX` to set a different +prefix when running `cmake`): ``` mkdir build cd build -cmake .. -make +cmake .. # Add CMake options here +make all test make install ``` diff --git a/include/pocketsphinx.h b/include/pocketsphinx.h index 7d3bd2082..a80a0bb63 100644 --- a/include/pocketsphinx.h +++ b/include/pocketsphinx.h @@ -48,7 +48,7 @@ #include #include -/* PocketSphinx headers (not many of them!) */ +/* PocketSphinx headers */ #include #include #include @@ -107,6 +107,12 @@ ps_decoder_t *ps_init(cmd_ln_t *config); * or other configuration without creating an entirely new decoding * object. * + * @note Since the acoustic model will be reloaded, changes made to + * feature extraction parameters may be overridden if a `feat.params` + * file is present. + * @note Any searches created with ps_set_search() or words added to + * the dictionary with ps_add_word() will also be lost. To avoid this + * you can use ps_reinit_feat(). * @note The decoder retains ownership of the pointer * config, so you should free it when no longer used. * @@ -120,26 +126,28 @@ POCKETSPHINX_EXPORT int ps_reinit(ps_decoder_t *ps, cmd_ln_t *config); /** - * Reinitialize only the feature extractor with updated configuration. + * Reinitialize only the feature computation with updated configuration. * - * This function allows you to switch the feature extraction + * This function allows you to switch the feature computation * parameters without otherwise affecting the decoder configuration. - * For example, if you change the sample rate or the frame rate and do - * not need to reconfigure the rest of the decoder. + * For example, if you change the sample rate or the frame rate, the + * cepstral mean, or the VTLN warping factor, and do not need to + * reconfigure the rest of the decoder. * - * @note The decoder retains ownership of the pointer - * config, so you should free it when no longer used. + * Note that if your code has modified any internal parameters in the + * \ref acmod_t, these will be overriden by values from the config. + * + * @note The decoder retains ownership of the pointer `config`, so you + * should free it when no longer used. * * @param ps Decoder. * @param config An optional new configuration to use. If this is * NULL, the previous configuration will be reloaded, - * with any changes to feature extraction applied. - * @return pointer to new feature extractor. The decoder owns this - * pointer, so you should not attempt to free it manually. - * Use fe_retain() if you wish to reuse it elsewhere. + * with any changes to feature computation applied. + * @return 0 for success, <0 for failure (usually an invalid parameter) */ POCKETSPHINX_EXPORT -fe_t * ps_reinit_fe(ps_decoder_t *ps, cmd_ln_t *config); +int ps_reinit_feat(ps_decoder_t *ps, cmd_ln_t *config); /** * Returns the argument definitions used in ps_init(). @@ -235,7 +243,7 @@ ps_mllr_t *ps_update_mllr(ps_decoder_t *ps, ps_mllr_t *mllr); * Reload the pronunciation dictionary from a file. * * This function replaces the current pronunciation dictionary with - * the one stored in dictfile. This also causes the active search + * the one stored in `dictfile`. This also causes the active search * module(s) to be reinitialized, in the same manner as calling * ps_add_word() with update=TRUE. * @@ -252,7 +260,7 @@ int ps_load_dict(ps_decoder_t *ps, char const *dictfile, /** * Dump the current pronunciation dictionary to a file. * - * This function dumps the current pronunciation dictionary to a tex + * This function dumps the current pronunciation dictionary to a text file. * * @param dictfile Path to file where dictionary will be written. * @param format Format of the dictionary file, or NULL for the @@ -414,7 +422,8 @@ int ps_end_utt(ps_decoder_t *ps); * @param out_best_score Output: path score corresponding to returned string. * @return String containing best hypothesis at this point in * decoding. NULL if no hypothesis is available. This string is owned - * by the decoder, so you should copy it if you need to hold onto it. + * by the decoder and only valid for the current hypothesis, so you + * should copy it if you need to hold onto it. */ POCKETSPHINX_EXPORT char const *ps_get_hyp(ps_decoder_t *ps, int32 *out_best_score); @@ -552,7 +561,8 @@ ps_nbest_t *ps_nbest_next(ps_nbest_t *nbest); * * @param nbest N-best iterator. * @param out_score Output: Path score for this hypothesis. - * @return String containing next best hypothesis. + * @return String containing next best hypothesis. Note that this + * pointer is only valid for the current iteration. */ POCKETSPHINX_EXPORT char const *ps_nbest_hyp(ps_nbest_t *nbest, int32 *out_score); @@ -599,17 +609,89 @@ void ps_get_all_time(ps_decoder_t *ps, double *out_nspeech, double *out_ncpu, double *out_nwall); /** - * @mainpage PocketSphinx API Documentation + * @mainpage PocketSphinx Documentation * @author David Huggins-Daines - * @author Alpha Cephei Inc. * @version 5.0.0 - * @date July, 2015 + * @date July, 2022 * * @section intro_sec Introduction * - * This is the API documentation for the PocketSphinx speech - * recognition engine. The main API calls are documented in - * . + * This is the documentation for the PocketSphinx speech recognition + * engine. The main API calls are documented in . + * + * @section install_sec Installation + * + * To install from source, you will need a C compiler and a recent + * version of CMake. If you wish to use an integrated development + * environment, Visual Studio Code will automate most of this process + * for you once you have installed C++ and CMake support as described + * at https://code.visualstudio.com/docs/languages/cpp + * + * @subsection python_install Python module install + * + * The easiest way to program PocketSphinx is with the Python module. + * This can be installed in a + * [VirtualEnv](https://docs.python.org/3/library/venv.html) or + * [Conda](https://docs.conda.io/projects/conda/en/latest/user-guide/concepts/environments.html) + * environment without affecting the rest of your system. For + * example, from the *top-level source directory*: + * + * python3 -m venv ~/ve_pocketsphinx + * . ~/ve_pocketsphinx/bin/activate + * pip install . + * + * There is no need to create a separate build directory as `pip` will + * do this for you. + * + * @subsection unix_install Unix-like systems + * + * From the Unix command line, you will create a separate directory in + * which to build the source code, then run `cmake` with the top-level + * source directory as argument to generate the build files: + * + * mkdir build + * cmake .. + * + * Now you can compile and run the tests, and install the code: + * + * make all test + * make install + * + * By default CMake will try to install things in `/usr/local`, which + * you might not have access to. If you want to install somewhere + * else you need to set `CMAKE_INSTALL_PREFIX` *when running cmake*: + * + * cmake .. -DCMAKE_INSTALL_PREFIX=$HOME/.local + * + * In this case you may also need to set the `LD_LIBRARY_PATH` + * environment variable so that the PocketSphinx library can be found: + * + * export LD_LIBRARY_PATH=$HOME/local/lib + * + * @subsection windows_install Windows + * + * On Windows, the process is similar, but you will need to tell CMake + * what build tool you are using with the `-g` option, and there are + * many of them. The build is known to work with `nmake` but it is + * easiest just to use Visual Studio Code. Once built, you will find + * the DLL and EXE files in `build\Debug` or `build\Release` depending + * on your build type. If the EXE files do not run, you need to + * ensure that `pocketsphinx.dll` is located in the same directory as + * them. + * + * @section faq_sec Frequently Asked Questions + * + * @subsection faq_faq Why are there no frequently asked questions? + * + * I'm glad you asked! There will be some soon. + * + * @section thanks_sec Acknowledgements + * + * PocketSphinx is largely based on the previous Sphinx-II and + * Sphinx-III systems, developed by a large number of contributors at + * Carnegie Mellon University. For some time afterwards, it was + * maintained by Nickolay Shmyrev and others at Alpha Cephei, Inc. + * See the `AUTHORS` file for a list of contributors. */ #ifdef __cplusplus diff --git a/include/pocketsphinx/ps_search.h b/include/pocketsphinx/ps_search.h index d5ef5a031..23109a69e 100644 --- a/include/pocketsphinx/ps_search.h +++ b/include/pocketsphinx/ps_search.h @@ -297,7 +297,10 @@ int ps_set_allphone_file(ps_decoder_t *ps, const char *name, const char *path); /** * Adds new search based on forced alignment. * - * Convenient method to and create a forced aligner for a piece of text. + * Convenient method to and create a forced aligner for a piece of + * text. Note that this is currently less than useful, as it depends + * on the word sequence exactly matching the input, including + * alternate pronunciations and silences. * * @param ps Decoder * @param name Name for this search (could be anything, such as an utterance diff --git a/include/sphinxbase/cmn.h b/include/sphinxbase/cmn.h index 7c277fb37..9dae45943 100644 --- a/include/sphinxbase/cmn.h +++ b/include/sphinxbase/cmn.h @@ -126,9 +126,9 @@ cmn_type_t cmn_type_from_str(const char *str); */ typedef struct { - mfcc_t *cmn_mean; /**< Temporary variable: current means */ - mfcc_t *cmn_var; /**< Temporary variables: stored the cmn variance */ - mfcc_t *sum; /**< The sum of the cmn frames */ + mfcc_t *cmn_mean; /**< Current means */ + mfcc_t *cmn_var; /**< Stored cmn variance */ + mfcc_t *sum; /**< Accumulated cepstra for computing mean */ int32 nframe; /**< Number of frames */ int32 veclen; /**< Length of cepstral vector */ } cmn_t; @@ -174,12 +174,6 @@ void cmn_live_update(cmn_t *cmn); SPHINXBASE_EXPORT void cmn_live_set(cmn_t *cmn, mfcc_t const *vec); -/** - * Get the live mean. - */ -SPHINXBASE_EXPORT -void cmn_live_get(cmn_t *cmn, mfcc_t *vec); - /* RAH, free previously allocated memory */ SPHINXBASE_EXPORT void cmn_free (cmn_t *cmn); diff --git a/src/acmod.c b/src/acmod.c index c37ed481c..836f11b71 100644 --- a/src/acmod.c +++ b/src/acmod.c @@ -63,6 +63,7 @@ #include "ms_mgau.h" static int32 acmod_process_mfcbuf(acmod_t *acmod); +static const char *acmod_update_cmninit(acmod_t *acmod); static int acmod_init_am(acmod_t *acmod) @@ -135,63 +136,109 @@ acmod_init_am(acmod_t *acmod) return 0; } -static int -acmod_init_feat(acmod_t *acmod) +int +acmod_reinit_feat(acmod_t *acmod, fe_t *fe, feat_t *fcb) { - acmod->fcb = - feat_init(cmd_ln_str_r(acmod->config, "-feat"), - cmn_type_from_str(cmd_ln_str_r(acmod->config,"-cmn")), - cmd_ln_boolean_r(acmod->config, "-varnorm"), - agc_type_from_str(cmd_ln_str_r(acmod->config, "-agc")), - 1, cmd_ln_int32_r(acmod->config, "-ceplen")); - if (acmod->fcb == NULL) - return -1; - - if (cmd_ln_str_r(acmod->config, "_lda")) { - E_INFO("Reading linear feature transformation from %s\n", - cmd_ln_str_r(acmod->config, "_lda")); - if (feat_read_lda(acmod->fcb, - cmd_ln_str_r(acmod->config, "_lda"), - cmd_ln_int32_r(acmod->config, "-ldadim")) < 0) + if (fe) + fe = fe_retain(fe); + else { + fe = fe_init_auto_r(acmod->config); + if (fe == NULL) return -1; } + if (acmod_fe_mismatch(acmod, fe)) { + fe_free(fe); + return -1; + } + if (acmod->fe) + fe_free(acmod->fe); + acmod->fe = fe; - if (cmd_ln_str_r(acmod->config, "-svspec")) { - int32 **subvecs; - E_INFO("Using subvector specification %s\n", - cmd_ln_str_r(acmod->config, "-svspec")); - if ((subvecs = parse_subvecs(cmd_ln_str_r(acmod->config, "-svspec"))) == NULL) - return -1; - if ((feat_set_subvecs(acmod->fcb, subvecs)) < 0) + if (fcb) + fcb = feat_retain(fcb); + else { + fcb = + feat_init(cmd_ln_str_r(acmod->config, "-feat"), + cmn_type_from_str(cmd_ln_str_r(acmod->config,"-cmn")), + cmd_ln_boolean_r(acmod->config, "-varnorm"), + agc_type_from_str(cmd_ln_str_r(acmod->config, "-agc")), + 1, cmd_ln_int32_r(acmod->config, "-ceplen")); + if (fcb == NULL) return -1; - } - if (cmd_ln_exists_r(acmod->config, "-agcthresh") - && 0 != strcmp(cmd_ln_str_r(acmod->config, "-agc"), "none")) { - agc_set_threshold(acmod->fcb->agc_struct, - cmd_ln_float32_r(acmod->config, "-agcthresh")); - } + if (cmd_ln_str_r(acmod->config, "_lda")) { + E_INFO("Reading linear feature transformation from %s\n", + cmd_ln_str_r(acmod->config, "_lda")); + if (feat_read_lda(fcb, + cmd_ln_str_r(acmod->config, "_lda"), + cmd_ln_int32_r(acmod->config, "-ldadim")) < 0) + return -1; + } - if (acmod->fcb->cmn_struct - && cmd_ln_exists_r(acmod->config, "-cmninit")) { - char *c, *cc, *vallist; - int32 nvals; - - vallist = ckd_salloc(cmd_ln_str_r(acmod->config, "-cmninit")); - c = vallist; - nvals = 0; - while (nvals < acmod->fcb->cmn_struct->veclen - && (cc = strchr(c, ',')) != NULL) { - *cc = '\0'; - acmod->fcb->cmn_struct->cmn_mean[nvals] = FLOAT2MFCC(atof_c(c)); - c = cc + 1; - ++nvals; + if (cmd_ln_str_r(acmod->config, "-svspec")) { + int32 **subvecs; + E_INFO("Using subvector specification %s\n", + cmd_ln_str_r(acmod->config, "-svspec")); + if ((subvecs = parse_subvecs(cmd_ln_str_r(acmod->config, "-svspec"))) == NULL) + return -1; + if ((feat_set_subvecs(fcb, subvecs)) < 0) + return -1; + } + + if (cmd_ln_exists_r(acmod->config, "-agcthresh") + && 0 != strcmp(cmd_ln_str_r(acmod->config, "-agc"), "none")) { + agc_set_threshold(fcb->agc_struct, + cmd_ln_float32_r(acmod->config, "-agcthresh")); } - if (nvals < acmod->fcb->cmn_struct->veclen && *c != '\0') { - acmod->fcb->cmn_struct->cmn_mean[nvals] = FLOAT2MFCC(atof_c(c)); + + if (fcb->cmn_struct + && cmd_ln_exists_r(acmod->config, "-cmninit") + && cmd_ln_str_r(acmod->config, "-cmninit")) { + char *c, *cc, *vallist; + int32 nvals; + + vallist = ckd_salloc(cmd_ln_str_r(acmod->config, "-cmninit")); + c = vallist; + nvals = 0; + while (nvals < fcb->cmn_struct->veclen + && (cc = strchr(c, ',')) != NULL) { + *cc = '\0'; + fcb->cmn_struct->cmn_mean[nvals] = FLOAT2MFCC(atof_c(c)); + c = cc + 1; + ++nvals; + } + if (nvals < fcb->cmn_struct->veclen && *c != '\0') { + fcb->cmn_struct->cmn_mean[nvals] = FLOAT2MFCC(atof_c(c)); + } + ckd_free(vallist); } - ckd_free(vallist); } + if (acmod_feat_mismatch(acmod, fcb)) { + feat_free(fcb); + return -1; + } + if (acmod->fcb) + feat_free(acmod->fcb); + acmod->fcb = fcb; + + /* The MFCC buffer needs to be at least as large as the dynamic + * feature window. */ + acmod->n_mfc_alloc = acmod->fcb->window_size * 2 + 1; + if (acmod->mfc_buf) + ckd_free_2d(acmod->mfc_buf); + acmod->mfc_buf = (mfcc_t **) + ckd_calloc_2d(acmod->n_mfc_alloc, acmod->fcb->cepsize, + sizeof(**acmod->mfc_buf)); + + /* Feature buffer has to be at least as large as MFCC buffer. */ + acmod->n_feat_alloc = acmod->n_mfc_alloc + cmd_ln_int32_r(acmod->config, "-pl_window"); + if (acmod->feat_buf) + feat_array_free(acmod->feat_buf); + acmod->feat_buf = feat_array_alloc(acmod->fcb, acmod->n_feat_alloc); + if (acmod->framepos) + ckd_free(acmod->framepos); + acmod->framepos = ckd_calloc(acmod->n_feat_alloc, sizeof(*acmod->framepos)); + return 0; } @@ -234,50 +281,14 @@ acmod_init(cmd_ln_t *config, logmath_t *lmath, fe_t *fe, feat_t *fcb) acmod->lmath = logmath_retain(lmath); acmod->state = ACMOD_IDLE; - /* Initialize feature computation. */ - if (fe) { - if (acmod_fe_mismatch(acmod, fe)) - goto error_out; - fe_retain(fe); - acmod->fe = fe; - } - else { - /* Initialize a new front end. */ - acmod->fe = fe_init_auto_r(config); - if (acmod->fe == NULL) - goto error_out; - if (acmod_fe_mismatch(acmod, acmod->fe)) - goto error_out; - } - if (fcb) { - if (acmod_feat_mismatch(acmod, fcb)) - goto error_out; - feat_retain(fcb); - acmod->fcb = fcb; - } - else { - /* Initialize a new fcb. */ - if (acmod_init_feat(acmod) < 0) - goto error_out; - } + /* Initialize or retain fe and fcb. */ + if (acmod_reinit_feat(acmod, fe, fcb) < 0) + goto error_out; /* Load acoustic model parameters. */ if (acmod_init_am(acmod) < 0) goto error_out; - - /* The MFCC buffer needs to be at least as large as the dynamic - * feature window. */ - acmod->n_mfc_alloc = acmod->fcb->window_size * 2 + 1; - acmod->mfc_buf = (mfcc_t **) - ckd_calloc_2d(acmod->n_mfc_alloc, acmod->fcb->cepsize, - sizeof(**acmod->mfc_buf)); - - /* Feature buffer has to be at least as large as MFCC buffer. */ - acmod->n_feat_alloc = acmod->n_mfc_alloc + cmd_ln_int32_r(config, "-pl_window"); - acmod->feat_buf = feat_array_alloc(acmod->fcb, acmod->n_feat_alloc); - acmod->framepos = ckd_calloc(acmod->n_feat_alloc, sizeof(*acmod->framepos)); - /* Senone computation stuff. */ acmod->senone_scores = ckd_calloc(bin_mdef_n_sen(acmod->mdef), sizeof(*acmod->senone_scores)); @@ -471,9 +482,45 @@ acmod_end_utt(acmod_t *acmod) acmod->senfh = NULL; } + acmod_update_cmninit(acmod); + return nfr; } +static const char * +acmod_update_cmninit(acmod_t *acmod) +{ + char *cmninit, *ptr; + cmn_t *cmn; + int i, len; + + if (acmod->fcb == NULL) + return NULL; + if ((cmn = acmod->fcb->cmn_struct) == NULL) + return NULL; + len = 0; + for (i = 0; i < cmn->veclen; ++i) { + int nbytes = snprintf(NULL, 0, "%g,", cmn->cmn_mean[i]); + if (nbytes <= 0) { + E_ERROR_SYSTEM("Failed to format %g for cmninit", cmn->cmn_mean[i]); + return NULL; + } + len += nbytes; + } + len++; + ptr = cmninit = ckd_malloc(len); + if (ptr == NULL) { + E_ERROR_SYSTEM("Failed to allocate %d bytes for cmninit", len); + return NULL; + } + for (i = 0; i < cmn->veclen; ++i) + ptr += snprintf(ptr, cmninit + len - ptr, "%g,", cmn->cmn_mean[i]); + *--ptr = '\0'; + cmd_ln_set_str_r(acmod->config, "-cmninit", cmninit); + ckd_free(cmninit); + return cmd_ln_str_r(acmod->config, "-cmninit"); +} + static int acmod_log_mfc(acmod_t *acmod, mfcc_t **cep, int n_frames) diff --git a/src/acmod.h b/src/acmod.h index 244507b19..9b3383203 100644 --- a/src/acmod.h +++ b/src/acmod.h @@ -222,11 +222,17 @@ typedef struct acmod_s acmod_t; POCKETSPHINX_EXPORT acmod_t *acmod_init(cmd_ln_t *config, logmath_t *lmath, fe_t *fe, feat_t *fcb); +/** + * Reinitialize feature computation modules. + */ +POCKETSPHINX_EXPORT +int acmod_reinit_feat(acmod_t *acmod, fe_t *fe, feat_t *fcb); + /** * Verify that feature extraction parameters are compatible with * acoustic model. * - * @param fe acoustic feature extraction module to verify. + * @param fe acoustic feature extraction module to verify. * @return TRUE if compatible, FALSE otherwise */ POCKETSPHINX_EXPORT diff --git a/src/feat/cmn_live.c b/src/feat/cmn_live.c index d5591aa6c..59bf9759a 100644 --- a/src/feat/cmn_live.c +++ b/src/feat/cmn_live.c @@ -69,16 +69,6 @@ cmn_live_set(cmn_t *cmn, mfcc_t const * vec) E_INFOCONT(">\n"); } -void -cmn_live_get(cmn_t *cmn, mfcc_t * vec) -{ - int32 i; - - for (i = 0; i < cmn->veclen; i++) - vec[i] = cmn->cmn_mean[i]; - -} - static void cmn_live_shiftwin(cmn_t *cmn) { diff --git a/src/pocketsphinx.c b/src/pocketsphinx.c index 6d57583d9..d0eee3450 100644 --- a/src/pocketsphinx.c +++ b/src/pocketsphinx.c @@ -213,24 +213,14 @@ ps_default_search_args(cmd_ln_t *config) #endif } -fe_t * -ps_reinit_fe(ps_decoder_t *ps, cmd_ln_t *config) +int +ps_reinit_feat(ps_decoder_t *ps, cmd_ln_t *config) { - fe_t *new_fe; - if (config && config != ps->config) { cmd_ln_free_r(ps->config); ps->config = cmd_ln_retain(config); } - if ((new_fe = fe_init_auto_r(ps->config)) == NULL) - return NULL; - if (acmod_fe_mismatch(ps->acmod, new_fe)) { - fe_free(new_fe); - return NULL; - } - fe_free(ps->acmod->fe); - ps->acmod->fe = new_fe; - return new_fe; + return acmod_reinit_feat(ps->acmod, NULL, NULL); } int diff --git a/src/util/cmd_ln.c b/src/util/cmd_ln.c index 9b644da08..ba776940c 100644 --- a/src/util/cmd_ln.c +++ b/src/util/cmd_ln.c @@ -780,7 +780,6 @@ cmd_ln_parse_file_r(cmd_ln_t *inout_cmdln, const arg_t * defn, const char *filen void cmd_ln_log_help_r(cmd_ln_t *cmdln, arg_t const* defn) { - if (defn == NULL) return; E_INFO("Arguments list definition:\n"); diff --git a/swig/ps_decoder.i b/swig/ps_decoder.i index 68bb2bc1f..c2a189c46 100644 --- a/swig/ps_decoder.i +++ b/swig/ps_decoder.i @@ -61,6 +61,10 @@ *errcode = ps_reinit($self, config); } + void reinit_feat(Config *config, int *errcode) { + *errcode = ps_reinit_feat($self, config); + } + void load_dict( char const *fdict, char const *ffilter, char const *format, int *errcode) { *errcode = ps_load_dict($self, fdict, ffilter, format); diff --git a/swig/python/test/decoder_test.py b/swig/python/test/decoder_test.py index 38323334a..67b07bb5a 100644 --- a/swig/python/test/decoder_test.py +++ b/swig/python/test/decoder_test.py @@ -10,14 +10,38 @@ "../../../test/data") class TestDecoder(unittest.TestCase): + def _run_decode(self, decoder, expect_fail=False): + with open(os.path.join(DATADIR, 'goforward.raw'), "rb") as fh: + buf = fh.read() + decoder.start_utt() + decoder.process_raw(buf, no_search=False, full_utt=True) + decoder.end_utt() + self._check_hyp(decoder.hyp().hypstr, decoder.seg(), expect_fail) + + def _check_hyp(self, hyp, hypseg, expect_fail=False): + if expect_fail: + self.assertNotEqual(hyp, "go forward ten meters") + else: + self.assertEqual(hyp, "go forward ten meters") + words = [] + try: + for seg in hypseg: + if seg.word not in ("", "", "", "(NULL)"): + words.append(seg.word) + except AttributeError: + for word, start, end in hypseg: + if word not in ("", "", "", "(NULL)"): + words.append(word) + if expect_fail: + self.assertNotEqual(words, "go forward ten meters".split()) + else: + self.assertEqual(words, "go forward ten meters".split()) + def test_decoder(self): - # Create a decoder with certain model config = Decoder.default_config() config.set_string('-hmm', os.path.join(MODELDIR, 'en-us/en-us')) config.set_string('-lm', os.path.join(MODELDIR, 'en-us/en-us.lm.bin')) config.set_string('-dict', os.path.join(MODELDIR, 'en-us/cmudict-en-us.dict')) - - # Decode streaming data. decoder = Decoder(config) print ("Pronunciation for word 'hello' is ", decoder.lookup_word("hello")) @@ -25,36 +49,41 @@ def test_decoder(self): print ("Pronunciation for word 'abcdf' is ", decoder.lookup_word("abcdf")) self.assertEqual(None, decoder.lookup_word("abcdf")) - decoder.start_utt() - stream = open(os.path.join(DATADIR, 'goforward.raw'), 'rb') - while True: - buf = stream.read(1024) - if buf: - decoder.process_raw(buf, False, False) - else: - break - decoder.end_utt() - - hypothesis = decoder.hyp() - logmath = decoder.get_logmath() - print ('Best hypothesis: ', hypothesis.hypstr, " model score: ", hypothesis.best_score, " confidence: ", logmath.exp(hypothesis.prob)) - - print ('Best hypothesis segments: ', [seg.word for seg in decoder.seg()]) + self._run_decode(decoder); # Access N best decodings. print ('Best 10 hypothesis: ') for best, i in zip(decoder.nbest(), range(10)): print (best.hypstr, best.score) - stream = open(os.path.join(DATADIR, 'goforward.mfc'), 'rb') - stream.read(4) - buf = stream.read(13780) - decoder.start_utt() - decoder.process_cep(buf, False, True) - decoder.end_utt() - hypothesis = decoder.hyp() - print ('Best hypothesis: ', hypothesis.hypstr, " model score: ", hypothesis.best_score, " confidence: ", hypothesis.prob) - self.assertEqual("go forward ten meters", decoder.hyp().hypstr) + with open(os.path.join(DATADIR, 'goforward.mfc'), 'rb') as stream: + stream.read(4) + buf = stream.read(13780) + decoder.start_utt() + decoder.process_cep(buf, False, True) + decoder.end_utt() + hypothesis = decoder.hyp() + print ('Best hypothesis: ', hypothesis.hypstr, " model score: ", hypothesis.best_score, " confidence: ", hypothesis.prob) + self.assertEqual("go forward ten meters", decoder.hyp().hypstr) + + def test_reinit(self): + config = Decoder.default_config() + config.set_string('-hmm', os.path.join(MODELDIR, 'en-us/en-us')) + config.set_string('-lm', os.path.join(MODELDIR, 'en-us/en-us.lm.bin')) + config.set_string('-dict', os.path.join(MODELDIR, 'en-us/cmudict-en-us.dict')) + decoder = Decoder(config) + decoder.add_word("_forward", "F AO R W ER D", True) + self._run_decode(decoder); + # should preserve dict words, but make decoding fail + config.set_float("-samprate", 48000) + decoder.reinit_feat(config) + self.assertEqual("F AO R W ER D", decoder.lookup_word("_forward")) + self._run_decode(decoder, expect_fail=True); + config.set_float("-samprate", 16000) + # should erase dict words + decoder.reinit(config) + self.assertEqual(None, decoder.lookup_word("_forward")) + self._run_decode(decoder); if __name__ == "__main__": diff --git a/test/unit/test_acmod.c b/test/unit/test_acmod.c index d514cfb32..d6d23637f 100644 --- a/test/unit/test_acmod.c +++ b/test/unit/test_acmod.c @@ -3,6 +3,7 @@ #include #include +#include #include "acmod.h" #include "test_macros.h" @@ -40,6 +41,7 @@ main(int argc, char *argv[]) (void)argc; (void)argv; + err_set_loglevel(ERR_INFO); lmath = logmath_init(1.0001, 0, 0); config = cmd_ln_init(NULL, ps_args(), TRUE, "-compallsen", "true", @@ -63,6 +65,8 @@ main(int argc, char *argv[]) cmd_ln_set_str_extra_r(config, "_lda", NULL); cmd_ln_set_str_extra_r(config, "_senmgau", NULL); + /* Unset -cmninit to avoid confusion */ + cmd_ln_set_str_r(config, "-cmninit", NULL); TEST_ASSERT(acmod = acmod_init(config, lmath, NULL, NULL)); cmn_live_set(acmod->fcb->cmn_struct, cmninit); @@ -93,6 +97,9 @@ main(int argc, char *argv[]) } } TEST_EQUAL(0, acmod_end_utt(acmod)); + /* Make sure -cmninit was updated. */ + TEST_ASSERT(cmd_ln_str_r(config, "-cmninit") != NULL); + E_INFO("New -cmninit: %s\n", cmd_ln_str_r(config, "-cmninit")); nread = 0; { int16 best_score; @@ -123,6 +130,9 @@ main(int argc, char *argv[]) TEST_EQUAL(0, acmod_start_utt(acmod)); acmod_process_raw(acmod, &bptr, &nsamps, TRUE); TEST_EQUAL(0, acmod_end_utt(acmod)); + /* Make sure -cmninit was updated. */ + TEST_ASSERT(cmd_ln_str_r(config, "-cmninit") != NULL); + E_INFO("New -cmninit: %s\n", cmd_ln_str_r(config, "-cmninit")); { int16 best_score; int frame_idx = -1, best_senid; @@ -175,6 +185,9 @@ main(int argc, char *argv[]) } } TEST_EQUAL(0, acmod_end_utt(acmod)); + /* Make sure -cmninit was updated. */ + TEST_ASSERT(cmd_ln_str_r(config, "-cmninit") != NULL); + E_INFO("New -cmninit: %s\n", cmd_ln_str_r(config, "-cmninit")); nfr = 0; acmod_process_cep(acmod, &cptr, &nfr, FALSE); { @@ -210,6 +223,9 @@ main(int argc, char *argv[]) nfr = frame_counter; acmod_process_cep(acmod, &cptr, &nfr, TRUE); TEST_EQUAL(0, acmod_end_utt(acmod)); + /* Make sure -cmninit was updated. */ + TEST_ASSERT(cmd_ln_str_r(config, "-cmninit") != NULL); + E_INFO("New -cmninit: %s\n", cmd_ln_str_r(config, "-cmninit")); { int16 best_score; int frame_idx = -1, best_senid; diff --git a/test/unit/test_reinit.c b/test/unit/test_reinit.c index 9c339ef60..422055fea 100644 --- a/test/unit/test_reinit.c +++ b/test/unit/test_reinit.c @@ -9,6 +9,7 @@ main(int argc, char *argv[]) { ps_decoder_t *ps; cmd_ln_t *config; + char *pron; (void)argc; (void)argv; @@ -31,8 +32,25 @@ main(int argc, char *argv[]) ps_add_word(ps, "foobie", "F UW B IY", FALSE); ps_add_word(ps, "hellosomething", "HH EH L OW S", TRUE); + /* Reinit features only, words should remain */ + cmd_ln_set_str_r(config, "-cmninit", "41,-4,1"); + TEST_EQUAL(0, ps_reinit_feat(ps, config)); + TEST_EQUAL(0, strcmp(cmd_ln_str_r(ps_get_config(ps), "-cmninit"), + "41,-4,1")); + pron = ps_lookup_word(ps, "foobie"); + TEST_ASSERT(pron != NULL); + TEST_EQUAL(0, strcmp(pron, "F UW B IY")); + ckd_free(pron); + /* Reinit with existing config */ ps_reinit(ps, NULL); + /* Words added above are gone, we expect that. */ + pron = ps_lookup_word(ps, "foobie"); + TEST_ASSERT(pron == NULL); + /* Unfortunately so are feature params if feat.params is in + * AM. No way around this... */ + TEST_ASSERT(0 != strcmp(cmd_ln_str_r(ps_get_config(ps), "-cmninit"), + "41,-4,1")); ps_free(ps); cmd_ln_free_r(config);