Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
39 changes: 33 additions & 6 deletions cython/_pocketsphinx.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -48,13 +48,26 @@ cdef class Config:
Many parameters have default values. Also, when constructing a
`Config` directly (as opposed to parsing JSON), `hmm`, `lm`, and
`dict` are set to the default models (some kind of US English
models of unknown origin + CMUDict). You can prevent this by
models of unknown origin + CMUDict). You can prevent this by
passing `None` for any of these parameters, e.g.:

config = Config(lm=None) # Do not load a language model

Decoder initialization **will fail** if more than one of `lm`,
`jsgf`, `fsg`, `keyphrase`, `kws`, `allphone`, or `lmctl` are set
in the configuration. To make life easier, and because there is
no possible case in which you would do this intentionally, if you
initialize a `Decoder` or `Config` with any of these (and not
`lm`), the default `lm` value will be removed. This is not the
case if you decide to set one of them in an existing `Config`, so
in that case you must make sure to set `lm` to `None`:

config["jsgf"] = "spam_eggs_and_spam.gram"
config["lm"] = None

You may also call `default_search_args()` after the fact to set
them. Note that this will set them unconditionally.
`hmm`, `lm`, and `dict` to the system defaults. Note that this
will set them unconditionally.

See :doc:`config_params` for a description of existing parameters.

Expand Down Expand Up @@ -83,13 +96,20 @@ cdef class Config:
self.default_search_args()
# Now override them from kwargs (including None)
if kwargs:
# Remove lm if a different search was specified
for s in ("jsgf", "fsg", "kws", "keyphrase",
"allphone", "lmctl"):
if s in kwargs:
ps_config_set_str(self.config, "lm", NULL)
break
for k, v in kwargs.items():
# Note that all this is quite inefficient as we end up
# calling _normalize_key repeatedly.
ckey = self._normalize_key(k)
# Special dispensation to support the thing which was
# documented but never actually worked, i.e. setting a
# string value to False (should be None) to remove the
# default. Note that all this is quite inefficient as
# we end up calling _normalize_key repeatedly.
ckey = self._normalize_key(k)
# default.
if ps_config_typeof(self.config, ckey) & ARG_STRING:
if v is False:
v = None
Expand Down Expand Up @@ -769,7 +789,14 @@ cdef class Decoder:

ps = Decoder(lm=None) # Do not load a language model

You may also pass a pre-defined `Config` object as the only
Decoder initialization **will fail** if more than one of `lm`,
`jsgf`, `fsg`, `keyphrase`, `kws`, `allphone`, or `lmctl` are set
in the configuration. To make life easier, and because there is
no possible case in which you would do this intentionally, if you
initialize a `Decoder` or `Config` with any of these (and not
`lm`), the default `lm` value will be removed.

You can also pass a pre-defined `Config` object as the only
argument to the constructor, e.g.:

config = Config.parse_json(json)
Expand Down
18 changes: 17 additions & 1 deletion cython/test/config_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -110,6 +110,21 @@ def test_config_defaults(self):
self.assertEqual(config["lm"], None)
self.assertEqual(config["dict"], get_model_path("en-us/cmudict-en-us.dict"))

def test_stupid_config_hacks(self):
"""Test various backward-compatibility special cases."""
config = Config()
self.assertEqual(config["lm"], get_model_path("en-us/en-us.lm.bin"))
config = Config(jsgf="spam_eggs_and_spam.gram")
self.assertIsNone(config["lm"])
self.assertEqual(config["jsgf"], "spam_eggs_and_spam.gram")
with self.assertRaises(RuntimeError):
config = Config()
config["jsgf"] = os.path.join(DATADIR, "goforward.gram")
_ = Decoder(config)
with self.assertRaises(RuntimeError):
_ = Decoder(kws=os.path.join(DATADIR, "goforward.kws"),
jsgf=os.path.join(DATADIR, "goforward.gram"))
_ = Decoder(jsgf=os.path.join(DATADIR, "goforward.gram"))

class TestConfigIter(unittest.TestCase):
def test_config__iter(self):
Expand All @@ -120,8 +135,9 @@ def test_config__iter(self):
for key, value in config.items():
self.assertTrue(key in config)
self.assertEqual(config[key], value)
config = Decoder.default_config()
config = Config()
self.assertEqual(default_len, len(config))
config["lm"] = None
config["hmm"] = os.path.join(MODELDIR, "en-us", "en-us")
config["fsg"] = os.path.join(DATADIR, "goforward.fsg")
config["dict"] = os.path.join(DATADIR, "turtle.dic")
Expand Down
10 changes: 10 additions & 0 deletions include/pocketsphinx.h
Original file line number Diff line number Diff line change
Expand Up @@ -144,6 +144,16 @@ ps_config_t *ps_config_retain(ps_config_t *config);
POCKETSPHINX_EXPORT
int ps_config_free(ps_config_t *config);

/**
* Validate configuration.
*
* Currently this just checks that you haven't specified multiple
* types of grammars or language models at the same time.
*
* @return 0 for success, <0 for failure.
*/
int ps_config_validate(ps_config_t *config);

/**
* Create or update a configuration by parsing slightly extended JSON.
*
Expand Down
38 changes: 18 additions & 20 deletions src/pocketsphinx.c
Original file line number Diff line number Diff line change
Expand Up @@ -223,6 +223,16 @@ ps_reinit(ps_decoder_t *ps, ps_config_t *config)
const char *keyphrase;
int32 lw;

/* Enforce only one of keyphrase, kws, fsg, jsgf, allphone, lm */
if (config) {
if (ps_config_validate(config) < 0)
return -1;
}
else if (ps->config) {
if (ps_config_validate(ps->config) < 0)
return -1;
}

if (config && config != ps->config) {
ps_config_free(ps->config);
ps->config = ps_config_retain(config);
Expand Down Expand Up @@ -289,8 +299,6 @@ ps_reinit(ps_decoder_t *ps, ps_config_t *config)
if ((ps->acmod = acmod_init(ps->config, ps->lmath, NULL, NULL)) == NULL)
return -1;



if (ps_config_int(ps->config, "pl_window") > 0) {
/* Initialize an auxiliary phone loop search, which will run in
* "parallel" with FSG or N-Gram search. */
Expand All @@ -314,21 +322,17 @@ ps_reinit(ps_decoder_t *ps, ps_config_t *config)
/* Determine whether we are starting out in FSG or N-Gram search mode.
* If neither is used skip search initialization. */

/* Load KWS if one was specified in config */
if ((keyphrase = ps_config_str(ps->config, "keyphrase"))) {
if (ps_add_keyphrase(ps, PS_DEFAULT_SEARCH, keyphrase))
return -1;
ps_activate_search(ps, PS_DEFAULT_SEARCH);
}

if ((path = ps_config_str(ps->config, "kws"))) {
else if ((path = ps_config_str(ps->config, "kws"))) {
if (ps_add_kws(ps, PS_DEFAULT_SEARCH, path))
return -1;
ps_activate_search(ps, PS_DEFAULT_SEARCH);
}

/* Load an FSG if one was specified in config */
if ((path = ps_config_str(ps->config, "fsg"))) {
else if ((path = ps_config_str(ps->config, "fsg"))) {
fsg_model_t *fsg = fsg_model_readfile(path, ps->lmath, lw);
if (!fsg)
return -1;
Expand All @@ -339,28 +343,22 @@ ps_reinit(ps_decoder_t *ps, ps_config_t *config)
fsg_model_free(fsg);
ps_activate_search(ps, PS_DEFAULT_SEARCH);
}

/* Or load a JSGF grammar */
if ((path = ps_config_str(ps->config, "jsgf"))) {
else if ((path = ps_config_str(ps->config, "jsgf"))) {
if (ps_add_jsgf_file(ps, PS_DEFAULT_SEARCH, path)
|| ps_activate_search(ps, PS_DEFAULT_SEARCH))
return -1;
}

if ((path = ps_config_str(ps->config, "allphone"))) {
else if ((path = ps_config_str(ps->config, "allphone"))) {
if (ps_add_allphone_file(ps, PS_DEFAULT_SEARCH, path)
|| ps_activate_search(ps, PS_DEFAULT_SEARCH))
return -1;
|| ps_activate_search(ps, PS_DEFAULT_SEARCH))
return -1;
}

if ((path = ps_config_str(ps->config, "lm")) &&
!ps_config_str(ps->config, "allphone")) {
else if ((path = ps_config_str(ps->config, "lm"))) {
if (ps_add_lm_file(ps, PS_DEFAULT_SEARCH, path)
|| ps_activate_search(ps, PS_DEFAULT_SEARCH))
return -1;
}

if ((path = ps_config_str(ps->config, "lmctl"))) {
else if ((path = ps_config_str(ps->config, "lmctl"))) {
const char *name;
ngram_model_t *lmset;
ngram_model_set_iter_t *lmset_it;
Expand Down
40 changes: 40 additions & 0 deletions src/ps_config.c
Original file line number Diff line number Diff line change
Expand Up @@ -122,6 +122,46 @@ ps_config_free(ps_config_t *config)
return 0;
}

static const char *searches[] = {
"lm",
"jsgf",
"fsg",
"keyphrase",
"kws",
"allphone",
"lmctl"
};
static const int nsearches = sizeof(searches)/sizeof(searches[0]);

int
ps_config_validate(ps_config_t *config)
{
int i, found = 0;
for (i = 0; i < nsearches; ++i) {
if (ps_config_str(config, searches[i]) != NULL)
if (++found > 1)
break;
}
if (found > 1) {
int len = strlen("Only one of ");
char *msg;
for (i = 0; i < nsearches; ++i)
len += strlen(searches[i]) + 2;
len += strlen("can be enabled at a time in config\n");
msg = ckd_malloc(len + 1);
strcpy(msg, "Only one of ");
for (i = 0; i < nsearches; ++i) {
strcat(msg, searches[i]);
strcat(msg, ", ");
}
strcat(msg, "can be enabled at a time in config\n");
E_ERROR(msg);
ckd_free(msg);
return -1;
}
return 0;
}

void
json_error(int err)
{
Expand Down
57 changes: 57 additions & 0 deletions test/unit/test_config.c
Original file line number Diff line number Diff line change
Expand Up @@ -129,6 +129,62 @@ test_config_json(void)
ckd_free(json);
}

static void
test_validate_config(void)
{
ps_config_t *config;
TEST_ASSERT(config =
ps_config_parse_json(
NULL,
"hmm: \"" MODELDIR "/en-us/en-us\","
"lm: \"" MODELDIR "/en-us/en-us.lm.bin\","
"dict: \"" MODELDIR "/en-us/cmudict-en-us.dict\","
"fwdtree: true,"
"fwdflat: false,"
"bestpath: false,"
"samprate: 16000"));
TEST_EQUAL(0, ps_config_validate(config));
ps_config_free(config);
TEST_ASSERT(config =
ps_config_parse_json(
NULL,
"hmm: \"" MODELDIR "/en-us/en-us\","
"lm: \"" MODELDIR "/en-us/en-us.lm.bin\","
"jsgf: \"" DATADIR "/goforward.gram\","
"dict: \"" MODELDIR "/en-us/cmudict-en-us.dict\","
"fwdtree: true,"
"fwdflat: false,"
"bestpath: false,"
"samprate: 16000"));
TEST_ASSERT(ps_config_validate(config) < 0);
ps_config_free(config);
TEST_ASSERT(config =
ps_config_parse_json(
NULL,
"hmm: \"" MODELDIR "/en-us/en-us\","
"kws: \"" DATADIR "/goforward.kws\","
"jsgf: \"" DATADIR "/goforward.gram\","
"fsg: \"" DATADIR "/goforward.fsg\","
"dict: \"" MODELDIR "/en-us/cmudict-en-us.dict\","
"fwdtree: true,"
"fwdflat: false,"
"bestpath: false,"
"samprate: 16000"));
TEST_ASSERT(ps_config_validate(config) < 0);
ps_config_free(config);
TEST_ASSERT(config =
ps_config_parse_json(
NULL,
"hmm: \"" MODELDIR "/en-us/en-us\","
"keyphrase: \"bonjour alexis\","
"fwdtree: true,"
"fwdflat: false,"
"bestpath: false,"
"samprate: 16000"));
TEST_EQUAL(0, ps_config_validate(config));
ps_config_free(config);
}

int
main(int argc, char *argv[])
{
Expand All @@ -137,6 +193,7 @@ main(int argc, char *argv[])
test_config_init();
test_config_args();
test_config_json();
test_validate_config();

return 0;
}