Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 16 additions & 0 deletions include/ps_search.h
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@
* <li>grammar - recognizes speech according to JSGF grammar. Unlike keyphrase grammar search doesn't ignore words which are not in grammar but tries to recognize them.</li>
* <li>ngram/lm - recognizes natural speech with a language model.</li>
* <li>allphone - recognizes phonemes with a phonetic language model.</li>
* <li>align - creates time alignments for a fixed word sequence.</li>
* </ul>
*
* Each search has a name and can be referenced by a name, names are
Expand Down Expand Up @@ -293,6 +294,21 @@ int ps_set_allphone(ps_decoder_t *ps, const char *name, ngram_model_t *lm);
POCKETSPHINX_EXPORT
int ps_set_allphone_file(ps_decoder_t *ps, const char *name, const char *path);

/**
* Adds new search based on forced alignment.
*
* Convenient method to and create a forced aligner for a piece of text.
*
* @param ps Decoder
* @param name Name for this search (could be anything, such as an utterance
* label or the name of the input file)
* @param words String containing whitespace-separated words for alignment.
* These words are assumed to exist in the current dictionary.
*
*/
POCKETSPHINX_EXPORT
int ps_set_align(ps_decoder_t *ps, const char *name, const char *words);

#ifdef __cplusplus
}
#endif
Expand Down
34 changes: 34 additions & 0 deletions src/libpocketsphinx/pocketsphinx.c
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,7 @@
#include "ngram_search_fwdtree.h"
#include "ngram_search_fwdflat.h"
#include "allphone_search.h"
#include "state_align_search.h"

static const arg_t ps_args_def[] = {
POCKETSPHINX_OPTIONS,
Expand Down Expand Up @@ -202,6 +203,7 @@ ps_default_search_args(cmd_ln_t *config)
&& !cmd_ln_str_r(config, "-lmctl")
&& !cmd_ln_str_r(config, "-kws")
&& !cmd_ln_str_r(config, "-keyphrase")
&& !cmd_ln_str_r(config, "-alignctl")
&& file_exists(MODELDIR "/en-us/en-us.lm.bin")) {
lmfile = MODELDIR "/en-us/en-us.lm.bin";
cmd_ln_set_str_r(config, "-lm", lmfile);
Expand Down Expand Up @@ -627,6 +629,38 @@ ps_set_allphone_file(ps_decoder_t *ps, const char *name, const char *path)
return result;
}

int
ps_set_align(ps_decoder_t *ps, const char *name, const char *text)
{
ps_search_t *search;
ps_alignment_t *alignment;
char *textbuf = ckd_salloc(text);
char *ptr, *word, delimfound;
int n;

textbuf = string_trim(textbuf, STRING_BOTH);
alignment = ps_alignment_init(ps->d2p);
ps_alignment_add_word(alignment, dict_wordid(ps->dict, "<s>"), 0);
for (ptr = textbuf;
(n = nextword(ptr, " \t\n\r", &word, &delimfound)) >= 0;
ptr = word + n, *ptr = delimfound) {
int wid;
if ((wid = dict_wordid(ps->dict, word)) == BAD_S3WID) {
E_ERROR("Unknown word %s\n", word);
ckd_free(textbuf);
ps_alignment_free(alignment);
return -1;
}
ps_alignment_add_word(alignment, wid, 0);
}
ps_alignment_add_word(alignment, dict_wordid(ps->dict, "</s>"), 0);
ps_alignment_populate(alignment);
search = state_align_search_init(name, ps->config, ps->acmod, alignment);
ps_alignment_free(alignment);
ckd_free(textbuf);
return set_search_internal(ps, search);
}

int
ps_set_kws(ps_decoder_t *ps, const char *name, const char *keyfile)
{
Expand Down
10 changes: 10 additions & 0 deletions src/libpocketsphinx/ps_alignment.c
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,14 @@ ps_alignment_init(dict2pid_t *d2p)
{
ps_alignment_t *al = ckd_calloc(1, sizeof(*al));
al->d2p = dict2pid_retain(d2p);
al->refcount = 1;
return al;
}

ps_alignment_t *
ps_alignment_retain(ps_alignment_t *al)
{
++al->refcount;
return al;
}

Expand All @@ -60,6 +68,8 @@ ps_alignment_free(ps_alignment_t *al)
{
if (al == NULL)
return 0;
if (--al->refcount > 0)
return al->refcount;
dict2pid_free(al->d2p);
ckd_free(al->word.seq);
ckd_free(al->sseq.seq);
Expand Down
18 changes: 18 additions & 0 deletions src/libpocketsphinx/ps_alignment.h
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,7 @@ struct ps_alignment_vector_s {
typedef struct ps_alignment_vector_s ps_alignment_vector_t;

struct ps_alignment_s {
int refcount;
dict2pid_t *d2p;
ps_alignment_vector_t word;
ps_alignment_vector_t sseq;
Expand All @@ -97,6 +98,11 @@ typedef struct ps_alignment_iter_s ps_alignment_iter_t;
*/
ps_alignment_t *ps_alignment_init(dict2pid_t *d2p);

/**
* Retain an alighment
*/
ps_alignment_t *ps_alignment_retain(ps_alignment_t *al);

/**
* Release an alignment
*/
Expand Down Expand Up @@ -155,6 +161,8 @@ ps_alignment_iter_t *ps_alignment_states(ps_alignment_t *al);

/**
* Get the alignment entry pointed to by an iterator.
*
* The iterator retains ownership of this so don't try to free it.
*/
ps_alignment_entry_t *ps_alignment_iter_get(ps_alignment_iter_t *itor);

Expand All @@ -165,20 +173,30 @@ ps_alignment_iter_t *ps_alignment_iter_goto(ps_alignment_iter_t *itor, int pos);

/**
* Move an alignment iterator forward.
*
* If the end of the alignment is reached, this will free the iterator
* and return NULL.
*/
ps_alignment_iter_t *ps_alignment_iter_next(ps_alignment_iter_t *itor);

/**
* Move an alignment iterator back.
*
* If the start of the alignment is reached, this will free the iterator
* and return NULL.
*/
ps_alignment_iter_t *ps_alignment_iter_prev(ps_alignment_iter_t *itor);

/**
* Get a new iterator starting at the parent of the current node.
*
* If there is no parent node, NULL is returned.
*/
ps_alignment_iter_t *ps_alignment_iter_up(ps_alignment_iter_t *itor);
/**
* Get a new iterator starting at the first child of the current node.
*
* If there is no child node, NULL is returned.
*/
ps_alignment_iter_t *ps_alignment_iter_down(ps_alignment_iter_t *itor);

Expand Down
121 changes: 117 additions & 4 deletions src/libpocketsphinx/state_align_search.c
Original file line number Diff line number Diff line change
Expand Up @@ -253,7 +253,7 @@ state_align_search_finish(ps_search_t *search)
static int
state_align_search_reinit(ps_search_t *search, dict_t *dict, dict2pid_t *d2p)
{
/* This does nothing. */
/* This does nothing, you need to make a new search for each utterance. */
return 0;
}

Expand All @@ -265,19 +265,132 @@ state_align_search_free(ps_search_t *search)
ckd_free(sas->hmms);
ckd_free(sas->tokens);
hmm_context_free(sas->hmmctx);
ps_alignment_free(sas->al);
ckd_free(sas);
}

struct state_align_seg_s {
ps_seg_t base;
ps_alignment_iter_t *itor;
};
typedef struct state_align_seg_s state_align_seg_t;

static void
state_align_search_seg_free(ps_seg_t * seg)
{
state_align_seg_t *itor = (state_align_seg_t *)seg;
ps_alignment_iter_free(itor->itor);
ckd_free(itor);
}

static void
state_align_search_fill_iter(ps_seg_t *seg)
{
state_align_seg_t *itor = (state_align_seg_t *)seg;
ps_alignment_entry_t *entry = ps_alignment_iter_get(itor->itor);

seg->sf = entry->start;
seg->ef = entry->start + entry->duration - 1;
seg->ascr = entry->score;
seg->lscr = 0;
seg->word = dict_wordstr(ps_search_dict(seg->search), entry->id.wid);
}

static ps_seg_t *
state_align_search_seg_next(ps_seg_t * seg)
{
state_align_seg_t *itor = (state_align_seg_t *)seg;

itor->itor = ps_alignment_iter_next(itor->itor);
if (itor->itor == NULL) {
state_align_search_seg_free(seg);
return NULL;
}
state_align_search_fill_iter(seg);
return seg;
}

static ps_segfuncs_t state_align_segfuncs = {
/* seg_next */ state_align_search_seg_next,
/* seg_free */ state_align_search_seg_free
};


static ps_seg_t *
state_align_search_seg_iter(ps_search_t * search)
{
state_align_search_t *sas = (state_align_search_t *) search;
state_align_seg_t *seg;
ps_alignment_iter_t *itor;

if (sas->al == NULL)
return NULL;
/* Even though the alignment has a bunch of levels, for the
purposes of the decoder API we will just iterate over words,
which is the most likely/useful use case. We will also expose
the rest of the alignment API separately. */

itor = ps_alignment_words(sas->al);
if (itor == NULL)
return NULL;
seg = ckd_calloc(1, sizeof(state_align_seg_t));
seg->base.vt = &state_align_segfuncs;
seg->base.search = search;
seg->itor = itor;
state_align_search_fill_iter((ps_seg_t *)seg);

return (ps_seg_t *)seg;
}

static char const *
state_align_search_hyp(ps_search_t *search, int32 *out_score)
{
state_align_search_t *sas = (state_align_search_t *)search;
ps_alignment_iter_t *itor;
size_t hyp_len;

if (search->hyp_str)
ckd_free(search->hyp_str);
search->hyp_str = NULL;
if (sas->al == NULL)
return NULL;
itor = ps_alignment_words(sas->al);
if (itor == NULL)
return NULL;
for (hyp_len = 0; itor; itor = ps_alignment_iter_next(itor)) {
const char *word = dict_wordstr(ps_search_dict(search),
ps_alignment_iter_get(itor)->id.wid);
if (word == NULL) {
E_ERROR("Unknown word id %d in alignment",
ps_alignment_iter_get(itor)->id.wid);
return NULL;
}
hyp_len += strlen(word) + 1;
}
search->hyp_str = ckd_calloc(hyp_len + 1, sizeof(*search->hyp_str));
for (itor = ps_alignment_words(sas->al);
itor; itor = ps_alignment_iter_next(itor)) {
ps_alignment_entry_t *ent = ps_alignment_iter_get(itor);
const char *word = dict_wordstr(ps_search_dict(search),
ent->id.wid);
strcat(search->hyp_str, word);
strcat(search->hyp_str, " ");
*out_score = ent->score;
}
search->hyp_str[strlen(search->hyp_str) - 1] = '\0';
return search->hyp_str;
}

static ps_searchfuncs_t state_align_search_funcs = {
/* start: */ state_align_search_start,
/* step: */ state_align_search_step,
/* finish: */ state_align_search_finish,
/* reinit: */ state_align_search_reinit,
/* free: */ state_align_search_free,
/* lattice: */ NULL,
/* hyp: */ NULL,
/* hyp: */ state_align_search_hyp,
/* prob: */ NULL,
/* seg_iter: */ NULL,
/* seg_iter: */ state_align_search_seg_iter,
};

ps_search_t *
Expand All @@ -300,7 +413,7 @@ state_align_search_init(const char *name,
ckd_free(sas);
return NULL;
}
sas->al = al;
sas->al = ps_alignment_retain(al);

/* Generate HMM vector from phone level of alignment. */
sas->n_phones = ps_alignment_n_phones(al);
Expand Down
2 changes: 1 addition & 1 deletion src/libpocketsphinx/state_align_search.h
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@ struct state_align_hist_s {
typedef struct state_align_hist_s state_align_hist_t;

/**
* Phone loop search structure.
* Forced alignment search structure.
*/
struct state_align_search_s {
ps_search_t base; /**< Base search structure. */
Expand Down
Loading