public static void populate_lrdiph(Pointer <dict2pid_t> d2p, Pointer <Pointer <Pointer <ushort> > > rdiph_rc, short b) { Pointer <bin_mdef_t> mdef = d2p.Deref.mdef; short l, r; for (l = 0; l < bin_mdef.bin_mdef_n_ciphone(mdef); l++) { for (r = 0; r < bin_mdef.bin_mdef_n_ciphone(mdef); r++) { int p; p = bin_mdef.bin_mdef_phone_id_nearest(mdef, (short)b, (short)l, (short)r, word_posn_t.WORD_POSN_SINGLE); d2p.Deref.lrdiph_rc[b][l].Set(r, checked ((ushort)bin_mdef.bin_mdef_pid2ssid(mdef, p))); if (r == bin_mdef.bin_mdef_silphone(mdef)) { d2p.Deref.ldiph_lc[b][r].Set(l, checked ((ushort)bin_mdef.bin_mdef_pid2ssid(mdef, p))); } if (rdiph_rc.IsNonNull && l == bin_mdef.bin_mdef_silphone(mdef)) { rdiph_rc[b][l].Set(r, checked ((ushort)bin_mdef.bin_mdef_pid2ssid(mdef, p))); } SphinxAssert.assert(s3types.IS_S3SSID(bin_mdef.bin_mdef_pid2ssid(mdef, p)) != 0); // LOGAN this dumped way too much //err.E_DEBUG(string.Format("{0}({1},{2}) => {3} / {4}\n", // cstring.FromCString(bin_mdef.bin_mdef_ciphone_str(mdef, b)), // cstring.FromCString(bin_mdef.bin_mdef_ciphone_str(mdef, l)), // cstring.FromCString(bin_mdef.bin_mdef_ciphone_str(mdef, r)), // p, bin_mdef.bin_mdef_pid2ssid(mdef, p))); } } }
public static Pointer <byte> mdef_ciphone_str(Pointer <mdef_t> m, int id) { SphinxAssert.assert(m.IsNonNull); SphinxAssert.assert((id >= 0) && (id < m.Deref.n_ciphone)); return(m.Deref.ciphone[id].name); }
/* * Compute senone score for one senone. * NOTE: Remember that senone PDF tables contain SCALED, NEGATED logs3 values. * NOTE: Remember also that PDF data may be transposed or not depending on s.Deref.n_gauden. */ public static int senone_eval(Pointer <senone_t> s, int id, Pointer <Pointer <gauden_dist_t> > dist, int n_top) { int scr; /* total senone score */ int fden; /* Gaussian density */ int fscr; /* senone score for one feature */ int fwscr; /* senone score for one feature, one codeword */ int f, t; int top; Pointer <gauden_dist_t> fdist; SphinxAssert.assert((id >= 0) && (id < s.Deref.n_sen)); SphinxAssert.assert((n_top > 0) && (n_top <= s.Deref.n_cw)); scr = 0; for (f = 0; f < s.Deref.n_feat; f++) { fdist = dist[f]; /* Top codeword for feature f */ top = fden = ((int)fdist[0].dist + ((1 << hmm.SENSCR_SHIFT) - 1)) >> hmm.SENSCR_SHIFT; fscr = (s.Deref.n_gauden > 1) ? (fden + -s.Deref.pdf[id][f][fdist[0].id]) /* untransposed */ : (fden + -s.Deref.pdf[f][fdist[0].id][id]); /* transposed */ err.E_DEBUG(string.Format("fden[{0}][{1}] l+= {2} + {3} = {4}\n", id, f, -(fscr - fden), -(fden - top), -(fscr - top))); /* Remaining of n_top codewords for feature f */ for (t = 1; t < n_top; t++) { fden = ((int)fdist[t].dist + ((1 << hmm.SENSCR_SHIFT) - 1)) >> hmm.SENSCR_SHIFT; fwscr = (s.Deref.n_gauden > 1) ? (fden + -s.Deref.pdf[id][f][fdist[t].id]) : (fden + -s.Deref.pdf[f][fdist[t].id][id]); fscr = logmath.logmath_add(s.Deref.lmath, fscr, fwscr); err.E_DEBUG(string.Format("fden[{0}][{1}] l+= {2} + {3} = {4}\n", id, f, -(fwscr - fden), -(fden - top), -(fscr - top))); } /* Senone scores are also scaled, negated logs3 values. Hence * we have to negate the stuff we calculated above. */ scr -= fscr; } /* Downscale scores. */ scr /= s.Deref.aw; /* Avoid overflowing int16 */ if (scr > 32767) { scr = 32767; } if (scr < -32768) { scr = -32768; } return(scr); }
public static void ciphone_add(Pointer <mdef_t> m, Pointer <byte> ci, int p) { SphinxAssert.assert(p < m.Deref.n_ciphone); m.Deref.ciphone[p].name = ckd_alloc.ckd_salloc(ci); /* freed in mdef_free */ if (hash_table.hash_table_enter(m.Deref.ciphone_ht, m.Deref.ciphone[p].name, p) != p) { err.E_FATAL(string.Format("hash_table_enter({0}) failed; duplicate CIphone?\n", cstring.FromCString(m.Deref.ciphone[p].name))); } }
public static int dict_wordid(Pointer <dict_t> d, Pointer <byte> word) { BoxedValueInt w = new BoxedValueInt(); SphinxAssert.assert(d.IsNonNull); SphinxAssert.assert(word.IsNonNull); if (hash_table.hash_table_lookup_int32(d.Deref.ht, word, w) < 0) { return(s3types.BAD_S3WID); } return(w.Val); }
public static void sseq_compress(Pointer <mdef_t> m) { Pointer <hash_table_t> h; Pointer <Pointer <ushort> > sseq; int n_sseq; int p; uint k; BoxedValue <int> j = new BoxedValue <int>(); Pointer <gnode_t> g; Pointer <gnode_t> gn; Pointer <hash_entry_t> he; k = (uint)(m.Deref.n_emit_state); h = hash_table.hash_table_new(m.Deref.n_phone, hash_table.HASH_CASE_YES); n_sseq = 0; /* Identify unique senone-sequence IDs. BUG: tmat-id not being considered!! */ for (p = 0; p < m.Deref.n_phone; p++) { /* Add senone sequence to hash table */ if (n_sseq == (j.Val = hash_table.hash_table_enter_bkey_int32(h, m.Deref.sseq[p].ReinterpretCast <byte>(), k * 2 /*sizeof(short)*/, n_sseq))) { n_sseq++; } m.Deref.phone[p].ssid = j.Val; } /* Generate compacted sseq table */ sseq = ckd_alloc.ckd_calloc_2d <ushort>((uint)n_sseq, (uint)m.Deref.n_emit_state); /* freed in mdef_free() */ g = hash_table.hash_table_tolist(h, j); SphinxAssert.assert(j.Val == n_sseq); for (gn = g; gn.IsNonNull; gn = glist.gnode_next(gn)) { he = (Pointer <hash_entry_t>)glist.gnode_ptr(gn); j.Val = (int)hash_table.hash_entry_val(he); hash_table.hash_entry_key(he).ReinterpretCast <ushort>().MemCopyTo(sseq[j.Val], (int)k); } glist.glist_free(g); /* Free the old, temporary senone sequence table, replace with compacted one */ ckd_alloc.ckd_free_2d(m.Deref.sseq); m.Deref.sseq = sseq; m.Deref.n_sseq = n_sseq; hash_table.hash_table_free(h); }
/** * Normalize densities to produce "posterior probabilities", * i.e. things with a reasonable dynamic range, then scale and * clamp them to the acceptable range. This is actually done * solely to ensure that we can use fast_logmath_add(). Note that * unless we share the same normalizer across all codebooks for * each feature stream we get defective scores (that's why these * loops are inside out - doing it per-feature should give us * greater precision). */ public static int ptm_mgau_codebook_norm(ptm_mgau_t s, Pointer <Pointer <float> > z, int frame) { int i, j; for (j = 0; j < s.g.Deref.n_feat; ++j) { int norm = hmm.WORST_SCORE; for (i = 0; i < s.g.Deref.n_mgau; ++i) { if (bitvec.bitvec_is_clear(s.f.Deref.mgau_active, i) != 0) { continue; } if (norm < s.f.Deref.topn[i][j][0].score >> hmm.SENSCR_SHIFT) { norm = s.f.Deref.topn[i][j][0].score >> hmm.SENSCR_SHIFT; } } SphinxAssert.assert(norm != hmm.WORST_SCORE); for (i = 0; i < s.g.Deref.n_mgau; ++i) { int k; if (bitvec.bitvec_is_clear(s.f.Deref.mgau_active, i) != 0) { continue; } for (k = 0; k < s.max_topn; ++k) { // LOGAN modified this func to avoid constant dereferencing of an inaccessible field int scr = s.f.Deref.topn[i][j][k].score; scr >>= hmm.SENSCR_SHIFT; scr -= norm; scr = -scr; if (scr > tied_mgau_common.MAX_NEG_ASCR) { scr = tied_mgau_common.MAX_NEG_ASCR; } s.f.Deref.topn[i][j].Set(k, new ptm_topn_t() { score = scr, cw = s.f.Deref.topn[i][j][k].cw }); } } } return(0); }
public static int dict_filler_word(Pointer <dict_t> d, int w) { SphinxAssert.assert(d.IsNonNull); SphinxAssert.assert((w >= 0) && (w < d.Deref.n_word)); w = dict_basewid(d, w); if ((w == d.Deref.startwid) || (w == d.Deref.finishwid)) { return(0); } if ((w >= d.Deref.filler_start) && (w <= d.Deref.filler_end)) { return(1); } return(0); }
gauden_dist(Pointer <gauden_t> g, int mgau, int n_top, Pointer <Pointer <float> > obs, Pointer <Pointer <gauden_dist_t> > out_dist) { int f; SphinxAssert.assert((n_top > 0) && (n_top <= g.Deref.n_density)); for (f = 0; f < g.Deref.n_feat; f++) { compute_dist(out_dist[f], n_top, obs[f], g.Deref.featlen[f], g.Deref.mean[mgau][f], g.Deref.var[mgau][f], g.Deref.det[mgau][f], g.Deref.n_density); err.E_DEBUG(string.Format("Top CW({0},{1}) = {2} {3}\n", mgau, f, out_dist[f][0].id, (int)out_dist[f][0].dist >> hmm.SENSCR_SHIFT)); } return(0); }
public static void triphone_add(Pointer <mdef_t> m, short ci, short lc, short rc, int wpos, int p) { Pointer <ph_lc_t> lcptr; Pointer <ph_rc_t> rcptr; SphinxAssert.assert(p < m.Deref.n_phone); /* Fill in phone[p] information (state and tmat mappings added later) */ m.Deref.phone[p].ci = ci; m.Deref.phone[p].lc = lc; m.Deref.phone[p].rc = rc; m.Deref.phone[p].wpos = wpos; /* Create <ci,lc,rc,wpos> .Deref. p mapping if not a CI phone */ if (p >= m.Deref.n_ciphone) { if ((lcptr = find_ph_lc(m.Deref.wpos_ci_lclist[wpos][(int)ci], lc)).IsNull) { lcptr = ckd_alloc.ckd_calloc_struct <ph_lc_t>(1); /* freed at mdef_free, I believe */ lcptr.Deref.lc = lc; lcptr.Deref.next = m.Deref.wpos_ci_lclist[wpos][(int)ci]; Pointer <Pointer <ph_lc_t> > tmp = m.Deref.wpos_ci_lclist[wpos]; tmp[(int)ci] = lcptr; /* This is what needs to be freed */ } if ((rcptr = find_ph_rc(lcptr.Deref.rclist, rc)).IsNonNull) { Pointer <byte> buf = PointerHelpers.Malloc <byte>(4096); mdef_phone_str(m, rcptr.Deref.pid, buf); err.E_FATAL(string.Format("Duplicate triphone: {0}\n", cstring.FromCString(buf))); } rcptr = ckd_alloc.ckd_calloc_struct <ph_rc_t>(1); /* freed in mdef_free, I believe */ rcptr.Deref.rc = rc; rcptr.Deref.pid = p; rcptr.Deref.next = lcptr.Deref.rclist; lcptr.Deref.rclist = rcptr; } }
public static int mdef_phone_str(Pointer <mdef_t> m, int pid, Pointer <byte> buf) { Pointer <byte> wpos_name; SphinxAssert.assert(m.IsNonNull); SphinxAssert.assert((pid >= 0) && (pid < m.Deref.n_phone)); wpos_name = WPOS_NAME; buf[0] = (byte)'\0'; if (pid < m.Deref.n_ciphone) { stdio.sprintf(buf, string.Format("{0}", cstring.FromCString(mdef_ciphone_str(m, pid)))); } else { stdio.sprintf(buf, string.Format("{0} {1} {2} {3}", cstring.FromCString(mdef_ciphone_str(m, m.Deref.phone[pid].ci)), cstring.FromCString(mdef_ciphone_str(m, m.Deref.phone[pid].lc)), cstring.FromCString(mdef_ciphone_str(m, m.Deref.phone[pid].rc)), (char)wpos_name[m.Deref.phone[pid].wpos])); } return(0); }
public static Pointer <hmm_context_t> hmm_context_init(int n_emit_state, Pointer <Pointer <Pointer <byte> > > tp, Pointer <short> senscore, Pointer <Pointer <ushort> > sseq) { Pointer <hmm_context_t> ctx; SphinxAssert.assert(n_emit_state > 0); if (n_emit_state > HMM_MAX_NSTATE) { err.E_ERROR(string.Format("Number of emitting states must be <= {0}\n", HMM_MAX_NSTATE)); return(PointerHelpers.NULL <hmm_context_t>()); } ctx = ckd_alloc.ckd_calloc_struct <hmm_context_t>(1); ctx.Deref.n_emit_state = n_emit_state; ctx.Deref.tp = tp; ctx.Deref.senscore = senscore; ctx.Deref.sseq = sseq; ctx.Deref.st_sen_scr = ckd_alloc.ckd_calloc <int>(n_emit_state); return(ctx); }
public static int dict_read(FILE fp, Pointer <dict_t> d) { Pointer <lineiter_t> li; Pointer <Pointer <byte> > wptr; Pointer <short> p; int lineno, nwd; int w; int i, maxwd; uint stralloc, phnalloc; maxwd = 512; p = ckd_alloc.ckd_calloc <short>(maxwd + 4); wptr = ckd_alloc.ckd_calloc <Pointer <byte> >(maxwd); /* Freed below */ lineno = 0; stralloc = phnalloc = 0; for (li = pio.lineiter_start(fp); li.IsNonNull; li = pio.lineiter_next(li)) { lineno++; if (0 == cstring.strncmp(li.Deref.buf, HASHES, 2) || 0 == cstring.strncmp(li.Deref.buf, SEMICOLONS, 2)) { continue; } if ((nwd = strfuncs.str2words(li.Deref.buf, wptr, maxwd)) < 0) { /* Increase size of p, wptr. */ nwd = strfuncs.str2words(li.Deref.buf, PointerHelpers.NULL <Pointer <byte> >(), 0); SphinxAssert.assert(nwd > maxwd); /* why else would it fail? */ maxwd = nwd; p = ckd_alloc.ckd_realloc(p, (uint)(maxwd + 4)); wptr = (Pointer <Pointer <byte> >)ckd_alloc.ckd_realloc(wptr, maxwd /* * sizeof(*wptr)*/); } if (nwd == 0) /* Empty line */ { continue; } /* wptr[0] is the word-string and wptr[1..nwd-1] the pronunciation sequence */ if (nwd == 1) { err.E_ERROR(string.Format("Line {0}: No pronunciation for word '{1}'; ignored\n", lineno, cstring.FromCString(wptr[0]))); continue; } /* Convert pronunciation string to CI-phone-ids */ for (i = 1; i < nwd; i++) { p[i - 1] = dict_ciphone_id(d, wptr[i]); if (s3types.NOT_S3CIPID(p[i - 1]) != 0) { err.E_ERROR(string.Format("Line {0}: Phone '{1}' is mising in the acoustic model; word '{2}' ignored\n", lineno, cstring.FromCString(wptr[i]), cstring.FromCString(wptr[0]))); break; } } if (i == nwd) { /* All CI-phones successfully converted to IDs */ w = dict_add_word(d, wptr[0], p, nwd - 1); if (s3types.NOT_S3WID(w) != 0) { err.E_ERROR (string.Format("Line {0}: Failed to add the word '{1}' (duplicate?); ignored\n", lineno, cstring.FromCString(wptr[0]))); } else { stralloc += cstring.strlen(d.Deref.word[w].word); phnalloc += (uint)d.Deref.word[w].pronlen * 2; } } } err.E_INFO(string.Format("Dictionary size {0}, allocated {1} KiB for strings, {2} KiB for phones\n", dict_size(d), (int)stralloc / 1024, (int)phnalloc / 1024)); ckd_alloc.ckd_free(p); ckd_alloc.ckd_free(wptr); return(0); }
public static Pointer <dict2pid_t> dict2pid_build(Pointer <bin_mdef_t> mdef, Pointer <dict_t> dictionary) { Pointer <dict2pid_t> returnVal; Pointer <Pointer <Pointer <ushort> > > rdiph_rc; Pointer <uint> ldiph; Pointer <uint> rdiph; Pointer <uint> single; int pronlen; int b, l, r, w, p; err.E_INFO("Building PID tables for dictionary\n"); SphinxAssert.assert(mdef.IsNonNull); SphinxAssert.assert(dictionary.IsNonNull); returnVal = (Pointer <dict2pid_t>)ckd_alloc.ckd_calloc_struct <dict2pid_t>(1); returnVal.Deref.refcount = 1; returnVal.Deref.mdef = bin_mdef.bin_mdef_retain(mdef); returnVal.Deref.dict = dict.dict_retain(dictionary); err.E_INFO(string.Format("Allocating {0}^3 * {1} bytes ({2} KiB) for word-initial triphones\n", mdef.Deref.n_ciphone, sizeof(ushort), mdef.Deref.n_ciphone * mdef.Deref.n_ciphone * mdef.Deref.n_ciphone * sizeof(ushort) / 1024)); returnVal.Deref.ldiph_lc = ckd_alloc.ckd_calloc_3d <ushort>((uint)mdef.Deref.n_ciphone, (uint)mdef.Deref.n_ciphone, (uint)mdef.Deref.n_ciphone); /* Only used internally to generate rssid */ rdiph_rc = ckd_alloc.ckd_calloc_3d <ushort>((uint)mdef.Deref.n_ciphone, (uint)mdef.Deref.n_ciphone, (uint)mdef.Deref.n_ciphone); returnVal.Deref.lrdiph_rc = ckd_alloc.ckd_calloc_3d <ushort>((uint)mdef.Deref.n_ciphone, (uint)mdef.Deref.n_ciphone, (uint)mdef.Deref.n_ciphone); /* Actually could use memset for this, if s3types.BAD_S3SSID is guaranteed * to be 65535... */ for (b = 0; b < mdef.Deref.n_ciphone; ++b) { for (r = 0; r < mdef.Deref.n_ciphone; ++r) { for (l = 0; l < mdef.Deref.n_ciphone; ++l) { returnVal.Deref.ldiph_lc[b][r].Set(l, s3types.BAD_S3SSID); returnVal.Deref.lrdiph_rc[b][l].Set(r, s3types.BAD_S3SSID); rdiph_rc[b][l].Set(r, s3types.BAD_S3SSID); } } } /* Track which diphones / ciphones have been seen. */ ldiph = bitvec.bitvec_alloc(mdef.Deref.n_ciphone * mdef.Deref.n_ciphone); rdiph = bitvec.bitvec_alloc(mdef.Deref.n_ciphone * mdef.Deref.n_ciphone); single = bitvec.bitvec_alloc(mdef.Deref.n_ciphone); for (w = 0; w < dict.dict_size(returnVal.Deref.dict); w++) { pronlen = dict.dict_pronlen(dictionary, w); if (pronlen >= 2) { b = dict.dict_first_phone(dictionary, w); r = dict.dict_second_phone(dictionary, w); /* Populate ldiph_lc */ if (bitvec.bitvec_is_clear(ldiph, b * mdef.Deref.n_ciphone + r) != 0) { /* Mark this diphone as done */ bitvec.bitvec_set(ldiph, b * mdef.Deref.n_ciphone + r); /* Record all possible ssids for b(?,r) */ for (l = 0; l < bin_mdef.bin_mdef_n_ciphone(mdef); l++) { p = bin_mdef.bin_mdef_phone_id_nearest(mdef, (short)b, (short)l, (short)r, word_posn_t.WORD_POSN_BEGIN); returnVal.Deref.ldiph_lc[b][r].Set(l, checked ((ushort)bin_mdef.bin_mdef_pid2ssid(mdef, p))); } } /* Populate rdiph_rc */ l = dict.dict_second_last_phone(dictionary, w); b = dict.dict_last_phone(dictionary, w); if (bitvec.bitvec_is_clear(rdiph, b * mdef.Deref.n_ciphone + l) != 0) { /* Mark this diphone as done */ bitvec.bitvec_set(rdiph, b * mdef.Deref.n_ciphone + l); for (r = 0; r < bin_mdef.bin_mdef_n_ciphone(mdef); r++) { p = bin_mdef.bin_mdef_phone_id_nearest(mdef, (short)b, (short)l, (short)r, word_posn_t.WORD_POSN_END); rdiph_rc[b][l].Set(r, checked ((ushort)bin_mdef.bin_mdef_pid2ssid(mdef, p))); } } } else if (pronlen == 1) { b = dict.dict_pron(dictionary, w, 0); err.E_DEBUG(string.Format("Building tables for single phone word {0} phone {1} = {2}\n", cstring.FromCString(dict.dict_wordstr(dictionary, w)), b, cstring.FromCString(bin_mdef.bin_mdef_ciphone_str(mdef, b)))); /* Populate lrdiph_rc (and also ldiph_lc, rdiph_rc if needed) */ if (bitvec.bitvec_is_clear(single, b) != 0) { populate_lrdiph(returnVal, rdiph_rc, checked ((short)b)); bitvec.bitvec_set(single, b); } } } bitvec.bitvec_free(ldiph); bitvec.bitvec_free(rdiph); bitvec.bitvec_free(single); /* Try to compress rdiph_rc into rdiph_rc_compressed */ compress_right_context_tree(returnVal, rdiph_rc); compress_left_right_context_tree(returnVal); ckd_alloc.ckd_free_3d(rdiph_rc); dict2pid_report(returnVal); return(returnVal); }
public static int feat_read_lda(Pointer <feat_t> feat, Pointer <byte> ldafile, int dim) { FILE fh; int byteswap; uint chksum, i, m, n; Pointer <Pointer <byte> > argname; Pointer <Pointer <byte> > argval; SphinxAssert.assert(feat.IsNonNull); if (feat.Deref.n_stream != 1) { err.E_ERROR(string.Format("LDA incompatible with multi-stream features (n_stream = {0})\n", feat.Deref.n_stream)); return(-1); } if ((fh = FILE.fopen(ldafile, "rb")) == null) { err.E_ERROR_SYSTEM(string.Format("Failed to open transform file '{0}' for reading", cstring.FromCString(ldafile))); return(-1); } BoxedValue <Pointer <Pointer <byte> > > boxed_argname = new BoxedValue <Pointer <Pointer <byte> > >(); BoxedValue <Pointer <Pointer <byte> > > boxed_argval = new BoxedValue <Pointer <Pointer <byte> > >(); if (bio.bio_readhdr(fh, boxed_argname, boxed_argval, out byteswap) < 0) { err.E_ERROR(string.Format("Failed to read header from transform file '{0}'\n", cstring.FromCString(ldafile))); fh.fclose(); return(-1); } argname = boxed_argname.Val; argval = boxed_argval.Val; for (i = 0; argname[i].IsNonNull; i++) { if (cstring.strcmp(argname[i], cstring.ToCString("version")) == 0) { if (cstring.strcmp(argval[i], MATRIX_FILE_VERSION) != 0) { err.E_WARN(string.Format("{0}: Version mismatch: {1}, expecting {2}\n", cstring.FromCString(ldafile), cstring.FromCString(argval[i]), cstring.FromCString(MATRIX_FILE_VERSION))); } } } bio.bio_hdrarg_free(argname, argval); argname = argval = PointerHelpers.NULL <Pointer <byte> >(); chksum = 0; if (feat.Deref.lda.IsNonNull) { ckd_alloc.ckd_free_3d(feat.Deref.lda); } { /* Use a temporary variable to avoid strict-aliasing problems. */ BoxedValue <Pointer <Pointer <Pointer <float> > > > outlda = new BoxedValue <Pointer <Pointer <Pointer <float> > > >(); BoxedValue <uint> boxed_n_lda = new BoxedValue <uint>(); BoxedValue <uint> boxed_m = new BoxedValue <uint>(); BoxedValue <uint> boxed_n = new BoxedValue <uint>(); BoxedValue <uint> boxed_checksum = new BoxedValue <uint>(); if (bio.bio_fread_3d(outlda, boxed_n_lda, boxed_m, boxed_n, fh, (uint)byteswap, boxed_checksum) < 0) { err.E_ERROR_SYSTEM(string.Format("{0}: bio_fread_3d(lda) failed\n", cstring.FromCString(ldafile))); fh.fclose(); return(-1); } feat.Deref.n_lda = boxed_n_lda.Val; m = boxed_m.Val; n = boxed_n.Val; feat.Deref.lda = outlda.Val; chksum = boxed_checksum.Val; } fh.fclose(); /* Note that SphinxTrain stores the eigenvectors as row vectors. */ if (n != feat.Deref.stream_len[0]) { err.E_FATAL(string.Format("LDA matrix dimension {0} doesn't match feature stream size %{1}\n", n, feat.Deref.stream_len[0])); } /* Override dim from file if it is 0 or greater than m. */ if (dim > m || dim <= 0) { dim = (int)m; } feat.Deref.out_dim = (uint)dim; return(0); }
public static void compress_left_right_context_tree(Pointer <dict2pid_t> d2p) { int n_ci; int b, l, r; Pointer <ushort> rmap; Pointer <ushort> tmpssid; Pointer <short> tmpcimap; Pointer <bin_mdef_t> mdef = d2p.Deref.mdef; uint alloc; n_ci = mdef.Deref.n_ciphone; tmpssid = ckd_alloc.ckd_calloc <ushort>(n_ci); tmpcimap = ckd_alloc.ckd_calloc <short>(n_ci); SphinxAssert.assert(d2p.Deref.lrdiph_rc.IsNonNull); d2p.Deref.lrssid = (Pointer <Pointer <xwdssid_t> >)ckd_alloc.ckd_calloc <Pointer <xwdssid_t> >(mdef.Deref.n_ciphone); alloc = (uint)(mdef.Deref.n_ciphone * 8); for (b = 0; b < n_ci; b++) { d2p.Deref.lrssid[b] = (Pointer <xwdssid_t>)ckd_alloc.ckd_calloc_struct <xwdssid_t>(mdef.Deref.n_ciphone); alloc += (uint)(mdef.Deref.n_ciphone * 20); for (l = 0; l < n_ci; l++) { rmap = d2p.Deref.lrdiph_rc[b][l]; compress_table(rmap, tmpssid, tmpcimap, mdef.Deref.n_ciphone); for (r = 0; r < mdef.Deref.n_ciphone && tmpssid[r] != s3types.BAD_S3SSID; r++) { ; } if (tmpssid[0] != s3types.BAD_S3SSID) { d2p.Deref.lrssid[b][l].ssid = ckd_alloc.ckd_calloc <ushort>(r); tmpssid.MemCopyTo(d2p.Deref.lrssid[b][l].ssid, r); d2p.Deref.lrssid[b][l].cimap = ckd_alloc.ckd_calloc <short>(mdef.Deref.n_ciphone); tmpcimap.MemCopyTo(d2p.Deref.lrssid[b][l].cimap, mdef.Deref.n_ciphone); d2p.Deref.lrssid[b][l].n_ssid = r; } else { d2p.Deref.lrssid[b][l].ssid = PointerHelpers.NULL <ushort>(); d2p.Deref.lrssid[b][l].cimap = PointerHelpers.NULL <short>(); d2p.Deref.lrssid[b][l].n_ssid = 0; } } } /* Try to compress lrdiph_rc into lrdiph_rc_compressed */ ckd_alloc.ckd_free(tmpssid); ckd_alloc.ckd_free(tmpcimap); err.E_INFO(string.Format("Allocated {0} bytes ({1} KiB) for single-phone word triphones\n", (int)alloc, (int)alloc / 1024)); }
public static Pointer <gauden_t> gauden_init(Pointer <byte> meanfile, Pointer <byte> varfile, float varfloor, Pointer <logmath_t> lmath) { int i, m, f, d; Pointer <int> flen; Pointer <gauden_t> g; SphinxAssert.assert(meanfile.IsNonNull); SphinxAssert.assert(varfile.IsNonNull); SphinxAssert.assert(varfloor > 0.0); g = ckd_alloc.ckd_calloc_struct <gauden_t>(1); g.Deref.lmath = lmath; BoxedValueInt out_n_mgau = new BoxedValueInt(); BoxedValueInt out_n_feat = new BoxedValueInt(); BoxedValueInt out_n_density = new BoxedValueInt(); BoxedValue <Pointer <int> > boxed_featlen = new BoxedValue <Pointer <int> >(); g.Deref.mean = gauden_param_read(meanfile, out_n_mgau, out_n_feat, out_n_density, boxed_featlen); if (g.Deref.mean.IsNull) { return(PointerHelpers.NULL <gauden_t>()); } g.Deref.n_mgau = out_n_mgau.Val; g.Deref.n_feat = out_n_feat.Val; g.Deref.n_density = out_n_density.Val; g.Deref.featlen = boxed_featlen.Val; g.Deref.var = gauden_param_read(varfile, out_n_mgau, out_n_feat, out_n_density, boxed_featlen); if (g.Deref.var.IsNull) { return(PointerHelpers.NULL <gauden_t>()); } m = out_n_mgau.Val; f = out_n_feat.Val; d = out_n_density.Val; flen = boxed_featlen.Val; /* Verify mean and variance parameter dimensions */ if ((m != g.Deref.n_mgau) || (f != g.Deref.n_feat) || (d != g.Deref.n_density)) { err.E_ERROR ("Mixture-gaussians dimensions for means and variances differ\n"); ckd_alloc.ckd_free(flen); gauden_free(g); return(PointerHelpers.NULL <gauden_t>()); } for (i = 0; i < g.Deref.n_feat; i++) { if (g.Deref.featlen[i] != flen[i]) { err.E_ERROR("Feature lengths for means and variances differ\n"); ckd_alloc.ckd_free(flen); gauden_free(g); return(PointerHelpers.NULL <gauden_t>()); } } ckd_alloc.ckd_free(flen); gauden_dist_precompute(g, lmath, varfloor); return(g); }
compute_dist(Pointer <gauden_dist_t> out_dist, int n_top, Pointer <float> obs, int featlen, Pointer <Pointer <float> > mean, Pointer <Pointer <float> > var, Pointer <float> det, int n_density) { int i, j, d; Pointer <gauden_dist_t> worst; /* Special case optimization when n_density <= n_top */ if (n_top >= n_density) { return(compute_dist_all (out_dist, obs, featlen, mean, var, det, n_density)); } for (i = 0; i < n_top; i++) { out_dist[i].dist = WORST_DIST; } worst = out_dist.Point(n_top - 1); for (d = 0; d < n_density; d++) { Pointer <float> m; Pointer <float> v; float dval; m = mean[d]; v = var[d]; dval = det[d]; for (i = 0; (i < featlen) && (dval >= worst.Deref.dist); i++) { float diff; diff = obs[i] - m[i]; /* The compiler really likes this to be a single * expression, for whatever reason. */ dval -= diff * diff * v[i]; } if ((i < featlen) || (dval < worst.Deref.dist)) /* Codeword d worse than worst */ { continue; } /* Codeword d at least as good as worst so far; insert in the ordered list */ for (i = 0; (i < n_top) && (dval < out_dist[i].dist); i++) { ; } SphinxAssert.assert(i < n_top); for (j = n_top - 1; j > i; --j) { out_dist[j] = out_dist[j - 1]; } out_dist[i].dist = dval; out_dist[i].id = d; } return(0); }
public static Pointer <byte> bin_mdef_ciphone_str(Pointer <bin_mdef_t> m, int ci) { SphinxAssert.assert(m.IsNonNull); SphinxAssert.assert(ci < m.Deref.n_ciphone); return(m.Deref.ciname[ci]); }
public static int bio_fread_3d(BoxedValue <Pointer <Pointer <Pointer <float> > > > arr, BoxedValue <uint> d1, BoxedValue <uint> d2, BoxedValue <uint> d3, FILE fp, uint swap, BoxedValue <uint> chksum) { MemoryBlock <byte> length_buf = new MemoryBlock <byte>(12); Pointer <byte> length = new Pointer <byte>(new BasicMemoryBlockAccess <byte>(length_buf), 0); Pointer <uint> l_d1 = new Pointer <uint>(new UpcastingMemoryBlockAccess <uint>(length_buf), 0); Pointer <uint> l_d2 = new Pointer <uint>(new UpcastingMemoryBlockAccess <uint>(length_buf), 4); Pointer <uint> l_d3 = new Pointer <uint>(new UpcastingMemoryBlockAccess <uint>(length_buf), 8); uint n = 0; Pointer <byte> raw = PointerHelpers.NULL <byte>(); uint ret; ret = (uint)bio_fread(length.Point(0), 4, 1, fp, (int)swap, chksum); if (ret != 1) { if (ret == 0) { err.E_ERROR_SYSTEM("Unable to read complete data"); } else { err.E_ERROR_SYSTEM("OS error in bio_fread_3d"); } return(-1); } ret = (uint)bio_fread(length.Point(4), 4, 1, fp, (int)swap, chksum); if (ret != 1) { if (ret == 0) { err.E_ERROR_SYSTEM("Unable to read complete data"); } else { err.E_ERROR_SYSTEM("OS error in bio_fread_3d"); } return(-1); } ret = (uint)bio_fread(length.Point(8), 4, 1, fp, (int)swap, chksum); if (ret != 1) { if (ret == 0) { err.E_ERROR_SYSTEM("Unable to read complete data"); } else { err.E_ERROR_SYSTEM("OS error in bio_fread_3d"); } return(-1); } BoxedValue <Pointer <byte> > boxed_raw = new BoxedValue <Pointer <byte> >(raw); BoxedValue <uint> boxed_n = new BoxedValue <uint>(n); if (bio_fread_1d(boxed_raw, 4, boxed_n, fp, (int)swap, chksum) != n) { return(-1); } n = boxed_n.Val; raw = boxed_raw.Val; SphinxAssert.assert(n == +l_d1 * +l_d2 * +l_d3); // LOGAN changed // Convert byte data to float Pointer <float> float_upcast_buf = raw.ReinterpretCast <float>(); Pointer <float> float_copy_buf = PointerHelpers.Malloc <float>(n); float_upcast_buf.MemCopyTo(float_copy_buf, (int)n); arr.Val = ckd_alloc.ckd_alloc_3d_ptr <float>(+l_d1, +l_d2, +l_d3, float_copy_buf); d1.Val = +l_d1; d2.Val = +l_d2; d3.Val = +l_d3; return((int)n); }
public static Pointer <fe_t> fe_init_auto_r(Pointer <cmd_ln_t> config) { Pointer <fe_t> returnVal; int prespch_frame_len; returnVal = ckd_alloc.ckd_calloc_struct <fe_t>(1); returnVal.Deref.refcount = 1; /* transfer params to front end */ if (fe_parse_general_params(cmd_ln.cmd_ln_retain(config), returnVal) < 0) { fe_free(returnVal); return(PointerHelpers.NULL <fe_t>()); } /* compute remaining fe parameters */ /* We add 0.5 so approximate the float with the closest * integer. E.g., 2.3 is truncate to 2, whereas 3.7 becomes 4 */ returnVal.Deref.frame_shift = checked ((short)(returnVal.Deref.sampling_rate / returnVal.Deref.frame_rate + 0.5)); returnVal.Deref.frame_size = checked ((short)(returnVal.Deref.window_length * returnVal.Deref.sampling_rate + 0.5)); returnVal.Deref.pre_emphasis_prior = 0; fe_start_stream(returnVal); SphinxAssert.assert(returnVal.Deref.frame_shift > 1); if (returnVal.Deref.frame_size < returnVal.Deref.frame_shift) { err.E_ERROR (string.Format("Frame size {0} (-wlen) must be greater than frame shift {1} (-frate)\n", returnVal.Deref.frame_size, returnVal.Deref.frame_shift)); fe_free(returnVal); return(PointerHelpers.NULL <fe_t>()); } if (returnVal.Deref.frame_size > (returnVal.Deref.fft_size)) { err.E_ERROR (string.Format("Number of FFT points has to be a power of 2 higher than {0}, it is {1}\n", returnVal.Deref.frame_size, returnVal.Deref.fft_size)); fe_free(returnVal); return(PointerHelpers.NULL <fe_t>()); } if (returnVal.Deref.dither != 0) { fe_init_dither(returnVal.Deref.dither_seed); } /* establish buffers for overflow samps and hamming window */ returnVal.Deref.overflow_samps = ckd_alloc.ckd_calloc <short>(returnVal.Deref.frame_size); returnVal.Deref.hamming_window = ckd_alloc.ckd_calloc <double>(returnVal.Deref.frame_size / 2); /* create hamming window */ fe_sigproc.fe_create_hamming(returnVal.Deref.hamming_window, returnVal.Deref.frame_size); /* init and fill appropriate filter structure */ returnVal.Deref.mel_fb = ckd_alloc.ckd_calloc_struct <melfb_t>(1); /* transfer params to mel fb */ fe_parse_melfb_params(config, returnVal, returnVal.Deref.mel_fb); if (returnVal.Deref.mel_fb.Deref.upper_filt_freq > returnVal.Deref.sampling_rate / 2 + 1.0) { err.E_ERROR(string.Format("Upper frequency {0} is higher than samprate/2 ({1})\n", returnVal.Deref.mel_fb.Deref.upper_filt_freq, returnVal.Deref.sampling_rate / 2)); fe_free(returnVal); return(PointerHelpers.NULL <fe_t>()); } fe_sigproc.fe_build_melfilters(returnVal.Deref.mel_fb); fe_sigproc.fe_compute_melcosine(returnVal.Deref.mel_fb); if (returnVal.Deref.remove_noise != 0 || returnVal.Deref.remove_silence != 0) { returnVal.Deref.noise_stats = fe_noise.fe_init_noisestats(returnVal.Deref.mel_fb.Deref.num_filters); } returnVal.Deref.vad_data = ckd_alloc.ckd_calloc_struct <vad_data_t>(1); prespch_frame_len = returnVal.Deref.log_spec != fe.RAW_LOG_SPEC ? returnVal.Deref.num_cepstra : returnVal.Deref.mel_fb.Deref.num_filters; returnVal.Deref.vad_data.Deref.prespch_buf = fe_prespch_buf.fe_prespch_init(returnVal.Deref.pre_speech + 1, prespch_frame_len, returnVal.Deref.frame_shift); /* Create temporary FFT, spectrum and mel-spectrum buffers. */ /* FIXME: Gosh there are a lot of these. */ returnVal.Deref.spch = ckd_alloc.ckd_calloc <short>(returnVal.Deref.frame_size); returnVal.Deref.frame = ckd_alloc.ckd_calloc <double>(returnVal.Deref.fft_size); returnVal.Deref.spec = ckd_alloc.ckd_calloc <double>(returnVal.Deref.fft_size); returnVal.Deref.mfspec = ckd_alloc.ckd_calloc <double>(returnVal.Deref.mel_fb.Deref.num_filters); /* create twiddle factors */ returnVal.Deref.ccc = ckd_alloc.ckd_calloc <double>(returnVal.Deref.fft_size / 4); returnVal.Deref.sss = ckd_alloc.ckd_calloc <double>(returnVal.Deref.fft_size / 4); fe_sigproc.fe_create_twiddle(returnVal); // LOGAN removed //if (cmd_ln.cmd_ln_boolean_r(config, "-verbose")) { // fe_print_current(fe); //} /*** Initialize the overflow buffers ***/ fe_start_utt(returnVal); return(returnVal); }
public static void cmn_run(Pointer <cmn_t> cmn, Pointer <Pointer <float> > mfc, int varnorm, int n_frame) { Pointer <float> mfcp; float t; int i, f; int n_pos_frame; SphinxAssert.assert(mfc.IsNonNull); if (n_frame <= 0) { return; } /* If cmn.Deref.cmn_mean wasn't NULL, we need to zero the contents */ cmn.Deref.cmn_mean.MemSet(0, cmn.Deref.veclen); /* Find mean cep vector for this utterance */ for (f = 0, n_pos_frame = 0; f < n_frame; f++) { mfcp = mfc[f]; /* Skip zero energy frames */ if (mfcp[0] < 0) { continue; } for (i = 0; i < cmn.Deref.veclen; i++) { cmn.Deref.cmn_mean[i] += mfcp[i]; } n_pos_frame++; } for (i = 0; i < cmn.Deref.veclen; i++) { cmn.Deref.cmn_mean[i] /= n_pos_frame; } err.E_INFO("CMN: "); for (i = 0; i < cmn.Deref.veclen; i++) { err.E_INFOCONT(string.Format("{0} ", (cmn.Deref.cmn_mean[i]))); } err.E_INFOCONT("\n"); if (varnorm == 0) { /* Subtract mean from each cep vector */ for (f = 0; f < n_frame; f++) { mfcp = mfc[f]; for (i = 0; i < cmn.Deref.veclen; i++) { mfcp[i] -= cmn.Deref.cmn_mean[i]; } } } else { /* Scale cep vectors to have unit variance along each dimension, and subtract means */ /* If cmn.Deref.cmn_var wasn't NULL, we need to zero the contents */ cmn.Deref.cmn_var.MemSet(0, cmn.Deref.veclen); for (f = 0; f < n_frame; f++) { mfcp = mfc[f]; for (i = 0; i < cmn.Deref.veclen; i++) { t = mfcp[i] - cmn.Deref.cmn_mean[i]; cmn.Deref.cmn_var[i] += (t * t); } } for (i = 0; i < cmn.Deref.veclen; i++) { /* Inverse Std. Dev, RAH added type case from sqrt */ cmn.Deref.cmn_var[i] = (float)(Math.Sqrt((double)n_frame / (cmn.Deref.cmn_var[i]))); } for (f = 0; f < n_frame; f++) { mfcp = mfc[f]; for (i = 0; i < cmn.Deref.veclen; i++) { mfcp[i] = ((mfcp[i] - cmn.Deref.cmn_mean[i]) * cmn.Deref.cmn_var[i]); } } } }
public static int bin_mdef_phone_id(Pointer <bin_mdef_t> m, int ci, int lc, int rc, int wpos) { Pointer <cd_tree_t> cd_tree; int level, max; Pointer <short> ctx = PointerHelpers.Malloc <short>(4); SphinxAssert.assert(m.IsNonNull); /* In the future, we might back off when context is not available, * but for now we'll just return the CI phone. */ if (lc < 0 || rc < 0) { return(ci); } SphinxAssert.assert((ci >= 0) && (ci < m.Deref.n_ciphone)); SphinxAssert.assert((lc >= 0) && (lc < m.Deref.n_ciphone)); SphinxAssert.assert((rc >= 0) && (rc < m.Deref.n_ciphone)); SphinxAssert.assert((wpos >= 0) && (wpos < mdef.N_WORD_POSN)); /* Create a context list, mapping fillers to silence. */ ctx[0] = (short)wpos; ctx[1] = (short)ci; ctx[2] = (short)((m.Deref.sil >= 0 && m.Deref.phone[lc].info_ci_filler != 0) ? m.Deref.sil : lc); ctx[3] = (short)((m.Deref.sil >= 0 && m.Deref.phone[rc].info_ci_filler != 0) ? m.Deref.sil : rc); /* Walk down the cd_tree. */ cd_tree = m.Deref.cd_tree; level = 0; /* What level we are on. */ max = mdef.N_WORD_POSN; /* Number of nodes on this level. */ while (level < 4) { int i; for (i = 0; i < max; ++i) { if (cd_tree[i].ctx == ctx[level]) { break; } } if (i == max) { return(-1); } /* Leaf node, stop here. */ if (cd_tree[i].n_down == 0) { return(cd_tree[i].c_pid); } /* Go down one level. */ max = cd_tree[i].n_down; cd_tree = m.Deref.cd_tree + cd_tree[i].c_down; ++level; } /* We probably shouldn't get here. */ return(-1); }