private bool ProcessIndex(ViterbiLattice lattice, int startIndex, string suffix) { bool found = false; for (int endIndex = 1; endIndex < suffix.Length + 1; endIndex++) { string prefix = suffix.Substring(0, endIndex); int result = fst.Lookup(prefix); if (result > 0) { found = true; // Don't produce unknown word starting from this index foreach (int wordId in dictionary.LookupWordIds(result)) { ViterbiNode node = new ViterbiNode(wordId, prefix, dictionary, startIndex, ViterbiNode.NodeType.KNOWN); lattice.AddNode(node, startIndex + 1, startIndex + 1 + endIndex); } } else if (result < 0) { // If result is less than zero, continue to next position break; } } return(found); }
private void BuildTokenInfoDictionary(string inputDirAbsolutePath, string outputDirAbsolutePath, string encoding, EncodingProvider provider) { try { ProgressLog.Begin("compiling tokeninfo dict"); var tokenInfoCompiler = GetTokenInfoDictionaryCompiler(encoding, provider); ProgressLog.Println("analyzing dictionary features"); using (var stream = tokenInfoCompiler.CombinedSequentialFileInputStream(inputDirAbsolutePath)) { tokenInfoCompiler.AnalyzeTokenInfo(stream); ProgressLog.Println("reading tokeninfo"); tokenInfoCompiler.ReadTokenInfo(stream); tokenInfoCompiler.Compile(stream); } List <string> surfaces = tokenInfoCompiler.GetSurfaces(); ProgressLog.Begin("compiling fst"); FSTCompiler fstCompiler = new FSTCompiler(surfaces); using (var stream = File.Open(outputDirAbsolutePath + Path.DirectorySeparatorChar + FST.FST.FST_FILENAME, FileMode.OpenOrCreate)) { fstCompiler.Compile(stream); } ProgressLog.Println("validating saved fst"); FST.FST fst; using (var stream = File.OpenRead(outputDirAbsolutePath + Path.DirectorySeparatorChar + FST.FST.FST_FILENAME)) { fst = new FST.FST(stream); } foreach (string surface in surfaces) { if (fst.Lookup(surface) < 0) { ProgressLog.Println("failed to look up [" + surface + "]"); } } ProgressLog.End(); ProgressLog.Begin("processing target map"); for (int i = 0; i < surfaces.Count; i++) { int id = fst.Lookup(surfaces[i]); tokenInfoCompiler.AddMapping(id, i); } tokenInfoCompiler.Write(outputDirAbsolutePath); // TODO: Should be refactored -Christian ProgressLog.End(); } catch (Exception ex) { throw new Exception("DictionaryCompilerBase.BuildTokenInfoDictionary: " + ex.Message); } }