/** * Find token(s) in input text and set found token(s) in arrays as normal tokens * * @param text * @param lattice */ private void ProcessUserDictionary(string text, ViterbiLattice lattice) { List <UserDictionary.UserDictionaryMatch> matches = userDictionary.FindUserDictionaryMatches(text); foreach (UserDictionary.UserDictionaryMatch match in matches) { int wordId = match.GetWordId(); int index = match.GetMatchStartIndex(); int length = match.GetMatchLength(); string word = text.Substring(index, length); ViterbiNode node = new ViterbiNode(wordId, word, userDictionary, index, ViterbiNode.NodeType.USER); int nodeStartIndex = index + 1; int nodeEndIndex = nodeStartIndex + length; lattice.AddNode(node, nodeStartIndex, nodeEndIndex); if (IsLatticeBrokenBefore(nodeStartIndex, lattice)) { RepairBrokenLatticeBefore(lattice, index); } if (IsLatticeBrokenAfter(nodeStartIndex + length, lattice)) { RepairBrokenLatticeAfter(lattice, nodeEndIndex); } } }
private bool ProcessIndex(ViterbiLattice lattice, int startIndex, string suffix) { bool found = false; for (int endIndex = 1; endIndex < suffix.Length + 1; endIndex++) { string prefix = suffix.Substring(0, endIndex); int result = fst.Lookup(prefix); if (result > 0) { found = true; // Don't produce unknown word starting from this index foreach (int wordId in dictionary.LookupWordIds(result)) { ViterbiNode node = new ViterbiNode(wordId, prefix, dictionary, startIndex, ViterbiNode.NodeType.KNOWN); lattice.AddNode(node, startIndex + 1, startIndex + 1 + endIndex); } } else if (result < 0) { // If result is less than zero, continue to next position break; } } return(found); }
private int ProcessUnknownWord(int category, int i, ViterbiLattice lattice, int unknownWordEndIndex, int startIndex, String suffix, bool found) { int unknownWordLength = 0; int[] definition = characterDefinitions.LookupDefinition(category); if (definition[CharacterDefinitions.INVOKE] == 1 || found == false) { if (definition[CharacterDefinitions.GROUP] == 0) { unknownWordLength = 1; } else { unknownWordLength = 1; for (int j = 1; j < suffix.Length; j++) { char c = suffix[j]; int[] categories = characterDefinitions.LookupCategories(c); if (categories == null) { break; } if (i < categories.Length && category == categories[i]) { unknownWordLength++; } else { break; } } } } if (unknownWordLength > 0) { string unkWord = suffix.Substring(0, unknownWordLength); int[] wordIds = unknownDictionary.LookupWordIds(category); // characters in input text are supposed to be the same foreach (int wordId in wordIds) { ViterbiNode node = new ViterbiNode(wordId, unkWord, unknownDictionary, startIndex, ViterbiNode.NodeType.UNKNOWN); lattice.AddNode(node, startIndex + 1, startIndex + 1 + unknownWordLength); } unknownWordEndIndex = startIndex + unknownWordLength; } return(unknownWordEndIndex); }
/** * Tries to repair the lattice by creating and adding an additional Viterbi node to the LEFT of the newly * inserted user dictionary entry by using the substring of the node in the lattice that overlaps the least * * @param lattice * @param index */ private void RepairBrokenLatticeBefore(ViterbiLattice lattice, int index) { ViterbiNode[][] nodeStartIndices = lattice.StartIndexArr; for (int startIndex = index; startIndex > 0; startIndex--) { if (nodeStartIndices[startIndex] != null) { ViterbiNode glueBase = FindGlueNodeCandidate(index, nodeStartIndices[startIndex], startIndex); if (glueBase != null) { int length = index + 1 - startIndex; String surface = glueBase.Surface.Substring(0, length); ViterbiNode glueNode = MakeGlueNode(startIndex, glueBase, surface); lattice.AddNode(glueNode, startIndex, startIndex + glueNode.Surface.Length); return; } } } }
/** * Tries to repair the lattice by creating and adding an additional Viterbi node to the RIGHT of the newly * inserted user dictionary entry by using the substring of the node in the lattice that overlaps the least * @param lattice * @param nodeEndIndex */ private void RepairBrokenLatticeAfter(ViterbiLattice lattice, int nodeEndIndex) { ViterbiNode[][] nodeEndIndices = lattice.EndIndexArr; for (int endIndex = nodeEndIndex + 1; endIndex < nodeEndIndices.Length; endIndex++) { if (nodeEndIndices[endIndex] != null) { ViterbiNode glueBase = FindGlueNodeCandidate(nodeEndIndex, nodeEndIndices[endIndex], endIndex); if (glueBase != null) { int delta = endIndex - nodeEndIndex; String glueBaseSurface = glueBase.Surface; String surface = glueBaseSurface.Substring(glueBaseSurface.Length - delta); ViterbiNode glueNode = MakeGlueNode(nodeEndIndex, glueBase, surface); lattice.AddNode(glueNode, nodeEndIndex, nodeEndIndex + glueNode.Surface.Length); return; } } } }