/// <summary> /// Find token(s) in input text and set found token(s) in arrays as normal tokens /// </summary> /// <param name="text"></param> /// <param name="lattice"></param> void ProcessUserDictionary(string text, ViterbiLattice lattice) { var matches = UserDictionary.FindUserDictionaryMatches(text); foreach (var match in matches) { var wordId = match.WordId; var index = match.MatchStartIndex; var length = match.MatchLength; var word = text.Substring(index, length); var node = new ViterbiNode(wordId, word, UserDictionary, index, ViterbiNode.NodeType.User); var nodeStartIndex = index + 1; var nodeEndIndex = nodeStartIndex + length; lattice.AddNode(node, nodeStartIndex, nodeEndIndex); if (IsLatticeBrokenBefore(nodeStartIndex, lattice)) { RepairBrokenLatticeBefore(lattice, index); } if (IsLatticeBrokenAfter(nodeStartIndex + length, lattice)) { RepairBrokenLatticeAfter(lattice, nodeEndIndex); } } }
bool ProcessIndex(ViterbiLattice lattice, int startIndex, string suffix) { var found = false; for (var endIndex = 1; endIndex < suffix.Length + 1; endIndex++) { var prefix = suffix.Substring(0, endIndex); var result = DoubleArrayTrie.Lookup(prefix); if (result > 0) { found = true; // Don't produce unknown word starting from this index foreach (var wordId in Dictionary.LookupWordIds(result)) { var node = new ViterbiNode(wordId, prefix, Dictionary, startIndex, ViterbiNode.NodeType.Known); lattice.AddNode(node, startIndex + 1, startIndex + 1 + endIndex); } } else if (result < 0) { // If result is less than zero, continue to next position break; } } return(found); }
int ProcessUnknownWord(int category, int i, ViterbiLattice lattice, int unknownWordEndIndex, int startIndex, string suffix, bool found) { var unknownWordLength = 0; var definition = CharacterDefinitions.LookupDefinition(category); if (definition[CharacterDefinitions.Invoke] == 1 || found == false) { if (definition[CharacterDefinitions.Group] == 0) { unknownWordLength = 1; } else { unknownWordLength = 1; for (var j = 1; j < suffix.Length; j++) { var c = suffix[j]; var categories = CharacterDefinitions.LookupCategories(c); if (categories == null) { break; } if (i < categories.Length && category == categories[i]) { unknownWordLength++; } else { break; } } } } if (unknownWordLength > 0) { var unkWord = suffix.Substring(0, unknownWordLength); var wordIds = UnknownDictionary.LookupWordIds(category); // characters in input text are supposed to be the same foreach (var wordId in wordIds) { var node = new ViterbiNode(wordId, unkWord, UnknownDictionary, startIndex, ViterbiNode.NodeType.Unknown); lattice.AddNode(node, startIndex + 1, startIndex + 1 + unknownWordLength); } unknownWordEndIndex = startIndex + unknownWordLength; } return(unknownWordEndIndex); }
/// <summary> /// Tries to repair the lattice by creating and adding an additional Viterbi node to the LEFT of the newly /// inserted user dictionary entry by using the substring of the node in the lattice that overlaps the least /// </summary> /// <param name="lattice"></param> /// <param name="index"></param> void RepairBrokenLatticeBefore(ViterbiLattice lattice, int index) { var nodeStartIndices = lattice.StartIndexArr; for (var startIndex = index; startIndex > 0; startIndex--) { if (nodeStartIndices[startIndex] != null) { var glueBase = FindGlueNodeCandidate(index, nodeStartIndices[startIndex], startIndex); if (glueBase != null) { var length = index + 1 - startIndex; var surface = glueBase.Surface.Substring(0, length); var glueNode = MakeGlueNode(startIndex, glueBase, surface); lattice.AddNode(glueNode, startIndex, startIndex + glueNode.Surface.Length); return; } } } }
/// <summary> /// Tries to repair the lattice by creating and adding an additional Viterbi node to the RIGHT of the newly /// inserted user dictionary entry by using the substring of the node in the lattice that overlaps the least /// </summary> /// <param name="lattice"></param> /// <param name="nodeEndIndex"></param> void RepairBrokenLatticeAfter(ViterbiLattice lattice, int nodeEndIndex) { var nodeEndIndices = lattice.EndIndexArr; for (var endIndex = nodeEndIndex + 1; endIndex < nodeEndIndices.Length; endIndex++) { if (nodeEndIndices[endIndex] != null) { ViterbiNode glueBase = FindGlueNodeCandidate(nodeEndIndex, nodeEndIndices[endIndex], endIndex); if (glueBase != null) { var delta = endIndex - nodeEndIndex; var glueBaseSurface = glueBase.Surface; var surface = glueBaseSurface.Substring(glueBaseSurface.Length - delta); var glueNode = MakeGlueNode(nodeEndIndex, glueBase, surface); lattice.AddNode(glueNode, nodeEndIndex, nodeEndIndex + glueNode.Surface.Length); return; } } } }