示例#1
0
        /// <summary>
        /// Find token(s) in input text and set found token(s) in arrays as normal tokens
        /// </summary>
        /// <param name="text"></param>
        /// <param name="lattice"></param>
        void ProcessUserDictionary(string text, ViterbiLattice lattice)
        {
            var matches = UserDictionary.FindUserDictionaryMatches(text);

            foreach (var match in matches)
            {
                var wordId = match.WordId;
                var index  = match.MatchStartIndex;
                var length = match.MatchLength;

                var word = text.Substring(index, length);

                var node           = new ViterbiNode(wordId, word, UserDictionary, index, ViterbiNode.NodeType.User);
                var nodeStartIndex = index + 1;
                var nodeEndIndex   = nodeStartIndex + length;

                lattice.AddNode(node, nodeStartIndex, nodeEndIndex);

                if (IsLatticeBrokenBefore(nodeStartIndex, lattice))
                {
                    RepairBrokenLatticeBefore(lattice, index);
                }

                if (IsLatticeBrokenAfter(nodeStartIndex + length, lattice))
                {
                    RepairBrokenLatticeAfter(lattice, nodeEndIndex);
                }
            }
        }
示例#2
0
        bool ProcessIndex(ViterbiLattice lattice, int startIndex, string suffix)
        {
            var found = false;

            for (var endIndex = 1; endIndex < suffix.Length + 1; endIndex++)
            {
                var prefix = suffix.Substring(0, endIndex);
                var result = DoubleArrayTrie.Lookup(prefix);

                if (result > 0)
                {
                    found = true; // Don't produce unknown word starting from this index
                    foreach (var wordId in Dictionary.LookupWordIds(result))
                    {
                        var node = new ViterbiNode(wordId, prefix, Dictionary, startIndex, ViterbiNode.NodeType.Known);
                        lattice.AddNode(node, startIndex + 1, startIndex + 1 + endIndex);
                    }
                }
                else if (result < 0)
                {
                    // If result is less than zero, continue to next position
                    break;
                }
            }

            return(found);
        }
示例#3
0
        int ProcessUnknownWord(int category, int i, ViterbiLattice lattice, int unknownWordEndIndex, int startIndex, string suffix, bool found)
        {
            var unknownWordLength = 0;
            var definition        = CharacterDefinitions.LookupDefinition(category);

            if (definition[CharacterDefinitions.Invoke] == 1 || found == false)
            {
                if (definition[CharacterDefinitions.Group] == 0)
                {
                    unknownWordLength = 1;
                }
                else
                {
                    unknownWordLength = 1;
                    for (var j = 1; j < suffix.Length; j++)
                    {
                        var c = suffix[j];

                        var categories = CharacterDefinitions.LookupCategories(c);

                        if (categories == null)
                        {
                            break;
                        }

                        if (i < categories.Length && category == categories[i])
                        {
                            unknownWordLength++;
                        }
                        else
                        {
                            break;
                        }
                    }
                }
            }

            if (unknownWordLength > 0)
            {
                var unkWord = suffix.Substring(0, unknownWordLength);
                var wordIds = UnknownDictionary.LookupWordIds(category); // characters in input text are supposed to be the same

                foreach (var wordId in wordIds)
                {
                    var node = new ViterbiNode(wordId, unkWord, UnknownDictionary, startIndex, ViterbiNode.NodeType.Unknown);
                    lattice.AddNode(node, startIndex + 1, startIndex + 1 + unknownWordLength);
                }
                unknownWordEndIndex = startIndex + unknownWordLength;
            }

            return(unknownWordEndIndex);
        }
示例#4
0
        /// <summary>
        /// Tries to repair the lattice by creating and adding an additional Viterbi node to the LEFT of the newly
        /// inserted user dictionary entry by using the substring of the node in the lattice that overlaps the least
        /// </summary>
        /// <param name="lattice"></param>
        /// <param name="index"></param>
        void RepairBrokenLatticeBefore(ViterbiLattice lattice, int index)
        {
            var nodeStartIndices = lattice.StartIndexArr;

            for (var startIndex = index; startIndex > 0; startIndex--)
            {
                if (nodeStartIndices[startIndex] != null)
                {
                    var glueBase = FindGlueNodeCandidate(index, nodeStartIndices[startIndex], startIndex);
                    if (glueBase != null)
                    {
                        var length   = index + 1 - startIndex;
                        var surface  = glueBase.Surface.Substring(0, length);
                        var glueNode = MakeGlueNode(startIndex, glueBase, surface);
                        lattice.AddNode(glueNode, startIndex, startIndex + glueNode.Surface.Length);
                        return;
                    }
                }
            }
        }
示例#5
0
        /// <summary>
        /// Tries to repair the lattice by creating and adding an additional Viterbi node to the RIGHT of the newly
        /// inserted user dictionary entry by using the substring of the node in the lattice that overlaps the least
        /// </summary>
        /// <param name="lattice"></param>
        /// <param name="nodeEndIndex"></param>
        void RepairBrokenLatticeAfter(ViterbiLattice lattice, int nodeEndIndex)
        {
            var nodeEndIndices = lattice.EndIndexArr;

            for (var endIndex = nodeEndIndex + 1; endIndex < nodeEndIndices.Length; endIndex++)
            {
                if (nodeEndIndices[endIndex] != null)
                {
                    ViterbiNode glueBase = FindGlueNodeCandidate(nodeEndIndex, nodeEndIndices[endIndex], endIndex);
                    if (glueBase != null)
                    {
                        var delta           = endIndex - nodeEndIndex;
                        var glueBaseSurface = glueBase.Surface;
                        var surface         = glueBaseSurface.Substring(glueBaseSurface.Length - delta);
                        var glueNode        = MakeGlueNode(nodeEndIndex, glueBase, surface);
                        lattice.AddNode(glueNode, nodeEndIndex, nodeEndIndex + glueNode.Surface.Length);
                        return;
                    }
                }
            }
        }