コード例 #1
0
        /**
         * Find token(s) in input text and set found token(s) in arrays as normal tokens
         *
         * @param text
         * @param lattice
         */
        private void ProcessUserDictionary(string text, ViterbiLattice lattice)
        {
            List <UserDictionary.UserDictionaryMatch> matches = userDictionary.FindUserDictionaryMatches(text);

            foreach (UserDictionary.UserDictionaryMatch match in matches)
            {
                int wordId = match.GetWordId();
                int index  = match.GetMatchStartIndex();
                int length = match.GetMatchLength();

                string word = text.Substring(index, length);

                ViterbiNode node           = new ViterbiNode(wordId, word, userDictionary, index, ViterbiNode.NodeType.USER);
                int         nodeStartIndex = index + 1;
                int         nodeEndIndex   = nodeStartIndex + length;

                lattice.AddNode(node, nodeStartIndex, nodeEndIndex);

                if (IsLatticeBrokenBefore(nodeStartIndex, lattice))
                {
                    RepairBrokenLatticeBefore(lattice, index);
                }

                if (IsLatticeBrokenAfter(nodeStartIndex + length, lattice))
                {
                    RepairBrokenLatticeAfter(lattice, nodeEndIndex);
                }
            }
        }
コード例 #2
0
        private bool ProcessIndex(ViterbiLattice lattice, int startIndex, string suffix)
        {
            bool found = false;

            for (int endIndex = 1; endIndex < suffix.Length + 1; endIndex++)
            {
                string prefix = suffix.Substring(0, endIndex);

                int result = fst.Lookup(prefix);

                if (result > 0)
                {
                    found = true; // Don't produce unknown word starting from this index
                    foreach (int wordId in dictionary.LookupWordIds(result))
                    {
                        ViterbiNode node = new ViterbiNode(wordId, prefix, dictionary, startIndex, ViterbiNode.NodeType.KNOWN);
                        lattice.AddNode(node, startIndex + 1, startIndex + 1 + endIndex);
                    }
                }
                else if (result < 0)
                { // If result is less than zero, continue to next position
                    break;
                }
            }
            return(found);
        }
コード例 #3
0
        private int ProcessUnknownWord(int category, int i, ViterbiLattice lattice, int unknownWordEndIndex, int startIndex, String suffix, bool found)
        {
            int unknownWordLength = 0;

            int[] definition = characterDefinitions.LookupDefinition(category);

            if (definition[CharacterDefinitions.INVOKE] == 1 || found == false)
            {
                if (definition[CharacterDefinitions.GROUP] == 0)
                {
                    unknownWordLength = 1;
                }
                else
                {
                    unknownWordLength = 1;
                    for (int j = 1; j < suffix.Length; j++)
                    {
                        char c = suffix[j];

                        int[] categories = characterDefinitions.LookupCategories(c);

                        if (categories == null)
                        {
                            break;
                        }

                        if (i < categories.Length && category == categories[i])
                        {
                            unknownWordLength++;
                        }
                        else
                        {
                            break;
                        }
                    }
                }
            }

            if (unknownWordLength > 0)
            {
                string unkWord = suffix.Substring(0, unknownWordLength);
                int[]  wordIds = unknownDictionary.LookupWordIds(category); // characters in input text are supposed to be the same

                foreach (int wordId in wordIds)
                {
                    ViterbiNode node = new ViterbiNode(wordId, unkWord, unknownDictionary, startIndex, ViterbiNode.NodeType.UNKNOWN);
                    lattice.AddNode(node, startIndex + 1, startIndex + 1 + unknownWordLength);
                }
                unknownWordEndIndex = startIndex + unknownWordLength;
            }

            return(unknownWordEndIndex);
        }
コード例 #4
0
        /**
         * Tries to repair the lattice by creating and adding an additional Viterbi node to the LEFT of the newly
         * inserted user dictionary entry by using the substring of the node in the lattice that overlaps the least
         *
         * @param lattice
         * @param index
         */
        private void RepairBrokenLatticeBefore(ViterbiLattice lattice, int index)
        {
            ViterbiNode[][] nodeStartIndices = lattice.StartIndexArr;

            for (int startIndex = index; startIndex > 0; startIndex--)
            {
                if (nodeStartIndices[startIndex] != null)
                {
                    ViterbiNode glueBase = FindGlueNodeCandidate(index, nodeStartIndices[startIndex], startIndex);
                    if (glueBase != null)
                    {
                        int         length   = index + 1 - startIndex;
                        String      surface  = glueBase.Surface.Substring(0, length);
                        ViterbiNode glueNode = MakeGlueNode(startIndex, glueBase, surface);
                        lattice.AddNode(glueNode, startIndex, startIndex + glueNode.Surface.Length);
                        return;
                    }
                }
            }
        }
コード例 #5
0
        /**
         * Tries to repair the lattice by creating and adding an additional Viterbi node to the RIGHT of the newly
         * inserted user dictionary entry by using the substring of the node in the lattice that overlaps the least
         *  @param lattice
         * @param nodeEndIndex
         */
        private void RepairBrokenLatticeAfter(ViterbiLattice lattice, int nodeEndIndex)
        {
            ViterbiNode[][] nodeEndIndices = lattice.EndIndexArr;

            for (int endIndex = nodeEndIndex + 1; endIndex < nodeEndIndices.Length; endIndex++)
            {
                if (nodeEndIndices[endIndex] != null)
                {
                    ViterbiNode glueBase = FindGlueNodeCandidate(nodeEndIndex, nodeEndIndices[endIndex], endIndex);
                    if (glueBase != null)
                    {
                        int         delta           = endIndex - nodeEndIndex;
                        String      glueBaseSurface = glueBase.Surface;
                        String      surface         = glueBaseSurface.Substring(glueBaseSurface.Length - delta);
                        ViterbiNode glueNode        = MakeGlueNode(nodeEndIndex, glueBase, surface);
                        lattice.AddNode(glueNode, nodeEndIndex, nodeEndIndex + glueNode.Surface.Length);
                        return;
                    }
                }
            }
        }