/**
         * <summary>The candidateList method takes a {@link Word} as an input and creates a candidates {@link List} by calling generateCandidateList
         * method with given word. Then, it loop i times where i ranges from 0 to size of candidates {@link List} and creates a
         * {@link FsmParseList} by calling morphologicalAnalysis with each item of candidates {@link List}. If the size of
         * {@link FsmParseList} is 0, it then removes the ith item.</summary>
         *
         * <param name="word">Word input.</param>
         * <returns>candidates {@link List}.</returns>
         */
        protected List <string> CandidateList(Word word)
        {
            var candidates = generateCandidateList(word.GetName());

            for (var i = 0; i < candidates.Count; i++)
            {
                var fsmParseList = fsm.MorphologicalAnalysis(candidates[i]);
                if (fsmParseList.Size() == 0)
                {
                    var newCandidate = fsm.GetDictionary().GetCorrectForm(candidates[i]);
                    if (newCandidate != null && fsm.MorphologicalAnalysis(newCandidate).Size() > 0)
                    {
                        candidates[i] = newCandidate;
                    }
                    else
                    {
                        candidates.RemoveAt(i);
                        i--;
                    }
                }
            }

            return(candidates);
        }
Esempio n. 2
0
        /**
         * <summary>The spellCheck method takes a {@link Sentence} as an input and loops i times where i ranges from 0 to size of words in given sentence.
         * Then, it calls morphologicalAnalysis method with each word and assigns it to the {@link FsmParseList}, if the size of
         * {@link FsmParseList} is equal to the 0, it adds current word to the candidateList and assigns it to the candidates {@link ArrayList}.
         * <p/>
         * Later on, it loops through candidates {@link ArrayList} and calls morphologicalAnalysis method with each word and
         * assigns it to the {@link FsmParseList}. Then, it gets the root from {@link FsmParseList}. For the first time, it defines a previousRoot
         * by calling getProbability method with root, and for the following times it calls getProbability method with previousRoot and root.
         * Then, it finds out the best probability and the corresponding candidate as best candidate and adds it to the result {@link Sentence}.
         * <p/>
         * If the size of {@link FsmParseList} is not equal to 0, it directly adds the current word to the result {@link Sentence} and finds
         * the previousRoot directly from the {@link FsmParseList}.</summary>
         *
         * <param name="sentence">{@link Sentence} type input.</param>
         * <returns>Sentence result.</returns>
         */
        public new Sentence SpellCheck(Sentence sentence)
        {
            Word         previousRoot = null, root, nextRoot;
            FsmParseList fsmParses;
            double       previousProbability, nextProbability, bestProbability;
            var          result = new Sentence();

            root     = CheckAnalysisAndSetRoot(sentence, 0);
            nextRoot = CheckAnalysisAndSetRoot(sentence, 1);
            for (var i = 0; i < sentence.WordCount(); i++)
            {
                var word = sentence.GetWord(i);
                if (root == null)
                {
                    var candidates    = CandidateList(word);
                    var bestCandidate = word.GetName();
                    var bestRoot      = word;
                    bestProbability = _threshold;
                    foreach (var candidate in candidates)
                    {
                        fsmParses = fsm.MorphologicalAnalysis(candidate);
                        if (_rootNgram)
                        {
                            root = fsmParses.GetParseWithLongestRootWord().GetWord();
                        }
                        else
                        {
                            root = new Word(candidate);
                        }

                        if (previousRoot != null)
                        {
                            previousProbability = _nGram.GetProbability(previousRoot.GetName(), root.GetName());
                        }
                        else
                        {
                            previousProbability = 0.0;
                        }

                        if (nextRoot != null)
                        {
                            nextProbability = _nGram.GetProbability(root.GetName(), nextRoot.GetName());
                        }
                        else
                        {
                            nextProbability = 0.0;
                        }

                        if (System.Math.Max(previousProbability, nextProbability) > bestProbability)
                        {
                            bestCandidate   = candidate;
                            bestRoot        = root;
                            bestProbability = System.Math.Max(previousProbability, nextProbability);
                        }
                    }

                    root = bestRoot;
                    result.AddWord(new Word(bestCandidate));
                }
                else
                {
                    result.AddWord(word);
                }

                previousRoot = root;
                root         = nextRoot;
                nextRoot     = CheckAnalysisAndSetRoot(sentence, i + 2);
            }

            return(result);
        }