예제 #1
0
        private void GetPossibleTagsRecursive(string lexeme, out string[] lemmas, out PersianPartOfSpeech[] tags, out double[] weights)
        {
            if (_baseTagger.IsKnown(lexeme))
            {
                _baseTagger.GetPossibleTags(lexeme, out lemmas, out tags, out weights);
                return;
            }

            var patinfos = _lemmatizer.MatchForSuffix(lexeme);

            if (patinfos.Length == 0)
            {
                tags    = _config.DefaultTags;
                weights = _config.DefaultWeights;
                lemmas  = new [] { lexeme };
                return;
            }

            // Starting from the longest lemma (=baseWord)
            for (int i = patinfos.Length - 1; i >= 0; i--)
            {
                var lemmaPat = patinfos[i];
                if (_baseTagger.IsKnown(lemmaPat.BaseWord))
                {
                    PersianPartOfSpeech[] baseTags;
                    double[] baseWeights;
                    _baseTagger.GetPossibleTags(lemmaPat.BaseWord, out lemmas, out baseTags, out baseWeights);
                    ApplyPersianDeclensionRules(baseTags, baseWeights, lemmaPat.Suffix, out tags, out weights);
                    return;
                }
                else
                {
                    PersianPartOfSpeech[] baseTags;
                    double[] baseWeights;
                    GetPossibleTagsRecursive(lemmaPat.BaseWord, out lemmas, out baseTags, out baseWeights);
                    ApplyPersianDeclensionRules(baseTags, baseWeights, lemmaPat.Suffix, out tags, out weights);
                    if (tags[0] != PersianPartOfSpeech.Unknown)
                    {
                        return;
                    }
                }
            }

            tags    = _config.DefaultTags;
            weights = _config.DefaultWeights;
            lemmas  = new [] { lexeme };
        }
예제 #2
0
        ///<summary>
        /// Extract POS tagged dictionary to a file
        ///</summary>
        ///<param name="fileName">File name</param>
        ///<returns>Tru on success</returns>
        public bool ExtractPOSTaggedDictionary(string fileName)
        {
            PersianPOSTag pos;
            int           totalCount        = m_wordList.Count;
            int           currentStep       = 0;
            double        remainingPregress = (1.0 - ProgressPercent) / 0.95;

            double denom = (2.0 / ((totalCount) * (totalCount + 1))) * remainingPregress;

            foreach (KeyValuePair <string, int> pair in m_wordList)
            {
                ProgressPercent += ((double)currentStep * denom);
                currentStep++;

                bool   curWordAdded = false;
                string word         = pair.Key;

                if (!m_wordContainerExternal.Contain(word, out pos))  //external dictionary does not contains the word
                {
                    ReversePatternMatcherPatternInfo[] suffixPatternArray = m_suffixRecognizer.MatchForSuffix(word);
                    if (suffixPatternArray.Length > 0)
                    {
                        foreach (ReversePatternMatcherPatternInfo suffixPattern in suffixPatternArray)
                        {
                            string stem = suffixPattern.BaseWord;

                            if (m_wordContainerExternal.Contain(stem, out pos)) //external dictionary contains the stem
                            {
                                curWordAdded = true;
                                AddWordToFinalList(stem, m_wordList[word], pos);
                                break;
                            }
                            else if (m_wordList.ContainsKey(stem))
                            {
                                curWordAdded = true;
                                AddToDictionary(stem, word);
                                break;
                            }
                        }
                        if (!curWordAdded)
                        {
                            AddToDictionary(word, word);
                        }
                    }
                    else
                    {
                        AddToDictionary(word, word);
                    }
                }
                else
                {
                    //if external dictionary contains the word, add it to file
                    AddWordToFinalList(word, m_wordList[word], pos);
                }
            }

            return(DumpFinalList(fileName));
        }
예제 #3
0
        public String lemmatize(String input)
        {
            var matcher = lemmatizer.MatchForSuffix(input);

            if (matcher.Length > 0)
            {
                input = matcher[0].BaseWord;
            }
            return(input);
        }
예제 #4
0
        public static bool IsValidInDictionary(string word, List <string> dic, PersianSuffixLemmatizer suffixer, PruneType prouneType)
        {
            if (PruneType.NoPrune == prouneType)
            {
                return(false);
            }

            if (dic.Contains(word))
            {
                return(true);
            }
            else if (PruneType.Stem == prouneType)
            {
                ReversePatternMatcherPatternInfo[] inf = suffixer.MatchForSuffix(word);
                foreach (ReversePatternMatcherPatternInfo info in inf)
                {
                    if (dic.Contains(info.BaseWord))
                    {
                        return(true);
                    }
                }
            }
            return(false);
        }