Beispiel #1
0
 public PersianLemmatizer()
 {
     lemmatizer = new PersianSuffixLemmatizer(false, true, SCICT.NLP.Persian.Constants.PersianSuffixesCategory.ComparativeAdjectives |
                                              SCICT.NLP.Persian.Constants.PersianSuffixesCategory.IndefiniteYaa |
                                              SCICT.NLP.Persian.Constants.PersianSuffixesCategory.PluralSignAan |
                                              SCICT.NLP.Persian.Constants.PersianSuffixesCategory.PluralSignHaa |
                                              SCICT.NLP.Persian.Constants.PersianSuffixesCategory.YaaNesbat);
 }
Beispiel #2
0
 public PersianLemmatizer()
 {
     lemmatizer = new PersianSuffixLemmatizer(false, true, SCICT.NLP.Persian.Constants.PersianSuffixesCategory.ComparativeAdjectives |
                                              SCICT.NLP.Persian.Constants.PersianSuffixesCategory.IndefiniteYaa |
                                              SCICT.NLP.Persian.Constants.PersianSuffixesCategory.PluralSignAan |
                                              SCICT.NLP.Persian.Constants.PersianSuffixesCategory.PluralSignHaa |
                                              SCICT.NLP.Persian.Constants.PersianSuffixesCategory.YaaNesbat);
 }
Beispiel #3
0
        public WordMapper(List <string> dictionary, PruneType pruneType)
        {
            if (dictionary == null)
            {
                _dictionary = new List <string>();
            }
            _dictionary = dictionary;

            _pruneType = pruneType;
            _suffixer  = new PersianSuffixLemmatizer(true, false);
        }
Beispiel #4
0
        public PersianMaximumLikelihoodTagger(Config config)
        {
            _config = config;

            MaximumLikelihoodTagger.Config baseConfig = new MaximumLikelihoodTagger.Config();
            baseConfig.DictionaryFilename = _config.DictionaryFilename;
            baseConfig.Normalize          = true;
            baseConfig.DefaultTags        = new[] { PersianPartOfSpeech.Unknown };
            baseConfig.DefaultWeights     = new[] { Double.NaN };
            _baseTagger = new MaximumLikelihoodTagger(baseConfig);

            _lemmatizer = new PersianSuffixLemmatizer(false, true);
            _verbs      = new Dictionary <string, List <Conjugator.VerbInfo> >();

            VerbInfoContainer dic = new VerbInfoContainer();

            dic.LoadStemFile(_config.StemFilename);
            Conjugator conjugator = new Conjugator(dic);

            foreach (ENUM_TENSE_PERSON person in Enum.GetValues(typeof(ENUM_TENSE_PERSON)))
            {
                foreach (var verbinfo in conjugator.ConjugateInfo(ENUM_VERB_TYPE.SADE, person))
                {
                    if (!_verbs.ContainsKey(verbinfo.Verb))
                    {
                        _verbs.Add(verbinfo.Verb, new List <Conjugator.VerbInfo>());
                    }
                    if (!_verbs[verbinfo.Verb].Contains(verbinfo))
                    {
                        _verbs[verbinfo.Verb].Add(verbinfo);
                    }
                }

                foreach (var verbinfo in conjugator.ConjugateInfo(ENUM_VERB_TYPE.PISHVANDI, person))
                {
                    if (!_verbs.ContainsKey(verbinfo.Verb))
                    {
                        _verbs.Add(verbinfo.Verb, new List <Conjugator.VerbInfo>());
                    }

                    if (!_verbs[verbinfo.Verb].Contains(verbinfo))
                    {
                        _verbs[verbinfo.Verb].Add(verbinfo);
                    }
                }
            }
        }
        public PinglishMapping(string mappingFileName, string dicPath, PruneType pruneType)
        {
            try
            {
                List <PinglishString> list = PinglishConverterUtils.LoadPinglishStrings(mappingFileName);
                Learn(list, false);
                m_pinglishDataSet.AddRange(list.RemoveDuplicates());

                _suffixer = new PersianSuffixLemmatizer(true);
                Tools.LoadList(ref _dic, dicPath);
                _wordMapper = new WordMapper(_dic, pruneType);
            }
            catch (Exception ex)
            {
                Debug.WriteLine(ex);
            }
        }
Beispiel #6
0
        public static bool IsValidInDictionary(string word, List <string> dic, PersianSuffixLemmatizer suffixer, PruneType prouneType)
        {
            if (PruneType.NoPrune == prouneType)
            {
                return(false);
            }

            if (dic.Contains(word))
            {
                return(true);
            }
            else if (PruneType.Stem == prouneType)
            {
                ReversePatternMatcherPatternInfo[] inf = suffixer.MatchForSuffix(word);
                foreach (ReversePatternMatcherPatternInfo info in inf)
                {
                    if (dic.Contains(info.BaseWord))
                    {
                        return(true);
                    }
                }
            }
            return(false);
        }