public PersianLemmatizer() { lemmatizer = new PersianSuffixLemmatizer(false, true, SCICT.NLP.Persian.Constants.PersianSuffixesCategory.ComparativeAdjectives | SCICT.NLP.Persian.Constants.PersianSuffixesCategory.IndefiniteYaa | SCICT.NLP.Persian.Constants.PersianSuffixesCategory.PluralSignAan | SCICT.NLP.Persian.Constants.PersianSuffixesCategory.PluralSignHaa | SCICT.NLP.Persian.Constants.PersianSuffixesCategory.YaaNesbat); }
public WordMapper(List <string> dictionary, PruneType pruneType) { if (dictionary == null) { _dictionary = new List <string>(); } _dictionary = dictionary; _pruneType = pruneType; _suffixer = new PersianSuffixLemmatizer(true, false); }
public PersianMaximumLikelihoodTagger(Config config) { _config = config; MaximumLikelihoodTagger.Config baseConfig = new MaximumLikelihoodTagger.Config(); baseConfig.DictionaryFilename = _config.DictionaryFilename; baseConfig.Normalize = true; baseConfig.DefaultTags = new[] { PersianPartOfSpeech.Unknown }; baseConfig.DefaultWeights = new[] { Double.NaN }; _baseTagger = new MaximumLikelihoodTagger(baseConfig); _lemmatizer = new PersianSuffixLemmatizer(false, true); _verbs = new Dictionary <string, List <Conjugator.VerbInfo> >(); VerbInfoContainer dic = new VerbInfoContainer(); dic.LoadStemFile(_config.StemFilename); Conjugator conjugator = new Conjugator(dic); foreach (ENUM_TENSE_PERSON person in Enum.GetValues(typeof(ENUM_TENSE_PERSON))) { foreach (var verbinfo in conjugator.ConjugateInfo(ENUM_VERB_TYPE.SADE, person)) { if (!_verbs.ContainsKey(verbinfo.Verb)) { _verbs.Add(verbinfo.Verb, new List <Conjugator.VerbInfo>()); } if (!_verbs[verbinfo.Verb].Contains(verbinfo)) { _verbs[verbinfo.Verb].Add(verbinfo); } } foreach (var verbinfo in conjugator.ConjugateInfo(ENUM_VERB_TYPE.PISHVANDI, person)) { if (!_verbs.ContainsKey(verbinfo.Verb)) { _verbs.Add(verbinfo.Verb, new List <Conjugator.VerbInfo>()); } if (!_verbs[verbinfo.Verb].Contains(verbinfo)) { _verbs[verbinfo.Verb].Add(verbinfo); } } } }
public PinglishMapping(string mappingFileName, string dicPath, PruneType pruneType) { try { List <PinglishString> list = PinglishConverterUtils.LoadPinglishStrings(mappingFileName); Learn(list, false); m_pinglishDataSet.AddRange(list.RemoveDuplicates()); _suffixer = new PersianSuffixLemmatizer(true); Tools.LoadList(ref _dic, dicPath); _wordMapper = new WordMapper(_dic, pruneType); } catch (Exception ex) { Debug.WriteLine(ex); } }
public static bool IsValidInDictionary(string word, List <string> dic, PersianSuffixLemmatizer suffixer, PruneType prouneType) { if (PruneType.NoPrune == prouneType) { return(false); } if (dic.Contains(word)) { return(true); } else if (PruneType.Stem == prouneType) { ReversePatternMatcherPatternInfo[] inf = suffixer.MatchForSuffix(word); foreach (ReversePatternMatcherPatternInfo info in inf) { if (dic.Contains(info.BaseWord)) { return(true); } } } return(false); }