public static DeepMorphy.Model.MorphInfo[] getAllLemms(string text) { string[] separators = { ",", ".", "!", "?", ";", ":", " ", "(", ")", "[", "]", "\"", "\'", "-", "–", "—", "»", "«", "•", "{", "}" }; var words = text.Split(separators, StringSplitOptions.RemoveEmptyEntries); MorphAnalyzer MA = new MorphAnalyzer(withLemmatization: true, withTrimAndLower: true); return MA.Parse(words).ToArray(); }
private void btnInitAnalyzer_Click(object sender, EventArgs e) { using (new BusyObject(this)) { if (analyzer == null) { string hspellPath = SelectProjectFolder("Select a path to HSpell data files", "hspell-data-files" + System.IO.Path.DirectorySeparatorChar); if (hspellPath == null) { return; } MorphAnalyzer a = new MorphAnalyzer(hspellPath); if (!a.IsInitialized) { MessageBox.Show("Error while trying to create a morphological analyzer object; please check the existance of the required data files and try again"); return; } analyzer = a; } // Recreate the index IndexWriter writer = new IndexWriter(FSDirectory.Open(tempPath), new Lucene.Net.Analysis.SimpleAnalyzer(), true, new IndexWriter.MaxFieldLength(10)); writer.Close(); } btnIndexAddFolder.Enabled = true; btnRunAutoTests.Enabled = true; btnExecuteSearch.Enabled = true; }
private void _testExistingTagCreation(MorphAnalyzer morph) { string postKey = "post"; string nmbrKey = "nmbr"; string gndrKey = "gndr"; string caseKey = "case"; string persKey = "pers"; string tensKey = "tens"; string moodKey = "mood"; string voicKey = "voic"; if (!morph.UseEnGrams) { postKey = GramInfo.TranslateKeyToRu(postKey); nmbrKey = GramInfo.TranslateKeyToRu(nmbrKey); gndrKey = GramInfo.TranslateKeyToRu(gndrKey); caseKey = GramInfo.TranslateKeyToRu(caseKey); persKey = GramInfo.TranslateKeyToRu(persKey); tensKey = GramInfo.TranslateKeyToRu(tensKey); moodKey = GramInfo.TranslateKeyToRu(moodKey); voicKey = GramInfo.TranslateKeyToRu(voicKey); } foreach (var kp in morph.TagHelper.TagsDic) { var index = kp.Key; var gDic = kp.Value; var post = gDic[postKey]; var nmbr = gDic.ContainsKey(nmbrKey) ? gDic[nmbrKey] : null; var gndr = gDic.ContainsKey(gndrKey) ? gDic[gndrKey] : null; var @case = gDic.ContainsKey(caseKey) ? gDic[caseKey] : null; var pers = gDic.ContainsKey(persKey) ? gDic[persKey] : null; var tens = gDic.ContainsKey(tensKey) ? gDic[tensKey] : null; var mood = gDic.ContainsKey(moodKey) ? gDic[moodKey] : null; var voic = gDic.ContainsKey(voicKey) ? gDic[voicKey] : null; var tagWithoutLemma = morph.TagHelper.CreateTag( post, gndr = gndr, nmbr = nmbr, @case = @case, pers = pers, tens = tens, mood = mood, voic = voic); var tagWithLemma = morph.TagHelper.CreateTag( post, gndr = gndr, nmbr = nmbr, @case = @case, pers = pers, tens = tens, mood = mood, voic = voic); Assert.AreEqual(index, tagWithoutLemma.Id, "Неправильный айди при создании без леммы"); Assert.AreEqual(index, tagWithoutLemma.Id, "Неправильный айди при создании c леммой"); } }
/// <summary> Constructor.</summary> /// <param name="ma">- the morphological analyzer plug-in /// </param> /// <param name="in">- input queue /// </param> /// <param name="out">- output queue /// </param> public MorphAnalyzerThread(MorphAnalyzer ma, LinkedBlockingQueue<PlainSentence> in_Renamed, LinkedBlockingQueue<SetOfSentences> out_Renamed) { this.ma = ma; this.in_Renamed = in_Renamed; this.out_Renamed = out_Renamed; }
static void LemmatizationExample1() { var m = new MorphAnalyzer(withLemmatization: true); WriteHeader("Выводим формы слова 'королевский'"); var words = new string[] { "королевский", "королевские", "корабли", "укрывал", "обновляя", "выходящие", "собаковод", "раскладывала", "обучает", "юбка", "пересказывают", "королевского" }; var results = m.Parse(words).ToArray(); var mainWord = results[0]; foreach (var morphInfo in results) { if (mainWord.CanBeSameLexeme(morphInfo)) { Console.WriteLine(morphInfo.Text); } } }
void CreateMorphAnalizer(bool supressExceptions = false) { try { m_morphAnalyzer = new MorphAnalyzer(m_dictionaryPath); } catch (DirectoryNotFoundException) { if (!supressExceptions) { MessageBox.Show("DirectoryNotFoundException."); } } catch (UnauthorizedAccessException) { if (!supressExceptions) { MessageBox.Show("UnauthorizedAccessException."); } } catch (IOException) { if (!supressExceptions) { MessageBox.Show("IOException."); } } }
/// <summary> Constructor.</summary> /// <param name="ma">- the morphological analyzer plug-in /// </param> /// <param name="in">- input queue /// </param> /// <param name="out">- output queue /// </param> public MorphAnalyzerThread(MorphAnalyzer ma, LinkedBlockingQueue <PlainSentence> in_Renamed, LinkedBlockingQueue <SetOfSentences> out_Renamed) { this.ma = ma; this.in_Renamed = in_Renamed; this.out_Renamed = out_Renamed; }
private static void MoveLuceneIndex(String lng, int indexServer) { T.TraceMessage("Moving lucene index to server {0}", indexServer); Directory[] readers = new Directory[1]; string impDomain = string.Empty, impUser = string.Empty, impPass = string.Empty; string destIndexBasePath = string.Empty; if (indexServer == 1) { destIndexBasePath = ConfigurationController.IndexRootPath; } Analyzer analyzer = new SpanishAnalyzer(ConfigurationController.Stop_Words); string destIndexPath = destIndexBasePath + "\\ES\\IDX"; string tempIndexPath = ConfigurationController.TempIndexRootPath + "/ES/IDX"; if (lng.ToLower().Trim().Equals("en")) { destIndexPath = destIndexBasePath + "\\EN\\IDX"; tempIndexPath = ConfigurationController.TempIndexRootPath + "/EN/IDX"; analyzer = new EnglishAnalyzer(ConfigurationController.Stop_Words); } if (lng.ToLower().Trim().Equals("he")) { destIndexPath = destIndexBasePath + "\\HE\\IDX"; tempIndexPath = ConfigurationController.TempIndexRootPath + "/HE/IDX"; analyzer = new MorphAnalyzer(ConfigurationController.MorphFilesPath); } MoveIndexFiles(impDomain, impUser, impPass, destIndexPath, tempIndexPath, analyzer); }
public void StemTest() { var morph = new MorphAnalyzer(); var lexeme = morph.Lexeme("федеральный", morph.TagHelper.CreateTag("прил", gndr: "муж", nmbr: "ед", @case: "им")); var prils = lexeme.Where(x => x.tag.Has("прил")); var stem = prils.GetLongestCommonPrefixWithEndings(out string[] endings); Assert.AreEqual("федеральн", stem); var etalonEndings = new[] { "ых", "ыми", "ым", "ые", "ом", "ое", "ому", "ого", "ой", "ую", "ая", "ый" }; Assert.AreEqual(etalonEndings, endings); }
static void LemmatizationExample2() { var m = new MorphAnalyzer(withLemmatization: true); WriteHeader("Выводим все леммы из главных тэгов"); var words = new string[] { "королевские", "корабли", "укрывал", "обновляя", "выходящие", "собаковод", "раскладывала", "обучает", "юбка", "пересказывают", "шоссе" }; var results = m.Parse(words).ToArray(); foreach (var morphInfo in results) { Console.WriteLine(morphInfo.BestTag.Lemma); } }
public void TestAmbigGramsForTagException() { Assert.Throws <TagNotFoundException>(() => { var morph = new MorphAnalyzer(); morph.TagHelper.CreateTag("сущ", gndr: "муж"); }); }
public void TestNotExistingTagException() { Assert.Throws <TagNotFoundException>(() => { var morph = new MorphAnalyzer(); morph.TagHelper.CreateTag("сущ", tens: "буд"); }); }
static void SimpleExample() { var m = new MorphAnalyzer(); var results = m.Parse(Words).ToArray(); foreach (var morphInfo in results) { Console.WriteLine(morphInfo.ToString()); } }
public SplittedProc(IEnumerable <I> input, MorphAnalyzer morph) { Input = input.ToArray(); Result = new R[Input.Length]; Morph = morph; _processorKeys = new string[Input.Length]; _fillProcessors(); Processors = morph.Processors; Net = morph.Net; CorrectionDict = morph.CorrectionDict; }
static void AnalisysFullExample1() { var m = new MorphAnalyzer(); var results = m.Parse(Words).ToArray(); Console.WriteLine("Лучший тег"); foreach (var morphInfo in results) { Console.WriteLine($"{morphInfo.Text} - {morphInfo.BestTag}"); } }
static void AnalisysPartExample2() { var m = new MorphAnalyzer(); var results = m.Parse(Words).ToArray(); WriteHeader("Только лучшая часть речи с ее вероятностью"); foreach (var morphInfo in results) { var bestGram = morphInfo["чр"].BestGram; Console.WriteLine($"{morphInfo.Text} - {bestGram.Key}:{bestGram.Power} "); } }
public SimpleUsageExample() { MorphAnalyzer morphAnalizer = new MorphAnalyzer("MyDictionaryFolderPath"); List <Parse> myWordParses = morphAnalizer.Parse("myWord"); foreach (var wordParse in myWordParses) { string wordParseStringRepresentation = wordParse.ToString(); Tag wordTagWithGrammemes = wordParse.Tag; string wordGrammemesList = wordTagWithGrammemes.ToString(); } }
private static void TestGramClassification() { var grams = Directory.GetFiles(System.Environment.CurrentDirectory, "*.xml") .Select(Path.GetFileNameWithoutExtension) .Where(x => x != "lem" && x != "main") .ToArray(); foreach (var gram in grams) { Console.WriteLine($"Calculating {gram} classification"); var morph = new MorphAnalyzer(useEnGrams: true, withPreprocessors: true); var tests = LoadTests(gram).ToArray(); var results = morph.Parse(tests.Select(x => x.X)).ToArray(); float testsCount = tests.Length; float totalClassesCount = 0; float correctTests = 0; float correctClassesCount = 0; for (int i = 0; i < tests.Length; i++) { var test = tests[i]; var res = results[i]; var etRez = test.Y.Split(';'); var rez = res[gram].Grams.ToArray(); totalClassesCount += etRez.Length; bool correct = true; for (int j = 0; j < etRez.Length; j++) { if (etRez.Contains(rez[j].Key)) { correctClassesCount++; } else { correct = false; break; } } if (correct) { correctTests++; } } float testAcc = correctTests / testsCount; float clsAcc = correctClassesCount / totalClassesCount; Console.WriteLine($"{gram} classification. Full acc: {testAcc}"); Console.WriteLine($"{gram} classification. Classes acc: {clsAcc}"); } }
static void AnalisysPartExample1() { var m = new MorphAnalyzer(); var results = m.Parse(Words).ToArray(); WriteHeader("Только прилагательные"); foreach (var morphInfo in results) { if (morphInfo["чр"].BestGramKey == "прил") { Console.WriteLine(morphInfo.ToString()); } } }
static void AnalisysFullExample5() { var m = new MorphAnalyzer(); var results = m.Parse(Words).ToArray(); Console.WriteLine("Слова, которые вероятно являются глаголами прошедшего времени"); foreach (var morphInfo in results) { if (morphInfo.HasCombination("гл", "прош")) { Console.WriteLine($"{morphInfo.Text}"); } } }
private static void ShowMemoryInfo() { Console.WriteLine("Memory consumption info"); Console.WriteLine($"Before all: {GetMemory()}"); var morph = new MorphAnalyzer(withPreprocessors: true, withLemmatization: true); Console.WriteLine($"After init: {GetMemory()}"); int j = 0; while (j < 100) { var results = morph.Parse(new string[] { "tafsdfdfasd", "xii", "123", ".345", "43,34", "..!", "1-ый", "бутявка", "в", "действуя", "королёвские", "большая", "двадцать", "тысячу", "миллионных", "222-ого", "дотошный", "красотка", "центральные", "укрывал", "королевские", "корабли", "укрывал", "обновляя", "выходящие", "собаковод", "раскладывала", "обучает", "юбка", "пересказывают" }).ToArray(); j++; } Console.WriteLine($"After processing: {GetMemory()}"); GC.Collect(); Console.WriteLine($"After collect: {GetMemory()}"); }
static void Inflect3() { var m = new MorphAnalyzer(withLemmatization: true); WriteHeader("Гипотетическая форма слова"); var tasks = new[] { new InflectTask("победить", m.TagHelper.CreateTag("инф_гл"), m.TagHelper.CreateTag("гл", nmbr: "ед", tens: "буд", pers: "1л", mood: "изъяв")) }; Console.WriteLine($"{tasks[0].word} {tasks[0].wordTag} -> {m.Inflect(tasks).First()} {tasks[0].resultTag}"); }
static void AnalisysFullExample2() { var m = new MorphAnalyzer(); var results = m.Parse(Words).ToArray(); Console.WriteLine("Все топ теги"); foreach (var morphInfo in results) { Console.WriteLine($"{morphInfo.Text}:"); foreach (var tag in morphInfo.Tags) { Console.WriteLine($" {tag} : {tag.Power}"); } } }
static void AnalisysFullExample4() { var m = new MorphAnalyzer(); var results = m.Parse(Words).ToArray(); Console.WriteLine("Вывод только части речи и числа"); foreach (var morphInfo in results) { Console.WriteLine($"{morphInfo.Text}:"); foreach (var tag in morphInfo.Tags) { Console.WriteLine($" {tag["чр"]} {tag["число"]}"); } } }
static void AnalisysPartExample3() { var m = new MorphAnalyzer(); WriteHeader("Полная информация по падежу"); var results = m.Parse(new string[] { "речка" }).ToArray(); foreach (var morphInfo in results) { Console.WriteLine(morphInfo.Text); foreach (var gram in morphInfo["падеж"].Grams) { Console.WriteLine($"{gram.Key}:{gram.Power}"); } } }
static void Lexeme2() { var m = new MorphAnalyzer(withLemmatization: true); var word = "я"; var res = m.Parse("я").ToArray(); var tag = res[0].BestTag; var results = m.Lexeme(word, tag).ToArray(); WriteHeader($"Лексема для слова {word}[{tag}]"); foreach (var tpl in results) { Console.WriteLine($"{tpl.tag} - {tpl.text}"); } Console.WriteLine(); }
static void Main(string[] args) { var morph = new MorphAnalyzer(@"E:\Workspace\VisualStudio\MorphyDotNetSampleDictionary"); var parsed = morph.Parse("стали"); foreach (var parse in parsed) { Console.WriteLine(parse); } //Suffixes suffixes = new Suffixes(@"E:\Workspace\pymorphy2_tests\gramtab-opencorpora-ext.json"); //var paradigms = new MorphyDotNet.DictUtils.ParadigmsReader().ReadFromFile(@"C:\Anaconda3\envs\pymorphy2_tests\Lib\site-packages\pymorphy2_dicts_ru\data\paradigms.array"); Console.ReadKey(); }
static void AnalisysFullExample3() { var m = new MorphAnalyzer(); var results = m.Parse(Words).ToArray(); Console.WriteLine("Теги с прилагательным и единственным числом"); foreach (var morphInfo in results) { foreach (var tag in morphInfo.Tags) { if (tag.Has("прил", "ед")) { Console.WriteLine($"{morphInfo.Text} {tag} : {tag.Power}"); } } } }
static void Lemmatization3() { var m = new MorphAnalyzer(withLemmatization: true); WriteHeader("Лемматизация без классификации"); var tasks = new[] { new LemTask("синяя", m.TagHelper.CreateTag("прил", gndr: "жен", nmbr: "ед", @case: "им")), new LemTask("гуляя", m.TagHelper.CreateTag("деепр", tens: "наст")) }; var lemmas = m.Lemmatize(tasks).ToArray(); for (int i = 0; i < tasks.Length; i++) { Console.WriteLine($"{tasks[i].word} - {lemmas[i]}"); } }
static void Inflect2() { var m = new MorphAnalyzer(withLemmatization: true); WriteHeader("Переводим слова во множественное число"); var morphRes = m.Parse("стула", "стола", "горшка").ToArray(); var tasks = morphRes .Select(mi => new InflectTask(mi.Text, mi.BestTag, m.TagHelper.CreateTag("сущ", gndr: mi.BestTag["род"], @case: mi.BestTag["падеж"], nmbr: "мн"))) .ToArray(); var results = m.Inflect(tasks).ToArray(); for (int i = 0; i < tasks.Length; i++) { Console.WriteLine($"{tasks[i].word} {tasks[i].wordTag} -> {results[i]} {tasks[i].resultTag}"); } }
static void Lexeme1() { var m = new MorphAnalyzer(withLemmatization: true); var word = "лемматизировать"; var tag = m.TagHelper.CreateTag("инф_гл"); var results = m.Lexeme(word, tag).ToArray(); WriteHeader($"Лексема для слова {word}[{tag}]"); foreach (var tpl in results) { Console.WriteLine($"{tpl.tag} - {tpl.text}"); } Console.WriteLine(); WriteHeader($"Только деепричастия из лексемы {word}[{tag}]"); foreach (var tpl in results.Where(x => x.tag.Has("деепр"))) { Console.WriteLine($"{tpl.tag} - {tpl.text}"); } }
public MorphAnalyzer(MorphAnalyzer other) : base() { hebMorphLemmatizer = other.hebMorphLemmatizer; }