コード例 #1
0
ファイル: Parser.cs プロジェクト: Exdominate/SearchSystem
 public static DeepMorphy.Model.MorphInfo[] getAllLemms(string text)
 {
     string[] separators = { ",", ".", "!", "?", ";", ":", " ", "(", ")", "[", "]", "\"", "\'", "-", "–", "—", "»", "«", "•", "{", "}" };
     var words = text.Split(separators, StringSplitOptions.RemoveEmptyEntries);
     MorphAnalyzer MA = new MorphAnalyzer(withLemmatization: true, withTrimAndLower: true);
     return MA.Parse(words).ToArray();     
 }
コード例 #2
0
ファイル: MainForm.cs プロジェクト: tsimonyan/HebMorph
        private void btnInitAnalyzer_Click(object sender, EventArgs e)
        {
            using (new BusyObject(this))
            {
                if (analyzer == null)
                {
                    string hspellPath = SelectProjectFolder("Select a path to HSpell data files", "hspell-data-files" + System.IO.Path.DirectorySeparatorChar);
                    if (hspellPath == null)
                    {
                        return;
                    }

                    MorphAnalyzer a = new MorphAnalyzer(hspellPath);
                    if (!a.IsInitialized)
                    {
                        MessageBox.Show("Error while trying to create a morphological analyzer object; please check the existance of the required data files and try again");
                        return;
                    }

                    analyzer = a;
                }

                // Recreate the index
                IndexWriter writer = new IndexWriter(FSDirectory.Open(tempPath), new Lucene.Net.Analysis.SimpleAnalyzer(), true, new IndexWriter.MaxFieldLength(10));
                writer.Close();
            }

            btnIndexAddFolder.Enabled = true;
            btnRunAutoTests.Enabled   = true;
            btnExecuteSearch.Enabled  = true;
        }
コード例 #3
0
        private void _testExistingTagCreation(MorphAnalyzer morph)
        {
            string postKey = "post";
            string nmbrKey = "nmbr";
            string gndrKey = "gndr";
            string caseKey = "case";
            string persKey = "pers";
            string tensKey = "tens";
            string moodKey = "mood";
            string voicKey = "voic";

            if (!morph.UseEnGrams)
            {
                postKey = GramInfo.TranslateKeyToRu(postKey);
                nmbrKey = GramInfo.TranslateKeyToRu(nmbrKey);
                gndrKey = GramInfo.TranslateKeyToRu(gndrKey);
                caseKey = GramInfo.TranslateKeyToRu(caseKey);
                persKey = GramInfo.TranslateKeyToRu(persKey);
                tensKey = GramInfo.TranslateKeyToRu(tensKey);
                moodKey = GramInfo.TranslateKeyToRu(moodKey);
                voicKey = GramInfo.TranslateKeyToRu(voicKey);
            }

            foreach (var kp in morph.TagHelper.TagsDic)
            {
                var index = kp.Key;
                var gDic  = kp.Value;
                var post  = gDic[postKey];
                var nmbr  = gDic.ContainsKey(nmbrKey) ? gDic[nmbrKey] : null;
                var gndr  = gDic.ContainsKey(gndrKey) ? gDic[gndrKey] : null;
                var @case = gDic.ContainsKey(caseKey) ? gDic[caseKey] : null;
                var pers  = gDic.ContainsKey(persKey) ? gDic[persKey] : null;
                var tens  = gDic.ContainsKey(tensKey) ? gDic[tensKey] : null;
                var mood  = gDic.ContainsKey(moodKey) ? gDic[moodKey] : null;
                var voic  = gDic.ContainsKey(voicKey) ? gDic[voicKey] : null;

                var tagWithoutLemma = morph.TagHelper.CreateTag(
                    post,
                    gndr  = gndr,
                    nmbr  = nmbr,
                    @case = @case,
                    pers  = pers,
                    tens  = tens,
                    mood  = mood,
                    voic  = voic);

                var tagWithLemma = morph.TagHelper.CreateTag(
                    post,
                    gndr  = gndr,
                    nmbr  = nmbr,
                    @case = @case,
                    pers  = pers,
                    tens  = tens,
                    mood  = mood,
                    voic  = voic);

                Assert.AreEqual(index, tagWithoutLemma.Id, "Неправильный айди при создании без леммы");
                Assert.AreEqual(index, tagWithoutLemma.Id, "Неправильный айди при создании c леммой");
            }
        }
コード例 #4
0
 /// <summary> Constructor.</summary>
 /// <param name="ma">- the morphological analyzer plug-in
 /// </param>
 /// <param name="in">- input queue
 /// </param>
 /// <param name="out">- output queue
 /// </param>
 public MorphAnalyzerThread(MorphAnalyzer ma, LinkedBlockingQueue<PlainSentence> in_Renamed,
     LinkedBlockingQueue<SetOfSentences> out_Renamed)
 {
     this.ma = ma;
     this.in_Renamed = in_Renamed;
     this.out_Renamed = out_Renamed;
 }
コード例 #5
0
ファイル: Program.cs プロジェクト: kactetus/DeepMorphy
        static void LemmatizationExample1()
        {
            var m = new MorphAnalyzer(withLemmatization: true);

            WriteHeader("Выводим формы слова 'королевский'");

            var words = new string[]
            {
                "королевский",
                "королевские",
                "корабли",
                "укрывал",
                "обновляя",
                "выходящие",
                "собаковод",
                "раскладывала",
                "обучает",
                "юбка",
                "пересказывают",
                "королевского"
            };

            var results  = m.Parse(words).ToArray();
            var mainWord = results[0];

            foreach (var morphInfo in results)
            {
                if (mainWord.CanBeSameLexeme(morphInfo))
                {
                    Console.WriteLine(morphInfo.Text);
                }
            }
        }
コード例 #6
0
 void CreateMorphAnalizer(bool supressExceptions = false)
 {
     try
     {
         m_morphAnalyzer = new MorphAnalyzer(m_dictionaryPath);
     }
     catch (DirectoryNotFoundException)
     {
         if (!supressExceptions)
         {
             MessageBox.Show("DirectoryNotFoundException.");
         }
     }
     catch (UnauthorizedAccessException)
     {
         if (!supressExceptions)
         {
             MessageBox.Show("UnauthorizedAccessException.");
         }
     }
     catch (IOException)
     {
         if (!supressExceptions)
         {
             MessageBox.Show("IOException.");
         }
     }
 }
コード例 #7
0
 /// <summary> Constructor.</summary>
 /// <param name="ma">- the morphological analyzer plug-in
 /// </param>
 /// <param name="in">- input queue
 /// </param>
 /// <param name="out">- output queue
 /// </param>
 public MorphAnalyzerThread(MorphAnalyzer ma, LinkedBlockingQueue <PlainSentence> in_Renamed,
                            LinkedBlockingQueue <SetOfSentences> out_Renamed)
 {
     this.ma          = ma;
     this.in_Renamed  = in_Renamed;
     this.out_Renamed = out_Renamed;
 }
コード例 #8
0
        private static void MoveLuceneIndex(String lng, int indexServer)
        {
            T.TraceMessage("Moving lucene index to server {0}", indexServer);

            Directory[] readers = new Directory[1];
            string      impDomain = string.Empty, impUser = string.Empty, impPass = string.Empty;
            string      destIndexBasePath = string.Empty;

            if (indexServer == 1)
            {
                destIndexBasePath = ConfigurationController.IndexRootPath;
            }

            Analyzer analyzer      = new SpanishAnalyzer(ConfigurationController.Stop_Words);
            string   destIndexPath = destIndexBasePath + "\\ES\\IDX";
            string   tempIndexPath = ConfigurationController.TempIndexRootPath + "/ES/IDX";

            if (lng.ToLower().Trim().Equals("en"))
            {
                destIndexPath = destIndexBasePath + "\\EN\\IDX";
                tempIndexPath = ConfigurationController.TempIndexRootPath + "/EN/IDX";
                analyzer      = new EnglishAnalyzer(ConfigurationController.Stop_Words);
            }
            if (lng.ToLower().Trim().Equals("he"))
            {
                destIndexPath = destIndexBasePath + "\\HE\\IDX";
                tempIndexPath = ConfigurationController.TempIndexRootPath + "/HE/IDX";
                analyzer      = new MorphAnalyzer(ConfigurationController.MorphFilesPath);
            }

            MoveIndexFiles(impDomain, impUser, impPass, destIndexPath, tempIndexPath, analyzer);
        }
コード例 #9
0
        public void StemTest()
        {
            var morph  = new MorphAnalyzer();
            var lexeme = morph.Lexeme("федеральный", morph.TagHelper.CreateTag("прил", gndr: "муж", nmbr: "ед", @case: "им"));
            var prils  = lexeme.Where(x => x.tag.Has("прил"));
            var stem   = prils.GetLongestCommonPrefixWithEndings(out string[] endings);

            Assert.AreEqual("федеральн", stem);

            var etalonEndings = new[]
            {
                "ых",
                "ыми",
                "ым",
                "ые",
                "ом",
                "ое",
                "ому",
                "ого",
                "ой",
                "ую",
                "ая",
                "ый"
            };

            Assert.AreEqual(etalonEndings, endings);
        }
コード例 #10
0
ファイル: Program.cs プロジェクト: kactetus/DeepMorphy
        static void LemmatizationExample2()
        {
            var m = new MorphAnalyzer(withLemmatization: true);

            WriteHeader("Выводим все леммы из главных тэгов");

            var words = new string[]
            {
                "королевские",
                "корабли",
                "укрывал",
                "обновляя",
                "выходящие",
                "собаковод",
                "раскладывала",
                "обучает",
                "юбка",
                "пересказывают",
                "шоссе"
            };

            var results = m.Parse(words).ToArray();

            foreach (var morphInfo in results)
            {
                Console.WriteLine(morphInfo.BestTag.Lemma);
            }
        }
コード例 #11
0
 public void TestAmbigGramsForTagException()
 {
     Assert.Throws <TagNotFoundException>(() =>
     {
         var morph = new MorphAnalyzer();
         morph.TagHelper.CreateTag("сущ", gndr: "муж");
     });
 }
コード例 #12
0
 public void TestNotExistingTagException()
 {
     Assert.Throws <TagNotFoundException>(() =>
     {
         var morph = new MorphAnalyzer();
         morph.TagHelper.CreateTag("сущ", tens: "буд");
     });
 }
コード例 #13
0
ファイル: Program.cs プロジェクト: kactetus/DeepMorphy
        static void SimpleExample()
        {
            var m       = new MorphAnalyzer();
            var results = m.Parse(Words).ToArray();

            foreach (var morphInfo in results)
            {
                Console.WriteLine(morphInfo.ToString());
            }
        }
コード例 #14
0
 public SplittedProc(IEnumerable <I> input, MorphAnalyzer morph)
 {
     Input          = input.ToArray();
     Result         = new R[Input.Length];
     Morph          = morph;
     _processorKeys = new string[Input.Length];
     _fillProcessors();
     Processors     = morph.Processors;
     Net            = morph.Net;
     CorrectionDict = morph.CorrectionDict;
 }
コード例 #15
0
ファイル: Program.cs プロジェクト: kactetus/DeepMorphy
        static void AnalisysFullExample1()
        {
            var m       = new MorphAnalyzer();
            var results = m.Parse(Words).ToArray();

            Console.WriteLine("Лучший тег");
            foreach (var morphInfo in results)
            {
                Console.WriteLine($"{morphInfo.Text} - {morphInfo.BestTag}");
            }
        }
コード例 #16
0
ファイル: Program.cs プロジェクト: kactetus/DeepMorphy
        static void AnalisysPartExample2()
        {
            var m       = new MorphAnalyzer();
            var results = m.Parse(Words).ToArray();

            WriteHeader("Только лучшая часть речи с ее вероятностью");
            foreach (var morphInfo in results)
            {
                var bestGram = morphInfo["чр"].BestGram;
                Console.WriteLine($"{morphInfo.Text} - {bestGram.Key}:{bestGram.Power} ");
            }
        }
コード例 #17
0
        public SimpleUsageExample()
        {
            MorphAnalyzer morphAnalizer = new MorphAnalyzer("MyDictionaryFolderPath");
            List <Parse>  myWordParses  = morphAnalizer.Parse("myWord");

            foreach (var wordParse in myWordParses)
            {
                string wordParseStringRepresentation = wordParse.ToString();
                Tag    wordTagWithGrammemes          = wordParse.Tag;
                string wordGrammemesList             = wordTagWithGrammemes.ToString();
            }
        }
コード例 #18
0
ファイル: Program.cs プロジェクト: kactetus/DeepMorphy
        private static void TestGramClassification()
        {
            var grams = Directory.GetFiles(System.Environment.CurrentDirectory, "*.xml")
                        .Select(Path.GetFileNameWithoutExtension)
                        .Where(x => x != "lem" && x != "main")
                        .ToArray();

            foreach (var gram in grams)
            {
                Console.WriteLine($"Calculating {gram} classification");
                var   morph               = new MorphAnalyzer(useEnGrams: true, withPreprocessors: true);
                var   tests               = LoadTests(gram).ToArray();
                var   results             = morph.Parse(tests.Select(x => x.X)).ToArray();
                float testsCount          = tests.Length;
                float totalClassesCount   = 0;
                float correctTests        = 0;
                float correctClassesCount = 0;

                for (int i = 0; i < tests.Length; i++)
                {
                    var test  = tests[i];
                    var res   = results[i];
                    var etRez = test.Y.Split(';');

                    var rez = res[gram].Grams.ToArray();
                    totalClassesCount += etRez.Length;

                    bool correct = true;
                    for (int j = 0; j < etRez.Length; j++)
                    {
                        if (etRez.Contains(rez[j].Key))
                        {
                            correctClassesCount++;
                        }
                        else
                        {
                            correct = false;
                            break;
                        }
                    }

                    if (correct)
                    {
                        correctTests++;
                    }
                }

                float testAcc = correctTests / testsCount;
                float clsAcc  = correctClassesCount / totalClassesCount;
                Console.WriteLine($"{gram} classification. Full acc: {testAcc}");
                Console.WriteLine($"{gram} classification. Classes acc: {clsAcc}");
            }
        }
コード例 #19
0
ファイル: Program.cs プロジェクト: kactetus/DeepMorphy
        static void AnalisysPartExample1()
        {
            var m       = new MorphAnalyzer();
            var results = m.Parse(Words).ToArray();

            WriteHeader("Только прилагательные");
            foreach (var morphInfo in results)
            {
                if (morphInfo["чр"].BestGramKey == "прил")
                {
                    Console.WriteLine(morphInfo.ToString());
                }
            }
        }
コード例 #20
0
ファイル: Program.cs プロジェクト: kactetus/DeepMorphy
        static void AnalisysFullExample5()
        {
            var m       = new MorphAnalyzer();
            var results = m.Parse(Words).ToArray();

            Console.WriteLine("Слова, которые вероятно являются глаголами прошедшего времени");
            foreach (var morphInfo in results)
            {
                if (morphInfo.HasCombination("гл", "прош"))
                {
                    Console.WriteLine($"{morphInfo.Text}");
                }
            }
        }
コード例 #21
0
ファイル: Program.cs プロジェクト: kactetus/DeepMorphy
        private static void ShowMemoryInfo()
        {
            Console.WriteLine("Memory consumption info");
            Console.WriteLine($"Before all: {GetMemory()}");
            var morph = new MorphAnalyzer(withPreprocessors: true, withLemmatization: true);

            Console.WriteLine($"After init: {GetMemory()}");
            int j = 0;

            while (j < 100)
            {
                var results = morph.Parse(new string[]
                {
                    "tafsdfdfasd",
                    "xii",
                    "123",
                    ".345",
                    "43,34",
                    "..!",
                    "1-ый",
                    "бутявка",
                    "в",
                    "действуя",
                    "королёвские",
                    "большая",
                    "двадцать",
                    "тысячу",
                    "миллионных",
                    "222-ого",
                    "дотошный",
                    "красотка",
                    "центральные",
                    "укрывал",
                    "королевские",
                    "корабли",
                    "укрывал",
                    "обновляя",
                    "выходящие",
                    "собаковод",
                    "раскладывала",
                    "обучает",
                    "юбка",
                    "пересказывают"
                }).ToArray();
                j++;
            }
            Console.WriteLine($"After processing: {GetMemory()}");
            GC.Collect();
            Console.WriteLine($"After collect: {GetMemory()}");
        }
コード例 #22
0
        static void Inflect3()
        {
            var m = new MorphAnalyzer(withLemmatization: true);

            WriteHeader("Гипотетическая форма слова");
            var tasks = new[]
            {
                new InflectTask("победить",
                                m.TagHelper.CreateTag("инф_гл"),
                                m.TagHelper.CreateTag("гл", nmbr: "ед", tens: "буд", pers: "1л", mood: "изъяв"))
            };

            Console.WriteLine($"{tasks[0].word} {tasks[0].wordTag} -> {m.Inflect(tasks).First()} {tasks[0].resultTag}");
        }
コード例 #23
0
ファイル: Program.cs プロジェクト: kactetus/DeepMorphy
        static void AnalisysFullExample2()
        {
            var m       = new MorphAnalyzer();
            var results = m.Parse(Words).ToArray();

            Console.WriteLine("Все топ теги");
            foreach (var morphInfo in results)
            {
                Console.WriteLine($"{morphInfo.Text}:");
                foreach (var tag in morphInfo.Tags)
                {
                    Console.WriteLine($"    {tag} : {tag.Power}");
                }
            }
        }
コード例 #24
0
ファイル: Program.cs プロジェクト: kactetus/DeepMorphy
        static void AnalisysFullExample4()
        {
            var m       = new MorphAnalyzer();
            var results = m.Parse(Words).ToArray();

            Console.WriteLine("Вывод только части речи и числа");
            foreach (var morphInfo in results)
            {
                Console.WriteLine($"{morphInfo.Text}:");
                foreach (var tag in morphInfo.Tags)
                {
                    Console.WriteLine($"    {tag["чр"]} {tag["число"]}");
                }
            }
        }
コード例 #25
0
ファイル: Program.cs プロジェクト: kactetus/DeepMorphy
        static void AnalisysPartExample3()
        {
            var m = new MorphAnalyzer();

            WriteHeader("Полная информация по падежу");
            var results = m.Parse(new string[] { "речка" }).ToArray();

            foreach (var morphInfo in results)
            {
                Console.WriteLine(morphInfo.Text);
                foreach (var gram in morphInfo["падеж"].Grams)
                {
                    Console.WriteLine($"{gram.Key}:{gram.Power}");
                }
            }
        }
コード例 #26
0
        static void Lexeme2()
        {
            var m       = new MorphAnalyzer(withLemmatization: true);
            var word    = "я";
            var res     = m.Parse("я").ToArray();
            var tag     = res[0].BestTag;
            var results = m.Lexeme(word, tag).ToArray();

            WriteHeader($"Лексема для слова {word}[{tag}]");
            foreach (var tpl in results)
            {
                Console.WriteLine($"{tpl.tag} - {tpl.text}");
            }

            Console.WriteLine();
        }
コード例 #27
0
        static void Main(string[] args)
        {
            var morph  = new MorphAnalyzer(@"E:\Workspace\VisualStudio\MorphyDotNetSampleDictionary");
            var parsed = morph.Parse("стали");

            foreach (var parse in parsed)
            {
                Console.WriteLine(parse);
            }

            //Suffixes suffixes = new Suffixes(@"E:\Workspace\pymorphy2_tests\gramtab-opencorpora-ext.json");


            //var paradigms = new MorphyDotNet.DictUtils.ParadigmsReader().ReadFromFile(@"C:\Anaconda3\envs\pymorphy2_tests\Lib\site-packages\pymorphy2_dicts_ru\data\paradigms.array");

            Console.ReadKey();
        }
コード例 #28
0
ファイル: Program.cs プロジェクト: kactetus/DeepMorphy
        static void AnalisysFullExample3()
        {
            var m       = new MorphAnalyzer();
            var results = m.Parse(Words).ToArray();

            Console.WriteLine("Теги с прилагательным и единственным числом");
            foreach (var morphInfo in results)
            {
                foreach (var tag in morphInfo.Tags)
                {
                    if (tag.Has("прил", "ед"))
                    {
                        Console.WriteLine($"{morphInfo.Text} {tag} : {tag.Power}");
                    }
                }
            }
        }
コード例 #29
0
        static void Lemmatization3()
        {
            var m = new MorphAnalyzer(withLemmatization: true);

            WriteHeader("Лемматизация без классификации");
            var tasks = new[]
            {
                new LemTask("синяя", m.TagHelper.CreateTag("прил", gndr: "жен", nmbr: "ед", @case: "им")),
                new LemTask("гуляя", m.TagHelper.CreateTag("деепр", tens: "наст"))
            };

            var lemmas = m.Lemmatize(tasks).ToArray();

            for (int i = 0; i < tasks.Length; i++)
            {
                Console.WriteLine($"{tasks[i].word} - {lemmas[i]}");
            }
        }
コード例 #30
0
        static void Inflect2()
        {
            var m = new MorphAnalyzer(withLemmatization: true);

            WriteHeader("Переводим слова во множественное число");

            var morphRes = m.Parse("стула", "стола", "горшка").ToArray();

            var tasks = morphRes
                        .Select(mi => new InflectTask(mi.Text,
                                                      mi.BestTag,
                                                      m.TagHelper.CreateTag("сущ", gndr: mi.BestTag["род"], @case: mi.BestTag["падеж"], nmbr: "мн")))
                        .ToArray();

            var results = m.Inflect(tasks).ToArray();

            for (int i = 0; i < tasks.Length; i++)
            {
                Console.WriteLine($"{tasks[i].word} {tasks[i].wordTag} -> {results[i]} {tasks[i].resultTag}");
            }
        }
コード例 #31
0
        static void Lexeme1()
        {
            var m       = new MorphAnalyzer(withLemmatization: true);
            var word    = "лемматизировать";
            var tag     = m.TagHelper.CreateTag("инф_гл");
            var results = m.Lexeme(word, tag).ToArray();

            WriteHeader($"Лексема для слова {word}[{tag}]");
            foreach (var tpl in results)
            {
                Console.WriteLine($"{tpl.tag} - {tpl.text}");
            }

            Console.WriteLine();

            WriteHeader($"Только деепричастия из лексемы {word}[{tag}]");
            foreach (var tpl in results.Where(x => x.tag.Has("деепр")))
            {
                Console.WriteLine($"{tpl.tag} - {tpl.text}");
            }
        }
コード例 #32
0
ファイル: MorphAnalyzer.cs プロジェクト: kirillkh/HebMorph
 public MorphAnalyzer(MorphAnalyzer other)
     : base()
 {
     hebMorphLemmatizer = other.hebMorphLemmatizer;
 }