protected string Lemmatize(TextAnalyzer textAnalyzer, string text)
        {
            Contract.Requires<ArgumentNullException>(textAnalyzer != null);
            Contract.Requires<ArgumentException>(!string.IsNullOrEmpty(text));

            string trimText = LexerUtils.TrimText(text);
            string[] words = trimText.Split(' ');
            StringBuilder stringBuilder = new StringBuilder(1024);

            for (int i = 0; i < words.Length; i++)
            {
                LemmatizeResult lemmatizeResult = textAnalyzer.Lemmatize(words[i]).FirstOrDefault();

                if (lemmatizeResult != null)
                {
                    string lemma = lemmatizeResult.GetTextByFormId(0).ToLower();

                    if (i != words.Length - 1)
                    {
                        stringBuilder.AppendFormat("{0} ", lemma);
                    }
                    else
                    {
                        stringBuilder.Append(lemma);
                    }
                }
            }

            return stringBuilder.ToString();
        }
示例#2
0
        private static void AnalyzeText(string path, Network network)
        {
            TextAnalyzer textAnalyzer = new TextAnalyzer(AdapterKind.RussianCp1251Adapter);
            textAnalyzer.Load(
                Path.Combine(Environment.CurrentDirectory, "Dictionaries", "Russian", "Dictionary.auto"),
                Path.Combine(Environment.CurrentDirectory, "Dictionaries", "Russian", "Paradigms.bin"),
                Path.Combine(Environment.CurrentDirectory, "Dictionaries", "Russian", "PredictionDictionary.auto"));

            List<NetworkNode> context = new List<NetworkNode>();
            List<NetworkNode> context2 = new List<NetworkNode>();

            using (FileStream fileStream = File.Open(path, FileMode.Open))
            using (StreamReader reader = new StreamReader(fileStream))
            {
                while (!reader.EndOfStream)
                {
                    string currentLine = reader.ReadLine();
                    string[] words = currentLine.Split(' ');

                    foreach (string word in words)
                    {
                        var foundNodes = network.Nodes.Where(node => string.Equals(node.Name, word, StringComparison.InvariantCultureIgnoreCase));
                        context2.AddRange(network.Nodes.Where(node => node.Name.Contains(word)));

                        if (foundNodes.Count() > 0)
                        {
                            context.AddRange(foundNodes);
                        }
                        else
                        {
                            var results = textAnalyzer.Lemmatize(word, true);
                            LemmatizeResult result = results.FirstOrDefault();

                            if (result != null)
                            {
                                PartOfSpeech partOfSpeech = result.GetPartOfSpeech();

                                //if (partOfSpeech == PartOfSpeech.NOUN)
                                {
                                    string lemma = result.GetTextByFormId(0);

                                    context.AddRange(network.Nodes.Where(node => string.Equals(node.Name, lemma, StringComparison.InvariantCultureIgnoreCase)));
                                    context2.AddRange(network.Nodes.Where(node => node.Name.Contains(lemma)));
                                }
                            }
                        }
                    }
                }
            }
        }
        private Grammem FindObjectGrammem(TextAnalyzer textAnalyzer, string predicate)
        {
            Contract.Requires<ArgumentNullException>(textAnalyzer != null);
            Contract.Requires<ArgumentException>(!string.IsNullOrEmpty(predicate));

            Grammem grammem = Grammem.None;
            string[] words = predicate.Split(' ');
            string verb = words.Length > 1 ? words[1] : words[0];
            LemmatizeResult result = textAnalyzer.Lemmatize(verb).FirstOrDefault();

            if (result != null)
            {
                _objectCasesDictionary.TryGetValue(result.GetTextByFormId(0).ToLower(), out grammem);
            }

            return grammem;
        }