protected string Lemmatize(TextAnalyzer textAnalyzer, string text) { Contract.Requires<ArgumentNullException>(textAnalyzer != null); Contract.Requires<ArgumentException>(!string.IsNullOrEmpty(text)); string trimText = LexerUtils.TrimText(text); string[] words = trimText.Split(' '); StringBuilder stringBuilder = new StringBuilder(1024); for (int i = 0; i < words.Length; i++) { LemmatizeResult lemmatizeResult = textAnalyzer.Lemmatize(words[i]).FirstOrDefault(); if (lemmatizeResult != null) { string lemma = lemmatizeResult.GetTextByFormId(0).ToLower(); if (i != words.Length - 1) { stringBuilder.AppendFormat("{0} ", lemma); } else { stringBuilder.Append(lemma); } } } return stringBuilder.ToString(); }
private static void AnalyzeText(string path, Network network) { TextAnalyzer textAnalyzer = new TextAnalyzer(AdapterKind.RussianCp1251Adapter); textAnalyzer.Load( Path.Combine(Environment.CurrentDirectory, "Dictionaries", "Russian", "Dictionary.auto"), Path.Combine(Environment.CurrentDirectory, "Dictionaries", "Russian", "Paradigms.bin"), Path.Combine(Environment.CurrentDirectory, "Dictionaries", "Russian", "PredictionDictionary.auto")); List<NetworkNode> context = new List<NetworkNode>(); List<NetworkNode> context2 = new List<NetworkNode>(); using (FileStream fileStream = File.Open(path, FileMode.Open)) using (StreamReader reader = new StreamReader(fileStream)) { while (!reader.EndOfStream) { string currentLine = reader.ReadLine(); string[] words = currentLine.Split(' '); foreach (string word in words) { var foundNodes = network.Nodes.Where(node => string.Equals(node.Name, word, StringComparison.InvariantCultureIgnoreCase)); context2.AddRange(network.Nodes.Where(node => node.Name.Contains(word))); if (foundNodes.Count() > 0) { context.AddRange(foundNodes); } else { var results = textAnalyzer.Lemmatize(word, true); LemmatizeResult result = results.FirstOrDefault(); if (result != null) { PartOfSpeech partOfSpeech = result.GetPartOfSpeech(); //if (partOfSpeech == PartOfSpeech.NOUN) { string lemma = result.GetTextByFormId(0); context.AddRange(network.Nodes.Where(node => string.Equals(node.Name, lemma, StringComparison.InvariantCultureIgnoreCase))); context2.AddRange(network.Nodes.Where(node => node.Name.Contains(lemma))); } } } } } } }
private Grammem FindObjectGrammem(TextAnalyzer textAnalyzer, string predicate) { Contract.Requires<ArgumentNullException>(textAnalyzer != null); Contract.Requires<ArgumentException>(!string.IsNullOrEmpty(predicate)); Grammem grammem = Grammem.None; string[] words = predicate.Split(' '); string verb = words.Length > 1 ? words[1] : words[0]; LemmatizeResult result = textAnalyzer.Lemmatize(verb).FirstOrDefault(); if (result != null) { _objectCasesDictionary.TryGetValue(result.GetTextByFormId(0).ToLower(), out grammem); } return grammem; }