public List <string> FindSentences(string words, OrderWordsInSearch orderWordsInSearch) { var uniqueWords = new HashSet <string>(words.Split(new [] { ' ' }, StringSplitOptions.RemoveEmptyEntries)); List <Sentence> sentences = _sentenceWordsQuery.FindSentencesByWords(uniqueWords.Select(e => e.Trim()).ToList(), orderWordsInSearch); if (EnumerableValidator.IsEmpty(sentences)) { NLPWrapper.ExternalObjects.Sentence sentence = _textAnalyzer.ParseSentence(words, false); var normalWords = new List <string>(); foreach (Word word in sentence.Words) { if (EnumerableValidator.IsNotEmpty(word.NormalForms)) { normalWords.AddRange(word.NormalForms); } else { normalWords.Add(word.GetAppropriateWordText()); } } sentences = _sentenceWordsQuery.FindSentencesByWords(normalWords, orderWordsInSearch); } return(sentences.Select(e => e.Text).ToList()); }
public List <Sentence> FindSentencesByWords(List <string> words, OrderWordsInSearch orderWordsInSearch, int maxCount = 5) { var uniqueWords = new HashSet <string>(words); int count = GetCount(maxCount); List <Sentence> result = Adapter.ReadByContext(c => { var sentences = from s in c.Sentence join sw in c.SentenceWord on s.Id equals sw.SentenceId join w in c.Word on sw.WordId equals w.Id where s.LanguageId == _languageId && (uniqueWords.Contains(w.Text) || uniqueWords.Contains(sw.OriginalText)) orderby s.Id select new { s, sw, w }; var dataBySentences = sentences.Take(count).ToList(); var innerResult = new List <Sentence>(); var prevSentence = new Sentence { Id = IdValidator.INVALID_ID }; var originalSentenceWords = new Dictionary <string, List <SentenceWord> >(); var sentenceWords = new Dictionary <string, List <SentenceWord> >(); foreach (var dataBySentence in dataBySentences) { if (dataBySentence.s.Id != prevSentence.Id) { //предложение изменилось - проверить слова //добавить точное совпадение если есть bool isAdded = AddAppropriateSentenceToResult(words, orderWordsInSearch, originalSentenceWords, prevSentence, innerResult); if (!isAdded) { //точных совпадений нет - добавить не точное совпадение если есть AddAppropriateSentenceToResult(words, orderWordsInSearch, sentenceWords, prevSentence, innerResult); } prevSentence = dataBySentence.s; sentenceWords.Clear(); originalSentenceWords.Clear(); if (innerResult.Count == maxCount) { //набрали нужное кол-во предложений break; } } string originalText = dataBySentence.sw.OriginalText.ToLowerInvariant(); string wordText = dataBySentence.w.Text.ToLowerInvariant(); if (!originalText.Equals(wordText)) { AddSentenceWordByText(sentenceWords, wordText, dataBySentence.sw); } AddSentenceWordByText(originalSentenceWords, originalText, dataBySentence.sw); AddSentenceWordByText(sentenceWords, originalText, dataBySentence.sw); } if (originalSentenceWords.Count > 0) { //добавить точное совпадение если есть bool isAdded = AddAppropriateSentenceToResult(words, orderWordsInSearch, originalSentenceWords, prevSentence, innerResult); if (!isAdded) { //точных совпадений нет - добавить не точное совпадение если есть AddAppropriateSentenceToResult(words, orderWordsInSearch, sentenceWords, prevSentence, innerResult); } } return(innerResult); }, new List <Sentence>()); return(result); }
private static bool AddAppropriateSentenceToResult(IEnumerable <string> words, OrderWordsInSearch orderWordsInSearch, Dictionary <string, List <SentenceWord> > sentenceWordsByWords, Sentence sentence, List <Sentence> result) { bool isAppropriateSentence = true; HashSet <int> correctOrderInSentences = null; foreach (string word in words) { List <SentenceWord> sentenceWords; if (!sentenceWordsByWords.TryGetValue(word.ToLowerInvariant(), out sentenceWords)) { //не все слова найдены в предложении - предложение нам не подходит isAppropriateSentence = false; break; } if (orderWordsInSearch != OrderWordsInSearch.ExactWordForWord) { //порядок не важен continue; } IEnumerable <int> currentOrderInSentence = sentenceWords.Select(e => e.OrderInSentence); if (correctOrderInSentences == null) { //первое слово сравнивать не с чем - пропустить correctOrderInSentences = new HashSet <int>(currentOrderInSentence); continue; } var newCorrectOrderInSentences = new HashSet <int>(); //порядок важен и есть предыдущее слово foreach (int orderInSentence in currentOrderInSentence) { int prevOrderInSentence = orderInSentence - 1; if (correctOrderInSentences.Contains(prevOrderInSentence)) { //предыдущее слово найдено newCorrectOrderInSentences.Add(orderInSentence); } } if (newCorrectOrderInSentences.Count == 0) { //предыдущее слово не стоит перед текущим словом - предложение нам не подходит isAppropriateSentence = false; break; } correctOrderInSentences = newCorrectOrderInSentences; } if (isAppropriateSentence) { //TODO: убрать foreach (string word in words) { sentence.Text = sentence.Text.Replace(word, "<b>" + word + "</b>"); } result.Add(sentence); } return(isAppropriateSentence); }
public List <string> FindSentences(string words, OrderWordsInSearch orderWordsInSearch) { return(new List <string>(0)); }