public List <string> FindSentences(string words, OrderWordsInSearch orderWordsInSearch)
        {
            var             uniqueWords = new HashSet <string>(words.Split(new [] { ' ' }, StringSplitOptions.RemoveEmptyEntries));
            List <Sentence> sentences   = _sentenceWordsQuery.FindSentencesByWords(uniqueWords.Select(e => e.Trim()).ToList(),
                                                                                   orderWordsInSearch);

            if (EnumerableValidator.IsEmpty(sentences))
            {
                NLPWrapper.ExternalObjects.Sentence sentence = _textAnalyzer.ParseSentence(words, false);
                var normalWords = new List <string>();
                foreach (Word word in sentence.Words)
                {
                    if (EnumerableValidator.IsNotEmpty(word.NormalForms))
                    {
                        normalWords.AddRange(word.NormalForms);
                    }
                    else
                    {
                        normalWords.Add(word.GetAppropriateWordText());
                    }
                }
                sentences = _sentenceWordsQuery.FindSentencesByWords(normalWords, orderWordsInSearch);
            }
            return(sentences.Select(e => e.Text).ToList());
        }
Example #2
0
        public List <Sentence> FindSentencesByWords(List <string> words,
                                                    OrderWordsInSearch orderWordsInSearch,
                                                    int maxCount = 5)
        {
            var             uniqueWords = new HashSet <string>(words);
            int             count       = GetCount(maxCount);
            List <Sentence> result      = Adapter.ReadByContext(c => {
                var sentences = from s in c.Sentence
                                join sw in c.SentenceWord on s.Id equals sw.SentenceId
                                join w in c.Word on sw.WordId equals w.Id
                                where
                                s.LanguageId == _languageId &&
                                (uniqueWords.Contains(w.Text) || uniqueWords.Contains(sw.OriginalText))
                                orderby s.Id
                                select new { s, sw, w };
                var dataBySentences = sentences.Take(count).ToList();
                var innerResult     = new List <Sentence>();
                var prevSentence    = new Sentence {
                    Id = IdValidator.INVALID_ID
                };
                var originalSentenceWords = new Dictionary <string, List <SentenceWord> >();
                var sentenceWords         = new Dictionary <string, List <SentenceWord> >();
                foreach (var dataBySentence in dataBySentences)
                {
                    if (dataBySentence.s.Id != prevSentence.Id)
                    {
                        //предложение изменилось - проверить слова

                        //добавить точное совпадение если есть
                        bool isAdded = AddAppropriateSentenceToResult(words, orderWordsInSearch, originalSentenceWords,
                                                                      prevSentence,
                                                                      innerResult);
                        if (!isAdded)
                        {
                            //точных совпадений нет - добавить не точное совпадение если есть
                            AddAppropriateSentenceToResult(words, orderWordsInSearch, sentenceWords, prevSentence,
                                                           innerResult);
                        }

                        prevSentence = dataBySentence.s;
                        sentenceWords.Clear();
                        originalSentenceWords.Clear();

                        if (innerResult.Count == maxCount)
                        {
                            //набрали нужное кол-во предложений
                            break;
                        }
                    }

                    string originalText = dataBySentence.sw.OriginalText.ToLowerInvariant();
                    string wordText     = dataBySentence.w.Text.ToLowerInvariant();
                    if (!originalText.Equals(wordText))
                    {
                        AddSentenceWordByText(sentenceWords, wordText, dataBySentence.sw);
                    }
                    AddSentenceWordByText(originalSentenceWords, originalText, dataBySentence.sw);
                    AddSentenceWordByText(sentenceWords, originalText, dataBySentence.sw);
                }

                if (originalSentenceWords.Count > 0)
                {
                    //добавить точное совпадение если есть
                    bool isAdded = AddAppropriateSentenceToResult(words, orderWordsInSearch, originalSentenceWords,
                                                                  prevSentence,
                                                                  innerResult);
                    if (!isAdded)
                    {
                        //точных совпадений нет - добавить не точное совпадение если есть
                        AddAppropriateSentenceToResult(words, orderWordsInSearch, sentenceWords, prevSentence,
                                                       innerResult);
                    }
                }

                return(innerResult);
            }, new List <Sentence>());

            return(result);
        }
Example #3
0
        private static bool AddAppropriateSentenceToResult(IEnumerable <string> words,
                                                           OrderWordsInSearch orderWordsInSearch,
                                                           Dictionary <string, List <SentenceWord> > sentenceWordsByWords,
                                                           Sentence sentence,
                                                           List <Sentence> result)
        {
            bool          isAppropriateSentence   = true;
            HashSet <int> correctOrderInSentences = null;

            foreach (string word in words)
            {
                List <SentenceWord> sentenceWords;
                if (!sentenceWordsByWords.TryGetValue(word.ToLowerInvariant(), out sentenceWords))
                {
                    //не все слова найдены в предложении - предложение нам не подходит
                    isAppropriateSentence = false;
                    break;
                }
                if (orderWordsInSearch != OrderWordsInSearch.ExactWordForWord)
                {
                    //порядок не важен
                    continue;
                }

                IEnumerable <int> currentOrderInSentence = sentenceWords.Select(e => e.OrderInSentence);
                if (correctOrderInSentences == null)
                {
                    //первое слово сравнивать не с чем - пропустить
                    correctOrderInSentences = new HashSet <int>(currentOrderInSentence);
                    continue;
                }

                var newCorrectOrderInSentences = new HashSet <int>();
                //порядок важен и есть предыдущее слово
                foreach (int orderInSentence in currentOrderInSentence)
                {
                    int prevOrderInSentence = orderInSentence - 1;
                    if (correctOrderInSentences.Contains(prevOrderInSentence))
                    {
                        //предыдущее слово найдено
                        newCorrectOrderInSentences.Add(orderInSentence);
                    }
                }

                if (newCorrectOrderInSentences.Count == 0)
                {
                    //предыдущее слово не стоит перед текущим словом - предложение нам не подходит
                    isAppropriateSentence = false;
                    break;
                }
                correctOrderInSentences = newCorrectOrderInSentences;
            }

            if (isAppropriateSentence)
            {
                //TODO: убрать
                foreach (string word in words)
                {
                    sentence.Text = sentence.Text.Replace(word, "<b>" + word + "</b>");
                }
                result.Add(sentence);
            }
            return(isAppropriateSentence);
        }
Example #4
0
 public List <string> FindSentences(string words, OrderWordsInSearch orderWordsInSearch)
 {
     return(new List <string>(0));
 }