コード例 #1
0
        public MatchedDocument[] Search(string query)
        {
            var queryNotWords = GetAllowedNormalizedWords(parser.GetAllNotWords(query)).ToHashSet <string>();

            queryNotWords.Add("not");

            var queryWords = parser.GetAllWords(query)
                             .Where(word => !queryNotWords.Contains(word));

            queryWords = GetAllowedNormalizedWords(queryWords);

            var queryPhrases = GetAllowedNormalizedWords(parser.GetAllPhrases(query))
                               .Select(parser.RemoveDelimeters)
                               .Select(phrase => GetAllowedNormalizedWords(parser.GetAllWords(phrase)));

            if (!queryWords.Any() && !queryPhrases.Any())
            {
                return(new MatchedDocument[0]);
            }

            var matchedDocuments = documents
                                   .Where(doc => queryWords.All(doc.Value.Contains) &&
                                          queryNotWords.All(word => !doc.Value.Contains(word)) &&
                                          queryPhrases.All(phrase => doc.Value.Contains(phrase)));

            return(matchedDocuments
                   .Select(doc => new MatchedDocument(doc.Key, TFIDF(queryWords, doc.Value)))
                   .ToArray());
        }