private string GetHighlightedText(FastVectorHighlighter highlighter, FieldQuery fieldQuery, IndexSearcher searcher, ScoreDoc match, string tag, int length)
 {
     var s = highlighter.GetBestFragment(fieldQuery, searcher.IndexReader, match.Doc, tag, length);
     if (!string.IsNullOrEmpty(s))
     {
         s = HttpUtility.HtmlEncode(s).Replace(HighlightPreTag, HtmlPreTag).Replace(HighlightPostTag, HtmlPostTag);
     }
     return s;
 }
Beispiel #2
0
			private void SetupHighlighter(Query luceneQuery)
			{
				if (indexQuery.HighlightedFields != null && indexQuery.HighlightedFields.Length > 0)
				{
					highlighter = new FastVectorHighlighter(
						FastVectorHighlighter.DEFAULT_PHRASE_HIGHLIGHT,
						FastVectorHighlighter.DEFAULT_FIELD_MATCH,
						new SimpleFragListBuilder(),
						new SimpleFragmentsBuilder(
							indexQuery.HighlighterPreTags != null && indexQuery.HighlighterPreTags.Any()
								? indexQuery.HighlighterPreTags
								: BaseFragmentsBuilder.COLORED_PRE_TAGS,
							indexQuery.HighlighterPostTags != null && indexQuery.HighlighterPostTags.Any()
								? indexQuery.HighlighterPostTags
								: BaseFragmentsBuilder.COLORED_POST_TAGS));

					fieldQuery = highlighter.GetFieldQuery(luceneQuery);
				}
			}
Beispiel #3
0
        public IEnumerable<string> FindText(string queryText, int phraseLimit, int numHits = 100)
        {
            var results = new List<string>();

            var searcher = new IndexSearcher(_reader);
            var collector = TopScoreDocCollector.Create(numHits: numHits, docsScoredInOrder: true);
            var phraseQuery = new PhraseQuery();
            var words = queryText.Split(' ');

            foreach (var w in words)
            {
                phraseQuery.Add(new Term("text", w));
            }

            var booleanQuery = new BooleanQuery { { phraseQuery, Occur.MUST } };

            searcher.Search(booleanQuery, collector);

            var topDocs = collector.TopDocs(0, numHits);
            var scoreDocs = topDocs.ScoreDocs;

            foreach (var scoreDoc in scoreDocs)
            {
                var docId = scoreDoc.Doc;

                var fieldQuery = new FieldQuery(booleanQuery, true, false);

                var highlighter = new FastVectorHighlighter(true, false) { PhraseLimit = phraseLimit };

                var fragments = highlighter.GetBestFragments(fieldQuery, _reader, docId, "text", 75, numHits);

                if (fragments == null || fragments.Length <= 0)
                    continue;

                foreach (var fragment in fragments)
                {
                    int firstChar = fragment.IndexOf(' ') + 1;
                    int lastChar = fragment.LastIndexOf(' ');
                    string truncated;
                    if (firstChar > 0 && lastChar > firstChar)
                        truncated = fragment.Substring(firstChar, lastChar - firstChar);
                    else
                        truncated = fragment;
                    truncated = Regex.Replace(truncated, @"</?b>", "");
                    truncated = Regex.Replace(truncated, @"\r|\n", " ");
                    truncated = truncated.Trim();
                    results.Add(truncated);
                }
            }

            return results;
        }
Beispiel #4
0
			public IEnumerable<IndexQueryResult> Query()
			{
				parent.MarkQueried();
				using (IndexStorage.EnsureInvariantCulture())
				{
					AssertQueryDoesNotContainFieldsThatAreNotIndexed();
					IndexSearcher indexSearcher;
					using (parent.GetSearcher(out indexSearcher))
					{
						var luceneQuery = ApplyIndexTriggers(GetLuceneQuery());


						int start = indexQuery.Start;
						int pageSize = indexQuery.PageSize;
						int returnedResults = 0;
						int skippedResultsInCurrentLoop = 0;
						bool readAll;
						bool adjustStart = true;

						var recorder = new DuplicateDocumentRecorder(indexSearcher,
													  parent,
													  documentsAlreadySeenInPreviousPage,
													  alreadyReturned,
													  fieldsToFetch,
													  parent.IsMapReduce || fieldsToFetch.IsProjection);

						do
						{
							if (skippedResultsInCurrentLoop > 0)
							{
								start = start + pageSize - (start - indexQuery.Start); // need to "undo" the index adjustment
								// trying to guesstimate how many results we will need to read from the index
								// to get enough unique documents to match the page size
								pageSize = Math.Max(2, skippedResultsInCurrentLoop) * pageSize;
								skippedResultsInCurrentLoop = 0;
							}
							TopDocs search;
							int moreRequired;
							do
							{
								search = ExecuteQuery(indexSearcher, luceneQuery, start, pageSize, indexQuery);
								moreRequired = recorder.RecordResultsAlreadySeenForDistinctQuery(search, adjustStart, ref start);
								pageSize += moreRequired * 2;
							} while (moreRequired > 0);
							indexQuery.TotalSize.Value = search.TotalHits;
							adjustStart = false;

							FastVectorHighlighter highlighter = null;
							FieldQuery fieldQuery = null;

							if (indexQuery.HighlightedFields != null && indexQuery.HighlightedFields.Length > 0)
							{
								highlighter = new FastVectorHighlighter(
									FastVectorHighlighter.DEFAULT_PHRASE_HIGHLIGHT,
									FastVectorHighlighter.DEFAULT_FIELD_MATCH,
									new SimpleFragListBuilder(),
									new SimpleFragmentsBuilder(
										indexQuery.HighlighterPreTags != null && indexQuery.HighlighterPreTags.Any()
											? indexQuery.HighlighterPreTags
											: BaseFragmentsBuilder.COLORED_PRE_TAGS,
										indexQuery.HighlighterPostTags != null && indexQuery.HighlighterPostTags.Any()
											? indexQuery.HighlighterPostTags
											: BaseFragmentsBuilder.COLORED_POST_TAGS));

								fieldQuery = highlighter.GetFieldQuery(luceneQuery);
							}

							for (var i = start; (i - start) < pageSize && i < search.ScoreDocs.Length; i++)
							{
								var scoreDoc = search.ScoreDocs[i];
								var document = indexSearcher.Doc(scoreDoc.Doc);
								var indexQueryResult = parent.RetrieveDocument(document, fieldsToFetch, scoreDoc.Score);
								if (ShouldIncludeInResults(indexQueryResult) == false)
								{
									indexQuery.SkippedResults.Value++;
									skippedResultsInCurrentLoop++;
									continue;
								}

								if (highlighter != null)
								{
									var highlightings =
										from highlightedField in this.indexQuery.HighlightedFields
										select new
										{
											highlightedField.Field,
											highlightedField.FragmentsField,
											Fragments = highlighter.GetBestFragments(
												fieldQuery,
												indexSearcher.IndexReader,
												scoreDoc.Doc,
												highlightedField.Field,
												highlightedField.FragmentLength,
												highlightedField.FragmentCount)
										}
										into fieldHighlitings
										where fieldHighlitings.Fragments != null &&
											  fieldHighlitings.Fragments.Length > 0
										select fieldHighlitings;

									if (fieldsToFetch.IsProjection || parent.IsMapReduce)
									{
										foreach (var highlighting in highlightings)
											if (!string.IsNullOrEmpty(highlighting.FragmentsField))
												indexQueryResult.Projection[highlighting.FragmentsField]
													= new RavenJArray(highlighting.Fragments);
									} else
										indexQueryResult.Highligtings = highlightings
											.ToDictionary(x => x.Field, x => x.Fragments);
								}

								returnedResults++;
								yield return indexQueryResult;
								if (returnedResults == indexQuery.PageSize)
									yield break;
							}
							readAll = search.TotalHits == search.ScoreDocs.Length;
						} while (returnedResults < indexQuery.PageSize && readAll == false);
					}
				}
			}
Beispiel #5
0
        public static SearchResultsViewModel SearchWithSuggestions(SearchQuery searchQuery, bool suggestOnlyWhenNoResults = false)
        {
            var ret = new SearchResultsViewModel
                      	{
                      		SearchResults = new List<SearchResultsViewModel.SearchResult>(PageSize), Query = searchQuery
                      	};

            // Parse query, possibly throwing a ParseException
            Query query;
            if (searchQuery.TitlesOnly)
            {
                var qp = new QueryParser(Lucene.Net.Util.Version.LUCENE_29, "Title",
                                         new StandardAnalyzer(Lucene.Net.Util.Version.LUCENE_29)
                                         );
                query = qp.Parse(searchQuery.Query);
            }
            else
            {
                query = MultiFieldQueryParser.Parse(Lucene.Net.Util.Version.LUCENE_29, searchQuery.Query,
                                                    SearchFields, SearchFlags,
                                                    new StandardAnalyzer(Lucene.Net.Util.Version.LUCENE_29)
                                                    );
            }

            // Perform the actual search
            var tsdc = TopScoreDocCollector.create(PageSize * searchQuery.CurrentPage, true);
            Searcher.Search(query, tsdc);
            ret.TotalResults = tsdc.GetTotalHits();
            var hits = tsdc.TopDocs().ScoreDocs;

            // Do the suggestion magic
            if (suggestOnlyWhenNoResults && ret.TotalResults == 0 || (!suggestOnlyWhenNoResults))
            {
                ret.Suggestions = new List<string>();
                var spellChecker = new SpellChecker.Net.Search.Spell.SpellChecker(Searcher.GetIndexReader().Directory());

                // This is kind of a hack to get things working quickly
                // for real-world usage we probably want to get the analyzed terms from the Query object
                var individualTerms = searchQuery.Query.Split(new[] { ' ', '\t', '\r', '\n' }, StringSplitOptions.RemoveEmptyEntries);

                foreach (var term in individualTerms)
                {
                    // we only specify field name if we actually got results,
                    // to improve suggestions relevancy
                    ret.Suggestions.AddRange(spellChecker.SuggestSimilar(term,
                                                                searchQuery.MaxSuggestions,
                                                                null,
                                                                ret.TotalResults == 0 ? null : "Title",
                                                                true));
                }
            }

            // Init the highlighter instance
            var fvh = new FastVectorHighlighter(FastVectorHighlighter.DEFAULT_PHRASE_HIGHLIGHT,
                                    FastVectorHighlighter.DEFAULT_FIELD_MATCH,
                                    new SimpleFragListBuilder(),
                                    new ScoreOrderFragmentsBuilder(new[] { "[b]" }, new[] { "[/b]" }));

            int i;
            for (i = (searchQuery.CurrentPage - 1) * PageSize; i < hits.Length; ++i)
            {
                var d = Searcher.Doc(hits[i].doc);
                var fq = fvh.GetFieldQuery(query);
                var fragment = fvh.GetBestFragment(fq, Searcher.GetIndexReader(),
                    hits[i].doc, "Content", 400);

                ret.SearchResults.Add(new SearchResultsViewModel.SearchResult
                {
                    Id = d.Get("Id"),
                    Title = d.Get("Title"),
                    Score = hits[i].score,
                    LuceneDocId = hits[i].doc,
                    Fragment = MvcHtmlString.Create(fragment.HtmlStripFragment()),
                });
            }
            return ret;
        }
Beispiel #6
0
        public static SearchResultsViewModel Search(SearchQuery searchQuery)
        {
            var ret = new SearchResultsViewModel
                      	{
                      		SearchResults = new List<SearchResultsViewModel.SearchResult>(PageSize), Query = searchQuery
                      	};

            // Parse query, possibly throwing a ParseException
            Query query;
            if (searchQuery.TitlesOnly) // we only need to query on one field
            {
                var qp = new QueryParser(Lucene.Net.Util.Version.LUCENE_29, "Title",
                                         new StandardAnalyzer(Lucene.Net.Util.Version.LUCENE_29));
                query = qp.Parse(searchQuery.Query);
            }
            else // querying on both fields, Content and Title
            {
                query = MultiFieldQueryParser.Parse(Lucene.Net.Util.Version.LUCENE_29, searchQuery.Query,
                                                    SearchFields, SearchFlags,
                                                    new StandardAnalyzer(Lucene.Net.Util.Version.LUCENE_29)
                                                    );
            }

            // Init the highlighter instance
            var fvh = new FastVectorHighlighter(FastVectorHighlighter.DEFAULT_PHRASE_HIGHLIGHT,
                                                FastVectorHighlighter.DEFAULT_FIELD_MATCH,
                                                new SimpleFragListBuilder(),
                                                new ScoreOrderFragmentsBuilder(new[] { "[b]" }, new[] { "[/b]" })
                                                );

            // Perform the actual search
            var tsdc = TopScoreDocCollector.create(PageSize * searchQuery.CurrentPage, true);
            Searcher.Search(query, tsdc);
            ret.TotalResults = tsdc.GetTotalHits();
            var hits = tsdc.TopDocs().ScoreDocs;

            int i;
            for (i = (searchQuery.CurrentPage - 1) * PageSize; i < hits.Length; ++i)
            {
                var d = Searcher.Doc(hits[i].doc);
                var fq = fvh.GetFieldQuery(query);
                var fragment = fvh.GetBestFragment(fq, Searcher.GetIndexReader(), hits[i].doc, "Content", 400);

                ret.SearchResults.Add(new SearchResultsViewModel.SearchResult
                {
                    Id = d.Get("Id"),
                    Title = d.Get("Title"),
                    Score = hits[i].score,
                    LuceneDocId = hits[i].doc,
                    Fragment = MvcHtmlString.Create(fragment.HtmlStripFragment()),
                });
            }
            return ret;
        }
		public IEnumerable<SearchResult> Search(SearchQuery searchQuery, out int totalHitCount)
		{
			var searcher = GetSearcher(searchQuery.IndexName);
			if (searcher == null)
				throw new ArgumentException("Index not found: " + searchQuery.IndexName);

			// Parse query, possibly throwing a ParseException
			Query query;
			if (searchQuery.SearchType == SearchType.LuceneDefault)
			{
				query = MultiFieldQueryParser.Parse(Lucene.Net.Util.Version.LUCENE_29, searchQuery.Query,
				                                    searchFieldsLucenesDefault, searchFlags, GetAnalyzer(searchQuery.SearchType));
			} else {
				query = HebrewMultiFieldQueryParser.Parse(Lucene.Net.Util.Version.LUCENE_29, searchQuery.Query,
				                                          searchFields, searchFlags, GetAnalyzer(searchQuery.SearchType));
			}

			// Log search, if doc-store exists
			if (MvcApplication.RavenDocStore != null)
			{
				using (var session = MvcApplication.RavenDocStore.OpenSession())
				{
					session.Store(searchQuery);
					session.SaveChanges();
				}
			}

			// Init
			var fvh = new FastVectorHighlighter(FastVectorHighlighter.DEFAULT_PHRASE_HIGHLIGHT,
												FastVectorHighlighter.DEFAULT_FIELD_MATCH,
												new SimpleFragListBuilder(),
												new CustomFragmentsBuilder("Content", new String[] { "[b]" }, new String[] { "[/b]" }));
			var contentFieldName = searchQuery.SearchType == SearchType.LuceneDefault ? "ContentDefault" : "Content";

			// Perform actual search
			var tsdc = TopScoreDocCollector.create(PageSize * searchQuery.CurrentPage, true);
			searcher.Search(query, tsdc);
			totalHitCount = tsdc.GetTotalHits();
			var hits = tsdc.TopDocs().scoreDocs;

			var ret = new List<SearchResult>(PageSize);

			int i;
			for (i = (searchQuery.CurrentPage - 1) * PageSize; i < hits.Length; ++i)
			{
				var d = searcher.Doc(hits[i].doc);
				var fq = fvh.GetFieldQuery(query);
				var fragment = fvh.GetBestFragment(fq, searcher.GetIndexReader(), hits[i].doc, contentFieldName, 400);

				ret.Add(new SearchResult
				        	{
				        		Id = d.Get("Id"),
				        		Title = d.Get("Title"),
				        		Score = hits[i].score,
				        		LuceneDocId = hits[i].doc,
								Fragment = MvcHtmlString.Create(fragment.HtmlStripFragment()),
				        	});
			}
			return ret;
		}