private IList <int> Search(string text, int tipodocumentoId, string startDate, string endDate) { var directory = this.GetDirectory(); var indexReader = this.GetIndexReader(directory); var searcher = new IndexSearcher(indexReader); try { var query = this.BuildQuery(text, tipodocumentoId); var filter = this.BuildDateFilter(startDate, endDate); var sort = new Sort(new SortField("dataCriacao", SortField.LONG, true)); var docs = searcher.Search(query, filter, this.configuracoesDaAplicacao.ResultadoMaximoConsulta, sort); // create highlighter var formatter = new SimpleHTMLFormatter("<span class=\"result-highlight\">", "</span>"); var scorer = new QueryScorer(query); this.Highlighter = new Highlighter(formatter, scorer); this.Stream = LuceneEngineBase.GetAnalyzer().TokenStream(string.Empty, new StringReader(text)); return(this.BuildSearchResult(docs, searcher)); } finally { searcher.Dispose(); indexReader.Dispose(); directory.Dispose(); } }
public Task <IEnumerable <ISearchItem> > Search(string pattern, int page) { using (Analyzer analyzer = new StandardAnalyzer(LuceneVersion.LUCENE_48)) using (Lucene.Net.Store.Directory index = new SimpleFSDirectory(Path.ChangeExtension(_bookFile.FullName, Convert.ToInt32(LuceneVersion.LUCENE_48).ToString()))) using (IndexReader reader = DirectoryReader.Open(index)) { Lucene.Net.Search.Query query = new QueryParser(LuceneVersion.LUCENE_48, nameof(TabHtmlText.Html), analyzer).Parse(pattern); Lucene.Net.Search.TopScoreDocCollector collector = Lucene.Net.Search.TopScoreDocCollector.Create(512, true); Lucene.Net.Search.IndexSearcher searcher = new Lucene.Net.Search.IndexSearcher(reader); searcher.Search(query, collector); Lucene.Net.Search.TopDocs docs = collector.GetTopDocs(page * PageSize, PageSize); QueryScorer scorer = new QueryScorer(query); Highlighter highlighter = new Highlighter(new SimpleHTMLFormatter(), scorer) // SpanGradientFormatter { TextFragmenter = new SimpleSpanFragmenter(scorer, 30) }; IEnumerable <ISearchItem> items = docs.ScoreDocs.Select(scoreDoc => { Document doc = searcher.Doc(scoreDoc.Doc); string html = doc.Get(nameof(TabHtmlText.Html)); string[] fragments = highlighter.GetBestFragments(new HTMLStripCharAnalyzer(), nameof(TabHtmlText.Html), html, 3); return(new SearchItem(int.Parse(doc.Get(nameof(TabHtmlText.NumId))), string.Join("\n", fragments))); }); return(Task.FromResult(items.ToList().AsEnumerable())); } }
/// <summary> /// Gets the highlighted wildcard text. /// </summary> /// <param name="indexField">The index field value.</param> /// <param name="searchQuery">The search query.</param> /// <param name="highlightField">The highlight field name.</param> /// <param name="examineIndexSetName">Name of the examine index set.</param> /// <param name="maxNumFragments">Maximum number of fragments to retrieve.</param> /// <param name="preTag">Highlight pre tag.</param> /// <param name="postTag">Highlight post tag.</param> /// <returns></returns> public static string GetHighlightWithWildcards(string indexField, string searchQuery, string highlightField, string examineIndexSetName, int maxNumFragments, string preTag, string postTag) { if (indexField == null) { throw new ArgumentNullException(nameof(indexField)); } if (searchQuery == null) { throw new ArgumentNullException(nameof(searchQuery)); } if (highlightField == null) { throw new ArgumentNullException(nameof(highlightField)); } if (examineIndexSetName == null) { throw new ArgumentNullException(nameof(examineIndexSetName)); } BooleanQuery finalQuery = new BooleanQuery(); foreach (string term in searchQuery.Split(new[] { ' ' }, StringSplitOptions.RemoveEmptyEntries)) { FuzzyQuery fuzzyQuery = new FuzzyQuery(new Lucene.Net.Index.Term(highlightField, term), 0.5f, 0); finalQuery.Add(new BooleanClause(fuzzyQuery, BooleanClause.Occur.SHOULD)); } string indexFieldStrippedHtmlValue = indexField.StripHtml(); SimpleHTMLFormatter formatter = new SimpleHTMLFormatter(preTag, postTag); QueryScorer fragmentScorer = new QueryScorer(finalQuery.Rewrite(GetIndexSearcher(examineIndexSetName).GetIndexReader())); Highlighter highlighter = new Highlighter(formatter, fragmentScorer); TokenStream tokenStream = new StandardAnalyzer(Version.LUCENE_29).TokenStream(highlightField, new StringReader(indexFieldStrippedHtmlValue)); return(highlighter.GetBestFragments(tokenStream, indexFieldStrippedHtmlValue, maxNumFragments, "...")); }
public static string GenerateHtmlPreviewText(Query query, string text, int length, Analyzer analyzer, string prefix = "<label class='highlight'>", string suffix = "</label>", bool returnRawContentWhenResultIsEmpty = false, int maxContentHighlightLength = Constants.DefaultMaxContentHighlightLength) { string result = null; if (text.Length <= maxContentHighlightLength) // For performance { if (query != null) { var scorer = new QueryScorer(query); var formatter = new SimpleHTMLFormatter(CodeContentProcessing.HighLightPrefix, CodeContentProcessing.HighLightSuffix); var highlighter = new Highlighter(formatter, scorer); highlighter.TextFragmenter = new SimpleFragmenter(length); highlighter.MaxDocCharsToAnalyze = maxContentHighlightLength; var stream = analyzer.GetTokenStream(nameof(CodeSource.Content), new StringReader(text)); result = highlighter.GetBestFragments(stream, text, 3, "..."); } result = string.IsNullOrEmpty(result) ? (returnRawContentWhenResultIsEmpty ? HttpUtility.HtmlEncode(text) : string.Empty) : HttpUtility.HtmlEncode(result).Replace(CodeContentProcessing.HighLightPrefix, prefix).Replace(CodeContentProcessing.HighLightSuffix, suffix); } else { result = "Content is too long to highlight"; } return(result); }
public IEnumerable <Hit> Search(string query, int maxResults) { var analyzer = new StandardAnalyzer(Lucene.Net.Util.Version.LUCENE_29); QueryParser qp = new QueryParser( Lucene.Net.Util.Version.LUCENE_29, "contents", analyzer ); Query q = qp.Parse(query); TopDocs top = searcher.Search(q, maxResults); List <Hit> result = new List <Hit>(); foreach (var scoreDoc in top.ScoreDocs) { var doc = searcher.Doc(scoreDoc.Doc); string contents = doc.Get("contents"); var scorer = new QueryScorer(q, searcher.IndexReader, "contents"); var highlighter = new Highlighter(scorer); result.Add(new Hit() { Relevance = scoreDoc.Score, Title = doc.Get("title"), Url = doc.Get("path"), Excerpt = highlighter.GetBestFragment(analyzer, "contents", contents) }); } return(result); }
// This method is printing out the message details given the index document. // NOTE: The field "mainText" must be stored in indexing level. Same goes for any // other field you want to search. private static void DisplayMessage(Document d, string searchTerm) { // THIS IS USED IN THE DATABASE INDEXic //Console.WriteLine("id: " + d.Get("id") + "\n" + "messageBox: " + d.Get("messageBox") + "\n" + "incoming: " + d.Get("incoming") + "\n" + "date: " + d.Get("date") + "\n" + "mainText: " + d.Get("mainText")); // THIS IS USED IN MY TEST FILES //Console.WriteLine("id: " + d.Get("id") + "\n" + "mainText: " + d.Get("mainText")); string text = d.Get("mainText"); TermQuery query = new TermQuery(new Term("mainText", searchTerm)); Lucene.Net.Search.Highlight.IScorer scorer = new QueryScorer(query); Highlighter highlighter = new Highlighter(scorer); System.IO.StringReader reader = new System.IO.StringReader(text); TokenStream tokenStream = new SimpleAnalyzer().TokenStream("mainText", reader); String[] toBePrinted = highlighter.GetBestFragments(tokenStream, text, 5); // 5 is the maximum number of fragments that gets tested foreach (var word in toBePrinted) { Console.Write(word); } Console.WriteLine("====================="); Console.ReadKey(); }
public IEnumerable <Tuple <float, Document, string[]> > Search(string text, string defaultField = "title", int maxResultCount = 500) { var parser = new QueryParser(Constants.Version, defaultField, _analyzer); Query query = parser.Parse(text ?? string.Empty); var formatter = new SimpleHTMLFormatter(string.Empty, string.Empty); var fragmenter = new SimpleFragmenter(120); var scorer = new QueryScorer(query); var highlighter = new Highlighter(formatter, scorer) { TextFragmenter = fragmenter }; using (var directory = FSDirectory.Open(new DirectoryInfo(_path), new NoLockFactory())) { using (var searcher = new IndexSearcher(IndexReader.Open(directory, ReadonlyMode))) { TopDocs hits = searcher.Search(query, maxResultCount); foreach (var scoreDoc in hits.ScoreDocs) { Document doc = searcher.Doc(scoreDoc.Doc); var field = doc.Get(defaultField); var tokenStream = _analyzer.TokenStream(defaultField, new StringReader(field)); var framgents = highlighter.GetBestFragments(tokenStream, field, 5); yield return(new Tuple <float, Document, string[]>(scoreDoc.Score, doc, framgents)); } } } }
public string GetHighlight(string value, IndexSearcher searcher, string highlightField, Query luceneQuery) { var scorer = new QueryScorer(luceneQuery.Rewrite(searcher.GetIndexReader())); var highlighter = new Highlighter(HighlightFormatter, scorer); var tokenStream = HighlightAnalyzer.TokenStream(highlightField, new StringReader(value)); return(highlighter.GetBestFragments(tokenStream, value, MaxNumHighlights, Separator)); }
/** GET HIGHLIGHTER FRAGMENT*/ public static string[] TextHighlighter(Query query, string text, StandardAnalyzer analys, IndexSearcher searcher, ScoreDoc doc) { QueryScorer scorer = new QueryScorer(query); SimpleHTMLFormatter formater = new SimpleHTMLFormatter("<b>", "</b>"); Highlighter highlighter = new Highlighter(formater, scorer); TokenStream tokenStream = TokenSources.GetAnyTokenStream(searcher.IndexReader, doc.Doc, "Contents", analys); string[] frags = highlighter.GetBestFragments(tokenStream, text, 3); return(frags); }
/// <summary> /// Creates a highlighter for current query. /// </summary> private Highlighter CreateHighlighter(Query query) { var formatter = new SimpleHTMLFormatter("<b>", "</b>"); var scorer = new QueryScorer(query); return(new Highlighter(formatter, scorer) { TextFragmenter = new SimpleSpanFragmenter(scorer, 150) }); }
public string GenerateHtmlPreviewText(SearchRequest searchRequest, string text, int length, string prefix = "<span class='highlight'>", string suffix = "</span>", bool returnRawContentWhenResultIsEmpty = false) { if (searchRequest == null) { return(returnRawContentWhenResultIsEmpty ? HttpUtility.HtmlEncode(text) : string.Empty); } var maintainer = GetIndexMaintainerWrapper(searchRequest.IndexPk); if (maintainer == null) { return(string.Empty); } var queryForContent = GetContentQuery(searchRequest, maintainer); string result = null; var maxContentHighlightLength = maintainer.IndexConfig.MaxContentHighlightLength; if (maxContentHighlightLength <= 0) { maxContentHighlightLength = Constants.DefaultMaxContentHighlightLength; } if (text.Length <= maxContentHighlightLength) // For performance { if (queryForContent != null) { var scorer = new QueryScorer(queryForContent); var formatter = new SimpleHTMLFormatter(HighLightPrefix, HighLightSuffix); var highlighter = new Highlighter(formatter, scorer) { TextFragmenter = new SimpleFragmenter(length), MaxDocCharsToAnalyze = maxContentHighlightLength }; using var stream = GetTokenStream(text, searchRequest.CaseSensitive); result = highlighter.GetBestFragments(stream, text, 3, "..."); } result = string.IsNullOrEmpty(result) ? (returnRawContentWhenResultIsEmpty ? HttpUtility.HtmlEncode(text) : string.Empty) : HttpUtility.HtmlEncode(result).Replace(HighLightPrefix, prefix).Replace(HighLightSuffix, suffix); } else { result = "Content is too long to highlight"; } return(result); }
/// <summary> /// 搜索结果高亮显示 /// </summary> /// <param name="keyword">关键字</param> /// <param name="content">搜索结果</param> /// <param name="analyzer">new SimpleAnalyzer()</param> /// <returns></returns> public static string HighLight(string keyword, string content, Analyzer analyzer) { const string FIELD_NAME = "keyword"; Query query = new QueryParserEx(Lucene.Net.Util.Version.LUCENE_30, FIELD_NAME, analyzer).Parse(keyword); QueryScorer scorer = new QueryScorer(query); SimpleHTMLFormatter formatter = new SimpleHTMLFormatter(PRE_TAG, END_TAG); SimpleSpanFragmenter fragment = new SimpleSpanFragmenter(scorer); Highlighter highlighter = new Highlighter(formatter, scorer); highlighter.TextFragmenter = fragment; return(highlighter.GetBestFragment(analyzer, FIELD_NAME, content) ?? content); }
public string GetHighlight(string value, string highlightField, Searcher searcher, string luceneRawQuery) { var query = GetQueryParser(highlightField).Parse(luceneRawQuery); var scorer = new QueryScorer(searcher.Rewrite(query)); var highlighter = new Highlighter(HighlightFormatter, scorer); var tokenStream = HighlightAnalyzer.TokenStream(highlightField, new StringReader(value)); string bestFragments = highlighter.GetBestFragments(tokenStream, value, MaxNumHighlights, Separator); return(bestFragments); }
/// <summary> /// /// </summary> /// <param name="lookQuery">The query to parse</param> /// <param name="parsingContext"></param> private static void ParseTextQuery(ParsingContext parsingContext, LookQuery lookQuery) { if (lookQuery.TextQuery == null) { return; } parsingContext.QueryAdd(new TermQuery(new Term(LookConstants.HasTextField, "1")), BooleanClause.Occur.MUST); if (!string.IsNullOrWhiteSpace(lookQuery.TextQuery.SearchText)) { var queryParser = new QueryParser(Lucene.Net.Util.Version.LUCENE_29, LookConstants.TextField, lookQuery.SearchingContext.Analyzer); Query searchTextQuery = null; try { searchTextQuery = queryParser.Parse(lookQuery.TextQuery.SearchText); } catch { throw new ParsingException($"Unable to parse LookQuery.TextQuery.SearchText: '{ lookQuery.TextQuery.SearchText }' into a Lucene query"); } if (searchTextQuery != null) { parsingContext.QueryAdd(searchTextQuery, BooleanClause.Occur.MUST); if (lookQuery.TextQuery.GetHighlight) { var queryScorer = new QueryScorer(searchTextQuery.Rewrite(lookQuery.SearchingContext.IndexSearcher.GetIndexReader())); var highlighter = new Highlighter(new SimpleHTMLFormatter("<strong>", "</strong>"), queryScorer); parsingContext.GetHighlight = (x) => { var tokenStream = lookQuery.SearchingContext.Analyzer.TokenStream(LookConstants.TextField, new StringReader(x)); var highlight = highlighter.GetBestFragments( tokenStream, x, 1, // max number of fragments "..."); return(new HtmlString(highlight)); }; } } } }
private string HighlightContents(Query q, string text) { string highlightStartTag = "<span class='highlight'>"; string highlightEndTag = "</span>"; int fragmentLength = 150; QueryScorer scorer = new QueryScorer(q, Body); Formatter formatter = new SimpleHTMLFormatter(highlightStartTag, highlightEndTag); Highlighter highlighter = new Highlighter(formatter, scorer); highlighter.SetTextFragmenter(new SimpleFragmenter(fragmentLength)); TokenStream stream = new StandardAnalyzer(Lucene.Net.Util.Version.LUCENE_29).TokenStream(Body, new StringReader(text)); return(highlighter.GetBestFragments(stream, text, 3, "...")); }
private SearchResult SearchCore(string query, string[] fields, int topResultCount) { // Search var parser = new MultiFieldQueryParser(Config.LuceneVersion, fields, Analyzer); parser.AllowLeadingWildcard = true; parser.DefaultOperator = Operator.AND; parser.Locale = Config.Locale; parser.AnalyzeRangeTerms = true; var q = parser.Parse(query); var results = Searcher.Search(q, topResultCount); var hits = results.ScoreDocs; if (results.TotalHits == 0) { return(SearchResult.Empty); } // Format var items = new List <SearchResultItem>(); var scorer = new QueryScorer(q); var formatter = new SimpleHTMLFormatter("<mark>", "</mark>"); var highlighter = new Highlighter(formatter, scorer) { TextFragmenter = new SimpleFragmenter(Config.FragmentLength) }; // ReSharper disable once ForCanBeConvertedToForeach for (var i = 0; i < hits.Length; i++) { var doc = Searcher.Doc(hits[i].Doc); var url = doc.Get("url"); var title = doc.Get("title"); var content = doc.Get("content"); using (var stream = Analyzer.GetTokenStream(url, new StringReader(content))) { var preview = highlighter.GetBestFragments(stream, content, Config.ResultFragments, Config.FragmentSeparator); var item = new SearchResultItem(url, ToWbrWrapName(title), preview); items.Add(item); } } return(new SearchResult(results.TotalHits, items)); }
public string GeneratePreviewText(Lucene.Net.Search.Query q, string text) { QueryScorer scorer = new QueryScorer(q); IFormatter formatter = new SimpleHTMLFormatter("", ""); Highlighter highlighter = new Highlighter(formatter, scorer); highlighter.TextFragmenter = new SimpleFragmenter(100); TokenStream stream = new SimpleAnalyzer().TokenStream(TEXT_FN, new StringReader(text)); string fragment = highlighter.GetBestFragments(stream, text, 2, "..."); if (string.IsNullOrEmpty(fragment)) { fragment = text.Substring(0, 100); } return(fragment); }
// This method takes a search term and a text as a parameter, and displays the text // with the search term in bold. public static void RealHighlighter(string searchTerm, string text) { TermQuery query = new TermQuery(new Term("mainText", searchTerm)); Lucene.Net.Search.Highlight.IScorer scorer = new QueryScorer(query); Highlighter highlighter = new Highlighter(scorer); System.IO.StringReader reader = new System.IO.StringReader(text); TokenStream tokenStream = new SimpleAnalyzer().TokenStream("mainText", reader); String[] toBePrinted = highlighter.GetBestFragments(tokenStream, text, 5); // 5 is the maximum number of fragments that gets tested foreach (var word in toBePrinted) { Console.Write(word); } }
public IEnumerable <SearchResult> Search(string luceneQuery, int maxResults = 500, string highlightOpenTag = null, string highlightCloseTag = null, params string[] fieldsToHighlight) { var results = new List <SearchResult>(); if (String.IsNullOrWhiteSpace(luceneQuery)) { return(results); } var parser = new MultiFieldQueryParser(LuceneVersion, searchFields, new StandardAnalyzer(LuceneVersion)); Query query = parser.Parse(luceneQuery); TopDocs topDocs = searcher.Search(query, maxResults); foreach (ScoreDoc scoreDoc in topDocs.ScoreDocs) { Document document = reader.Document(scoreDoc.doc); var result = new SearchResult(document, scoreDoc.score); results.Add(result); } if (!String.IsNullOrEmpty(highlightOpenTag) && !String.IsNullOrEmpty(highlightCloseTag) && fieldsToHighlight.Length > 0) { var scorer = new QueryScorer(query); var formatter = new SimpleHTMLFormatter(highlightOpenTag, highlightCloseTag); var highlighter = new Highlighter(formatter, scorer); highlighter.SetTextFragmenter(new SimpleFragmenter()); foreach (SearchResult result in results) { foreach (string highlightField in fieldsToHighlight) { if (!result.Fields.ContainsKey(highlightField)) { continue; } string fieldValue = result[highlightField]; TokenStream stream = new StandardAnalyzer(LuceneVersion).TokenStream(highlightField, new StringReader(fieldValue)); string highlightedFieldValue = highlighter.GetBestFragments(stream, fieldValue, 500, "..."); if (!String.IsNullOrWhiteSpace(highlightedFieldValue)) { result.Fields[highlightField] = highlightedFieldValue; } } } } return(results); }
// TEST METHOD FOR HIGHLIGHTING. public static void Highlighter() { string textTest = "I am a man that follows hell."; TermQuery queryTest = new TermQuery(new Term("", "hell")); Lucene.Net.Search.Highlight.IScorer scorer = new QueryScorer(queryTest); Highlighter highlighter = new Highlighter(scorer); System.IO.StringReader reader = new System.IO.StringReader(textTest); TokenStream tokenStream = new SimpleAnalyzer().TokenStream("field", reader); String[] toBePrinted = highlighter.GetBestFragments(tokenStream, textTest, 1); // 1 is the maximum number of fragments that gets tested foreach (var word in toBePrinted) { Console.WriteLine(word); } }
/// <summary> /// highlight the search term in the supplied result /// </summary> /// <param name="Result"></param> /// <param name="UmbracoProperty"></param> /// <param name="Summary"></param> /// <returns></returns> protected bool LuceneHighlightField(SearchResult Result, UmbracoProperty UmbracoProperty, out string Summary) { Summary = string.Empty; var fieldName = UmbracoProperty.PropertyName; if (!string.IsNullOrEmpty(Result.Fields[fieldName])) { Highlighter highlighter; if (HighlighterCache.ContainsKey(fieldName)) { highlighter = HighlighterCache[fieldName]; } else { var searchTerms = SearchUtilities.GetSearchTermsSplit(Parameters.SearchTerm); var luceneQuery = QueryHighlight(UmbracoProperty, searchTerms); var parser = new QueryParser(Lucene.Net.Util.Version.LUCENE_29, fieldName, _analyzer); // This is needed to make wildcards highlight correctly if (UmbracoProperty.Wildcard) { parser.SetMultiTermRewriteMethod(MultiTermQuery.SCORING_BOOLEAN_QUERY_REWRITE); } var query = parser.Parse(luceneQuery); query = query.Rewrite(_reader); var scorer = new QueryScorer(query); highlighter = new Highlighter(_formatter, scorer); highlighter.SetTextFragmenter(new SimpleFragmenter(Parameters.SummaryLength)); HighlighterCache.Add(fieldName, highlighter); } using (var sr = new StringReader(Result.Fields[fieldName])) { var tokenstream = _analyzer.TokenStream(fieldName, sr); Summary = highlighter.GetBestFragment(tokenstream, Result.Fields[fieldName]); if (!string.IsNullOrEmpty(Summary)) { return(true); } } } return(false); }
public string GetSummaryWithHighlight(Query query, string text, string fileName) { // create highlighter var analyzer = _getAnalyzer(); var formatter = new SimpleHTMLFormatter("<span class=\"search-highlight\">", "</span>"); var fragmenter = new SimpleFragmenter(250); var scorer = new QueryScorer(query); var highlighter = new Highlighter(formatter, scorer); highlighter.TextFragmenter = fragmenter; var stream = analyzer.TokenStream(fileName, new StringReader(text)); var summary = highlighter.GetBestFragments(stream, text, 2, "..."); if (string.IsNullOrEmpty(summary)) { summary = text.ToString(); } return(summary); }
public SimpleHtmlHighlightedFragmentProvider(ICrmEntityIndex index, Query query) { if (index == null) { throw new ArgumentNullException("index"); } if (query == null) { throw new ArgumentNullException("query"); } _index = index; var queryScorer = new QueryScorer(query); _highlighter = new Highlighter(new SimpleHTMLFormatter(_highlighterStartTag, _highlighterEndTag), queryScorer) { TextFragmenter = new SimpleSpanFragmenter(queryScorer, 160) }; }
protected string Summarize(LuceneQuery query, string text, bool htmlEncodeOutput) { if (query == null || string.IsNullOrEmpty(text)) { return(null); } try { // Build the highlighter. var formatter = new SimpleHTMLFormatter(_configuration.StartTag, _configuration.EndTag); var scorer = new QueryScorer(query); Encoder encoder; if (htmlEncodeOutput) { encoder = new SimpleHTMLEncoder(); } else { encoder = new DefaultEncoder(); } var highlighter = new LuceneHighlighter(formatter, encoder, scorer); highlighter.setTextFragmenter(new SimpleSpanFragmenter(scorer, _configuration.FragmentSize)); // Perform highlighting. var tokenStream = _contentAnalyzer.tokenStream(string.Empty, new java.io.StringReader(text)); return(highlighter.getBestFragments(tokenStream, text, _configuration.MaxFragments, _configuration.Separator)); } catch (Exception) { // on error just return the original string return(text); } }
/// <summary> /// Creates a single highlighter. /// </summarTy> /// <param name="parser">A lucene parser.</param> /// <param name="value">The value which was searched.</param> /// <param name="highlightPreTag">Pre match tag.</param> /// <param name="highlightPostTag">Post match taf.</param> /// <returns>a highlighter.</returns> private Highlighter MakeValueHighlighter(QueryParser parser, string value, string highlightPreTag, string highlightPostTag) { // With lucene-net 3.0.3 some queries are not supported, for instance query such as "*someterm" (prefix is wildcard). // These queries throw exception when calling QueryParser.Parse(string value) regarding use of configuration manager // which is not supported in net core. see bug https://dev.azure.com/csedevil/K2-bridge-internal/_workitems/edit/1658 // these terms are discarded during the following creation of highlighter. try { var luceneQuery = parser.Parse(value); var scorer = new QueryScorer(luceneQuery); var formatter = new SimpleHTMLFormatter(highlightPreTag, highlightPostTag); return(new Highlighter(formatter, scorer) { TextFragmenter = new SimpleSpanFragmenter(scorer, int.MaxValue), MaxDocCharsToAnalyze = int.MaxValue, }); } catch (Exception e) { logger.LogError(e, $"Failure creating highlighters for {value}"); return(null); } }
public static string Highlight(this string text, string term) { if (string.IsNullOrEmpty(text)) { return(string.Empty); } var bq = new BooleanQuery(); term.Split(new char[] { ' ' }, StringSplitOptions.RemoveEmptyEntries) .ToList() .ForEach(x => bq.Add(new TermQuery(new Term("field", x)), Occur.SHOULD)); var fragmentLength = 100; var highlightStartTag = @"<span class='search_highlight'>"; var highlightEndTag = @"</span>"; QueryScorer scorer = new QueryScorer(bq); var formatter = new SimpleHTMLFormatter(highlightStartTag, highlightEndTag); Highlighter highlighter = new Highlighter(formatter, scorer); highlighter.TextFragmenter = new SimpleFragmenter(fragmentLength); TokenStream stream = new StandardAnalyzer(Lucene.Net.Util.LuceneVersion.LUCENE_48).GetTokenStream("field", new StringReader(text)); return(highlighter.GetBestFragments(stream, text, 100, "...")); }
public string HighLight(string keyword, string sourceText) { if (keyword.IsNullOrWhiteSpace() || sourceText.IsNullOrWhiteSpace()) { return(string.Empty); } //queryParser = new QueryParser(Lucene.Net.Util.LuceneVersion.LUCENE_48, filedName, _analyzer); //var query = queryParser.Parse(keyword); var query = _luceneIndexSearcher.GetFuzzyquery(queryParser, keyword); var scorer = new QueryScorer(query); Highlighter highlighter = new Highlighter(formatter, scorer); highlighter.TextFragmenter = new SimpleFragmenter(fragmentSize); //highlighter.MaxDocCharsToAnalyze = 200; TokenStream tokenStream = _analyzer.GetTokenStream(filedName, new StringReader(sourceText)); var frags = highlighter.GetBestFragments(tokenStream, sourceText, maxNumFragments); return(frags.Length > 0 ? frags[0] : sourceText.Substring(0, Math.Min(this.fragmentSize, sourceText.Length))); }
public string GeneratePreviewText(string text) { QueryScorer scorer = new QueryScorer(currentQuery); IFormatter formatter = new SimpleHTMLFormatter("", ""); Highlighter highlighter = new Highlighter(formatter, scorer); highlighter.TextFragmenter = new SimpleFragmenter(100); TokenStream stream = new SnowballAnalyzer(VERSION, "English").TokenStream(URL_FN, new StringReader(text)); //TokenStream stream = new StandardAnalyzer(VERSION).TokenStream(URL_FN, new StringReader(text)); string fragment = highlighter.GetBestFragments(stream, text, 2, "..."); if (string.IsNullOrEmpty(fragment)) { if (text.Length > 100) { fragment = text.Substring(0, 100); } else { fragment = text; } } return(fragment); }
private string Highlight(int numId, string pattern, string html) { if (!string.IsNullOrWhiteSpace(pattern)) { using (Analyzer analyzer = new StandardAnalyzer(LuceneVersion.LUCENE_48)) using (Lucene.Net.Store.Directory index = new SimpleFSDirectory(Path.ChangeExtension(_bookFile.FullName, Convert.ToInt32(LuceneVersion.LUCENE_48).ToString()))) using (IndexReader reader = DirectoryReader.Open(index)) { Lucene.Net.Search.IndexSearcher searcher = new Lucene.Net.Search.IndexSearcher(reader); Lucene.Net.Search.TopDocs docs = searcher.Search( Lucene.Net.Search.NumericRangeQuery.NewInt32Range(nameof(TabHtmlText.NumId), numId, numId, true, true), 1); int docId = docs.ScoreDocs.First().Doc; QueryScorer scorer = new QueryScorer(new QueryParser(LuceneVersion.LUCENE_48, nameof(TabHtmlText.Html), analyzer) .Parse(pattern)); Highlighter highlighter = new Highlighter(new SimpleHTMLFormatter("<span style=\"background-color: yellow\">", "</span>"), scorer) { TextFragmenter = new NullFragmenter() }; using (TokenStream stream = TokenSources.GetAnyTokenStream(reader, docId, nameof(TabHtmlText.Html), analyzer)) { return(highlighter.GetBestFragment(stream, html)); } } } return(html); }
public void TestFieldSpecificHighlighting() { var helper = new TestHighlightRunner(); helper.TestAction = () => { var docMainText = "fred is one of the people"; var parser = new QueryParser(TEST_VERSION, FIELD_NAME, analyzer); var query = parser.Parse("fred category:people"); // highlighting respects fieldnames used in query IScorer fieldSpecificScorer = null; if (helper.Mode == TestHighlightRunner.QUERY) { fieldSpecificScorer = new QueryScorer(query, FIELD_NAME); } else if (helper.Mode == TestHighlightRunner.QUERY_TERM) { fieldSpecificScorer = new QueryTermScorer(query, "contents"); } var fieldSpecificHighlighter = new Highlighter(new SimpleHTMLFormatter(), fieldSpecificScorer) {TextFragmenter = new NullFragmenter()}; String result = fieldSpecificHighlighter.GetBestFragment(analyzer, FIELD_NAME, docMainText); Assert.AreEqual(result, "<B>fred</B> is one of the people", "Should match"); // highlighting does not respect fieldnames used in query IScorer fieldInSpecificScorer = null; if (helper.Mode == TestHighlightRunner.QUERY) { fieldInSpecificScorer = new QueryScorer(query, null); } else if (helper.Mode == TestHighlightRunner.QUERY_TERM) { fieldInSpecificScorer = new QueryTermScorer(query); } var fieldInSpecificHighlighter = new Highlighter(new SimpleHTMLFormatter(), fieldInSpecificScorer) {TextFragmenter = new NullFragmenter()}; result = fieldInSpecificHighlighter.GetBestFragment(analyzer, FIELD_NAME, docMainText); Assert.AreEqual(result, "<B>fred</B> is one of the <B>people</B>", "Should match"); reader.Close(); }; helper.Start(); }
public void DoStandardHighlights(Analyzer analyzer, IndexSearcher searcher, TopDocs hits, Query query, IFormatter formatter, bool expandMT) { IFragmenter frag = new SimpleFragmenter(20); for (int i = 0; i < hits.TotalHits; i++) { String text = searcher.Doc(hits.ScoreDocs[i].Doc).Get(HighlighterTest.FIELD_NAME); int maxNumFragmentsRequired = 2; String fragmentSeparator = "..."; IScorer scorer = null; TokenStream tokenStream = analyzer.TokenStream(HighlighterTest.FIELD_NAME, new StringReader(text)); if (Mode == QUERY) { scorer = new QueryScorer(query); } else if (Mode == QUERY_TERM) { scorer = new QueryTermScorer(query); } var highlighter = new Highlighter(formatter, scorer) {TextFragmenter = frag}; String result = highlighter.GetBestFragments(tokenStream, text, maxNumFragmentsRequired, fragmentSeparator); Console.WriteLine("\t" + result); } }
public void TestSimpleQueryScorerPhraseHighlighting() { DoSearching("\"very long and contains\""); int maxNumFragmentsRequired = 2; QueryScorer scorer = new QueryScorer(query, FIELD_NAME); Highlighter highlighter = new Highlighter(this, scorer); for (int i = 0; i < hits.TotalHits; i++) { String text = searcher.Doc(hits.ScoreDocs[i].Doc).Get(FIELD_NAME); TokenStream tokenStream = analyzer.TokenStream(FIELD_NAME, new StringReader(text)); highlighter.TextFragmenter = new SimpleFragmenter(40); String result = highlighter.GetBestFragments(tokenStream, text, maxNumFragmentsRequired, "..."); Console.WriteLine("\t" + result); } Assert.IsTrue(numHighlights == 3, "Failed to find correct number of highlights " + numHighlights + " found"); numHighlights = 0; DoSearching("\"This piece of text refers to Kennedy\""); maxNumFragmentsRequired = 2; scorer = new QueryScorer(query, FIELD_NAME); highlighter = new Highlighter(this, scorer); for (int i = 0; i < hits.TotalHits; i++) { String text = searcher.Doc(hits.ScoreDocs[i].Doc).Get(FIELD_NAME); TokenStream tokenStream = analyzer.TokenStream(FIELD_NAME, new StringReader(text)); highlighter.TextFragmenter = new SimpleFragmenter(40); String result = highlighter.GetBestFragments(tokenStream, text, maxNumFragmentsRequired, "..."); Console.WriteLine("\t" + result); } Assert.IsTrue(numHighlights == 4, "Failed to find correct number of highlights " + numHighlights + " found"); numHighlights = 0; DoSearching("\"lets is a the lets is a the lets is a the lets\""); maxNumFragmentsRequired = 2; scorer = new QueryScorer(query, FIELD_NAME); highlighter = new Highlighter(this, scorer); for (int i = 0; i < hits.TotalHits; i++) { String text = searcher.Doc(hits.ScoreDocs[i].Doc).Get(FIELD_NAME); TokenStream tokenStream = analyzer.TokenStream(FIELD_NAME, new StringReader(text)); highlighter.TextFragmenter = new SimpleFragmenter(40); String result = highlighter.GetBestFragments(tokenStream, text, maxNumFragmentsRequired, "..."); Console.WriteLine("\t" + result); } Assert.IsTrue(numHighlights == 4, "Failed to find correct number of highlights " + numHighlights + " found"); }
public void TestRegexQuery() { const int maxNumFragmentsRequired = 2; query = new RegexQuery(new Term(FIELD_NAME, "ken.*")); searcher = new IndexSearcher(ramDir, true); hits = searcher.Search(query, 100); var scorer = new QueryScorer(query, FIELD_NAME); var highlighter = new Highlighter(this, scorer); for (int i = 0; i < hits.TotalHits; i++) { String text = searcher.Doc(hits.ScoreDocs[i].Doc).Get(FIELD_NAME); TokenStream tokenStream = analyzer.TokenStream(FIELD_NAME, new StringReader(text)); highlighter.TextFragmenter = new SimpleFragmenter(40); String result = highlighter.GetBestFragments(tokenStream, text, maxNumFragmentsRequired, "..."); Console.WriteLine("\t" + result); } Assert.IsTrue(numHighlights == 5, "Failed to find correct number of highlights " + numHighlights + " found"); }
public void TestNumericRangeQuery() { // doesn't currently highlight, but make sure it doesn't cause exception either query = NumericRangeQuery.NewIntRange(NUMERIC_FIELD_NAME, 2, 6, true, true); searcher = new IndexSearcher(ramDir, true); hits = searcher.Search(query, 100); int maxNumFragmentsRequired = 2; QueryScorer scorer = new QueryScorer(query, FIELD_NAME); Highlighter highlighter = new Highlighter(this, scorer); for (int i = 0; i < hits.TotalHits; i++) { String text = searcher.Doc(hits.ScoreDocs[i].Doc).Get(NUMERIC_FIELD_NAME); TokenStream tokenStream = analyzer.TokenStream(FIELD_NAME, new StringReader(text)); highlighter.TextFragmenter = new SimpleFragmenter(40); String result = highlighter.GetBestFragments(tokenStream, text, maxNumFragmentsRequired, "..."); //Console.WriteLine("\t" + result); } }
public void TestSimpleQueryScorerPhraseHighlighting2() { DoSearching("\"text piece long\"~5"); int maxNumFragmentsRequired = 2; var scorer = new QueryScorer(query, FIELD_NAME); var highlighter = new Highlighter(this, scorer); highlighter.TextFragmenter = new SimpleFragmenter(40); for (int i = 0; i < hits.TotalHits; i++) { var text = searcher.Doc(hits.ScoreDocs[i].Doc).Get(FIELD_NAME); var tokenStream = analyzer.TokenStream(FIELD_NAME, new StringReader(text)); var result = highlighter.GetBestFragments(tokenStream, text, maxNumFragmentsRequired, "..."); Console.WriteLine("\t" + result); } Assert.IsTrue(numHighlights == 6, "Failed to find correct number of highlights " + numHighlights + " found"); }
public void TestSimpleSpanFragmenter() { DoSearching("\"piece of text that is very long\""); int maxNumFragmentsRequired = 2; QueryScorer scorer = new QueryScorer(query, FIELD_NAME); Highlighter highlighter = new Highlighter(this, scorer); for (int i = 0; i < hits.TotalHits; i++) { String text = searcher.Doc(hits.ScoreDocs[i].Doc).Get(FIELD_NAME); TokenStream tokenStream = analyzer.TokenStream(FIELD_NAME, new StringReader(text)); highlighter.TextFragmenter = new SimpleSpanFragmenter(scorer, 5); String result = highlighter.GetBestFragments(tokenStream, text, maxNumFragmentsRequired, "..."); Console.WriteLine("\t" + result); } DoSearching("\"been shot\""); maxNumFragmentsRequired = 2; scorer = new QueryScorer(query, FIELD_NAME); highlighter = new Highlighter(this, scorer); for (int i = 0; i < hits.TotalHits; i++) { String text = searcher.Doc(hits.ScoreDocs[i].Doc).Get(FIELD_NAME); TokenStream tokenStream = analyzer.TokenStream(FIELD_NAME, new StringReader(text)); highlighter.TextFragmenter = new SimpleSpanFragmenter(scorer, 20); String result = highlighter.GetBestFragments(tokenStream, text, maxNumFragmentsRequired, "..."); Console.WriteLine("\t" + result); } }
public void TestPosTermStdTerm() { DoSearching("y \"x y z\""); int maxNumFragmentsRequired = 2; QueryScorer scorer = new QueryScorer(query, FIELD_NAME); Highlighter highlighter = new Highlighter(this, scorer); for (int i = 0; i < hits.TotalHits; i++) { String text = searcher.Doc(hits.ScoreDocs[i].Doc).Get(FIELD_NAME); TokenStream tokenStream = analyzer.TokenStream(FIELD_NAME, new StringReader(text)); highlighter.TextFragmenter = new SimpleFragmenter(40); String result = highlighter.GetBestFragments(tokenStream, text, maxNumFragmentsRequired, "..."); Console.WriteLine("\t" + result); Assert.IsTrue(numHighlights == 4, "Failed to find correct number of highlights " + numHighlights + " found"); } }
public Highlighter GetHighlighter(Query query, String fieldName, TokenStream stream, IFormatter formatter, bool expanMultiTerm) { IScorer scorer = null; if (Mode == QUERY) { scorer = new QueryScorer(query, fieldName); if (!expanMultiTerm) { ((QueryScorer) scorer).IsExpandMultiTermQuery = false; } } else if (Mode == QUERY_TERM) { scorer = new QueryTermScorer(query); } else { throw new SystemException("Unknown highlight mode"); } return new Highlighter(formatter, scorer); }
public void TestSimpleSpanHighlighter() { DoSearching("Kennedy"); int maxNumFragmentsRequired = 2; QueryScorer scorer = new QueryScorer(query, FIELD_NAME); Highlighter highlighter = new Highlighter(scorer); for (int i = 0; i < hits.TotalHits; i++) { String text = searcher.Doc(hits.ScoreDocs[i].Doc).Get(FIELD_NAME); TokenStream tokenStream = analyzer.TokenStream(FIELD_NAME, new StringReader(text)); highlighter.TextFragmenter = new SimpleFragmenter(40); String result = highlighter.GetBestFragments(tokenStream, text, maxNumFragmentsRequired, "..."); Console.WriteLine("\t" + result); } // Not sure we can assert anything here - just running to check we dont // throw any exceptions }
/* * This method intended for use with <tt>testHighlightingWithDefaultField()</tt> * @throws InvalidTokenOffsetsException */ private static String HighlightField(Query query, String fieldName, String text) { TokenStream tokenStream = new StandardAnalyzer(TEST_VERSION).TokenStream(fieldName, new StringReader(text)); // Assuming "<B>", "</B>" used to highlight SimpleHTMLFormatter formatter = new SimpleHTMLFormatter(); QueryScorer scorer = new QueryScorer(query, fieldName, FIELD_NAME); Highlighter highlighter = new Highlighter(formatter, scorer); highlighter.TextFragmenter = new SimpleFragmenter(int.MaxValue); String rv = highlighter.GetBestFragments(tokenStream, text, 1, "(FIELD TEXT TRUNCATED)"); return rv.Length == 0 ? text : rv; }
public void TestConstantScoreMultiTermQuery() { numHighlights = 0; query = new WildcardQuery(new Term(FIELD_NAME, "ken*")); ((WildcardQuery) query).RewriteMethod = MultiTermQuery.CONSTANT_SCORE_FILTER_REWRITE; searcher = new IndexSearcher(ramDir, true); // can't rewrite ConstantScore if you want to highlight it - // it rewrites to ConstantScoreQuery which cannot be highlighted // query = unReWrittenQuery.Rewrite(reader); Console.WriteLine("Searching for: " + query.ToString(FIELD_NAME)); hits = searcher.Search(query, null, 1000); for (int i = 0; i < hits.TotalHits; i++) { String text = searcher.Doc(hits.ScoreDocs[i].Doc).Get(HighlighterTest.FIELD_NAME); int maxNumFragmentsRequired = 2; String fragmentSeparator = "..."; QueryScorer scorer = null; TokenStream tokenStream = null; tokenStream = analyzer.TokenStream(FIELD_NAME, new StringReader(text)); scorer = new QueryScorer(query, FIELD_NAME); Highlighter highlighter = new Highlighter(this, scorer); highlighter.TextFragmenter = new SimpleFragmenter(20); String result = highlighter.GetBestFragments(tokenStream, text, maxNumFragmentsRequired, fragmentSeparator); Console.WriteLine("\t" + result); } Assert.IsTrue(numHighlights == 5, "Failed to find correct number of highlights " + numHighlights + " found"); // try null field hits = searcher.Search(query, null, 1000); numHighlights = 0; for (int i = 0; i < hits.TotalHits; i++) { String text = searcher.Doc(hits.ScoreDocs[i].Doc).Get(HighlighterTest.FIELD_NAME); int maxNumFragmentsRequired = 2; String fragmentSeparator = "..."; QueryScorer scorer = null; TokenStream tokenStream = null; tokenStream = analyzer.TokenStream(HighlighterTest.FIELD_NAME, new StringReader(text)); scorer = new QueryScorer(query, null); Highlighter highlighter = new Highlighter(this, scorer); highlighter.TextFragmenter = new SimpleFragmenter(20); String result = highlighter.GetBestFragments(tokenStream, text, maxNumFragmentsRequired, fragmentSeparator); Console.WriteLine("\t" + result); } Assert.IsTrue(numHighlights == 5, "Failed to find correct number of highlights " + numHighlights + " found"); // try default field hits = searcher.Search(query, null, 1000); numHighlights = 0; for (int i = 0; i < hits.TotalHits; i++) { String text = searcher.Doc(hits.ScoreDocs[i].Doc).Get(HighlighterTest.FIELD_NAME); int maxNumFragmentsRequired = 2; String fragmentSeparator = "..."; QueryScorer scorer = null; TokenStream tokenStream = null; tokenStream = analyzer.TokenStream(HighlighterTest.FIELD_NAME, new StringReader(text)); scorer = new QueryScorer(query, "random_field", HighlighterTest.FIELD_NAME); Highlighter highlighter = new Highlighter(this, scorer); highlighter.TextFragmenter = new SimpleFragmenter(20); String result = highlighter.GetBestFragments(tokenStream, text, maxNumFragmentsRequired, fragmentSeparator); Console.WriteLine("\t" + result); } Assert.IsTrue(numHighlights == 5, "Failed to find correct number of highlights " + numHighlights + " found"); }
public void TestRepeatingTermsInMultBooleans() { String content = "x y z a b c d e f g b c g"; String ph1 = "\"a b c d\""; String ph2 = "\"b c g\""; String f1 = "f1"; String f2 = "f2"; String f1c = f1 + ":"; String f2c = f2 + ":"; String q = "(" + f1c + ph1 + " OR " + f2c + ph1 + ") AND (" + f1c + ph2 + " OR " + f2c + ph2 + ")"; Analyzer analyzer = new WhitespaceAnalyzer(); QueryParser qp = new QueryParser(TEST_VERSION, f1, analyzer); Query query = qp.Parse(q); QueryScorer scorer = new QueryScorer(query, f1); scorer.IsExpandMultiTermQuery = false; Highlighter h = new Highlighter(this, scorer); h.GetBestFragment(analyzer, f1, content); Assert.IsTrue(numHighlights == 7, "Failed to find correct number of highlights " + numHighlights + " found"); }
/// <summary> /// Searches the index for the querytext /// </summary> /// <param name="querytext">The text to search the index</param> //public string SearchIndext(string querytext) public List <Dictionary <string, string> > SearchIndext(string querytext) { List <Dictionary <string, string> > resultListDict = new List <Dictionary <string, string> >(); // Initiate a result list Query query = DisplayQueries(querytext); Console.WriteLine("query is " + query); TopDocs results = searcher.Search(query, 100); System.Console.WriteLine("Number of results is " + results.TotalHits); // Setup the configuration of Highlighter IFormatter formatter = new SimpleHTMLFormatter("<span style=\"font-weight:bold; background-color:yellow;\">", "</span>"); SimpleFragmenter fragmenter = new SimpleFragmenter(2000); QueryScorer scorer = new QueryScorer(query); Highlighter highlighter = new Highlighter(formatter, scorer); highlighter.TextFragmenter = fragmenter; int rank = 0; // ScoreDocs : a array stores pointers of a query // scoreDoc : a pointer of a query points to doc_ID and score (of the doc for the query) //string output = ""; if (results.TotalHits != 0) // Check if there are results { foreach (ScoreDoc scoreDoc in results.ScoreDocs) { rank++; Lucene.Net.Documents.Document doc = searcher.Doc(scoreDoc.Doc); string myFieldValue = doc.Get(TEXT_FN_PASS_TEXT); string myURL = doc.Get(TEXT_FN_URL); string passId = doc.Get(TEXT_FN_PASS_ID); string score = scoreDoc.Score.ToString(); string queryId = doc.Get(TEXT_FN_QUERY_ID); int jsonId = Int32.Parse(doc.Get(TEXT_FN_JSON_ARRAY_ID)); // passage_text field store as Field.Store.NO foreach (var itemP in jArr[jsonId][PASSAGES]) { if (itemP[TEXT_FN_PASS_ID].ToString() == passId) { myFieldValue = itemP[TEXT_FN_PASS_TEXT].ToString(); } } //Add the Highlighter tag into passage_text of query //TokenStream HLstream = analyzer.TokenStream("", new StringReader(doc.Get(TEXT_FN_PASS_TEXT))); //string HLmyFieldValue = highlighter.GetBestFragment(HLstream, doc.Get(TEXT_FN_PASS_TEXT)); TokenStream HLstream = analyzer.TokenStream("", new StringReader(myFieldValue)); string HLmyFieldValue = highlighter.GetBestFragment(HLstream, myFieldValue); Explanation e = searcher.Explain(query, scoreDoc.Doc); //Extract title from URL char delimiters = '/'; string[] urlSeg = myURL.Split(delimiters); string title; if (urlSeg[urlSeg.Length - 1].Length == 0) { title = urlSeg[urlSeg.Length - 2]; } else { title = urlSeg[urlSeg.Length - 1]; } resultListDict.Add(new Dictionary <string, string> { { "rank", rank.ToString() }, { "passId", passId }, { "score", score }, { "title", title }, { "url", myURL }, { "text", myFieldValue }, { "queryId", queryId }, { "highlighter", HLmyFieldValue } }); //Console.WriteLine("Rank " + rank + " text " + myFieldValue + " URL " + myURL); //Console.WriteLine(e); } } return(resultListDict); }
public void TestQueryScorerHits() { Analyzer analyzer = new SimpleAnalyzer(); QueryParser qp = new QueryParser(TEST_VERSION, FIELD_NAME, analyzer); query = qp.Parse("\"very long\""); searcher = new IndexSearcher(ramDir, true); TopDocs hits = searcher.Search(query, 10); QueryScorer scorer = new QueryScorer(query, FIELD_NAME); Highlighter highlighter = new Highlighter(scorer); for (int i = 0; i < hits.ScoreDocs.Length; i++) { Document doc = searcher.Doc(hits.ScoreDocs[i].Doc); String storedField = doc.Get(FIELD_NAME); TokenStream stream = TokenSources.GetAnyTokenStream(searcher.IndexReader, hits.ScoreDocs[i].Doc, FIELD_NAME, doc, analyzer); IFragmenter fragmenter = new SimpleSpanFragmenter(scorer); highlighter.TextFragmenter = fragmenter; String fragment = highlighter.GetBestFragment(stream, storedField); Console.WriteLine(fragment); } }