public IEnumerable <SampleHit> Search(string query_str) { List <SampleHit> result_hits = new List <SampleHit>(); using (Lucene.Net.Store.Directory luceneIndexDirectory = Lucene.Net.Store.FSDirectory.Open(index_folder)) { Lucene.Net.Analysis.Analyzer analyzer = new Lucene.Net.Analysis.Ru.RussianAnalyzer(Lucene.Net.Util.Version.LUCENE_CURRENT); //Lucene.Net.Analysis.Analyzer analyzer = new Lucene.Net.Analysis.Standard.StandardAnalyzer(Lucene.Net.Util.Version.LUCENE_CURRENT); using (IndexSearcher searcher = new IndexSearcher(luceneIndexDirectory)) { QueryParser parser = new QueryParser(Lucene.Net.Util.Version.LUCENE_CURRENT, IndexModel.LineText, analyzer); Query query = parser.Parse(query_str); TopDocs hits = searcher.Search(query, max_search_hits); // code highlighting var formatter = new Lucene.Net.Search.Highlight.SimpleHTMLFormatter("<span style=\"background:yellow;\">", "</span>"); var fragmenter = new Lucene.Net.Search.Highlight.SimpleFragmenter(200); Lucene.Net.Search.Highlight.QueryScorer scorer = new Lucene.Net.Search.Highlight.QueryScorer(query); Lucene.Net.Search.Highlight.Highlighter highlighter = new Lucene.Net.Search.Highlight.Highlighter(formatter, scorer); highlighter.TextFragmenter = fragmenter; foreach (ScoreDoc hit in hits.ScoreDocs) { Document doc = searcher.Doc(hit.Doc); float score = hit.Score; Field line_number = doc.GetField(IndexModel.LineNumber); Field line_text = doc.GetField(IndexModel.LineText); Lucene.Net.Analysis.TokenStream stream = analyzer.TokenStream("", new System.IO.StringReader(line_text.StringValue)); string highlightedText = highlighter.GetBestFragments(stream, doc.Get(IndexModel.LineText), 1, "..."); result_hits.Add(new SampleHit { line_number = line_number.StringValue, sample_text = line_text.StringValue, html_highlighting = highlightedText }); } } } return(result_hits); }
/// <summary> /// Determines a list of search query matches. /// </summary> /// <param name="searchType">Type of search.</param> /// <param name="query">Query, whose matches should be returned.</param> /// <param name="filesAnalyzer">Files analyzer to be used.</param> /// <param name="contentsAnalyzer">Contents analyzer to be used.</param> /// <param name="tagsAnalyzer">Tags analyzer to be used.</param> /// <param name="text">Original text</param> /// <param name="createFragment">Determines, whether fragments should be created or not.</param> /// <returns>Collection of search query match occurrences.</returns> public static ReadOnlyCollection <IOccurrence> DetermineOccurrences(string searchType, Query query, string text, Analyzer filesAnalyzer, Analyzer contentsAnalyzer, Analyzer tagsAnalyzer, bool createFragment) { // Create and configure a Lucene highlighter in order to obtain a list of search // query matches in the document. var scorer = new Lucene.Net.Search.Highlight.QueryScorer(query); var occurrenceCollector = new OccurrenceCollectorFormatter(); var highlighter = new Lucene.Net.Search.Highlight.Highlighter(occurrenceCollector, scorer); // We must extend the analyzer limit to the whole text. highlighter.MaxDocCharsToAnalyze = text.Length; // We want to analyze the whole text, thus the NullFragmenter. highlighter.TextFragmenter = new Lucene.Net.Search.Highlight.NullFragmenter(); switch (searchType) { case SearchType.Files: highlighter.GetBestFragments(filesAnalyzer.ReusableTokenStream(LuceneIndexBuilder.FIELD_PATH, new System.IO.StringReader(text)), text, 1); break; case SearchType.Contents: highlighter.GetBestFragments(contentsAnalyzer.ReusableTokenStream(LuceneIndexBuilder.FIELD_CONTENTS, new System.IO.StringReader(text)), text, 1); break; case SearchType.Tags: highlighter.GetBestFragments(tagsAnalyzer.ReusableTokenStream(LuceneIndexBuilder.FIELD_TAGS, new System.IO.StringReader(text)), text, 1); break; } // Obtain line offsets from original file. This is needed in order to initialize the // Line and Column properts of IOccurrence. int[] lineOffsets = TextUtilities.GetLineOffsets(text).ToArray(); // Process the list of matches obtained through the Lucene highlighter // and create a list of IOccurrenc instances. List <IOccurrence> occurrences = new List <IOccurrence>(); foreach (var occurrence in occurrenceCollector.Occurrences) { // Obtain line number from offset by doing binary search. int line = TextUtilities.OffsetToLineIndex(occurrence.Value.Item3, lineOffsets); // Get start offset of line, this is needed to obtain the right column number. int lineOffset = lineOffsets[line]; List <KeyValuePair <string, int> > fragment = null; // Create occurrence fragment. Currently the fragment consists of 6 lines around the match. if (createFragment) { int fragmentStartLine = Math.Max(line - 2, 0); int fragmentEndLine = Math.Min(fragmentStartLine + 3, lineOffsets.Length - 2); fragment = new List <KeyValuePair <string, int> >(); for (int i = fragmentStartLine; i < fragmentEndLine; ++i) { fragment.Add(new KeyValuePair <string, int>(text.Substring(lineOffsets[i], lineOffsets[i + 1] - lineOffsets[i]), i)); } } // Add occurrence occurrences.Add(new Occurrence(occurrence.Key.Substring(occurrence.Value.Item3 - occurrence.Value.Item1, occurrence.Value.Item4 - occurrence.Value.Item3), line, occurrence.Value.Item3 - lineOffset, fragment != null ? fragment.ToArray() : null)); } return(new ReadOnlyCollection <IOccurrence>(occurrences)); }