Ejemplo n.º 1
0
        public IEnumerable <SampleHit> Search(string query_str)
        {
            List <SampleHit> result_hits = new List <SampleHit>();

            using (Lucene.Net.Store.Directory luceneIndexDirectory = Lucene.Net.Store.FSDirectory.Open(index_folder))
            {
                Lucene.Net.Analysis.Analyzer analyzer = new Lucene.Net.Analysis.Ru.RussianAnalyzer(Lucene.Net.Util.Version.LUCENE_CURRENT);
                //Lucene.Net.Analysis.Analyzer analyzer = new Lucene.Net.Analysis.Standard.StandardAnalyzer(Lucene.Net.Util.Version.LUCENE_CURRENT);

                using (IndexSearcher searcher = new IndexSearcher(luceneIndexDirectory))
                {
                    QueryParser parser = new QueryParser(Lucene.Net.Util.Version.LUCENE_CURRENT, IndexModel.LineText, analyzer);
                    Query       query  = parser.Parse(query_str);

                    TopDocs hits = searcher.Search(query, max_search_hits);

                    // code highlighting
                    var formatter  = new Lucene.Net.Search.Highlight.SimpleHTMLFormatter("<span style=\"background:yellow;\">", "</span>");
                    var fragmenter = new Lucene.Net.Search.Highlight.SimpleFragmenter(200);
                    Lucene.Net.Search.Highlight.QueryScorer scorer      = new Lucene.Net.Search.Highlight.QueryScorer(query);
                    Lucene.Net.Search.Highlight.Highlighter highlighter = new Lucene.Net.Search.Highlight.Highlighter(formatter, scorer);
                    highlighter.TextFragmenter = fragmenter;

                    foreach (ScoreDoc hit in hits.ScoreDocs)
                    {
                        Document doc   = searcher.Doc(hit.Doc);
                        float    score = hit.Score;

                        Field line_number = doc.GetField(IndexModel.LineNumber);
                        Field line_text   = doc.GetField(IndexModel.LineText);

                        Lucene.Net.Analysis.TokenStream stream = analyzer.TokenStream("", new System.IO.StringReader(line_text.StringValue));
                        string highlightedText = highlighter.GetBestFragments(stream, doc.Get(IndexModel.LineText), 1, "...");

                        result_hits.Add(new SampleHit {
                            line_number = line_number.StringValue, sample_text = line_text.StringValue, html_highlighting = highlightedText
                        });
                    }
                }
            }


            return(result_hits);
        }
        /// <summary>
        /// Determines a list of search query matches.
        /// </summary>
        /// <param name="searchType">Type of search.</param>
        /// <param name="query">Query, whose matches should be returned.</param>
        /// <param name="filesAnalyzer">Files analyzer to be used.</param>
        /// <param name="contentsAnalyzer">Contents analyzer to be used.</param>
        /// <param name="tagsAnalyzer">Tags analyzer to be used.</param>
        /// <param name="text">Original text</param>
        /// <param name="createFragment">Determines, whether fragments should be created or not.</param>
        /// <returns>Collection of search query match occurrences.</returns>
        public static ReadOnlyCollection <IOccurrence> DetermineOccurrences(string searchType, Query query, string text, Analyzer filesAnalyzer, Analyzer contentsAnalyzer, Analyzer tagsAnalyzer, bool createFragment)
        {
            // Create and configure a Lucene highlighter in order to obtain a list of search
            // query matches in the document.
            var scorer = new Lucene.Net.Search.Highlight.QueryScorer(query);
            var occurrenceCollector = new OccurrenceCollectorFormatter();
            var highlighter         = new Lucene.Net.Search.Highlight.Highlighter(occurrenceCollector, scorer);

            // We must extend the analyzer limit to the whole text.
            highlighter.MaxDocCharsToAnalyze = text.Length;
            // We want to analyze the whole text, thus the NullFragmenter.
            highlighter.TextFragmenter = new Lucene.Net.Search.Highlight.NullFragmenter();
            switch (searchType)
            {
            case SearchType.Files:
                highlighter.GetBestFragments(filesAnalyzer.ReusableTokenStream(LuceneIndexBuilder.FIELD_PATH, new System.IO.StringReader(text)), text, 1);
                break;

            case SearchType.Contents:
                highlighter.GetBestFragments(contentsAnalyzer.ReusableTokenStream(LuceneIndexBuilder.FIELD_CONTENTS, new System.IO.StringReader(text)), text, 1);
                break;

            case SearchType.Tags:
                highlighter.GetBestFragments(tagsAnalyzer.ReusableTokenStream(LuceneIndexBuilder.FIELD_TAGS, new System.IO.StringReader(text)), text, 1);
                break;
            }

            // Obtain line offsets from original file. This is needed in order to initialize the
            // Line and Column properts of IOccurrence.
            int[] lineOffsets = TextUtilities.GetLineOffsets(text).ToArray();

            // Process the list of matches obtained through the Lucene highlighter
            // and create a list of IOccurrenc instances.
            List <IOccurrence> occurrences = new List <IOccurrence>();

            foreach (var occurrence in occurrenceCollector.Occurrences)
            {
                // Obtain line number from offset by doing binary search.
                int line = TextUtilities.OffsetToLineIndex(occurrence.Value.Item3, lineOffsets);
                // Get start offset of line, this is needed to obtain the right column number.
                int lineOffset = lineOffsets[line];

                List <KeyValuePair <string, int> > fragment = null;
                // Create occurrence fragment. Currently the fragment consists of 6 lines around the match.
                if (createFragment)
                {
                    int fragmentStartLine = Math.Max(line - 2, 0);
                    int fragmentEndLine   = Math.Min(fragmentStartLine + 3, lineOffsets.Length - 2);
                    fragment = new List <KeyValuePair <string, int> >();
                    for (int i = fragmentStartLine; i < fragmentEndLine; ++i)
                    {
                        fragment.Add(new KeyValuePair <string, int>(text.Substring(lineOffsets[i], lineOffsets[i + 1] - lineOffsets[i]), i));
                    }
                }

                // Add occurrence
                occurrences.Add(new Occurrence(occurrence.Key.Substring(occurrence.Value.Item3 - occurrence.Value.Item1, occurrence.Value.Item4 - occurrence.Value.Item3), line, occurrence.Value.Item3 - lineOffset, fragment != null ? fragment.ToArray() : null));
            }

            return(new ReadOnlyCollection <IOccurrence>(occurrences));
        }