Exemplo n.º 1
0
        /// <summary>
        /// 搜索结果高亮显示
        /// </summary>
        /// <param name="keyword">关键字</param>
        /// <param name="content">搜索结果</param>
        /// <param name="analyzer">new SimpleAnalyzer()</param>
        /// <returns></returns>
        public static string HighLight(string keyword, string content, Analyzer analyzer)
        {
            const string         FIELD_NAME  = "keyword";
            Query                query       = new QueryParserEx(Lucene.Net.Util.Version.LUCENE_30, FIELD_NAME, analyzer).Parse(keyword);
            QueryScorer          scorer      = new QueryScorer(query);
            SimpleHTMLFormatter  formatter   = new SimpleHTMLFormatter(PRE_TAG, END_TAG);
            SimpleSpanFragmenter fragment    = new SimpleSpanFragmenter(scorer);
            Highlighter          highlighter = new Highlighter(formatter, scorer);

            highlighter.TextFragmenter = fragment;
            return(highlighter.GetBestFragment(analyzer, FIELD_NAME, content) ?? content);
        }
Exemplo n.º 2
0
        public void TestQueryScorerHits()
        {
            Analyzer analyzer = new SimpleAnalyzer();
            QueryParser qp = new QueryParser(TEST_VERSION, FIELD_NAME, analyzer);
            query = qp.Parse("\"very long\"");
            searcher = new IndexSearcher(ramDir, true);
            TopDocs hits = searcher.Search(query, 10);

            QueryScorer scorer = new QueryScorer(query, FIELD_NAME);
            Highlighter highlighter = new Highlighter(scorer);


            for (int i = 0; i < hits.ScoreDocs.Length; i++)
            {
                Document doc = searcher.Doc(hits.ScoreDocs[i].Doc);
                String storedField = doc.Get(FIELD_NAME);

                TokenStream stream = TokenSources.GetAnyTokenStream(searcher.IndexReader, hits.ScoreDocs[i].Doc,
                                                                    FIELD_NAME, doc, analyzer);

                IFragmenter fragmenter = new SimpleSpanFragmenter(scorer);

                highlighter.TextFragmenter = fragmenter;

                String fragment = highlighter.GetBestFragment(stream, storedField);

                Console.WriteLine(fragment);
            }
        }
Exemplo n.º 3
0
        public ReadOnlyHitCollection Search(string searchExpression, Model.Filter filter = null, string sortField = null, int page = 1, int hitsPerPage = 10)
        {
            if (string.IsNullOrEmpty(searchExpression))
            {
                throw new SearchException("Must have searchExpression");
            }
            string defaultFieldName = Fields.Content;
            var    analyzer         = AnalyzerBuilder.CreateAnalyzer();
            Query  query;

            try
            {
                query = new QueryParser(Version.LUCENE_30, defaultFieldName, analyzer).Parse(searchExpression.ToLower());
            }
            catch (ParseException ex)
            {
                throw new SearchException(string.Format("Sorry, '{0}' isn't something we can search for so far.", searchExpression), ex);
            }

            var indexDirectory = new SimpleFSDirectory(new DirectoryInfo(_configuration.IndexPath));

            List <Hit> onePageOfHits;
            int        totalHits;

            using (var reader = IndexReader.Open(indexDirectory, true))
            {
                //  Get one page of hits
                var hits     = new List <Hit>();
                var searcher = new IndexSearcher(reader);

                var termsFilter = filter != null && !string.IsNullOrEmpty(filter.Field)
                    ? new FieldCacheTermsFilter(filter.Field, filter.Terms.ToArray())
                    : null;
                var sort = !string.IsNullOrEmpty(sortField)
                    ? new Sort(new SortField(sortField, SortField.STRING))
                    : Sort.RELEVANCE;

                ScoreDoc[] scoreDocs = searcher.Search(query, termsFilter, MaxNumberOfHits, sort).ScoreDocs;
                totalHits = scoreDocs.Length;

                foreach (var scoreDoc in scoreDocs)
                {
                    int    docId    = scoreDoc.Doc;
                    string filePath = searcher.Doc(docId).Get(Fields.Path);
                    string language = searcher.Doc(docId).Get(Fields.Language);
                    var    hit      = new Hit(docId, _configuration.ContentRootPath, filePath, scoreDoc.Score, language);
                    hits.Add(hit);
                }

                onePageOfHits = hits.GetPage(page, hitsPerPage).ToList();

                // Get offsets and higlights on the page we are going to return
                foreach (var hit in onePageOfHits)
                {
                    var primitiveQuery = query.Rewrite(reader);
                    var terms          = new HashSet <Term>();
                    primitiveQuery.ExtractTerms(terms);
                    string searchField = string.Empty;
                    if (terms.Count == 0)
                    {
                        // There can be all kinds of queires
                        var prefixQuery = query as PrefixQuery;
                        if (prefixQuery != null)
                        {
                            searchField    = prefixQuery.Prefix.Field;
                            primitiveQuery = prefixQuery;
                        }
                    }
                    else
                    {
                        // TODO: There can be multiple term fields, like code: and method:
                        searchField = terms.First().Field;
                    }

                    var termFreqVector     = reader.GetTermFreqVector(hit.DocId, Fields.Content);
                    var termPositionVector = termFreqVector as TermPositionVector;
                    if (termFreqVector == null || termPositionVector == null)
                    {
                        throw new ArgumentException("Must have term frequencies and positions vectors");
                    }

                    // No offsets for prefix and other non-term based queries
                    const int maxOffsetNumber = 10;
                    foreach (var term in terms)
                    {
                        int termIndex = termFreqVector.IndexOf(term.Text); // Meaning get me this term, not text yet.
                        if (termIndex != -1)
                        {
                            foreach (var offset in termPositionVector.GetOffsets(termIndex))
                            {
                                if (hit.Offsets.Count < maxOffsetNumber)
                                {
                                    hit.Offsets.Add(new Offset
                                    {
                                        StartOffset = offset.StartOffset,
                                        EndOffset   = offset.EndOffset
                                    });
                                }
                            }
                        }
                    }

                    // Highlighter from contrib package
                    var tokenStream = TokenSources.GetTokenStream(termPositionVector);
                    var scorer      = new QueryScorer(primitiveQuery, searchField);
                    var fragmenter  = new SimpleSpanFragmenter(scorer);
                    var formatter   = new SimpleHTMLFormatter("<kbd>", "</kbd>");
                    var highlighter = new Highlighter(formatter, scorer)
                    {
                        TextFragmenter = fragmenter
                    };

                    string text;
                    using (var sr = new StreamReader(hit.FilePath))
                    {
                        text = sr.ReadToEnd();
                    }
                    string bestFragment = highlighter.GetBestFragment(tokenStream, text);
                    if (!string.IsNullOrEmpty(bestFragment))
                    {
                        hit.BestFragment = EscapeHtmlMarkup(bestFragment);
                    }
                }
            }

            return(new ReadOnlyHitCollection(onePageOfHits, totalHits));
        }