Exemple #1
0
        private IList <int> Search(string text, int tipodocumentoId, string startDate, string endDate)
        {
            var directory   = this.GetDirectory();
            var indexReader = this.GetIndexReader(directory);
            var searcher    = new IndexSearcher(indexReader);

            try
            {
                var query  = this.BuildQuery(text, tipodocumentoId);
                var filter = this.BuildDateFilter(startDate, endDate);

                var sort = new Sort(new SortField("dataCriacao", SortField.LONG, true));

                var docs = searcher.Search(query, filter, this.configuracoesDaAplicacao.ResultadoMaximoConsulta, sort);

                // create highlighter
                var formatter = new SimpleHTMLFormatter("<span class=\"result-highlight\">", "</span>");
                var scorer    = new QueryScorer(query);
                this.Highlighter = new Highlighter(formatter, scorer);
                this.Stream      = LuceneEngineBase.GetAnalyzer().TokenStream(string.Empty, new StringReader(text));

                return(this.BuildSearchResult(docs, searcher));
            }
            finally
            {
                searcher.Dispose();
                indexReader.Dispose();
                directory.Dispose();
            }
        }
Exemple #2
0
        public Task <IEnumerable <ISearchItem> > Search(string pattern, int page)
        {
            using (Analyzer analyzer = new StandardAnalyzer(LuceneVersion.LUCENE_48))
                using (Lucene.Net.Store.Directory index = new SimpleFSDirectory(Path.ChangeExtension(_bookFile.FullName,
                                                                                                     Convert.ToInt32(LuceneVersion.LUCENE_48).ToString())))
                    using (IndexReader reader = DirectoryReader.Open(index))
                    {
                        Lucene.Net.Search.Query query =
                            new QueryParser(LuceneVersion.LUCENE_48, nameof(TabHtmlText.Html), analyzer).Parse(pattern);
                        Lucene.Net.Search.TopScoreDocCollector collector =
                            Lucene.Net.Search.TopScoreDocCollector.Create(512, true);
                        Lucene.Net.Search.IndexSearcher searcher = new Lucene.Net.Search.IndexSearcher(reader);
                        searcher.Search(query, collector);
                        Lucene.Net.Search.TopDocs docs = collector.GetTopDocs(page * PageSize, PageSize);

                        QueryScorer scorer      = new QueryScorer(query);
                        Highlighter highlighter = new Highlighter(new SimpleHTMLFormatter(), scorer) // SpanGradientFormatter
                        {
                            TextFragmenter = new SimpleSpanFragmenter(scorer, 30)
                        };

                        IEnumerable <ISearchItem> items = docs.ScoreDocs.Select(scoreDoc =>
                        {
                            Document doc       = searcher.Doc(scoreDoc.Doc);
                            string html        = doc.Get(nameof(TabHtmlText.Html));
                            string[] fragments = highlighter.GetBestFragments(new HTMLStripCharAnalyzer(),
                                                                              nameof(TabHtmlText.Html), html, 3);
                            return(new SearchItem(int.Parse(doc.Get(nameof(TabHtmlText.NumId))), string.Join("\n", fragments)));
                        });

                        return(Task.FromResult(items.ToList().AsEnumerable()));
                    }
        }
        /// <summary>
        /// Gets the highlighted wildcard text.
        /// </summary>
        /// <param name="indexField">The index field value.</param>
        /// <param name="searchQuery">The search query.</param>
        /// <param name="highlightField">The highlight field name.</param>
        /// <param name="examineIndexSetName">Name of the examine index set.</param>
        /// <param name="maxNumFragments">Maximum number of fragments to retrieve.</param>
        /// <param name="preTag">Highlight pre tag.</param>
        /// <param name="postTag">Highlight post tag.</param>
        /// <returns></returns>
        public static string GetHighlightWithWildcards(string indexField, string searchQuery, string highlightField, string examineIndexSetName, int maxNumFragments, string preTag, string postTag)
        {
            if (indexField == null)
            {
                throw new ArgumentNullException(nameof(indexField));
            }
            if (searchQuery == null)
            {
                throw new ArgumentNullException(nameof(searchQuery));
            }
            if (highlightField == null)
            {
                throw new ArgumentNullException(nameof(highlightField));
            }
            if (examineIndexSetName == null)
            {
                throw new ArgumentNullException(nameof(examineIndexSetName));
            }

            BooleanQuery finalQuery = new BooleanQuery();

            foreach (string term in searchQuery.Split(new[] { ' ' }, StringSplitOptions.RemoveEmptyEntries))
            {
                FuzzyQuery fuzzyQuery = new FuzzyQuery(new Lucene.Net.Index.Term(highlightField, term), 0.5f, 0);
                finalQuery.Add(new BooleanClause(fuzzyQuery, BooleanClause.Occur.SHOULD));
            }

            string indexFieldStrippedHtmlValue = indexField.StripHtml();
            SimpleHTMLFormatter formatter      = new SimpleHTMLFormatter(preTag, postTag);
            QueryScorer         fragmentScorer = new QueryScorer(finalQuery.Rewrite(GetIndexSearcher(examineIndexSetName).GetIndexReader()));
            Highlighter         highlighter    = new Highlighter(formatter, fragmentScorer);
            TokenStream         tokenStream    = new StandardAnalyzer(Version.LUCENE_29).TokenStream(highlightField, new StringReader(indexFieldStrippedHtmlValue));

            return(highlighter.GetBestFragments(tokenStream, indexFieldStrippedHtmlValue, maxNumFragments, "..."));
        }
Exemple #4
0
        public static string GenerateHtmlPreviewText(Query query, string text, int length, Analyzer analyzer, string prefix = "<label class='highlight'>", string suffix = "</label>", bool returnRawContentWhenResultIsEmpty = false, int maxContentHighlightLength = Constants.DefaultMaxContentHighlightLength)
        {
            string result = null;

            if (text.Length <= maxContentHighlightLength) // For performance
            {
                if (query != null)
                {
                    var scorer    = new QueryScorer(query);
                    var formatter = new SimpleHTMLFormatter(CodeContentProcessing.HighLightPrefix, CodeContentProcessing.HighLightSuffix);

                    var highlighter = new Highlighter(formatter, scorer);
                    highlighter.TextFragmenter       = new SimpleFragmenter(length);
                    highlighter.MaxDocCharsToAnalyze = maxContentHighlightLength;

                    var stream = analyzer.GetTokenStream(nameof(CodeSource.Content), new StringReader(text));

                    result = highlighter.GetBestFragments(stream, text, 3, "...");
                }

                result = string.IsNullOrEmpty(result) ?
                         (returnRawContentWhenResultIsEmpty ? HttpUtility.HtmlEncode(text) : string.Empty)
                        : HttpUtility.HtmlEncode(result).Replace(CodeContentProcessing.HighLightPrefix, prefix).Replace(CodeContentProcessing.HighLightSuffix, suffix);
            }
            else
            {
                result = "Content is too long to highlight";
            }

            return(result);
        }
Exemple #5
0
        public IEnumerable <Hit> Search(string query, int maxResults)
        {
            var analyzer = new StandardAnalyzer(Lucene.Net.Util.Version.LUCENE_29);

            QueryParser qp = new QueryParser(
                Lucene.Net.Util.Version.LUCENE_29,
                "contents",
                analyzer
                );
            Query q = qp.Parse(query);

            TopDocs    top    = searcher.Search(q, maxResults);
            List <Hit> result = new List <Hit>();

            foreach (var scoreDoc in top.ScoreDocs)
            {
                var    doc      = searcher.Doc(scoreDoc.Doc);
                string contents = doc.Get("contents");

                var scorer      = new QueryScorer(q, searcher.IndexReader, "contents");
                var highlighter = new Highlighter(scorer);

                result.Add(new Hit()
                {
                    Relevance = scoreDoc.Score,
                    Title     = doc.Get("title"),
                    Url       = doc.Get("path"),
                    Excerpt   = highlighter.GetBestFragment(analyzer, "contents", contents)
                });
            }

            return(result);
        }
    // This method is printing out the message details given the index document.
    // NOTE: The field "mainText" must be stored in indexing level. Same goes for any
    // other field you want to search.
    private static void DisplayMessage(Document d, string searchTerm)
    {
        // THIS IS USED IN THE DATABASE INDEXic
        //Console.WriteLine("id: " + d.Get("id") + "\n" + "messageBox: " + d.Get("messageBox") + "\n" + "incoming: " + d.Get("incoming") + "\n" + "date: " + d.Get("date") + "\n" + "mainText: " + d.Get("mainText"));

        // THIS IS USED IN MY TEST FILES
        //Console.WriteLine("id: " + d.Get("id") + "\n" + "mainText: " + d.Get("mainText"));
        string    text  = d.Get("mainText");
        TermQuery query = new TermQuery(new Term("mainText", searchTerm));

        Lucene.Net.Search.Highlight.IScorer scorer = new QueryScorer(query);
        Highlighter highlighter = new Highlighter(scorer);

        System.IO.StringReader reader      = new System.IO.StringReader(text);
        TokenStream            tokenStream = new SimpleAnalyzer().TokenStream("mainText", reader);

        String[] toBePrinted = highlighter.GetBestFragments(tokenStream, text, 5);     // 5 is the maximum number of fragments that gets tested
        foreach (var word in toBePrinted)
        {
            Console.Write(word);
        }

        Console.WriteLine("=====================");
        Console.ReadKey();
    }
        public IEnumerable <Tuple <float, Document, string[]> > Search(string text, string defaultField = "title", int maxResultCount = 500)
        {
            var   parser = new QueryParser(Constants.Version, defaultField, _analyzer);
            Query query  = parser.Parse(text ?? string.Empty);

            var formatter   = new SimpleHTMLFormatter(string.Empty, string.Empty);
            var fragmenter  = new SimpleFragmenter(120);
            var scorer      = new QueryScorer(query);
            var highlighter = new Highlighter(formatter, scorer)
            {
                TextFragmenter = fragmenter
            };

            using (var directory = FSDirectory.Open(new DirectoryInfo(_path), new NoLockFactory()))
            {
                using (var searcher = new IndexSearcher(IndexReader.Open(directory, ReadonlyMode)))
                {
                    TopDocs hits = searcher.Search(query, maxResultCount);

                    foreach (var scoreDoc in hits.ScoreDocs)
                    {
                        Document doc = searcher.Doc(scoreDoc.Doc);

                        var field       = doc.Get(defaultField);
                        var tokenStream = _analyzer.TokenStream(defaultField, new StringReader(field));
                        var framgents   = highlighter.GetBestFragments(tokenStream, field, 5);

                        yield return(new Tuple <float, Document, string[]>(scoreDoc.Score, doc, framgents));
                    }
                }
            }
        }
Exemple #8
0
        public string GetHighlight(string value, IndexSearcher searcher, string highlightField, Query luceneQuery)
        {
            var scorer      = new QueryScorer(luceneQuery.Rewrite(searcher.GetIndexReader()));
            var highlighter = new Highlighter(HighlightFormatter, scorer);

            var tokenStream = HighlightAnalyzer.TokenStream(highlightField, new StringReader(value));

            return(highlighter.GetBestFragments(tokenStream, value, MaxNumHighlights, Separator));
        }
Exemple #9
0
        /** GET HIGHLIGHTER FRAGMENT*/

        public static string[] TextHighlighter(Query query, string text, StandardAnalyzer analys, IndexSearcher searcher, ScoreDoc doc)
        {
            QueryScorer         scorer      = new QueryScorer(query);
            SimpleHTMLFormatter formater    = new SimpleHTMLFormatter("<b>", "</b>");
            Highlighter         highlighter = new Highlighter(formater, scorer);
            TokenStream         tokenStream = TokenSources.GetAnyTokenStream(searcher.IndexReader, doc.Doc, "Contents", analys);

            string[] frags = highlighter.GetBestFragments(tokenStream, text, 3);
            return(frags);
        }
Exemple #10
0
        /// <summary>
        /// Creates a highlighter for current query.
        /// </summary>
        private Highlighter CreateHighlighter(Query query)
        {
            var formatter = new SimpleHTMLFormatter("<b>", "</b>");
            var scorer    = new QueryScorer(query);

            return(new Highlighter(formatter, scorer)
            {
                TextFragmenter = new SimpleSpanFragmenter(scorer, 150)
            });
        }
Exemple #11
0
        public string GenerateHtmlPreviewText(SearchRequest searchRequest, string text, int length, string prefix = "<span class='highlight'>", string suffix = "</span>", bool returnRawContentWhenResultIsEmpty = false)
        {
            if (searchRequest == null)
            {
                return(returnRawContentWhenResultIsEmpty ? HttpUtility.HtmlEncode(text) : string.Empty);
            }

            var maintainer = GetIndexMaintainerWrapper(searchRequest.IndexPk);

            if (maintainer == null)
            {
                return(string.Empty);
            }

            var queryForContent = GetContentQuery(searchRequest, maintainer);

            string result = null;

            var maxContentHighlightLength = maintainer.IndexConfig.MaxContentHighlightLength;

            if (maxContentHighlightLength <= 0)
            {
                maxContentHighlightLength = Constants.DefaultMaxContentHighlightLength;
            }

            if (text.Length <= maxContentHighlightLength) // For performance
            {
                if (queryForContent != null)
                {
                    var scorer    = new QueryScorer(queryForContent);
                    var formatter = new SimpleHTMLFormatter(HighLightPrefix, HighLightSuffix);

                    var highlighter = new Highlighter(formatter, scorer)
                    {
                        TextFragmenter       = new SimpleFragmenter(length),
                        MaxDocCharsToAnalyze = maxContentHighlightLength
                    };

                    using var stream = GetTokenStream(text, searchRequest.CaseSensitive);

                    result = highlighter.GetBestFragments(stream, text, 3, "...");
                }

                result = string.IsNullOrEmpty(result) ?
                         (returnRawContentWhenResultIsEmpty ? HttpUtility.HtmlEncode(text) : string.Empty)
                        : HttpUtility.HtmlEncode(result).Replace(HighLightPrefix, prefix).Replace(HighLightSuffix, suffix);
            }
            else
            {
                result = "Content is too long to highlight";
            }

            return(result);
        }
Exemple #12
0
        /// <summary>
        /// 搜索结果高亮显示
        /// </summary>
        /// <param name="keyword">关键字</param>
        /// <param name="content">搜索结果</param>
        /// <param name="analyzer">new SimpleAnalyzer()</param>
        /// <returns></returns>
        public static string HighLight(string keyword, string content, Analyzer analyzer)
        {
            const string         FIELD_NAME  = "keyword";
            Query                query       = new QueryParserEx(Lucene.Net.Util.Version.LUCENE_30, FIELD_NAME, analyzer).Parse(keyword);
            QueryScorer          scorer      = new QueryScorer(query);
            SimpleHTMLFormatter  formatter   = new SimpleHTMLFormatter(PRE_TAG, END_TAG);
            SimpleSpanFragmenter fragment    = new SimpleSpanFragmenter(scorer);
            Highlighter          highlighter = new Highlighter(formatter, scorer);

            highlighter.TextFragmenter = fragment;
            return(highlighter.GetBestFragment(analyzer, FIELD_NAME, content) ?? content);
        }
        public string GetHighlight(string value, string highlightField, Searcher searcher, string luceneRawQuery)
        {
            var query  = GetQueryParser(highlightField).Parse(luceneRawQuery);
            var scorer = new QueryScorer(searcher.Rewrite(query));

            var highlighter = new Highlighter(HighlightFormatter, scorer);

            var    tokenStream   = HighlightAnalyzer.TokenStream(highlightField, new StringReader(value));
            string bestFragments = highlighter.GetBestFragments(tokenStream, value, MaxNumHighlights, Separator);

            return(bestFragments);
        }
        /// <summary>
        ///
        /// </summary>
        /// <param name="lookQuery">The query to parse</param>
        /// <param name="parsingContext"></param>
        private static void ParseTextQuery(ParsingContext parsingContext, LookQuery lookQuery)
        {
            if (lookQuery.TextQuery == null)
            {
                return;
            }

            parsingContext.QueryAdd(new TermQuery(new Term(LookConstants.HasTextField, "1")), BooleanClause.Occur.MUST);

            if (!string.IsNullOrWhiteSpace(lookQuery.TextQuery.SearchText))
            {
                var queryParser = new QueryParser(Lucene.Net.Util.Version.LUCENE_29, LookConstants.TextField, lookQuery.SearchingContext.Analyzer);

                Query searchTextQuery = null;

                try
                {
                    searchTextQuery = queryParser.Parse(lookQuery.TextQuery.SearchText);
                }
                catch
                {
                    throw new ParsingException($"Unable to parse LookQuery.TextQuery.SearchText: '{ lookQuery.TextQuery.SearchText }' into a Lucene query");
                }

                if (searchTextQuery != null)
                {
                    parsingContext.QueryAdd(searchTextQuery, BooleanClause.Occur.MUST);

                    if (lookQuery.TextQuery.GetHighlight)
                    {
                        var queryScorer = new QueryScorer(searchTextQuery.Rewrite(lookQuery.SearchingContext.IndexSearcher.GetIndexReader()));

                        var highlighter = new Highlighter(new SimpleHTMLFormatter("<strong>", "</strong>"), queryScorer);

                        parsingContext.GetHighlight = (x) =>
                        {
                            var tokenStream = lookQuery.SearchingContext.Analyzer.TokenStream(LookConstants.TextField, new StringReader(x));

                            var highlight = highlighter.GetBestFragments(
                                tokenStream,
                                x,
                                1,                             // max number of fragments
                                "...");

                            return(new HtmlString(highlight));
                        };
                    }
                }
            }
        }
Exemple #15
0
        private string HighlightContents(Query q, string text)
        {
            string      highlightStartTag = "<span class='highlight'>";
            string      highlightEndTag   = "</span>";
            int         fragmentLength    = 150;
            QueryScorer scorer            = new QueryScorer(q, Body);
            Formatter   formatter         = new SimpleHTMLFormatter(highlightStartTag, highlightEndTag);
            Highlighter highlighter       = new Highlighter(formatter, scorer);

            highlighter.SetTextFragmenter(new SimpleFragmenter(fragmentLength));
            TokenStream stream = new StandardAnalyzer(Lucene.Net.Util.Version.LUCENE_29).TokenStream(Body, new StringReader(text));

            return(highlighter.GetBestFragments(stream, text, 3, "..."));
        }
Exemple #16
0
        private SearchResult SearchCore(string query, string[] fields, int topResultCount)
        {
            // Search
            var parser = new MultiFieldQueryParser(Config.LuceneVersion, fields, Analyzer);

            parser.AllowLeadingWildcard = true;
            parser.DefaultOperator      = Operator.AND;
            parser.Locale            = Config.Locale;
            parser.AnalyzeRangeTerms = true;

            var q = parser.Parse(query);

            var results = Searcher.Search(q, topResultCount);
            var hits    = results.ScoreDocs;

            if (results.TotalHits == 0)
            {
                return(SearchResult.Empty);
            }

            // Format
            var items = new List <SearchResultItem>();

            var scorer      = new QueryScorer(q);
            var formatter   = new SimpleHTMLFormatter("<mark>", "</mark>");
            var highlighter = new Highlighter(formatter, scorer)
            {
                TextFragmenter = new SimpleFragmenter(Config.FragmentLength)
            };

            // ReSharper disable once ForCanBeConvertedToForeach
            for (var i = 0; i < hits.Length; i++)
            {
                var doc     = Searcher.Doc(hits[i].Doc);
                var url     = doc.Get("url");
                var title   = doc.Get("title");
                var content = doc.Get("content");

                using (var stream = Analyzer.GetTokenStream(url, new StringReader(content)))
                {
                    var preview = highlighter.GetBestFragments(stream, content, Config.ResultFragments, Config.FragmentSeparator);

                    var item = new SearchResultItem(url, ToWbrWrapName(title), preview);
                    items.Add(item);
                }
            }

            return(new SearchResult(results.TotalHits, items));
        }
Exemple #17
0
        public string GeneratePreviewText(Lucene.Net.Search.Query q, string text)
        {
            QueryScorer scorer      = new QueryScorer(q);
            IFormatter  formatter   = new SimpleHTMLFormatter("", "");
            Highlighter highlighter = new Highlighter(formatter, scorer);

            highlighter.TextFragmenter = new SimpleFragmenter(100);
            TokenStream stream   = new SimpleAnalyzer().TokenStream(TEXT_FN, new StringReader(text));
            string      fragment = highlighter.GetBestFragments(stream, text, 2, "...");

            if (string.IsNullOrEmpty(fragment))
            {
                fragment = text.Substring(0, 100);
            }
            return(fragment);
        }
    // This method takes a search term and a text as a parameter, and displays the text
    // with the search term in bold.
    public static void RealHighlighter(string searchTerm, string text)
    {
        TermQuery query = new TermQuery(new Term("mainText", searchTerm));

        Lucene.Net.Search.Highlight.IScorer scorer = new QueryScorer(query);
        Highlighter highlighter = new Highlighter(scorer);

        System.IO.StringReader reader      = new System.IO.StringReader(text);
        TokenStream            tokenStream = new SimpleAnalyzer().TokenStream("mainText", reader);

        String[] toBePrinted = highlighter.GetBestFragments(tokenStream, text, 5);     // 5 is the maximum number of fragments that gets tested
        foreach (var word in toBePrinted)
        {
            Console.Write(word);
        }
    }
        public IEnumerable <SearchResult> Search(string luceneQuery, int maxResults = 500, string highlightOpenTag = null, string highlightCloseTag = null, params string[] fieldsToHighlight)
        {
            var results = new List <SearchResult>();

            if (String.IsNullOrWhiteSpace(luceneQuery))
            {
                return(results);
            }

            var     parser  = new MultiFieldQueryParser(LuceneVersion, searchFields, new StandardAnalyzer(LuceneVersion));
            Query   query   = parser.Parse(luceneQuery);
            TopDocs topDocs = searcher.Search(query, maxResults);

            foreach (ScoreDoc scoreDoc in topDocs.ScoreDocs)
            {
                Document document = reader.Document(scoreDoc.doc);
                var      result   = new SearchResult(document, scoreDoc.score);
                results.Add(result);
            }

            if (!String.IsNullOrEmpty(highlightOpenTag) && !String.IsNullOrEmpty(highlightCloseTag) && fieldsToHighlight.Length > 0)
            {
                var scorer      = new QueryScorer(query);
                var formatter   = new SimpleHTMLFormatter(highlightOpenTag, highlightCloseTag);
                var highlighter = new Highlighter(formatter, scorer);
                highlighter.SetTextFragmenter(new SimpleFragmenter());
                foreach (SearchResult result in results)
                {
                    foreach (string highlightField in fieldsToHighlight)
                    {
                        if (!result.Fields.ContainsKey(highlightField))
                        {
                            continue;
                        }
                        string      fieldValue            = result[highlightField];
                        TokenStream stream                = new StandardAnalyzer(LuceneVersion).TokenStream(highlightField, new StringReader(fieldValue));
                        string      highlightedFieldValue = highlighter.GetBestFragments(stream, fieldValue, 500, "...");
                        if (!String.IsNullOrWhiteSpace(highlightedFieldValue))
                        {
                            result.Fields[highlightField] = highlightedFieldValue;
                        }
                    }
                }
            }
            return(results);
        }
    // TEST METHOD FOR HIGHLIGHTING.
    public static void Highlighter()
    {
        string    textTest  = "I am a man that follows hell.";
        TermQuery queryTest = new TermQuery(new Term("", "hell"));

        Lucene.Net.Search.Highlight.IScorer scorer = new QueryScorer(queryTest);
        Highlighter highlighter = new Highlighter(scorer);

        System.IO.StringReader reader      = new System.IO.StringReader(textTest);
        TokenStream            tokenStream = new SimpleAnalyzer().TokenStream("field", reader);

        String[] toBePrinted = highlighter.GetBestFragments(tokenStream, textTest, 1);     // 1 is the maximum number of fragments that gets tested
        foreach (var word in toBePrinted)
        {
            Console.WriteLine(word);
        }
    }
        /// <summary>
        /// highlight the search term in the supplied result
        /// </summary>
        /// <param name="Result"></param>
        /// <param name="UmbracoProperty"></param>
        /// <param name="Summary"></param>
        /// <returns></returns>
        protected bool LuceneHighlightField(SearchResult Result, UmbracoProperty UmbracoProperty, out string Summary)
        {
            Summary = string.Empty;
            var fieldName = UmbracoProperty.PropertyName;

            if (!string.IsNullOrEmpty(Result.Fields[fieldName]))
            {
                Highlighter highlighter;
                if (HighlighterCache.ContainsKey(fieldName))
                {
                    highlighter = HighlighterCache[fieldName];
                }
                else
                {
                    var searchTerms = SearchUtilities.GetSearchTermsSplit(Parameters.SearchTerm);
                    var luceneQuery = QueryHighlight(UmbracoProperty, searchTerms);
                    var parser      = new QueryParser(Lucene.Net.Util.Version.LUCENE_29, fieldName, _analyzer);
                    // This is needed to make wildcards highlight correctly
                    if (UmbracoProperty.Wildcard)
                    {
                        parser.SetMultiTermRewriteMethod(MultiTermQuery.SCORING_BOOLEAN_QUERY_REWRITE);
                    }
                    var query = parser.Parse(luceneQuery);
                    query = query.Rewrite(_reader);
                    var scorer = new QueryScorer(query);
                    highlighter = new Highlighter(_formatter, scorer);
                    highlighter.SetTextFragmenter(new SimpleFragmenter(Parameters.SummaryLength));
                    HighlighterCache.Add(fieldName, highlighter);
                }
                using (var sr = new StringReader(Result.Fields[fieldName]))
                {
                    var tokenstream = _analyzer.TokenStream(fieldName, sr);
                    Summary = highlighter.GetBestFragment(tokenstream, Result.Fields[fieldName]);
                    if (!string.IsNullOrEmpty(Summary))
                    {
                        return(true);
                    }
                }
            }
            return(false);
        }
Exemple #22
0
        public string GetSummaryWithHighlight(Query query, string text, string fileName)
        {
            // create highlighter
            var analyzer    = _getAnalyzer();
            var formatter   = new SimpleHTMLFormatter("<span class=\"search-highlight\">", "</span>");
            var fragmenter  = new SimpleFragmenter(250);
            var scorer      = new QueryScorer(query);
            var highlighter = new Highlighter(formatter, scorer);

            highlighter.TextFragmenter = fragmenter;

            var stream  = analyzer.TokenStream(fileName, new StringReader(text));
            var summary = highlighter.GetBestFragments(stream, text, 2, "...");

            if (string.IsNullOrEmpty(summary))
            {
                summary = text.ToString();
            }

            return(summary);
        }
        public SimpleHtmlHighlightedFragmentProvider(ICrmEntityIndex index, Query query)
        {
            if (index == null)
            {
                throw new ArgumentNullException("index");
            }

            if (query == null)
            {
                throw new ArgumentNullException("query");
            }

            _index = index;

            var queryScorer = new QueryScorer(query);

            _highlighter = new Highlighter(new SimpleHTMLFormatter(_highlighterStartTag, _highlighterEndTag), queryScorer)
            {
                TextFragmenter = new SimpleSpanFragmenter(queryScorer, 160)
            };
        }
Exemple #24
0
        protected string Summarize(LuceneQuery query, string text, bool htmlEncodeOutput)
        {
            if (query == null || string.IsNullOrEmpty(text))
            {
                return(null);
            }

            try
            {
                // Build the highlighter.

                var formatter = new SimpleHTMLFormatter(_configuration.StartTag, _configuration.EndTag);
                var scorer    = new QueryScorer(query);

                Encoder encoder;

                if (htmlEncodeOutput)
                {
                    encoder = new SimpleHTMLEncoder();
                }
                else
                {
                    encoder = new DefaultEncoder();
                }

                var highlighter = new LuceneHighlighter(formatter, encoder, scorer);
                highlighter.setTextFragmenter(new SimpleSpanFragmenter(scorer, _configuration.FragmentSize));

                // Perform highlighting.

                var tokenStream = _contentAnalyzer.tokenStream(string.Empty, new java.io.StringReader(text));
                return(highlighter.getBestFragments(tokenStream, text, _configuration.MaxFragments, _configuration.Separator));
            }
            catch (Exception)
            {
                // on error just return the original string
                return(text);
            }
        }
Exemple #25
0
 /// <summary>
 /// Creates a single highlighter.
 /// </summarTy>
 /// <param name="parser">A lucene parser.</param>
 /// <param name="value">The value which was searched.</param>
 /// <param name="highlightPreTag">Pre match tag.</param>
 /// <param name="highlightPostTag">Post match taf.</param>
 /// <returns>a highlighter.</returns>
 private Highlighter MakeValueHighlighter(QueryParser parser, string value, string highlightPreTag, string highlightPostTag)
 {
     // With lucene-net 3.0.3 some queries are not supported, for instance query such as "*someterm" (prefix is wildcard).
     // These queries throw exception when calling QueryParser.Parse(string value) regarding use of configuration manager
     // which is not supported in net core. see bug https://dev.azure.com/csedevil/K2-bridge-internal/_workitems/edit/1658
     // these terms are discarded during the following creation of highlighter.
     try
     {
         var luceneQuery = parser.Parse(value);
         var scorer      = new QueryScorer(luceneQuery);
         var formatter   = new SimpleHTMLFormatter(highlightPreTag, highlightPostTag);
         return(new Highlighter(formatter, scorer)
         {
             TextFragmenter = new SimpleSpanFragmenter(scorer, int.MaxValue),
             MaxDocCharsToAnalyze = int.MaxValue,
         });
     }
     catch (Exception e)
     {
         logger.LogError(e, $"Failure creating highlighters for {value}");
         return(null);
     }
 }
Exemple #26
0
        public static string Highlight(this string text, string term)
        {
            if (string.IsNullOrEmpty(text))
            {
                return(string.Empty);
            }
            var bq = new BooleanQuery();

            term.Split(new char[] { ' ' }, StringSplitOptions.RemoveEmptyEntries)
            .ToList()
            .ForEach(x => bq.Add(new TermQuery(new Term("field", x)), Occur.SHOULD));
            var         fragmentLength    = 100;
            var         highlightStartTag = @"<span class='search_highlight'>";
            var         highlightEndTag   = @"</span>";
            QueryScorer scorer            = new QueryScorer(bq);
            var         formatter         = new SimpleHTMLFormatter(highlightStartTag, highlightEndTag);
            Highlighter highlighter       = new Highlighter(formatter, scorer);

            highlighter.TextFragmenter = new SimpleFragmenter(fragmentLength);
            TokenStream stream = new StandardAnalyzer(Lucene.Net.Util.LuceneVersion.LUCENE_48).GetTokenStream("field", new StringReader(text));

            return(highlighter.GetBestFragments(stream, text, 100, "..."));
        }
        public string HighLight(string keyword, string sourceText)
        {
            if (keyword.IsNullOrWhiteSpace() || sourceText.IsNullOrWhiteSpace())
            {
                return(string.Empty);
            }


            //queryParser = new QueryParser(Lucene.Net.Util.LuceneVersion.LUCENE_48, filedName, _analyzer);

            //var query = queryParser.Parse(keyword);

            var         query       = _luceneIndexSearcher.GetFuzzyquery(queryParser, keyword);
            var         scorer      = new QueryScorer(query);
            Highlighter highlighter = new Highlighter(formatter, scorer);

            highlighter.TextFragmenter = new SimpleFragmenter(fragmentSize);
            //highlighter.MaxDocCharsToAnalyze = 200;
            TokenStream tokenStream = _analyzer.GetTokenStream(filedName, new StringReader(sourceText));
            var         frags       = highlighter.GetBestFragments(tokenStream, sourceText, maxNumFragments);

            return(frags.Length > 0 ? frags[0] : sourceText.Substring(0, Math.Min(this.fragmentSize, sourceText.Length)));
        }
        public string GeneratePreviewText(string text)
        {
            QueryScorer scorer      = new QueryScorer(currentQuery);
            IFormatter  formatter   = new SimpleHTMLFormatter("", "");
            Highlighter highlighter = new Highlighter(formatter, scorer);

            highlighter.TextFragmenter = new SimpleFragmenter(100);
            TokenStream stream = new SnowballAnalyzer(VERSION, "English").TokenStream(URL_FN, new StringReader(text));
            //TokenStream stream = new StandardAnalyzer(VERSION).TokenStream(URL_FN, new StringReader(text));
            string fragment = highlighter.GetBestFragments(stream, text, 2, "...");

            if (string.IsNullOrEmpty(fragment))
            {
                if (text.Length > 100)
                {
                    fragment = text.Substring(0, 100);
                }
                else
                {
                    fragment = text;
                }
            }
            return(fragment);
        }
Exemple #29
0
        private string Highlight(int numId, string pattern, string html)
        {
            if (!string.IsNullOrWhiteSpace(pattern))
            {
                using (Analyzer analyzer = new StandardAnalyzer(LuceneVersion.LUCENE_48))
                    using (Lucene.Net.Store.Directory index = new SimpleFSDirectory(Path.ChangeExtension(_bookFile.FullName,
                                                                                                         Convert.ToInt32(LuceneVersion.LUCENE_48).ToString())))
                        using (IndexReader reader = DirectoryReader.Open(index))
                        {
                            Lucene.Net.Search.IndexSearcher searcher = new Lucene.Net.Search.IndexSearcher(reader);
                            Lucene.Net.Search.TopDocs       docs     = searcher.Search(
                                Lucene.Net.Search.NumericRangeQuery.NewInt32Range(nameof(TabHtmlText.NumId), numId, numId, true,
                                                                                  true), 1);

                            int docId = docs.ScoreDocs.First().Doc;

                            QueryScorer scorer =
                                new QueryScorer(new QueryParser(LuceneVersion.LUCENE_48, nameof(TabHtmlText.Html), analyzer)
                                                .Parse(pattern));
                            Highlighter highlighter =
                                new Highlighter(new SimpleHTMLFormatter("<span style=\"background-color: yellow\">", "</span>"),
                                                scorer)
                            {
                                TextFragmenter = new NullFragmenter()
                            };

                            using (TokenStream stream =
                                       TokenSources.GetAnyTokenStream(reader, docId, nameof(TabHtmlText.Html), analyzer))
                            {
                                return(highlighter.GetBestFragment(stream, html));
                            }
                        }
            }

            return(html);
        }
        public void TestFieldSpecificHighlighting()
        {
            var helper = new TestHighlightRunner();
            helper.TestAction = () =>
                                    {
                                        var docMainText = "fred is one of the people";
                                        var parser = new QueryParser(TEST_VERSION, FIELD_NAME, analyzer);
                                        var query = parser.Parse("fred category:people");

                                        // highlighting respects fieldnames used in query

                                        IScorer fieldSpecificScorer = null;
                                        if (helper.Mode == TestHighlightRunner.QUERY)
                                        {
                                            fieldSpecificScorer = new QueryScorer(query, FIELD_NAME);
                                        }
                                        else if (helper.Mode == TestHighlightRunner.QUERY_TERM)
                                        {
                                            fieldSpecificScorer = new QueryTermScorer(query, "contents");
                                        }
                                        var fieldSpecificHighlighter = new Highlighter(new SimpleHTMLFormatter(),
                                                                                       fieldSpecificScorer)
                                                                           {TextFragmenter = new NullFragmenter()};
                                        String result = fieldSpecificHighlighter.GetBestFragment(analyzer, FIELD_NAME,
                                                                                                 docMainText);
                                        Assert.AreEqual(result, "<B>fred</B> is one of the people", "Should match");

                                        // highlighting does not respect fieldnames used in query
                                        IScorer fieldInSpecificScorer = null;
                                        if (helper.Mode == TestHighlightRunner.QUERY)
                                        {
                                            fieldInSpecificScorer = new QueryScorer(query, null);
                                        }
                                        else if (helper.Mode == TestHighlightRunner.QUERY_TERM)
                                        {
                                            fieldInSpecificScorer = new QueryTermScorer(query);
                                        }

                                        var fieldInSpecificHighlighter = new Highlighter(new SimpleHTMLFormatter(),
                                                                                         fieldInSpecificScorer)
                                                                             {TextFragmenter = new NullFragmenter()};
                                        result = fieldInSpecificHighlighter.GetBestFragment(analyzer, FIELD_NAME,
                                                                                            docMainText);
                                        Assert.AreEqual(result, "<B>fred</B> is one of the <B>people</B>",
                                                        "Should match");

                                        reader.Close();
                                    };

            helper.Start();

        }
        public void DoStandardHighlights(Analyzer analyzer, IndexSearcher searcher,
                                         TopDocs hits, Query query, IFormatter formatter, bool expandMT)
        {
            IFragmenter frag = new SimpleFragmenter(20);

            for (int i = 0; i < hits.TotalHits; i++)
            {
                String text = searcher.Doc(hits.ScoreDocs[i].Doc).Get(HighlighterTest.FIELD_NAME);
                int maxNumFragmentsRequired = 2;
                String fragmentSeparator = "...";
                IScorer scorer = null;
                TokenStream tokenStream = analyzer.TokenStream(HighlighterTest.FIELD_NAME, new StringReader(text));
                if (Mode == QUERY)
                {
                    scorer = new QueryScorer(query);
                }
                else if (Mode == QUERY_TERM)
                {
                    scorer = new QueryTermScorer(query);
                }
                var highlighter = new Highlighter(formatter, scorer) {TextFragmenter = frag};

                String result = highlighter.GetBestFragments(tokenStream, text, maxNumFragmentsRequired,
                                                             fragmentSeparator);
                Console.WriteLine("\t" + result);
            }
        }
        public void TestSimpleQueryScorerPhraseHighlighting()
        {
            DoSearching("\"very long and contains\"");

            int maxNumFragmentsRequired = 2;

            QueryScorer scorer = new QueryScorer(query, FIELD_NAME);
            Highlighter highlighter = new Highlighter(this, scorer);

            for (int i = 0; i < hits.TotalHits; i++)
            {
                String text = searcher.Doc(hits.ScoreDocs[i].Doc).Get(FIELD_NAME);
                TokenStream tokenStream = analyzer.TokenStream(FIELD_NAME, new StringReader(text));

                highlighter.TextFragmenter = new SimpleFragmenter(40);

                String result = highlighter.GetBestFragments(tokenStream, text, maxNumFragmentsRequired,
                                                             "...");
                Console.WriteLine("\t" + result);
            }

            Assert.IsTrue(numHighlights == 3, "Failed to find correct number of highlights " + numHighlights + " found");

            numHighlights = 0;
            DoSearching("\"This piece of text refers to Kennedy\"");

            maxNumFragmentsRequired = 2;

            scorer = new QueryScorer(query, FIELD_NAME);
            highlighter = new Highlighter(this, scorer);

            for (int i = 0; i < hits.TotalHits; i++)
            {
                String text = searcher.Doc(hits.ScoreDocs[i].Doc).Get(FIELD_NAME);
                TokenStream tokenStream = analyzer.TokenStream(FIELD_NAME, new StringReader(text));

                highlighter.TextFragmenter = new SimpleFragmenter(40);

                String result = highlighter.GetBestFragments(tokenStream, text, maxNumFragmentsRequired,
                                                             "...");
                Console.WriteLine("\t" + result);
            }

            Assert.IsTrue(numHighlights == 4, "Failed to find correct number of highlights " + numHighlights + " found");

            numHighlights = 0;
            DoSearching("\"lets is a the lets is a the lets is a the lets\"");

            maxNumFragmentsRequired = 2;

            scorer = new QueryScorer(query, FIELD_NAME);
            highlighter = new Highlighter(this, scorer);

            for (int i = 0; i < hits.TotalHits; i++)
            {
                String text = searcher.Doc(hits.ScoreDocs[i].Doc).Get(FIELD_NAME);
                TokenStream tokenStream = analyzer.TokenStream(FIELD_NAME, new StringReader(text));

                highlighter.TextFragmenter = new SimpleFragmenter(40);

                String result = highlighter.GetBestFragments(tokenStream, text, maxNumFragmentsRequired,
                                                             "...");
                Console.WriteLine("\t" + result);
            }

            Assert.IsTrue(numHighlights == 4, "Failed to find correct number of highlights " + numHighlights + " found");

        }
        public void TestRegexQuery()
        {
            const int maxNumFragmentsRequired = 2;

            query = new RegexQuery(new Term(FIELD_NAME, "ken.*"));
            searcher = new IndexSearcher(ramDir, true);
            hits = searcher.Search(query, 100);

            var scorer = new QueryScorer(query, FIELD_NAME);
            var highlighter = new Highlighter(this, scorer);

            for (int i = 0; i < hits.TotalHits; i++)
            {
                String text = searcher.Doc(hits.ScoreDocs[i].Doc).Get(FIELD_NAME);
                TokenStream tokenStream = analyzer.TokenStream(FIELD_NAME, new StringReader(text));

                highlighter.TextFragmenter = new SimpleFragmenter(40);

                String result = highlighter.GetBestFragments(tokenStream, text, maxNumFragmentsRequired,
                                                             "...");
                Console.WriteLine("\t" + result);
            }

            Assert.IsTrue(numHighlights == 5, "Failed to find correct number of highlights " + numHighlights + " found");
        }
        public void TestNumericRangeQuery()
        {
            // doesn't currently highlight, but make sure it doesn't cause exception either
            query = NumericRangeQuery.NewIntRange(NUMERIC_FIELD_NAME, 2, 6, true, true);
            searcher = new IndexSearcher(ramDir, true);
            hits = searcher.Search(query, 100);
            int maxNumFragmentsRequired = 2;

            QueryScorer scorer = new QueryScorer(query, FIELD_NAME);
            Highlighter highlighter = new Highlighter(this, scorer);

            for (int i = 0; i < hits.TotalHits; i++)
            {
                String text = searcher.Doc(hits.ScoreDocs[i].Doc).Get(NUMERIC_FIELD_NAME);
                TokenStream tokenStream = analyzer.TokenStream(FIELD_NAME, new StringReader(text));

                highlighter.TextFragmenter = new SimpleFragmenter(40);

                String result = highlighter.GetBestFragments(tokenStream, text, maxNumFragmentsRequired,
                                                             "...");
                //Console.WriteLine("\t" + result);
            }


        }
        public void TestSimpleQueryScorerPhraseHighlighting2()
        {
            DoSearching("\"text piece long\"~5");

            int maxNumFragmentsRequired = 2;

            var scorer = new QueryScorer(query, FIELD_NAME);
            var highlighter = new Highlighter(this, scorer);
            highlighter.TextFragmenter = new SimpleFragmenter(40);

            for (int i = 0; i < hits.TotalHits; i++)
            {
                var text = searcher.Doc(hits.ScoreDocs[i].Doc).Get(FIELD_NAME);
                var tokenStream = analyzer.TokenStream(FIELD_NAME, new StringReader(text));

                var result = highlighter.GetBestFragments(tokenStream, text, maxNumFragmentsRequired,
                                                             "...");
                Console.WriteLine("\t" + result);
            }

            Assert.IsTrue(numHighlights == 6, "Failed to find correct number of highlights " + numHighlights + " found");
        }
        public void TestSimpleSpanFragmenter()
        {
            DoSearching("\"piece of text that is very long\"");

            int maxNumFragmentsRequired = 2;

            QueryScorer scorer = new QueryScorer(query, FIELD_NAME);
            Highlighter highlighter = new Highlighter(this, scorer);

            for (int i = 0; i < hits.TotalHits; i++)
            {
                String text = searcher.Doc(hits.ScoreDocs[i].Doc).Get(FIELD_NAME);
                TokenStream tokenStream = analyzer.TokenStream(FIELD_NAME, new StringReader(text));

                highlighter.TextFragmenter = new SimpleSpanFragmenter(scorer, 5);

                String result = highlighter.GetBestFragments(tokenStream, text,
                                                             maxNumFragmentsRequired, "...");
                Console.WriteLine("\t" + result);

            }

            DoSearching("\"been shot\"");

            maxNumFragmentsRequired = 2;

            scorer = new QueryScorer(query, FIELD_NAME);
            highlighter = new Highlighter(this, scorer);

            for (int i = 0; i < hits.TotalHits; i++)
            {
                String text = searcher.Doc(hits.ScoreDocs[i].Doc).Get(FIELD_NAME);
                TokenStream tokenStream = analyzer.TokenStream(FIELD_NAME, new StringReader(text));

                highlighter.TextFragmenter = new SimpleSpanFragmenter(scorer, 20);

                String result = highlighter.GetBestFragments(tokenStream, text,
                                                             maxNumFragmentsRequired, "...");
                Console.WriteLine("\t" + result);

            }
        }
        public void TestPosTermStdTerm()
        {
            DoSearching("y \"x y z\"");

            int maxNumFragmentsRequired = 2;

            QueryScorer scorer = new QueryScorer(query, FIELD_NAME);
            Highlighter highlighter = new Highlighter(this, scorer);

            for (int i = 0; i < hits.TotalHits; i++)
            {
                String text = searcher.Doc(hits.ScoreDocs[i].Doc).Get(FIELD_NAME);
                TokenStream tokenStream = analyzer.TokenStream(FIELD_NAME, new StringReader(text));

                highlighter.TextFragmenter = new SimpleFragmenter(40);

                String result = highlighter.GetBestFragments(tokenStream, text, maxNumFragmentsRequired,
                                                             "...");
                Console.WriteLine("\t" + result);

                Assert.IsTrue(numHighlights == 4,
                              "Failed to find correct number of highlights " + numHighlights + " found");
            }
        }
        public Highlighter GetHighlighter(Query query, String fieldName, TokenStream stream, IFormatter formatter,
                                          bool expanMultiTerm)
        {
            IScorer scorer = null;
            if (Mode == QUERY)
            {
                scorer = new QueryScorer(query, fieldName);
                if (!expanMultiTerm)
                {
                    ((QueryScorer) scorer).IsExpandMultiTermQuery = false;
                }
            }
            else if (Mode == QUERY_TERM)
            {
                scorer = new QueryTermScorer(query);
            }
            else
            {
                throw new SystemException("Unknown highlight mode");
            }

            return new Highlighter(formatter, scorer);
        }
        public void TestSimpleSpanHighlighter()
        {
            DoSearching("Kennedy");

            int maxNumFragmentsRequired = 2;

            QueryScorer scorer = new QueryScorer(query, FIELD_NAME);
            Highlighter highlighter = new Highlighter(scorer);

            for (int i = 0; i < hits.TotalHits; i++)
            {
                String text = searcher.Doc(hits.ScoreDocs[i].Doc).Get(FIELD_NAME);
                TokenStream tokenStream = analyzer.TokenStream(FIELD_NAME,
                                                               new StringReader(text));
                highlighter.TextFragmenter = new SimpleFragmenter(40);

                String result = highlighter.GetBestFragments(tokenStream, text, maxNumFragmentsRequired,
                                                             "...");
                Console.WriteLine("\t" + result);
            }

            // Not sure we can assert anything here - just running to check we dont
            // throw any exceptions
        }
        /*
         * This method intended for use with <tt>testHighlightingWithDefaultField()</tt>
       * @throws InvalidTokenOffsetsException 
         */

        private static String HighlightField(Query query, String fieldName, String text)
        {
            TokenStream tokenStream = new StandardAnalyzer(TEST_VERSION).TokenStream(fieldName, new StringReader(text));
            // Assuming "<B>", "</B>" used to highlight
            SimpleHTMLFormatter formatter = new SimpleHTMLFormatter();
            QueryScorer scorer = new QueryScorer(query, fieldName, FIELD_NAME);
            Highlighter highlighter = new Highlighter(formatter, scorer);
            highlighter.TextFragmenter = new SimpleFragmenter(int.MaxValue);

            String rv = highlighter.GetBestFragments(tokenStream, text, 1, "(FIELD TEXT TRUNCATED)");
            return rv.Length == 0 ? text : rv;
        }
        public void TestConstantScoreMultiTermQuery()
        {

            numHighlights = 0;

            query = new WildcardQuery(new Term(FIELD_NAME, "ken*"));
            ((WildcardQuery) query).RewriteMethod = MultiTermQuery.CONSTANT_SCORE_FILTER_REWRITE;
            searcher = new IndexSearcher(ramDir, true);
            // can't rewrite ConstantScore if you want to highlight it -
            // it rewrites to ConstantScoreQuery which cannot be highlighted
            // query = unReWrittenQuery.Rewrite(reader);
            Console.WriteLine("Searching for: " + query.ToString(FIELD_NAME));
            hits = searcher.Search(query, null, 1000);

            for (int i = 0; i < hits.TotalHits; i++)
            {
                String text = searcher.Doc(hits.ScoreDocs[i].Doc).Get(HighlighterTest.FIELD_NAME);
                int maxNumFragmentsRequired = 2;
                String fragmentSeparator = "...";
                QueryScorer scorer = null;
                TokenStream tokenStream = null;

                tokenStream = analyzer.TokenStream(FIELD_NAME, new StringReader(text));

                scorer = new QueryScorer(query, FIELD_NAME);

                Highlighter highlighter = new Highlighter(this, scorer);

                highlighter.TextFragmenter = new SimpleFragmenter(20);

                String result = highlighter.GetBestFragments(tokenStream, text, maxNumFragmentsRequired,
                                                             fragmentSeparator);
                Console.WriteLine("\t" + result);
            }
            Assert.IsTrue(numHighlights == 5, "Failed to find correct number of highlights " + numHighlights + " found");

            // try null field

            hits = searcher.Search(query, null, 1000);

            numHighlights = 0;

            for (int i = 0; i < hits.TotalHits; i++)
            {
                String text = searcher.Doc(hits.ScoreDocs[i].Doc).Get(HighlighterTest.FIELD_NAME);
                int maxNumFragmentsRequired = 2;
                String fragmentSeparator = "...";
                QueryScorer scorer = null;
                TokenStream tokenStream = null;

                tokenStream = analyzer.TokenStream(HighlighterTest.FIELD_NAME, new StringReader(text));

                scorer = new QueryScorer(query, null);

                Highlighter highlighter = new Highlighter(this, scorer);

                highlighter.TextFragmenter = new SimpleFragmenter(20);

                String result = highlighter.GetBestFragments(tokenStream, text, maxNumFragmentsRequired,
                                                             fragmentSeparator);
                Console.WriteLine("\t" + result);
            }
            Assert.IsTrue(numHighlights == 5, "Failed to find correct number of highlights " + numHighlights + " found");

            // try default field

            hits = searcher.Search(query, null, 1000);

            numHighlights = 0;

            for (int i = 0; i < hits.TotalHits; i++)
            {
                String text = searcher.Doc(hits.ScoreDocs[i].Doc).Get(HighlighterTest.FIELD_NAME);
                int maxNumFragmentsRequired = 2;
                String fragmentSeparator = "...";
                QueryScorer scorer = null;
                TokenStream tokenStream = null;

                tokenStream = analyzer.TokenStream(HighlighterTest.FIELD_NAME, new StringReader(text));

                scorer = new QueryScorer(query, "random_field", HighlighterTest.FIELD_NAME);

                Highlighter highlighter = new Highlighter(this, scorer);

                highlighter.TextFragmenter = new SimpleFragmenter(20);

                String result = highlighter.GetBestFragments(tokenStream, text, maxNumFragmentsRequired,
                                                             fragmentSeparator);
                Console.WriteLine("\t" + result);
            }
            Assert.IsTrue(numHighlights == 5, "Failed to find correct number of highlights " + numHighlights + " found");
        }
        public void TestRepeatingTermsInMultBooleans()
        {
            String content = "x y z a b c d e f g b c g";
            String ph1 = "\"a b c d\"";
            String ph2 = "\"b c g\"";
            String f1 = "f1";
            String f2 = "f2";
            String f1c = f1 + ":";
            String f2c = f2 + ":";
            String q = "(" + f1c + ph1 + " OR " + f2c + ph1 + ") AND (" + f1c + ph2
                       + " OR " + f2c + ph2 + ")";
            Analyzer analyzer = new WhitespaceAnalyzer();
            QueryParser qp = new QueryParser(TEST_VERSION, f1, analyzer);
            Query query = qp.Parse(q);

            QueryScorer scorer = new QueryScorer(query, f1);
            scorer.IsExpandMultiTermQuery = false;

            Highlighter h = new Highlighter(this, scorer);

            h.GetBestFragment(analyzer, f1, content);

            Assert.IsTrue(numHighlights == 7, "Failed to find correct number of highlights " + numHighlights + " found");
        }
Exemple #43
0
        /// <summary>
        /// Searches the index for the querytext
        /// </summary>
        /// <param name="querytext">The text to search the index</param>
        //public string SearchIndext(string querytext)
        public List <Dictionary <string, string> > SearchIndext(string querytext)
        {
            List <Dictionary <string, string> > resultListDict = new List <Dictionary <string, string> >();      // Initiate a result list

            Query query = DisplayQueries(querytext);

            Console.WriteLine("query is " + query);
            TopDocs results = searcher.Search(query, 100);

            System.Console.WriteLine("Number of results is " + results.TotalHits);

            // Setup the configuration of Highlighter
            IFormatter       formatter   = new SimpleHTMLFormatter("<span style=\"font-weight:bold; background-color:yellow;\">", "</span>");
            SimpleFragmenter fragmenter  = new SimpleFragmenter(2000);
            QueryScorer      scorer      = new QueryScorer(query);
            Highlighter      highlighter = new Highlighter(formatter, scorer);

            highlighter.TextFragmenter = fragmenter;

            int rank = 0;

            // ScoreDocs : a array stores pointers of a query
            // scoreDoc : a pointer of a query points to doc_ID and score (of the doc for the query)
            //string output = "";
            if (results.TotalHits != 0)   // Check if there are results
            {
                foreach (ScoreDoc scoreDoc in results.ScoreDocs)
                {
                    rank++;
                    Lucene.Net.Documents.Document doc = searcher.Doc(scoreDoc.Doc);
                    string myFieldValue = doc.Get(TEXT_FN_PASS_TEXT);
                    string myURL        = doc.Get(TEXT_FN_URL);
                    string passId       = doc.Get(TEXT_FN_PASS_ID);
                    string score        = scoreDoc.Score.ToString();
                    string queryId      = doc.Get(TEXT_FN_QUERY_ID);

                    int jsonId = Int32.Parse(doc.Get(TEXT_FN_JSON_ARRAY_ID));

                    // passage_text field store as Field.Store.NO
                    foreach (var itemP in jArr[jsonId][PASSAGES])
                    {
                        if (itemP[TEXT_FN_PASS_ID].ToString() == passId)
                        {
                            myFieldValue = itemP[TEXT_FN_PASS_TEXT].ToString();
                        }
                    }

                    //Add the Highlighter tag into passage_text of query
                    //TokenStream HLstream = analyzer.TokenStream("", new StringReader(doc.Get(TEXT_FN_PASS_TEXT)));
                    //string HLmyFieldValue = highlighter.GetBestFragment(HLstream, doc.Get(TEXT_FN_PASS_TEXT));
                    TokenStream HLstream       = analyzer.TokenStream("", new StringReader(myFieldValue));
                    string      HLmyFieldValue = highlighter.GetBestFragment(HLstream, myFieldValue);

                    Explanation e = searcher.Explain(query, scoreDoc.Doc);

                    //Extract title from URL
                    char     delimiters = '/';
                    string[] urlSeg     = myURL.Split(delimiters);
                    string   title;
                    if (urlSeg[urlSeg.Length - 1].Length == 0)
                    {
                        title = urlSeg[urlSeg.Length - 2];
                    }
                    else
                    {
                        title = urlSeg[urlSeg.Length - 1];
                    }

                    resultListDict.Add(new Dictionary <string, string> {
                        { "rank", rank.ToString() }, { "passId", passId },
                        { "score", score }, { "title", title }, { "url", myURL }, { "text", myFieldValue }, { "queryId", queryId }, { "highlighter", HLmyFieldValue }
                    });

                    //Console.WriteLine("Rank " + rank + " text " + myFieldValue + " URL " + myURL);
                    //Console.WriteLine(e);
                }
            }

            return(resultListDict);
        }
        public void TestQueryScorerHits()
        {
            Analyzer analyzer = new SimpleAnalyzer();
            QueryParser qp = new QueryParser(TEST_VERSION, FIELD_NAME, analyzer);
            query = qp.Parse("\"very long\"");
            searcher = new IndexSearcher(ramDir, true);
            TopDocs hits = searcher.Search(query, 10);

            QueryScorer scorer = new QueryScorer(query, FIELD_NAME);
            Highlighter highlighter = new Highlighter(scorer);


            for (int i = 0; i < hits.ScoreDocs.Length; i++)
            {
                Document doc = searcher.Doc(hits.ScoreDocs[i].Doc);
                String storedField = doc.Get(FIELD_NAME);

                TokenStream stream = TokenSources.GetAnyTokenStream(searcher.IndexReader, hits.ScoreDocs[i].Doc,
                                                                    FIELD_NAME, doc, analyzer);

                IFragmenter fragmenter = new SimpleSpanFragmenter(scorer);

                highlighter.TextFragmenter = fragmenter;

                String fragment = highlighter.GetBestFragment(stream, storedField);

                Console.WriteLine(fragment);
            }
        }