Ejemplo n.º 1
0
        public string HighlightContent(string text)
        {
            QueryScorer scorer = new QueryScorer(GetQuery());
            Formatter formatter = new SimpleHTMLFormatter("<span style='color:maroon; font-weight:bold;'>", "</span>");
            Highlighter highlighter = new Highlighter(formatter, scorer);
            highlighter.SetTextFragmenter(new SimpleFragmenter(120));
            TokenStream stream = new StandardAnalyzer().TokenStream("Content", new StringReader(text));

            var fragments = highlighter.GetBestFragments(stream, text, 3);

            if (fragments == null || fragments.Length == 0) return text.Length > 120 ? text.Substring(0, 120) + "..." : text;

            string highlighted = "";

            foreach (var fragment in fragments)
            {

                if (text.StartsWith(fragment))
                    highlighted += "<p>" + fragment + " ... </p>";
                else if (text.EndsWith(fragment))
                    highlighted += "<p> ... " + fragment + "</p>";
                else
                    highlighted += "<p> ... " + fragment + " ... </p>";
            }

            return highlighted;
        }
Ejemplo n.º 2
0
        public IEnumerable<Hit> Search(string query, int maxResults)
        {
            var analyzer = new StandardAnalyzer(Lucene.Net.Util.Version.LUCENE_29);

            QueryParser qp = new QueryParser(
                Lucene.Net.Util.Version.LUCENE_29,
                "contents",
                analyzer
            );
            Query q = qp.Parse(query);

            TopDocs top = searcher.Search(q, maxResults);
            List<Hit> result = new List<Hit>(top.totalHits);

            for (int index = 0; index < top.totalHits; index++)
            {
                var doc = searcher.Doc(top.scoreDocs[index].doc);
                string contents = doc.Get("contents");

                var scorer = new QueryScorer(q, searcher.GetIndexReader(), "contents");
                var highlighter = new Highlighter(scorer);

                result.Add(new Hit()
                {
                    Relevance = top.scoreDocs[index].score,
                    Title = doc.Get("title"),
                    Url = doc.Get("path"),
                    Excerpt = highlighter.GetBestFragment(analyzer, "contents", contents)
                });
            }

            return result;
        }
		public string GetHighlight(string value, IndexSearcher searcher, string highlightField, Query luceneQuery)
		{
			var scorer = new QueryScorer(luceneQuery.Rewrite(searcher.GetIndexReader()));
			var highlighter = new Highlighter(HighlightFormatter, scorer);

			var tokenStream = HighlightAnalyzer.TokenStream(highlightField, new StringReader(value));
			return highlighter.GetBestFragments(tokenStream, value, MaxNumHighlights, Separator);
		}
		public string GetHighlight(string value, string highlightField, Searcher searcher, string luceneRawQuery)
		{
			var query = GetQueryParser(highlightField).Parse(luceneRawQuery);
			var scorer = new QueryScorer(searcher.Rewrite(query));

			var highlighter = new Highlighter(HighlightFormatter, scorer);

			var tokenStream = HighlightAnalyzer.TokenStream(highlightField, new StringReader(value));
			string bestFragments = highlighter.GetBestFragments(tokenStream, value, MaxNumHighlights, Separator);
			return bestFragments;
		}
Ejemplo n.º 5
0
        public string HighlightTitle(string text)
        {
            QueryScorer scorer = new QueryScorer(GetQuery());

            Formatter formatter = new SimpleHTMLFormatter("<span style='color:maroon; font-weight:bold;'>", "</span>");
            Highlighter highlighter = new Highlighter(formatter, scorer);
            highlighter.SetTextFragmenter(new NullFragmenter());
            TokenStream stream = new StandardAnalyzer().TokenStream("Title", new StringReader(text));
            var title = highlighter.GetBestFragment(stream, text);
            return title ?? text;
        }
        public IEnumerable<SearchResult> Search(string query)
        {
            Analyzer analyzer = new SnowballAnalyzer("English");
            QueryParser parser = new QueryParser("text", analyzer);
            Query luceneQuery = parser.Parse(query);
            Directory directory = FSDirectory.GetDirectory(indexPath);
            IndexSearcher searcher = new IndexSearcher(directory);

            QueryScorer queryScorer = new QueryScorer(luceneQuery);
            Highlighter highlighter = new Highlighter(queryScorer);

            TopDocs topDocs = searcher.Search(luceneQuery, 100);

            var searchResults = new List<SearchResult>();
            foreach (ScoreDoc scoreDoc in topDocs.scoreDocs)
            {
                Document doc = searcher.Doc(scoreDoc.doc);
                searchResults.Add(new SearchResult { Path = doc.Get("path"), Score = scoreDoc.score, Title = doc.Get("title"), Preview = highlighter.GetBestFragment(analyzer, "text", doc.Get("text")) });
            }

            return searchResults;
        }
				public IEnumerable<SearchResult> Search(string luceneQuery, int maxResults = 500, string highlightOpenTag = null, string highlightCloseTag = null, params string[] fieldsToHighlight)
				{
						var results = new List<SearchResult>();
						if (String.IsNullOrWhiteSpace(luceneQuery)) return results;
						 
						var parser = new MultiFieldQueryParser(LuceneVersion, searchFields, new StandardAnalyzer(LuceneVersion));
						Query query = parser.Parse(luceneQuery);
						TopDocs topDocs = searcher.Search(query, maxResults);

						foreach (ScoreDoc scoreDoc in topDocs.ScoreDocs)
						{
								Document document = reader.Document(scoreDoc.doc);
								var result = new SearchResult(document, scoreDoc.score);
								results.Add(result);
						}

						if(!String.IsNullOrEmpty(highlightOpenTag) && !String.IsNullOrEmpty(highlightCloseTag) && fieldsToHighlight.Length > 0)
						{
								var scorer = new QueryScorer(query);
								var formatter = new SimpleHTMLFormatter(highlightOpenTag, highlightCloseTag);
								var highlighter = new Highlighter(formatter, scorer);
								highlighter.SetTextFragmenter(new SimpleFragmenter());
								foreach (SearchResult result in results)
								{
										foreach (string highlightField in fieldsToHighlight)
										{
												if(!result.Fields.ContainsKey(highlightField)) continue;
												string fieldValue = result[highlightField];
												TokenStream stream = new StandardAnalyzer(LuceneVersion).TokenStream(highlightField, new StringReader(fieldValue));
												string highlightedFieldValue = highlighter.GetBestFragments(stream, fieldValue, 500, "...");
												if (!String.IsNullOrWhiteSpace(highlightedFieldValue))
												{
														result.Fields[highlightField] = highlightedFieldValue;
												}
										}
								}
						}
						return results;
				}
        private string GeneratePreviewText(Query q, string text)
        {
            var scorer = new QueryScorer(q);
            var formatter = new SimpleHTMLFormatter("<em>", "</em>");

            var highlighter = new Highlighter(formatter, scorer);

            highlighter.SetTextFragmenter(new SimpleFragmenter(250));

            var stream = new StandardAnalyzer(Version.LUCENE_29).TokenStream("bodyText", new StringReader(text));
            return highlighter.GetBestFragments(stream, text, 3, "...");
        }
Ejemplo n.º 9
0
        private SearchResults Query(Query query, string cachePath,
            int offset, int resultAmount, bool includeContentSnippets, int limit)
        {
            SearchResults results = new SearchResults();
            Lucene.Net.Store.FSDirectory directory = Lucene.Net.Store.FSDirectory.Open(new System.IO.DirectoryInfo(_indexPath));
            IndexReader reader = IndexReader.Open(directory, true);
            IndexSearcher searcher = new IndexSearcher(reader);

            // Request all results up to the page we actually need (this is quick)
            TopDocs topDocs = searcher.Search(query, limit);
            ScoreDoc[] hits = topDocs.scoreDocs;
            // Save num results
            results.NumResults = hits.Length;

            // Only loop through the hits that should be on the page
            for (int i = offset; i < hits.Length && i < offset + resultAmount; i++)
            {
                int docId = hits[i].doc;
                Document doc = searcher.Doc(docId);

                if (includeContentSnippets)
                {
                    // Read the whole file from the cache to find the content snippet.
                    string filepath = CacheManager.GetRelativeCacheFileName(doc.Get("uri"), "GET");
                    string documentContent = Utils.ReadFileAsString(cachePath + filepath);

                    // Remove unusable stuff.
                    documentContent = HtmlUtils.RemoveHead(documentContent);
                    documentContent = HtmlUtils.ExtractText(documentContent);

                    // Find (and highlight) content snippets
                    QueryScorer scorer = new QueryScorer(query);
                    SimpleHTMLFormatter formatter = new SimpleHTMLFormatter("<b>", "</b>");
                    Highlighter highlighter = new Highlighter(formatter, scorer);
                    highlighter.SetTextFragmenter(new SentenceFragmenter());
                    TokenStream stream = _analyzer.TokenStream("content", new StringReader(documentContent));

                    // Get 1 fragment
                    string contentSnippet = "";
                    try
                    {
                        string[] fragments = highlighter.GetBestFragments(stream, documentContent, 1);
                        if (fragments.Length > 0)
                        {
                            contentSnippet = HtmlUtils.StripTagsCharArray(fragments[0], false);
                            // If the content snippet does end in mid of a sentence, let's append "..."
                            if (!new char[] { '.', '!', '?' }.Contains(contentSnippet[contentSnippet.Length - 1]))
                            {
                                contentSnippet += "...";
                            }
                        }
                    }
                    catch (Exception)
                    {
                    }
                    results.AddLuceneDocument(doc, contentSnippet);
                }
                else
                {
                    results.AddLuceneDocument(doc);
                }
            }

            searcher.Close();
            return results;
        }