private LuceneHighlightHelper() { Separator = "..."; MaxNumHighlights = 5; HighlightAnalyzer = new StandardAnalyzer(_luceneVersion); HighlightFormatter = new SimpleHTMLFormatter("", " "); }
public string HighlightContent(string text) { QueryScorer scorer = new QueryScorer(GetQuery()); Formatter formatter = new SimpleHTMLFormatter("<span style='color:maroon; font-weight:bold;'>", "</span>"); Highlighter highlighter = new Highlighter(formatter, scorer); highlighter.SetTextFragmenter(new SimpleFragmenter(120)); TokenStream stream = new StandardAnalyzer().TokenStream("Content", new StringReader(text)); var fragments = highlighter.GetBestFragments(stream, text, 3); if (fragments == null || fragments.Length == 0) return text.Length > 120 ? text.Substring(0, 120) + "..." : text; string highlighted = ""; foreach (var fragment in fragments) { if (text.StartsWith(fragment)) highlighted += "<p>" + fragment + " ... </p>"; else if (text.EndsWith(fragment)) highlighted += "<p> ... " + fragment + "</p>"; else highlighted += "<p> ... " + fragment + " ... </p>"; } return highlighted; }
public string HighlightTitle(string text) { QueryScorer scorer = new QueryScorer(GetQuery()); Formatter formatter = new SimpleHTMLFormatter("<span style='color:maroon; font-weight:bold;'>", "</span>"); Highlighter highlighter = new Highlighter(formatter, scorer); highlighter.SetTextFragmenter(new NullFragmenter()); TokenStream stream = new StandardAnalyzer().TokenStream("Title", new StringReader(text)); var title = highlighter.GetBestFragment(stream, text); return title ?? text; }
public IEnumerable<SearchResult> Search(string luceneQuery, int maxResults = 500, string highlightOpenTag = null, string highlightCloseTag = null, params string[] fieldsToHighlight) { var results = new List<SearchResult>(); if (String.IsNullOrWhiteSpace(luceneQuery)) return results; var parser = new MultiFieldQueryParser(LuceneVersion, searchFields, new StandardAnalyzer(LuceneVersion)); Query query = parser.Parse(luceneQuery); TopDocs topDocs = searcher.Search(query, maxResults); foreach (ScoreDoc scoreDoc in topDocs.ScoreDocs) { Document document = reader.Document(scoreDoc.doc); var result = new SearchResult(document, scoreDoc.score); results.Add(result); } if(!String.IsNullOrEmpty(highlightOpenTag) && !String.IsNullOrEmpty(highlightCloseTag) && fieldsToHighlight.Length > 0) { var scorer = new QueryScorer(query); var formatter = new SimpleHTMLFormatter(highlightOpenTag, highlightCloseTag); var highlighter = new Highlighter(formatter, scorer); highlighter.SetTextFragmenter(new SimpleFragmenter()); foreach (SearchResult result in results) { foreach (string highlightField in fieldsToHighlight) { if(!result.Fields.ContainsKey(highlightField)) continue; string fieldValue = result[highlightField]; TokenStream stream = new StandardAnalyzer(LuceneVersion).TokenStream(highlightField, new StringReader(fieldValue)); string highlightedFieldValue = highlighter.GetBestFragments(stream, fieldValue, 500, "..."); if (!String.IsNullOrWhiteSpace(highlightedFieldValue)) { result.Fields[highlightField] = highlightedFieldValue; } } } } return results; }
private string GeneratePreviewText(Query q, string text) { var scorer = new QueryScorer(q); var formatter = new SimpleHTMLFormatter("<em>", "</em>"); var highlighter = new Highlighter(formatter, scorer); highlighter.SetTextFragmenter(new SimpleFragmenter(250)); var stream = new StandardAnalyzer(Version.LUCENE_29).TokenStream("bodyText", new StringReader(text)); return highlighter.GetBestFragments(stream, text, 3, "..."); }
private SearchResults Query(Query query, string cachePath, int offset, int resultAmount, bool includeContentSnippets, int limit) { SearchResults results = new SearchResults(); Lucene.Net.Store.FSDirectory directory = Lucene.Net.Store.FSDirectory.Open(new System.IO.DirectoryInfo(_indexPath)); IndexReader reader = IndexReader.Open(directory, true); IndexSearcher searcher = new IndexSearcher(reader); // Request all results up to the page we actually need (this is quick) TopDocs topDocs = searcher.Search(query, limit); ScoreDoc[] hits = topDocs.scoreDocs; // Save num results results.NumResults = hits.Length; // Only loop through the hits that should be on the page for (int i = offset; i < hits.Length && i < offset + resultAmount; i++) { int docId = hits[i].doc; Document doc = searcher.Doc(docId); if (includeContentSnippets) { // Read the whole file from the cache to find the content snippet. string filepath = CacheManager.GetRelativeCacheFileName(doc.Get("uri"), "GET"); string documentContent = Utils.ReadFileAsString(cachePath + filepath); // Remove unusable stuff. documentContent = HtmlUtils.RemoveHead(documentContent); documentContent = HtmlUtils.ExtractText(documentContent); // Find (and highlight) content snippets QueryScorer scorer = new QueryScorer(query); SimpleHTMLFormatter formatter = new SimpleHTMLFormatter("<b>", "</b>"); Highlighter highlighter = new Highlighter(formatter, scorer); highlighter.SetTextFragmenter(new SentenceFragmenter()); TokenStream stream = _analyzer.TokenStream("content", new StringReader(documentContent)); // Get 1 fragment string contentSnippet = ""; try { string[] fragments = highlighter.GetBestFragments(stream, documentContent, 1); if (fragments.Length > 0) { contentSnippet = HtmlUtils.StripTagsCharArray(fragments[0], false); // If the content snippet does end in mid of a sentence, let's append "..." if (!new char[] { '.', '!', '?' }.Contains(contentSnippet[contentSnippet.Length - 1])) { contentSnippet += "..."; } } } catch (Exception) { } results.AddLuceneDocument(doc, contentSnippet); } else { results.AddLuceneDocument(doc); } } searcher.Close(); return results; }
/// <summary> /// /// </summary> /// <param name="IndexField"></param> /// <param name="LuceneIndex"></param> /// <param name="searchQuery"></param> /// <returns></returns> public static string GetHighlight(string IndexField, string LuceneIndex, string searchQuery, string highlightField) { string hightlightText = string.Empty; var formatter = new SimpleHTMLFormatter("<span class=\"umbSearchHighlight\">", "</span>"); var highlighter = new Highlighter(formatter, FragmentScorer(searchQuery, highlightField, LuceneIndex)); var tokenStream = new SnowballAnalyzer("English").TokenStream(highlightField, new StringReader(IndexField)); string tmp = highlighter.GetBestFragments(tokenStream, IndexField, 3, "..."); if (tmp.Length > 0) hightlightText = tmp + "..."; return hightlightText; }
/// <summary> /// �������ڡ��������ķ��� /// </summary> private void search() { DateTime start = DateTime.Now; //try //{ // } //catch (Exception e) //{ // Response.Write("<script type='text/javascript'>window.alert(' " + e.ToString() + " ');</script>"); //} // �������� //�������ڡ�index��Ŀ¼�� string indexDirectory1 = Server.MapPath("./")+"index\\1.5\\"; IndexSearcher searcher1 = new IndexSearcher(indexDirectory1); string indexDirectory2 = Server.MapPath("./") + "index\\1.4\\"; IndexSearcher searcher2 = new IndexSearcher(indexDirectory2); //System.String index3 = @"\\192.168.1.130\index\1.5"; //Lucene.Net.Index.IndexReader reader3; Lucene.Net.Search.ParallelMultiSearcher searcher; /* if (System.IO.Directory.Exists(index3))//�Ƿ����Ŀ¼ { reader3 = Lucene.Net.Index.IndexReader.Open(index3);//��ȡ������ȡ����ʵ�����ô�Ϊ������ȡ��reader��ȡ�ļ���Ϊindex���ļ���(Ŀ¼) IndexSearcher searcher3 = new IndexSearcher(reader3); searcher = new Lucene.Net.Search.ParallelMultiSearcher(new Lucene.Net.Search.Searchable[] { searcher3,searcher1, searcher2 }); } else { searcher = new Lucene.Net.Search.ParallelMultiSearcher(new Lucene.Net.Search.Searchable[] { searcher1, searcher2 }); }*/ searcher = new Lucene.Net.Search.ParallelMultiSearcher(new Lucene.Net.Search.Searchable[] { searcher1, searcher2 }); //====================>(1)�����ѯ����<============================================== //System.String field = "text";//���ڱ�����ҳɹ����ļ����ڵ�Ŀ¼ //QueryParser parser = new QueryParser(field, new StandardAnalyzer());//������ѯ��������ָ����field(������ָcontents��Ŀ¼), analyzer������������ //====================>(2)�����ѯ����<============================================== string[] strs = new string[] { "text", "path","fullpath","keywords","description","title" }; Lucene.Net.QueryParsers.QueryParser parser = new Lucene.Net.QueryParsers.MultiFieldQueryParser(strs, new StandardAnalyzer()); parser.SetDefaultOperator(Lucene.Net.QueryParsers.QueryParser.OR_OPERATOR); // ��������� this.Results.Columns.Add("link", typeof(string)); this.Results.Columns.Add("title", typeof(string)); this.Results.Columns.Add("sample", typeof(string)); this.Results.Columns.Add("path", typeof(string)); // ���� Query query = parser.Parse(this.Query);//����һ����ѯ����ָ����ѯ����Query_condition������ this.Query_txt.Text�� Hits hits = searcher.Search(query); this.total = hits.Length(); // ���� �����Ĺؼ���,Ĭ����<b>..</b> // �����ָ��<read>..</read> SimpleHTMLFormatter simpleHTMLFormatter = new SimpleHTMLFormatter("<B style='color:Red;'>", "</B>"); Highlighter highlighter = new Highlighter(simpleHTMLFormatter, new QueryScorer(query)); // ���һ�������Ҫ���صģ����������ݳ��� // ���̫С����ֻ�����ݵĿ�ʼ���ֱ��������������ҷ��ص�����Ҳ�� ̫����ʱ̫�˷��ˡ� highlighter.SetTextFragmenter(new SimpleFragmenter(100)); // initialize startAt this.startAt = initStartAt(); // ��ʾ�������ġ������Ŀ int resultsCount = smallerOf(total, this.maxResults + this.startAt); for (int i = startAt; i < resultsCount; i++) { // �õ����е��ĵ� Document doc = hits.Doc(i); //��ӽ�β����֤��β������Ų������� string title = doc.Get("title") + " "; // �õ��ļ����� System.String text =Search.CutString( doc.Get("text"),480); // �õ��ļ�������ȷ·�� string path = doc.Get("path"); string orpath = doc.Get("fullpath"); Lucene.Net.Analysis.TokenStream titkeStream = analyzer.TokenStream("title", new System.IO.StringReader(title));//��Ŀ Lucene.Net.Analysis.TokenStream tokenStream = analyzer.TokenStream("text", new System.IO.StringReader(text));//ժҪ Lucene.Net.Analysis.TokenStream pathStream = analyzer.TokenStream("path", new System.IO.StringReader(path));//��ʾ��·�� System.String result = highlighter.GetBestFragments(tokenStream, text, 2, "..."); string tresult = highlighter.GetBestFragments(titkeStream, title, 2, ".."); string pathwords = highlighter.GetBestFragments(pathStream, path, 2, ".."); //·����ʱ��ʾ // ����һ������ʾ�������Ľ�� DataRow row = this.Results.NewRow(); if (tresult == "") { row["title"] = title; } else { row["title"] = tresult; } if (getpath(row, System.IO.Path.GetFileName(path.Replace("\\", "/")))) { row["link"]=getFullpath( System.IO.Path.GetFileName(doc.Get("path"))); } else { row["link"] =orpath; if (pathwords=="") { row["path"] = orpath; } else { row["path"] = pathwords.Replace("\\", "/"); } } if (result == ""){ row["sample"] = text; } else { row["sample"] = result; } this.Results.Rows.Add(row); } searcher.Close(); // �����Ϣ this.duration = DateTime.Now - start; this.fromItem = startAt + 1; this.toItem = smallerOf(startAt + maxResults, total); }