private LuceneHighlightHelper()
		{
			Separator = "...";
			MaxNumHighlights = 5;
			HighlightAnalyzer = new StandardAnalyzer(_luceneVersion);
			HighlightFormatter = new SimpleHTMLFormatter("", " ");
		}
Example #2
0
        public string HighlightContent(string text)
        {
            QueryScorer scorer = new QueryScorer(GetQuery());
            Formatter formatter = new SimpleHTMLFormatter("<span style='color:maroon; font-weight:bold;'>", "</span>");
            Highlighter highlighter = new Highlighter(formatter, scorer);
            highlighter.SetTextFragmenter(new SimpleFragmenter(120));
            TokenStream stream = new StandardAnalyzer().TokenStream("Content", new StringReader(text));

            var fragments = highlighter.GetBestFragments(stream, text, 3);

            if (fragments == null || fragments.Length == 0) return text.Length > 120 ? text.Substring(0, 120) + "..." : text;

            string highlighted = "";

            foreach (var fragment in fragments)
            {

                if (text.StartsWith(fragment))
                    highlighted += "<p>" + fragment + " ... </p>";
                else if (text.EndsWith(fragment))
                    highlighted += "<p> ... " + fragment + "</p>";
                else
                    highlighted += "<p> ... " + fragment + " ... </p>";
            }

            return highlighted;
        }
Example #3
0
        public string HighlightTitle(string text)
        {
            QueryScorer scorer = new QueryScorer(GetQuery());

            Formatter formatter = new SimpleHTMLFormatter("<span style='color:maroon; font-weight:bold;'>", "</span>");
            Highlighter highlighter = new Highlighter(formatter, scorer);
            highlighter.SetTextFragmenter(new NullFragmenter());
            TokenStream stream = new StandardAnalyzer().TokenStream("Title", new StringReader(text));
            var title = highlighter.GetBestFragment(stream, text);
            return title ?? text;
        }
				public IEnumerable<SearchResult> Search(string luceneQuery, int maxResults = 500, string highlightOpenTag = null, string highlightCloseTag = null, params string[] fieldsToHighlight)
				{
						var results = new List<SearchResult>();
						if (String.IsNullOrWhiteSpace(luceneQuery)) return results;
						 
						var parser = new MultiFieldQueryParser(LuceneVersion, searchFields, new StandardAnalyzer(LuceneVersion));
						Query query = parser.Parse(luceneQuery);
						TopDocs topDocs = searcher.Search(query, maxResults);

						foreach (ScoreDoc scoreDoc in topDocs.ScoreDocs)
						{
								Document document = reader.Document(scoreDoc.doc);
								var result = new SearchResult(document, scoreDoc.score);
								results.Add(result);
						}

						if(!String.IsNullOrEmpty(highlightOpenTag) && !String.IsNullOrEmpty(highlightCloseTag) && fieldsToHighlight.Length > 0)
						{
								var scorer = new QueryScorer(query);
								var formatter = new SimpleHTMLFormatter(highlightOpenTag, highlightCloseTag);
								var highlighter = new Highlighter(formatter, scorer);
								highlighter.SetTextFragmenter(new SimpleFragmenter());
								foreach (SearchResult result in results)
								{
										foreach (string highlightField in fieldsToHighlight)
										{
												if(!result.Fields.ContainsKey(highlightField)) continue;
												string fieldValue = result[highlightField];
												TokenStream stream = new StandardAnalyzer(LuceneVersion).TokenStream(highlightField, new StringReader(fieldValue));
												string highlightedFieldValue = highlighter.GetBestFragments(stream, fieldValue, 500, "...");
												if (!String.IsNullOrWhiteSpace(highlightedFieldValue))
												{
														result.Fields[highlightField] = highlightedFieldValue;
												}
										}
								}
						}
						return results;
				}
        private string GeneratePreviewText(Query q, string text)
        {
            var scorer = new QueryScorer(q);
            var formatter = new SimpleHTMLFormatter("<em>", "</em>");

            var highlighter = new Highlighter(formatter, scorer);

            highlighter.SetTextFragmenter(new SimpleFragmenter(250));

            var stream = new StandardAnalyzer(Version.LUCENE_29).TokenStream("bodyText", new StringReader(text));
            return highlighter.GetBestFragments(stream, text, 3, "...");
        }
Example #6
0
        private SearchResults Query(Query query, string cachePath,
            int offset, int resultAmount, bool includeContentSnippets, int limit)
        {
            SearchResults results = new SearchResults();
            Lucene.Net.Store.FSDirectory directory = Lucene.Net.Store.FSDirectory.Open(new System.IO.DirectoryInfo(_indexPath));
            IndexReader reader = IndexReader.Open(directory, true);
            IndexSearcher searcher = new IndexSearcher(reader);

            // Request all results up to the page we actually need (this is quick)
            TopDocs topDocs = searcher.Search(query, limit);
            ScoreDoc[] hits = topDocs.scoreDocs;
            // Save num results
            results.NumResults = hits.Length;

            // Only loop through the hits that should be on the page
            for (int i = offset; i < hits.Length && i < offset + resultAmount; i++)
            {
                int docId = hits[i].doc;
                Document doc = searcher.Doc(docId);

                if (includeContentSnippets)
                {
                    // Read the whole file from the cache to find the content snippet.
                    string filepath = CacheManager.GetRelativeCacheFileName(doc.Get("uri"), "GET");
                    string documentContent = Utils.ReadFileAsString(cachePath + filepath);

                    // Remove unusable stuff.
                    documentContent = HtmlUtils.RemoveHead(documentContent);
                    documentContent = HtmlUtils.ExtractText(documentContent);

                    // Find (and highlight) content snippets
                    QueryScorer scorer = new QueryScorer(query);
                    SimpleHTMLFormatter formatter = new SimpleHTMLFormatter("<b>", "</b>");
                    Highlighter highlighter = new Highlighter(formatter, scorer);
                    highlighter.SetTextFragmenter(new SentenceFragmenter());
                    TokenStream stream = _analyzer.TokenStream("content", new StringReader(documentContent));

                    // Get 1 fragment
                    string contentSnippet = "";
                    try
                    {
                        string[] fragments = highlighter.GetBestFragments(stream, documentContent, 1);
                        if (fragments.Length > 0)
                        {
                            contentSnippet = HtmlUtils.StripTagsCharArray(fragments[0], false);
                            // If the content snippet does end in mid of a sentence, let's append "..."
                            if (!new char[] { '.', '!', '?' }.Contains(contentSnippet[contentSnippet.Length - 1]))
                            {
                                contentSnippet += "...";
                            }
                        }
                    }
                    catch (Exception)
                    {
                    }
                    results.AddLuceneDocument(doc, contentSnippet);
                }
                else
                {
                    results.AddLuceneDocument(doc);
                }
            }

            searcher.Close();
            return results;
        }
        /// <summary>
        /// 
        /// </summary>
        /// <param name="IndexField"></param>
        /// <param name="LuceneIndex"></param>
        /// <param name="searchQuery"></param>
        /// <returns></returns>
        public static string GetHighlight(string IndexField, string LuceneIndex, string searchQuery, string highlightField)
        {
            string hightlightText = string.Empty;

            var formatter = new SimpleHTMLFormatter("<span class=\"umbSearchHighlight\">", "</span>");

            var highlighter = new Highlighter(formatter, FragmentScorer(searchQuery, highlightField, LuceneIndex));
            var tokenStream = new SnowballAnalyzer("English").TokenStream(highlightField, new StringReader(IndexField));

            string tmp = highlighter.GetBestFragments(tokenStream, IndexField, 3, "...");
            if (tmp.Length > 0)
                hightlightText = tmp + "...";

            return hightlightText;
        }
Example #8
0
    /// <summary>
    /// �������ڡ��������ķ���
    /// </summary>
    private void search()
    {
        DateTime start = DateTime.Now;

        //try
        //{
        // }
        //catch (Exception e)
        //{

        //    Response.Write("<script type='text/javascript'>window.alert(' " + e.ToString() + " ');</script>");

        //}

        // ��������
        //�������ڡ�index��Ŀ¼��
        string indexDirectory1 = Server.MapPath("./")+"index\\1.5\\";
        IndexSearcher searcher1 = new IndexSearcher(indexDirectory1);

           string indexDirectory2 = Server.MapPath("./") + "index\\1.4\\";
        IndexSearcher searcher2 = new IndexSearcher(indexDirectory2);

        //System.String index3 = @"\\192.168.1.130\index\1.5";
        //Lucene.Net.Index.IndexReader reader3;
        Lucene.Net.Search.ParallelMultiSearcher searcher;

           /* if (System.IO.Directory.Exists(index3))//�Ƿ����Ŀ¼
        {
            reader3 = Lucene.Net.Index.IndexReader.Open(index3);//��ȡ������ȡ����ʵ�����ô�Ϊ������ȡ��reader��ȡ�ļ���Ϊindex���ļ���(Ŀ¼)
            IndexSearcher searcher3 = new IndexSearcher(reader3);

            searcher = new Lucene.Net.Search.ParallelMultiSearcher(new Lucene.Net.Search.Searchable[] { searcher3,searcher1, searcher2 });
        }
        else
        {
            searcher = new Lucene.Net.Search.ParallelMultiSearcher(new Lucene.Net.Search.Searchable[] { searcher1, searcher2 });
        }*/
            searcher = new Lucene.Net.Search.ParallelMultiSearcher(new Lucene.Net.Search.Searchable[] { searcher1, searcher2 });

        //====================>(1)�����ѯ����<==============================================
        //System.String field = "text";//���ڱ�����ҳɹ����ļ����ڵ�Ŀ¼
        //QueryParser parser = new QueryParser(field, new StandardAnalyzer());//������ѯ��������ָ����field(������ָcontents��Ŀ¼), analyzer����׼��������

        //====================>(2)�����ѯ����<==============================================
        string[] strs = new string[] { "text", "path","fullpath","keywords","description","title" };
        Lucene.Net.QueryParsers.QueryParser parser = new Lucene.Net.QueryParsers.MultiFieldQueryParser(strs, new StandardAnalyzer());
        parser.SetDefaultOperator(Lucene.Net.QueryParsers.QueryParser.OR_OPERATOR);

        // ���������
        this.Results.Columns.Add("link", typeof(string));
        this.Results.Columns.Add("title", typeof(string));
        this.Results.Columns.Add("sample", typeof(string));
        this.Results.Columns.Add("path", typeof(string));

        // ����
        Query query = parser.Parse(this.Query);//����һ����ѯ����ָ����ѯ����Query_condition������ this.Query_txt.Text��
        Hits hits = searcher.Search(query);

        this.total = hits.Length();

        // ���� �����Ĺؼ���,Ĭ����<b>..</b>
          // �����ָ��<read>..</read>
          SimpleHTMLFormatter simpleHTMLFormatter = new SimpleHTMLFormatter("<B style='color:Red;'>", "</B>");
          Highlighter highlighter = new Highlighter(simpleHTMLFormatter, new QueryScorer(query));
          // ���һ�������Ҫ���صģ����������ݳ���
          // ���̫С����ֻ�����ݵĿ�ʼ���ֱ��������������ҷ��ص�����Ҳ��    ̫����ʱ̫�˷��ˡ�
          highlighter.SetTextFragmenter(new SimpleFragmenter(100));

        // initialize startAt
        this.startAt = initStartAt();

        // ��ʾ�������ġ������Ŀ
        int resultsCount = smallerOf(total, this.maxResults + this.startAt);

        for (int i = startAt; i < resultsCount; i++)
        {
            // �õ����е��ĵ�
            Document doc = hits.Doc(i);

            //��ӽ�β����֤��β������Ų�������
            string title = doc.Get("title") + "  ";
            // �õ��ļ�����
            System.String text =Search.CutString( doc.Get("text"),480);
            // �õ��ļ�������ȷ·��
            string path = doc.Get("path");
            string orpath = doc.Get("fullpath");

            Lucene.Net.Analysis.TokenStream titkeStream = analyzer.TokenStream("title", new System.IO.StringReader(title));//��Ŀ
            Lucene.Net.Analysis.TokenStream tokenStream = analyzer.TokenStream("text", new System.IO.StringReader(text));//ժҪ
            Lucene.Net.Analysis.TokenStream pathStream = analyzer.TokenStream("path", new System.IO.StringReader(path));//��ʾ��·��

            System.String result = highlighter.GetBestFragments(tokenStream, text, 2, "...");
            string tresult = highlighter.GetBestFragments(titkeStream, title, 2, "..");
            string pathwords = highlighter.GetBestFragments(pathStream, path, 2, ".."); //·����ʱ��ʾ

            // ����һ������ʾ�������Ľ��
            DataRow row = this.Results.NewRow();
            if (tresult == "")
            {
                row["title"] = title;
            }
            else {
                row["title"] = tresult;

            }
            if (getpath(row, System.IO.Path.GetFileName(path.Replace("\\", "/"))))
            {
               row["link"]=getFullpath( System.IO.Path.GetFileName(doc.Get("path")));
            }
            else {
                row["link"] =orpath;
                if (pathwords=="")
                {
                    row["path"] = orpath;

                }
                else
                {
                    row["path"] = pathwords.Replace("\\", "/");

                }

            }

            if (result == ""){
                row["sample"] = text;

            }
            else {
                row["sample"] = result;

            }

            this.Results.Rows.Add(row);
        }
        searcher.Close();

        // �����Ϣ
        this.duration = DateTime.Now - start;
        this.fromItem = startAt + 1;
        this.toItem = smallerOf(startAt + maxResults, total);
    }