public string Search(string strQuery) { string result = string.Empty; Lucene.Net.Index.IndexReader reader = Lucene.Net.Index.IndexReader.Open(Server.MapPath(System.Configuration.ConfigurationManager.AppSettings["IndexingArticle"])); Lucene.Net.QueryParsers.QueryParser parser = new Lucene.Net.QueryParsers.QueryParser("ArticleDetail", new Lucene.Net.Analysis.Standard.StandardAnalyzer()); Lucene.Net.Search.Query query = parser.Parse(strQuery); Lucene.Net.Search.IndexSearcher searcher = new Lucene.Net.Search.IndexSearcher(reader); Lucene.Net.Search.Hits hits = searcher.Search(query); Lucene.Net.Highlight.QueryScorer score = new Lucene.Net.Highlight.QueryScorer(query); Lucene.Net.Highlight.SimpleHTMLFormatter formater = new Lucene.Net.Highlight.SimpleHTMLFormatter("<span class='Highlight'>", "</span>"); Lucene.Net.Highlight.Highlighter highlighter = new Lucene.Net.Highlight.Highlighter(formater, score); result += "<div align='right' style='background-color:#F0F7F9; padding-right:15px' height='30px'><font style='FONT-WEIGHT: bold; FONT-SIZE: 10pt; COLOR: #005482; FONT-FAMILY: arial'>Kết quả tìm thấy : " + hits.Length() + " </font></div>"; result += "<div style='padding: 10px 10px 10px 10px;'>"; for (int i = 0; i < hits.Length(); i++) { string id = hits.Doc(i).Get("ArticleId"); string title = hits.Doc(i).Get("ArticleTitle"); string detail = hits.Doc(i).Get("ArticleDetail"); Lucene.Net.Analysis.TokenStream ts = (new Lucene.Net.Analysis.Standard.StandardAnalyzer()).TokenStream("ArticleDetail", new System.IO.StringReader(detail)); result += string.Format("<div align='left'><font style='FONT-WEIGHT: bold; FONT-SIZE: 10pt; COLOR: #5b5b5b; FONT-FAMILY: arial'><a href='/?ArticleId={0}'>{1}</a></font>", id, title); result += string.Format("<div align='left'><font style='FONT-SIZE: 9pt' face='Arial' color='#005482'>...{0}...</font></div></div></br>", highlighter.GetBestFragment(ts, detail)); } result += "</div>"; reader.Close(); return(result); }
public string SearchAndPaging(string strQuery, string index) { string result = string.Empty; try { List <SearchArticle> searchArticleList = new List <SearchArticle>(); PSCPortal.CMS.ArticleCollection ArticleList = ArticleCollection.GetArticleCollectionPublish(); string nameSub = Libs.Ultility.GetSubDomain() == string.Empty ? "HomePage" : Libs.Ultility.GetSubDomain(); SubDomain subDomain = PSCPortal.Engine.SubDomain.GetSubByName(nameSub); PageCollection pagesBelongTo = subDomain.GetPagesBelongTo(); string strId = string.Empty; foreach (var page in pagesBelongTo) { foreach (var ar in ArticleList.Where(ar => ar.PageId == page.Id)) { strId += ar.Id + " OR "; } if (strId.Length > 0) { strId = strId.Remove(strId.Length - 3, 3); } } int pageIndex = Int32.Parse(index); string strSearch = " ArticleDetail:(" + strQuery + ") AND ArticleId:" + "( " + strId + " )"; Lucene.Net.Index.IndexReader reader = Lucene.Net.Index.IndexReader.Open(Server.MapPath(System.Configuration.ConfigurationManager.AppSettings["IndexingArticle"])); Lucene.Net.QueryParsers.QueryParser parser = new Lucene.Net.QueryParsers.QueryParser("ArticleDetail", new Lucene.Net.Analysis.Standard.StandardAnalyzer()); Lucene.Net.Search.Query query = parser.Parse(strSearch); Lucene.Net.Search.IndexSearcher searcher = new Lucene.Net.Search.IndexSearcher(reader); Lucene.Net.Search.Hits hits = searcher.Search(query); Lucene.Net.Highlight.QueryScorer score = new Lucene.Net.Highlight.QueryScorer(query); Lucene.Net.Highlight.SimpleHTMLFormatter formater = new Lucene.Net.Highlight.SimpleHTMLFormatter("<span class='Highlight'>", "</span>"); Lucene.Net.Highlight.Highlighter highlighter = new Lucene.Net.Highlight.Highlighter(formater, score); result += hits.Length() + "_" + "<div class='blog_news'><div class='topic_news_title1'><div class='topic_news_title'><a href='#'>Kết quả tìm thấy: " + hits.Length() + "</a></div></div>"; result += "<div class='ct_topic_l'><div class='ct_topic_r1'>"; for (int i = pageIndex * 20 - 20; i < pageIndex * 20 && i < hits.Length(); i++) { string detail = hits.Doc(i).Get("ArticleDetail"); Lucene.Net.Analysis.TokenStream ts = (new Lucene.Net.Analysis.Standard.StandardAnalyzer()).TokenStream("ArticleDetail", new System.IO.StringReader(detail)); SearchArticle searchArticle = new SearchArticle(); searchArticle.Id = hits.Doc(i).Get("ArticleId");; searchArticle.Title = hits.Doc(i).Get("ArticleTitle"); searchArticle.Highligth = highlighter.GetBestFragment(ts, detail); searchArticleList.Add(searchArticle); } reader.Close(); JavaScriptSerializer serializer = new JavaScriptSerializer(); Dictionary <string, object> resultDic = new Dictionary <string, object>(); resultDic["Count"] = hits.Length(); resultDic["Data"] = searchArticleList; result = serializer.Serialize(resultDic); } catch (Exception e) { } return(result); }
public virtual void TestFieldSpecificHighlighting() { System.String docMainText = "fred is one of the people"; QueryParser parser = new QueryParser(FIELD_NAME, analyzer); Query query = parser.Parse("fred category:people"); //highlighting respects fieldnames used in query QueryScorer fieldSpecificScorer = new QueryScorer(query, "contents"); Highlighter fieldSpecificHighlighter = new Highlighter(new SimpleHTMLFormatter(), fieldSpecificScorer); fieldSpecificHighlighter.SetTextFragmenter(new NullFragmenter()); System.String result = fieldSpecificHighlighter.GetBestFragment(analyzer, FIELD_NAME, docMainText); Assert.AreEqual(result, "<B>fred</B> is one of the people", "Should match"); //highlighting does not respect fieldnames used in query QueryScorer fieldInSpecificScorer = new QueryScorer(query); Highlighter fieldInSpecificHighlighter = new Highlighter(new SimpleHTMLFormatter(), fieldInSpecificScorer); fieldInSpecificHighlighter.SetTextFragmenter(new NullFragmenter()); result = fieldInSpecificHighlighter.GetBestFragment(analyzer, FIELD_NAME, docMainText); Assert.AreEqual(result, "<B>fred</B> is one of the <B>people</B>", "Should match"); reader.Close(); }
public IEnumerable<Content> Search(string keyword, int page, int pageSize, out int totals) { lock (locker) { List<Content> result = new List<Content>(); IndexReader reader = IndexReader.Open(DBNLConfigurationManager.LuceneElement.IndexingFolder); IndexSearcher searcher = new IndexSearcher(reader); TopDocCollector collector = new TopDocCollector((page + 1) * pageSize); PhraseQuery pquery = new PhraseQuery(); BooleanQuery myquery = new BooleanQuery(); PhraseQuery q2 = new PhraseQuery(); //grab the search terms from the query string string[] str = keyword.Split(' '); //build the query foreach (string word in str) { //brand is the field I'm searching in q2.Add(new Term("content", word.ToLower())); } //finally, add it to the BooleanQuery object myquery.Add(q2, BooleanClause.Occur.MUST); //foreach (string srt in keyword.Split(new char[] {' '})) //{ // pquery.Add(new Term("content", srt.ToLower())); //} //pquery.Add(q2, BooleanClause.Occur.MUST); TermQuery query = new TermQuery(new Term("content", keyword)); // TopDocs topDocs = searcher.Search(query, collector); //searcher.Search(query, collector); QueryParser qp = new QueryParser("content", new StandardAnalyzer()); //Contains a phrase such as "this is a phrase" Query q = qp.Parse(keyword); //Hits hits = searcher.Search(q); //Hits hits = searcher.Search(query); Hits hits = searcher.Search(myquery); //ScoreDoc[] hits = collector.TopDocs().scoreDocs; totals = hits.Length(); Lucene.Net.Highlight.Formatter formatter = new Lucene.Net.Highlight.SimpleHTMLFormatter( "<span class=\"Highlight\">", "</span>"); Lucene.Net.Highlight.SimpleFragmenter fragmenter = new Lucene.Net.Highlight.SimpleFragmenter(400); Lucene.Net.Highlight.QueryScorer scorer = new Lucene.Net.Highlight.QueryScorer(myquery); Lucene.Net.Highlight.Highlighter highlighter = new Lucene.Net.Highlight.Highlighter(formatter, scorer); highlighter.SetTextFragmenter(fragmenter); for (int i = (page - 1) * pageSize; i < Math.Min(page * pageSize, hits.Length()); i++) { Document doc = hits.Doc(i); string raw_text = doc.Get("content"); Lucene.Net.Analysis.TokenStream stream = analyzer.TokenStream("", new System.IO.StringReader(raw_text)); string highlighted_text = highlighter.GetBestFragments(stream, raw_text, 1, "...").Replace("'", "''"); if (highlighted_text == "") // someties the highlighter fails to emit text... { highlighted_text = raw_text.Replace("'", "''"); } if (highlighted_text.Length > 500) { highlighted_text = highlighted_text.Substring(0, 500); } Content content = new ContentService().GetItem(int.Parse(doc.Get("id"))); content.HighlightText = highlighted_text; result.Add(content); } reader.Close(); searcher.Close(); return result.AsEnumerable(); } }
public string Query(string keyword) { Lucene.Net.QueryParsers.QueryParser parser = new Lucene.Net.QueryParsers.QueryParser("text", analyzer); Lucene.Net.Search.Query query = null; try { if (string.IsNullOrEmpty(keyword)) { throw new Exception("keywork is empty"); } query = parser.Parse(keyword); } catch (Exception e) { } lock (locker) { Lucene.Net.Search.Hits hits = null; try { if (searcher == null) { searcher = new Lucene.Net.Search.IndexSearcher(DBNLConfigurationManager.LuceneElement.IndexingFolder); } hits = searcher.Search(query); } catch (Exception e) { } for (int i = 0; i < hits.Length(); i++) { Lucene.Net.Documents.Document doc = hits.Doc(i); } Lucene.Net.Highlight.Formatter formatter = new Lucene.Net.Highlight.SimpleHTMLFormatter( "<span style=\"background:yellow;color:red;\">", "</span>"); Lucene.Net.Highlight.SimpleFragmenter fragmenter = new Lucene.Net.Highlight.SimpleFragmenter(400); Lucene.Net.Highlight.QueryScorer scorer = new Lucene.Net.Highlight.QueryScorer(query); Lucene.Net.Highlight.Highlighter highlighter = new Lucene.Net.Highlight.Highlighter(formatter, scorer); highlighter.SetTextFragmenter(fragmenter); StringBuilder sb = new StringBuilder(); string guid = Guid.NewGuid().ToString().Replace("-", ""); Dictionary<string, int> dict_already_seen_ids = new Dictionary<string, int>(); // insert the search results into a temp table which we will join with what's in the database for (int i = 0; i < hits.Length(); i++) { if (dict_already_seen_ids.Count < 100) { Lucene.Net.Documents.Document doc = hits.Doc(i); string id = doc.Get("id"); if (!dict_already_seen_ids.ContainsKey(id)) { dict_already_seen_ids[id] = 1; string raw_text =doc.Get("raw_text"); Lucene.Net.Analysis.TokenStream stream = analyzer.TokenStream("", new System.IO.StringReader(raw_text)); string highlighted_text = highlighter.GetBestFragments(stream, raw_text, 1, "...").Replace("'", "''"); if (highlighted_text == "") // someties the highlighter fails to emit text... { highlighted_text = raw_text.Replace("'", "''"); } if (highlighted_text.Length > 3000) { highlighted_text = highlighted_text.Substring(0, 3000); } sb.Append(highlighted_text); sb.Append("'"); sb.Append(")\n"); } } else { break; } } return sb.ToString(); } }
public SearchModel Search(string searchText) { var result = new SearchModel(); if (string.IsNullOrEmpty(searchText)) { result.Message = "Įveskite paieškos užklausą."; return(result); } var stemmedSearchText = new LithuanianStemmer().Stem(searchText.Trim()); if (string.IsNullOrEmpty(stemmedSearchText)) { result.Message = "Įveskite paieškos užklausą."; return(result); } Lucene.Net.Search.Hits hits = null; try { if (char.IsLetter(stemmedSearchText[stemmedSearchText.Length - 1])) { stemmedSearchText += "*"; } query = parser.Parse(stemmedSearchText); if (searcher == null) { searcher = new Lucene.Net.Search.IndexSearcher(CustomAppSettings.SearchIndexFolder); } hits = searcher.Search(query); } catch (Exception e) { result.Message = "Paieška nepavyko. Pataisykite užklausą. Klaidos pranešimas: " + e.Message; return(result); } Lucene.Net.Highlight.Formatter formatter = new Lucene.Net.Highlight.SimpleHTMLFormatter( "<span class=\"highlightResult\">", "</span>"); var fragmenter = new Lucene.Net.Highlight.SimpleFragmenter(100); var scorer = new Lucene.Net.Highlight.QueryScorer(searcher.Rewrite(query)); var highlighter = new Lucene.Net.Highlight.Highlighter(formatter, scorer); highlighter.SetTextFragmenter(fragmenter); Dictionary <string, int> dict_already_seen_ids = new Dictionary <string, int>(); var list = new List <SearchIndexModel>(); // insert the search results into a temp table which we will join with what's in the database for (int i = 0; i < hits.Length(); i++) { if (dict_already_seen_ids.Count < 100) { Lucene.Net.Documents.Document doc = hits.Doc(i); string id = doc.Get("id"); if (!dict_already_seen_ids.ContainsKey(id)) { dict_already_seen_ids[id] = 1; var model = new SearchIndexModel(); model.Id = id; model.Score = hits.Score(i); model.Subject = doc.Get("subject"); model.Type = (EntryTypes)Enum.Parse(typeof(EntryTypes), doc.Get("type")); string raw_text = HttpUtility.HtmlEncode(doc.Get("raw_text")); //string raw_text = doc.Get("raw_text"); Lucene.Net.Analysis.TokenStream stream = analyzer.TokenStream("text", new System.IO.StringReader( raw_text)); string highlighted_text = highlighter.GetBestFragments(stream, raw_text, 3, "...").Replace("'", "''"); if (highlighted_text == "") // someties the highlighter fails to emit text... { highlighted_text = raw_text.Replace("'", "''"); } if (highlighted_text.Length > 3000) { highlighted_text = highlighted_text.Substring(0, 3000); } model.HighlightedText = highlighted_text; list.Add(model); } } else { break; } } result.List = list; result.SearchPhrase = searchText; if (list.Count == 0) { result.Message = string.Format("Įrašų pagal užklausą '{0}' nerasta. Patikslinkite paieškos duomenis.", searchText); } return(result); }