public string Search(string strQuery) { string result = string.Empty; Lucene.Net.Index.IndexReader reader = Lucene.Net.Index.IndexReader.Open(Server.MapPath(System.Configuration.ConfigurationManager.AppSettings["IndexingArticle"])); Lucene.Net.QueryParsers.QueryParser parser = new Lucene.Net.QueryParsers.QueryParser("ArticleDetail", new Lucene.Net.Analysis.Standard.StandardAnalyzer()); Lucene.Net.Search.Query query = parser.Parse(strQuery); Lucene.Net.Search.IndexSearcher searcher = new Lucene.Net.Search.IndexSearcher(reader); Lucene.Net.Search.Hits hits = searcher.Search(query); Lucene.Net.Highlight.QueryScorer score = new Lucene.Net.Highlight.QueryScorer(query); Lucene.Net.Highlight.SimpleHTMLFormatter formater = new Lucene.Net.Highlight.SimpleHTMLFormatter("<span class='Highlight'>", "</span>"); Lucene.Net.Highlight.Highlighter highlighter = new Lucene.Net.Highlight.Highlighter(formater, score); result += "<div align='right' style='background-color:#F0F7F9; padding-right:15px' height='30px'><font style='FONT-WEIGHT: bold; FONT-SIZE: 10pt; COLOR: #005482; FONT-FAMILY: arial'>Kết quả tìm thấy : " + hits.Length() + " </font></div>"; result += "<div style='padding: 10px 10px 10px 10px;'>"; for (int i = 0; i < hits.Length(); i++) { string id = hits.Doc(i).Get("ArticleId"); string title = hits.Doc(i).Get("ArticleTitle"); string detail = hits.Doc(i).Get("ArticleDetail"); Lucene.Net.Analysis.TokenStream ts = (new Lucene.Net.Analysis.Standard.StandardAnalyzer()).TokenStream("ArticleDetail", new System.IO.StringReader(detail)); result += string.Format("<div align='left'><font style='FONT-WEIGHT: bold; FONT-SIZE: 10pt; COLOR: #5b5b5b; FONT-FAMILY: arial'><a href='/?ArticleId={0}'>{1}</a></font>", id, title); result += string.Format("<div align='left'><font style='FONT-SIZE: 9pt' face='Arial' color='#005482'>...{0}...</font></div></div></br>", highlighter.GetBestFragment(ts, detail)); } result += "</div>"; reader.Close(); return(result); }
public string SearchAndPaging(string strQuery, string index) { string result = string.Empty; try { List <SearchArticle> searchArticleList = new List <SearchArticle>(); PSCPortal.CMS.ArticleCollection ArticleList = ArticleCollection.GetArticleCollectionPublish(); string nameSub = Libs.Ultility.GetSubDomain() == string.Empty ? "HomePage" : Libs.Ultility.GetSubDomain(); SubDomain subDomain = PSCPortal.Engine.SubDomain.GetSubByName(nameSub); PageCollection pagesBelongTo = subDomain.GetPagesBelongTo(); string strId = string.Empty; foreach (var page in pagesBelongTo) { foreach (var ar in ArticleList.Where(ar => ar.PageId == page.Id)) { strId += ar.Id + " OR "; } if (strId.Length > 0) { strId = strId.Remove(strId.Length - 3, 3); } } int pageIndex = Int32.Parse(index); string strSearch = " ArticleDetail:(" + strQuery + ") AND ArticleId:" + "( " + strId + " )"; Lucene.Net.Index.IndexReader reader = Lucene.Net.Index.IndexReader.Open(Server.MapPath(System.Configuration.ConfigurationManager.AppSettings["IndexingArticle"])); Lucene.Net.QueryParsers.QueryParser parser = new Lucene.Net.QueryParsers.QueryParser("ArticleDetail", new Lucene.Net.Analysis.Standard.StandardAnalyzer()); Lucene.Net.Search.Query query = parser.Parse(strSearch); Lucene.Net.Search.IndexSearcher searcher = new Lucene.Net.Search.IndexSearcher(reader); Lucene.Net.Search.Hits hits = searcher.Search(query); Lucene.Net.Highlight.QueryScorer score = new Lucene.Net.Highlight.QueryScorer(query); Lucene.Net.Highlight.SimpleHTMLFormatter formater = new Lucene.Net.Highlight.SimpleHTMLFormatter("<span class='Highlight'>", "</span>"); Lucene.Net.Highlight.Highlighter highlighter = new Lucene.Net.Highlight.Highlighter(formater, score); result += hits.Length() + "_" + "<div class='blog_news'><div class='topic_news_title1'><div class='topic_news_title'><a href='#'>Kết quả tìm thấy: " + hits.Length() + "</a></div></div>"; result += "<div class='ct_topic_l'><div class='ct_topic_r1'>"; for (int i = pageIndex * 20 - 20; i < pageIndex * 20 && i < hits.Length(); i++) { string detail = hits.Doc(i).Get("ArticleDetail"); Lucene.Net.Analysis.TokenStream ts = (new Lucene.Net.Analysis.Standard.StandardAnalyzer()).TokenStream("ArticleDetail", new System.IO.StringReader(detail)); SearchArticle searchArticle = new SearchArticle(); searchArticle.Id = hits.Doc(i).Get("ArticleId");; searchArticle.Title = hits.Doc(i).Get("ArticleTitle"); searchArticle.Highligth = highlighter.GetBestFragment(ts, detail); searchArticleList.Add(searchArticle); } reader.Close(); JavaScriptSerializer serializer = new JavaScriptSerializer(); Dictionary <string, object> resultDic = new Dictionary <string, object>(); resultDic["Count"] = hits.Length(); resultDic["Data"] = searchArticleList; result = serializer.Serialize(resultDic); } catch (Exception e) { } return(result); }
public virtual void TestEncoding() { String rawDocContent = "\"Smith & sons' prices < 3 and >4\" claims article"; // run the highlighter on the raw content (scorer does not score any tokens // for // highlighting but scores a single fragment for selection Highlighter highlighter = new Highlighter(this, new SimpleHTMLEncoder(), new AnonymousClassScorer()); highlighter.SetTextFragmenter(new SimpleFragmenter(2000)); TokenStream tokenStream = analyzer.TokenStream(FIELD_NAME, new System.IO.StringReader(rawDocContent)); String encodedSnippet = highlighter.GetBestFragments(tokenStream, rawDocContent, 1, ""); // An ugly bit of XML creation: String xhtml = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n" + "<html xmlns=\"http://www.w3.org/1999/xhtml\" xml:lang=\"en\" lang=\"en\">\n" + "<head>\n" + "<title>My Test HTML Document</title>\n" + "</head>\n" + "<body>\n" + "<h2>" + encodedSnippet + "</h2>\n" + "</body>\n" + "</html>"; // now an ugly built of XML parsing to test the snippet is encoded OK System.Xml.XmlDocument doc = new System.Xml.XmlDocument(); doc.LoadXml(xhtml); System.Xml.XmlNodeList nodes = doc.GetElementsByTagName("body"); System.Xml.XmlNode body = nodes[0]; nodes = body.ChildNodes; System.Xml.XmlNode h2 = (System.Xml.XmlNode)nodes[0]; String decodedSnippet = h2.FirstChild.InnerText; Assert.AreEqual(rawDocContent, decodedSnippet,"XHTML Encoding should have worked:"); }
public virtual void TestNoFragments() { DoSearching("AnInvalidQueryWhichShouldYieldNoResults"); Highlighter highlighter = new Highlighter(this, new QueryScorer(query)); for (int i = 0; i < texts.Length; i++) { System.String text = texts[i]; TokenStream tokenStream = analyzer.TokenStream(FIELD_NAME, new System.IO.StringReader(text)); System.String result = highlighter.GetBestFragment(tokenStream, text); Assert.IsNull(result, "The highlight result should be null for text with no query terms"); } }
public virtual void TestUnRewrittenQuery() { //test to show how rewritten query can still be used searcher = new IndexSearcher(ramDir); Analyzer analyzer = new StandardAnalyzer(); QueryParser parser = new QueryParser(FIELD_NAME, analyzer); Query query = parser.Parse("JF? or Kenned*"); System.Console.Out.WriteLine("Searching with primitive query"); //forget to set this and... //query=query.rewrite(reader); Hits hits = searcher.Search(query); //create an instance of the highlighter with the tags used to surround highlighted text // QueryHighlightExtractor highlighter = new QueryHighlightExtractor(this, query, new StandardAnalyzer()); Highlighter highlighter = new Highlighter(this, new QueryScorer(query)); highlighter.SetTextFragmenter(new SimpleFragmenter(40)); int maxNumFragmentsRequired = 3; for (int i = 0; i < hits.Length(); i++) { System.String text = hits.Doc(i).Get(FIELD_NAME); TokenStream tokenStream = analyzer.TokenStream(FIELD_NAME, new System.IO.StringReader(text)); System.String highlightedText = highlighter.GetBestFragments(tokenStream, text, maxNumFragmentsRequired, "..."); System.Console.Out.WriteLine(highlightedText); } //We expect to have zero highlights if the query is multi-terms and is not rewritten! Assert.IsTrue(numHighlights == 0, "Failed to find correct number of highlights " + numHighlights + " found"); }
public virtual void TestMaxSizeHighlightTruncates() { System.String goodWord = "goodtoken"; System.String[] stopWords = new System.String[]{"stoppedtoken"}; TermQuery query = new TermQuery(new Term("data", goodWord)); SimpleHTMLFormatter fm = new SimpleHTMLFormatter(); Highlighter hg = new Highlighter(fm, new QueryScorer(query)); hg.SetTextFragmenter(new NullFragmenter()); System.String match = null; System.Text.StringBuilder sb = new System.Text.StringBuilder(); sb.Append(goodWord); for (int i = 0; i < 10000; i++) { sb.Append(" "); sb.Append(stopWords[0]); } hg.SetMaxDocBytesToAnalyze(100); match = hg.GetBestFragment(new StandardAnalyzer(stopWords), "data", sb.ToString()); Assert.IsTrue(match.Length < hg.GetMaxDocBytesToAnalyze(), "Matched text should be no more than 100 chars in length "); //add another tokenized word to the overrall length - but set way beyond //the length of text under consideration (after a large slug of stop words + whitespace) sb.Append(" "); sb.Append(goodWord); match = hg.GetBestFragment(new StandardAnalyzer(stopWords), "data", sb.ToString()); Assert.IsTrue(match.Length < hg.GetMaxDocBytesToAnalyze(), "Matched text should be no more than 100 chars in length "); }
public virtual void TestMaxSizeHighlight() { DoSearching("meat"); Highlighter highlighter = new Highlighter(this, new QueryScorer(query)); highlighter.SetMaxDocBytesToAnalyze(30); TokenStream tokenStream = analyzer.TokenStream(FIELD_NAME, new System.IO.StringReader(texts[0])); highlighter.GetBestFragment(tokenStream, texts[0]); Assert.IsTrue(numHighlights == 0, "Setting MaxDocBytesToAnalyze should have prevented " + "us from finding matches for this record: " + numHighlights + " found"); }
public virtual void TestOverlapAnalyzer2() { System.String s = "Hi-Speed10 foo"; Query query; Highlighter highlighter; System.String result; query = new QueryParser("text", new WhitespaceAnalyzer()).Parse("foo"); highlighter = new Highlighter(new QueryScorer(query)); result = highlighter.GetBestFragments(GetTS2(), s, 3, "..."); Assert.AreEqual(result, "Hi-Speed10 <B>foo</B>"); query = new QueryParser("text", new WhitespaceAnalyzer()).Parse("10"); highlighter = new Highlighter(new QueryScorer(query)); result = highlighter.GetBestFragments(GetTS2(), s, 3, "..."); Assert.AreEqual(result, "Hi-Speed<B>10</B> foo"); query = new QueryParser("text", new WhitespaceAnalyzer()).Parse("hi"); highlighter = new Highlighter(new QueryScorer(query)); result = highlighter.GetBestFragments(GetTS2(), s, 3, "..."); Assert.AreEqual(result, "<B>Hi</B>-Speed10 foo"); query = new QueryParser("text", new WhitespaceAnalyzer()).Parse("speed"); highlighter = new Highlighter(new QueryScorer(query)); result = highlighter.GetBestFragments(GetTS2(), s, 3, "..."); Assert.AreEqual(result, "Hi-<B>Speed</B>10 foo"); query = new QueryParser("text", new WhitespaceAnalyzer()).Parse("hispeed"); highlighter = new Highlighter(new QueryScorer(query)); result = highlighter.GetBestFragments(GetTS2(), s, 3, "..."); Assert.AreEqual(result, "<B>Hi-Speed</B>10 foo"); query = new QueryParser("text", new WhitespaceAnalyzer()).Parse("hi speed"); highlighter = new Highlighter(new QueryScorer(query)); result = highlighter.GetBestFragments(GetTS2(), s, 3, "..."); Assert.AreEqual(result, "<B>Hi-Speed</B>10 foo"); /////////////////// same tests, just put the bigger overlapping token first query = new QueryParser("text", new WhitespaceAnalyzer()).Parse("foo"); highlighter = new Highlighter(new QueryScorer(query)); result = highlighter.GetBestFragments(GetTS2a(), s, 3, "..."); Assert.AreEqual(result, "Hi-Speed10 <B>foo</B>"); query = new QueryParser("text", new WhitespaceAnalyzer()).Parse("10"); highlighter = new Highlighter(new QueryScorer(query)); result = highlighter.GetBestFragments(GetTS2a(), s, 3, "..."); Assert.AreEqual(result, "Hi-Speed<B>10</B> foo"); query = new QueryParser("text", new WhitespaceAnalyzer()).Parse("hi"); highlighter = new Highlighter(new QueryScorer(query)); result = highlighter.GetBestFragments(GetTS2a(), s, 3, "..."); Assert.AreEqual(result, "<B>Hi</B>-Speed10 foo"); query = new QueryParser("text", new WhitespaceAnalyzer()).Parse("speed"); highlighter = new Highlighter(new QueryScorer(query)); result = highlighter.GetBestFragments(GetTS2a(), s, 3, "..."); Assert.AreEqual(result, "Hi-<B>Speed</B>10 foo"); query = new QueryParser("text", new WhitespaceAnalyzer()).Parse("hispeed"); highlighter = new Highlighter(new QueryScorer(query)); result = highlighter.GetBestFragments(GetTS2a(), s, 3, "..."); Assert.AreEqual(result, "<B>Hi-Speed</B>10 foo"); query = new QueryParser("text", new WhitespaceAnalyzer()).Parse("hi speed"); highlighter = new Highlighter(new QueryScorer(query)); result = highlighter.GetBestFragments(GetTS2a(), s, 3, "..."); Assert.AreEqual(result, "<B>Hi-Speed</B>10 foo"); }
public virtual void TestOverlapAnalyzer() { //UPGRADE_TODO: Class 'java.util.HashMap' was converted to 'System.Collections.Hashtable' which has a different behavior. 'ms-help://MS.VSCC.2003/commoner/redir/redirect.htm?keyword="jlca1073_javautilHashMap_3"' System.Collections.Hashtable synonyms = new System.Collections.Hashtable(); synonyms["football"] = "soccer,footie"; Analyzer analyzer = new SynonymAnalyzer(synonyms); System.String srchkey = "football"; System.String s = "football-soccer in the euro 2004 footie competition"; QueryParser parser = new QueryParser("bookid", analyzer); Query query = parser.Parse(srchkey); Highlighter highlighter = new Highlighter(new QueryScorer(query)); TokenStream tokenStream = analyzer.TokenStream(null, new System.IO.StringReader(s)); // Get 3 best fragments and seperate with a "..." System.String result = highlighter.GetBestFragments(tokenStream, s, 3, "..."); System.String expectedResult = "<B>football</B>-<B>soccer</B> in the euro 2004 <B>footie</B> competition"; Assert.IsTrue(expectedResult.Equals(result), "overlapping analyzer should handle highlights OK"); }
public virtual void TestGetBestSingleFragmentWithWeights() { WeightedTerm[] wTerms = new WeightedTerm[2]; wTerms[0] = new WeightedTerm(10f, "hello"); wTerms[1] = new WeightedTerm(1f, "kennedy"); Highlighter highlighter = new Highlighter(new QueryScorer(wTerms)); TokenStream tokenStream = analyzer.TokenStream(FIELD_NAME, new System.IO.StringReader(texts[0])); highlighter.SetTextFragmenter(new SimpleFragmenter(2)); System.String result = highlighter.GetBestFragment(tokenStream, texts[0]).Trim(); Assert.IsTrue("<B>Hello</B>".Equals(result), "Failed to find best section using weighted terms. Found: [" + result + "]"); //readjust weights wTerms[1].SetWeight(50f); tokenStream = analyzer.TokenStream(FIELD_NAME, new System.IO.StringReader(texts[0])); highlighter = new Highlighter(new QueryScorer(wTerms)); highlighter.SetTextFragmenter(new SimpleFragmenter(2)); result = highlighter.GetBestFragment(tokenStream, texts[0]).Trim(); Assert.IsTrue("<B>kennedy</B>".Equals(result), "Failed to find best section using weighted terms. Found: " + result); }
public virtual void TestOffByOne() { TermQuery query = new TermQuery(new Term("data", "help")); Highlighter hg = new Highlighter(new SimpleHTMLFormatter(), new QueryScorer(query)); hg.SetTextFragmenter(new NullFragmenter()); System.String match = null; match = hg.GetBestFragment(new StandardAnalyzer(), "data", "help me [54-65]"); Assert.AreEqual("<B>help</B> me [54-65]", match); }
public virtual void TestSimpleHighlighter() { DoSearching("Kennedy"); Highlighter highlighter = new Highlighter(new QueryScorer(query)); highlighter.SetTextFragmenter(new SimpleFragmenter(40)); int maxNumFragmentsRequired = 2; for (int i = 0; i < hits.Length(); i++) { System.String text = hits.Doc(i).Get(FIELD_NAME); TokenStream tokenStream = analyzer.TokenStream(FIELD_NAME, new System.IO.StringReader(text)); System.String result = highlighter.GetBestFragments(tokenStream, text, maxNumFragmentsRequired, "..."); System.Console.Out.WriteLine("\t" + result); } //Not sure we can assert anything here - just running to check we dont throw any exceptions }
public IEnumerable<Content> Search(string keyword, int page, int pageSize, out int totals) { lock (locker) { List<Content> result = new List<Content>(); IndexReader reader = IndexReader.Open(DBNLConfigurationManager.LuceneElement.IndexingFolder); IndexSearcher searcher = new IndexSearcher(reader); TopDocCollector collector = new TopDocCollector((page + 1) * pageSize); PhraseQuery pquery = new PhraseQuery(); BooleanQuery myquery = new BooleanQuery(); PhraseQuery q2 = new PhraseQuery(); //grab the search terms from the query string string[] str = keyword.Split(' '); //build the query foreach (string word in str) { //brand is the field I'm searching in q2.Add(new Term("content", word.ToLower())); } //finally, add it to the BooleanQuery object myquery.Add(q2, BooleanClause.Occur.MUST); //foreach (string srt in keyword.Split(new char[] {' '})) //{ // pquery.Add(new Term("content", srt.ToLower())); //} //pquery.Add(q2, BooleanClause.Occur.MUST); TermQuery query = new TermQuery(new Term("content", keyword)); // TopDocs topDocs = searcher.Search(query, collector); //searcher.Search(query, collector); QueryParser qp = new QueryParser("content", new StandardAnalyzer()); //Contains a phrase such as "this is a phrase" Query q = qp.Parse(keyword); //Hits hits = searcher.Search(q); //Hits hits = searcher.Search(query); Hits hits = searcher.Search(myquery); //ScoreDoc[] hits = collector.TopDocs().scoreDocs; totals = hits.Length(); Lucene.Net.Highlight.Formatter formatter = new Lucene.Net.Highlight.SimpleHTMLFormatter( "<span class=\"Highlight\">", "</span>"); Lucene.Net.Highlight.SimpleFragmenter fragmenter = new Lucene.Net.Highlight.SimpleFragmenter(400); Lucene.Net.Highlight.QueryScorer scorer = new Lucene.Net.Highlight.QueryScorer(myquery); Lucene.Net.Highlight.Highlighter highlighter = new Lucene.Net.Highlight.Highlighter(formatter, scorer); highlighter.SetTextFragmenter(fragmenter); for (int i = (page - 1) * pageSize; i < Math.Min(page * pageSize, hits.Length()); i++) { Document doc = hits.Doc(i); string raw_text = doc.Get("content"); Lucene.Net.Analysis.TokenStream stream = analyzer.TokenStream("", new System.IO.StringReader(raw_text)); string highlighted_text = highlighter.GetBestFragments(stream, raw_text, 1, "...").Replace("'", "''"); if (highlighted_text == "") // someties the highlighter fails to emit text... { highlighted_text = raw_text.Replace("'", "''"); } if (highlighted_text.Length > 500) { highlighted_text = highlighted_text.Substring(0, 500); } Content content = new ContentService().GetItem(int.Parse(doc.Get("id"))); content.HighlightText = highlighted_text; result.Add(content); } reader.Close(); searcher.Close(); return result.AsEnumerable(); } }
public string Query(string keyword) { Lucene.Net.QueryParsers.QueryParser parser = new Lucene.Net.QueryParsers.QueryParser("text", analyzer); Lucene.Net.Search.Query query = null; try { if (string.IsNullOrEmpty(keyword)) { throw new Exception("keywork is empty"); } query = parser.Parse(keyword); } catch (Exception e) { } lock (locker) { Lucene.Net.Search.Hits hits = null; try { if (searcher == null) { searcher = new Lucene.Net.Search.IndexSearcher(DBNLConfigurationManager.LuceneElement.IndexingFolder); } hits = searcher.Search(query); } catch (Exception e) { } for (int i = 0; i < hits.Length(); i++) { Lucene.Net.Documents.Document doc = hits.Doc(i); } Lucene.Net.Highlight.Formatter formatter = new Lucene.Net.Highlight.SimpleHTMLFormatter( "<span style=\"background:yellow;color:red;\">", "</span>"); Lucene.Net.Highlight.SimpleFragmenter fragmenter = new Lucene.Net.Highlight.SimpleFragmenter(400); Lucene.Net.Highlight.QueryScorer scorer = new Lucene.Net.Highlight.QueryScorer(query); Lucene.Net.Highlight.Highlighter highlighter = new Lucene.Net.Highlight.Highlighter(formatter, scorer); highlighter.SetTextFragmenter(fragmenter); StringBuilder sb = new StringBuilder(); string guid = Guid.NewGuid().ToString().Replace("-", ""); Dictionary<string, int> dict_already_seen_ids = new Dictionary<string, int>(); // insert the search results into a temp table which we will join with what's in the database for (int i = 0; i < hits.Length(); i++) { if (dict_already_seen_ids.Count < 100) { Lucene.Net.Documents.Document doc = hits.Doc(i); string id = doc.Get("id"); if (!dict_already_seen_ids.ContainsKey(id)) { dict_already_seen_ids[id] = 1; string raw_text =doc.Get("raw_text"); Lucene.Net.Analysis.TokenStream stream = analyzer.TokenStream("", new System.IO.StringReader(raw_text)); string highlighted_text = highlighter.GetBestFragments(stream, raw_text, 1, "...").Replace("'", "''"); if (highlighted_text == "") // someties the highlighter fails to emit text... { highlighted_text = raw_text.Replace("'", "''"); } if (highlighted_text.Length > 3000) { highlighted_text = highlighted_text.Substring(0, 3000); } sb.Append(highlighted_text); sb.Append("'"); sb.Append(")\n"); } } else { break; } } return sb.ToString(); } }
public SearchModel Search(string searchText) { var result = new SearchModel(); if (string.IsNullOrEmpty(searchText)) { result.Message = "Įveskite paieškos užklausą."; return(result); } var stemmedSearchText = new LithuanianStemmer().Stem(searchText.Trim()); if (string.IsNullOrEmpty(stemmedSearchText)) { result.Message = "Įveskite paieškos užklausą."; return(result); } Lucene.Net.Search.Hits hits = null; try { if (char.IsLetter(stemmedSearchText[stemmedSearchText.Length - 1])) { stemmedSearchText += "*"; } query = parser.Parse(stemmedSearchText); if (searcher == null) { searcher = new Lucene.Net.Search.IndexSearcher(CustomAppSettings.SearchIndexFolder); } hits = searcher.Search(query); } catch (Exception e) { result.Message = "Paieška nepavyko. Pataisykite užklausą. Klaidos pranešimas: " + e.Message; return(result); } Lucene.Net.Highlight.Formatter formatter = new Lucene.Net.Highlight.SimpleHTMLFormatter( "<span class=\"highlightResult\">", "</span>"); var fragmenter = new Lucene.Net.Highlight.SimpleFragmenter(100); var scorer = new Lucene.Net.Highlight.QueryScorer(searcher.Rewrite(query)); var highlighter = new Lucene.Net.Highlight.Highlighter(formatter, scorer); highlighter.SetTextFragmenter(fragmenter); Dictionary <string, int> dict_already_seen_ids = new Dictionary <string, int>(); var list = new List <SearchIndexModel>(); // insert the search results into a temp table which we will join with what's in the database for (int i = 0; i < hits.Length(); i++) { if (dict_already_seen_ids.Count < 100) { Lucene.Net.Documents.Document doc = hits.Doc(i); string id = doc.Get("id"); if (!dict_already_seen_ids.ContainsKey(id)) { dict_already_seen_ids[id] = 1; var model = new SearchIndexModel(); model.Id = id; model.Score = hits.Score(i); model.Subject = doc.Get("subject"); model.Type = (EntryTypes)Enum.Parse(typeof(EntryTypes), doc.Get("type")); string raw_text = HttpUtility.HtmlEncode(doc.Get("raw_text")); //string raw_text = doc.Get("raw_text"); Lucene.Net.Analysis.TokenStream stream = analyzer.TokenStream("text", new System.IO.StringReader( raw_text)); string highlighted_text = highlighter.GetBestFragments(stream, raw_text, 3, "...").Replace("'", "''"); if (highlighted_text == "") // someties the highlighter fails to emit text... { highlighted_text = raw_text.Replace("'", "''"); } if (highlighted_text.Length > 3000) { highlighted_text = highlighted_text.Substring(0, 3000); } model.HighlightedText = highlighted_text; list.Add(model); } } else { break; } } result.List = list; result.SearchPhrase = searchText; if (list.Count == 0) { result.Message = string.Format("Įrašų pagal užklausą '{0}' nerasta. Patikslinkite paieškos duomenis.", searchText); } return(result); }
public virtual void TestMultiSearcher() { //setup index 1 RAMDirectory ramDir1 = new RAMDirectory(); IndexWriter writer1 = new IndexWriter(ramDir1, new StandardAnalyzer(), true); Document d = new Document(); Field f = new Field(FIELD_NAME, "multiOne", Field.Store.YES, Field.Index.TOKENIZED); d.Add(f); writer1.AddDocument(d); writer1.Optimize(); writer1.Close(); IndexReader reader1 = IndexReader.Open(ramDir1); //setup index 2 RAMDirectory ramDir2 = new RAMDirectory(); IndexWriter writer2 = new IndexWriter(ramDir2, new StandardAnalyzer(), true); d = new Document(); f = new Field(FIELD_NAME, "multiTwo", Field.Store.YES, Field.Index.TOKENIZED); d.Add(f); writer2.AddDocument(d); writer2.Optimize(); writer2.Close(); IndexReader reader2 = IndexReader.Open(ramDir2); IndexSearcher[] searchers = new IndexSearcher[2]; searchers[0] = new IndexSearcher(ramDir1); searchers[1] = new IndexSearcher(ramDir2); MultiSearcher multiSearcher = new MultiSearcher(searchers); QueryParser parser = new QueryParser(FIELD_NAME, new StandardAnalyzer()); parser.SetMultiTermRewriteMethod(MultiTermQuery.SCORING_BOOLEAN_QUERY_REWRITE); query = parser.Parse("multi*"); System.Console.Out.WriteLine("Searching for: " + query.ToString(FIELD_NAME)); //at this point the multisearcher calls combine(query[]) hits = multiSearcher.Search(query); //query = QueryParser.parse("multi*", FIELD_NAME, new StandardAnalyzer()); Query[] expandedQueries = new Query[2]; expandedQueries[0] = query.Rewrite(reader1); expandedQueries[1] = query.Rewrite(reader2); query = query.Combine(expandedQueries); //create an instance of the highlighter with the tags used to surround highlighted text Highlighter highlighter = new Highlighter(this, new QueryScorer(query)); for (int i = 0; i < hits.Length(); i++) { System.String text = hits.Doc(i).Get(FIELD_NAME); TokenStream tokenStream = analyzer.TokenStream(FIELD_NAME, new System.IO.StringReader(text)); System.String highlightedText = highlighter.GetBestFragment(tokenStream, text); System.Console.Out.WriteLine(highlightedText); } Assert.IsTrue(numHighlights == 2, "Failed to find correct number of highlights " + numHighlights + " found"); }
public virtual void TestFieldSpecificHighlighting() { System.String docMainText = "fred is one of the people"; QueryParser parser = new QueryParser(FIELD_NAME, analyzer); Query query = parser.Parse("fred category:people"); //highlighting respects fieldnames used in query QueryScorer fieldSpecificScorer = new QueryScorer(query, "contents"); Highlighter fieldSpecificHighlighter = new Highlighter(new SimpleHTMLFormatter(), fieldSpecificScorer); fieldSpecificHighlighter.SetTextFragmenter(new NullFragmenter()); System.String result = fieldSpecificHighlighter.GetBestFragment(analyzer, FIELD_NAME, docMainText); Assert.AreEqual(result, "<B>fred</B> is one of the people", "Should match"); //highlighting does not respect fieldnames used in query QueryScorer fieldInSpecificScorer = new QueryScorer(query); Highlighter fieldInSpecificHighlighter = new Highlighter(new SimpleHTMLFormatter(), fieldInSpecificScorer); fieldInSpecificHighlighter.SetTextFragmenter(new NullFragmenter()); result = fieldInSpecificHighlighter.GetBestFragment(analyzer, FIELD_NAME, docMainText); Assert.AreEqual(result, "<B>fred</B> is one of the <B>people</B>", "Should match"); reader.Close(); }
public virtual void TestGetSimpleHighlight() { DoSearching("Kennedy"); Highlighter highlighter = new Highlighter(this, new QueryScorer(query)); for (int i = 0; i < hits.Length(); i++) { System.String text = hits.Doc(i).Get(FIELD_NAME); TokenStream tokenStream = analyzer.TokenStream(FIELD_NAME, new System.IO.StringReader(text)); System.String result = highlighter.GetBestFragment(tokenStream, text); System.Console.Out.WriteLine("\t" + result); } Assert.IsTrue(numHighlights == 4, "Failed to find correct number of highlights " + numHighlights + " found"); }
internal virtual void DoStandardHighlights() { Highlighter highlighter = new Highlighter(this, new QueryScorer(query)); highlighter.SetTextFragmenter(new SimpleFragmenter(20)); for (int i = 0; i < hits.Length(); i++) { System.String text = hits.Doc(i).Get(FIELD_NAME); int maxNumFragmentsRequired = 2; System.String fragmentSeparator = "..."; TokenStream tokenStream = analyzer.TokenStream(FIELD_NAME, new System.IO.StringReader(text)); System.String result = highlighter.GetBestFragments(tokenStream, text, maxNumFragmentsRequired, fragmentSeparator); System.Console.Out.WriteLine("\t" + result); } }
public virtual void TestGetTextFragments() { DoSearching("Kennedy"); Highlighter highlighter = new Highlighter(this, new QueryScorer(query)); highlighter.SetTextFragmenter(new SimpleFragmenter(20)); for (int i = 0; i < hits.Length(); i++) { System.String text = hits.Doc(i).Get(FIELD_NAME); TokenStream tokenStream = analyzer.TokenStream(FIELD_NAME, new System.IO.StringReader(text)); System.String[] stringResults = highlighter.GetBestFragments(tokenStream, text, 10); tokenStream = analyzer.TokenStream(FIELD_NAME, new System.IO.StringReader(text)); TextFragment[] fragmentResults = highlighter.GetBestTextFragments(tokenStream, text, true, 10); Assert.IsTrue(fragmentResults.Length == stringResults.Length, "Failed to find correct number of text Fragments: " + fragmentResults.Length + " vs " + stringResults.Length); for (int j = 0; j < stringResults.Length; j++) { //UPGRADE_TODO: Method 'java.io.PrintStream.println' was converted to 'System.Console.Out.WriteLine' which has a different behavior. 'ms-help://MS.VSCC.2003/commoner/redir/redirect.htm?keyword="jlca1073_javaioPrintStreamprintln_javalangObject_3"' System.Console.Out.WriteLine(fragmentResults[j]); Assert.IsTrue(fragmentResults[j].ToString().Equals(stringResults[j]), "Failed to find same text Fragments: " + fragmentResults[j] + " found"); } } }