public SearchResult[] Search(string query) { Hits hits = _doSearch(query); List<SearchResult> results = new List<SearchResult>(); for (int i = 0; i < hits.Length(); i++) { Document doc = hits.Doc(i); string contents = doc.Get("contents"); SimpleHTMLFormatter formatter = new SimpleHTMLFormatter("<span class=\"highlight\">", "</span>"); SimpleFragmenter fragmenter = new SimpleFragmenter(_fragmentSize); Highlighter hiliter = new Highlighter(formatter, new QueryScorer(QueryParser.Parse(query, "contents", _analyzer))); hiliter.SetTextFragmenter(fragmenter); int numfragments = contents.Length / fragmenter.GetFragmentSize() + 1; TokenStream tokenstream = _analyzer.TokenStream("contents", new StringReader(contents)); TextFragment[] frags = hiliter.GetBestTextFragments(tokenstream, contents, false, numfragments); SearchResult sr = new SearchResult(doc, _analyzer, query, _fragmentSize); foreach (TextFragment frag in frags) { if (frag.GetScore() > 0) sr.AddFragment(frag.ToString()); } results.Add(sr); } return results.ToArray(); }
public static string GetOriginalHighlightedContents(SearchResult sr) { StringBuilder result = new StringBuilder("<html><meta http-equiv=\"Content-Type\" content=\"text/html; charset=utf-8\"><style>.highlight{background:yellow;}</style><body><font face=Arial size=5>"); string contents = sr.GetDocContents(); SimpleHTMLFormatter formatter = new SimpleHTMLFormatter("<span class=\"highlight\">", "</span>"); SimpleFragmenter fragmenter = new SimpleFragmenter(sr.FragmentSize); Highlighter hiliter = new Highlighter(formatter, new QueryScorer(sr.QueryParser.Parse(sr.Query))); hiliter.SetTextFragmenter(fragmenter); int numfragments = contents.Length / fragmenter.GetFragmentSize() + 1; TokenStream tokenstream = sr.Analyzer.TokenStream("contents", new StringReader(contents)); result.Append(hiliter.GetBestFragments(tokenstream, contents, numfragments, "...")); result.Append("</font></body></html>"); result.Replace("\n", "<br/>"); return result.ToString(); }
public static string GetHilitedContentsWithoutHeaders(SearchResult sr) { StringBuilder result = new StringBuilder("<font face=Arial size=5>"); string contents = sr.GetDocContents(); SimpleHTMLFormatter formatter = new SimpleHTMLFormatter("<span class=\"highlight\">", "</span>"); SimpleFragmenter fragmenter = new SimpleFragmenter(sr.FragmentSize); Highlighter hiliter = new Highlighter(formatter, new QueryScorer(sr.QueryParser.Parse(sr.Query))); hiliter.SetTextFragmenter(fragmenter); int numfragments = contents.Length / fragmenter.GetFragmentSize() + 1; TokenStream tokenstream = sr.Analyzer.TokenStream("contents", new StringReader(contents)); result.Append(hiliter.GetBestFragments(tokenstream, contents, numfragments, "...")); result.Append("</font>"); result.Replace("\n", "<br/>"); return result.ToString(); }
public static void Highlight(Document d, string query, Analyzer analyzer) { string contents = d.Get("contents"); SimpleHTMLFormatter formatter = new SimpleHTMLFormatter("<span class=\"highlight\"><b>", "</b></span>"); //SpanGradientFormatter formatter = new SpanGradientFormatter(10.0f, null, null, "#F1FD9F", "#EFF413"); //SimpleHTMLEncoder encoder = new SimpleHTMLEncoder(); SimpleFragmenter fragmenter = new SimpleFragmenter(250); Highlighter hiliter = new Highlighter(formatter, new QueryScorer(QueryParser.Parse(query, "contents", analyzer))); hiliter.SetTextFragmenter(fragmenter); int numfragments = contents.Length / fragmenter.GetFragmentSize() + 1;// +1 ensures its never zero. More than the required number of fragments dont harm. StringBuilder result = new StringBuilder("<html><meta http-equiv=\"Content-Type\" content=\"text/html; charset=utf-8\"><style>.highlight{background:yellow;}</style><head><title>Search Results - "); result.Append(d.Get("filename")); result.Append("</title></head><body><font face=Arial size=5>"); TokenStream tokenstream = analyzer.TokenStream("contents", new System.IO.StringReader(contents)); TextFragment[] frags = hiliter.GetBestTextFragments(tokenstream, contents, false, numfragments); foreach (TextFragment frag in frags) { if (frag.GetScore() > 0) { result.Append(frag.ToString() + "<br/><hr/><br/>"); } } string contentspath = System.IO.Path.Combine(System.Windows.Forms.Application.StartupPath, "contents.html"); result.Append("</font><a target=_self href=\"file:///"); result.Append(contentspath); result.Append("\">View Original Document...</a>"); result.Append("</body></html>"); result.Replace("\n", "<br/>"); string resultspath = System.IO.Path.Combine(System.Windows.Forms.Application.StartupPath, "results.html"); System.IO.File.WriteAllText(resultspath, result.ToString()); //webBrowser1.Url = new Uri("file:///" + resultspath); Highlighter hiliter2 = new Highlighter(formatter, new QueryScorer(QueryParser.Parse(query, "contents", analyzer))); hiliter2.SetTextFragmenter(fragmenter); TokenStream tokstr = analyzer.TokenStream(new System.IO.StringReader(contents)); StringBuilder htmlcontents = new StringBuilder("<html><meta http-equiv=\"Content-Type\" content=\"text/html; charset=utf-8\"><style>.highlight{background:yellow;}</style><body><font face=Arial size=5>"); htmlcontents.Append(hiliter2.GetBestFragments(tokstr, contents, numfragments, "...")); htmlcontents.Append("</font></body></html>"); htmlcontents.Replace("\n", "<br/>"); System.IO.File.WriteAllText(contentspath, htmlcontents.ToString()); }
public static SearchResult GetFastSearchResultFragments(ref SearchResult sr) { Document doc = sr.Document; string contents = doc.Get("contents"); SimpleHTMLFormatter formatter = new SimpleHTMLFormatter("<span class=\"highlight\">", "</span>"); SimpleFragmenter fragmenter = new SimpleFragmenter(sr.FragmentSize); Highlighter hiliter = new Highlighter(formatter, new QueryScorer(sr.QueryParser.Parse(sr.Query))); hiliter.SetTextFragmenter(fragmenter); int numfragments = contents.Length / fragmenter.GetFragmentSize() + 1; TokenStream tokenstream = sr.Analyzer.TokenStream("contents", new StringReader(contents)); TextFragment[] frags = hiliter.GetBestTextFragments(tokenstream, contents, false, numfragments); //SearchResult sr = new SearchResult(doc, _analyzer, query, _fragmentSize); foreach (TextFragment frag in frags) { if (frag.GetScore() > 0) sr.AddFragment(frag.ToString()); } return sr; }
private void Search() { try { SearchProgressBar.Maximum = 11; ProgressLabel.Text = "Progress: Initialize Search ..."; Searcher searcher = new IndexSearcher(@"Canon\index"); Analyzer analyzer = new StandardAnalyzer(); ArrayList resultList = new ArrayList(); System.IO.StreamReader in_Renamed = new System.IO.StreamReader(new System.IO.StreamReader(System.Console.OpenStandardInput(), System.Text.Encoding.Default).BaseStream, new System.IO.StreamReader(System.Console.OpenStandardInput(), System.Text.Encoding.Default).CurrentEncoding); String line = QueryInputBox.Text; if (line.Length == - 1) return; ProgressLabel.Text = "Progress: Parsing Query ..."; Query query = QueryParser.Parse(line, "contents", analyzer); //int[] ix = qtm.GetTermFrequencies(); Hits hits = searcher.Search(query); SearchProgressBar.Increment(1); ProgressLabel.Text = "Progress: Searched. Analyzing results ..."; //QueryHighlightExtractor highlighter = new QueryHighlightExtractor(query, new WhitespaceAnalyzer(), "<B>", "</B>"); Highlighter highlighter = new Highlighter(new QueryScorer(query)); highlighter.SetTextFragmenter(new SimpleFragmenter(80)); int maxNumFragmentsRequired = 1; //int HITS_PER_PAGE = 10; for (int i = 0; i < 10; i++) { SearchProgressBar.Increment(1); ProgressLabel.Text = "Progress: Analyzing hit " + (i+1).ToString(); // get the document from index Document doc = hits.Doc(i); //SegmentReader ir = new SegmentReader(); //Lucene.Net.Index.TermFreqVector tfv = //tfv.GetTermFrequencies string score = hits.Score(i).ToString(); //Box += "Hit no. " + i + " scored: " + score + " occ: " + /*highlighter.tokenFrequency */ " best fragment: \n"; ResultSet a = new ResultSet(); a.BookName = doc.Get("path").Replace(@"c:\cscd\temp\",""); a.Score = hits.Score(i); a.numberOfHits = hits.Length(); // get the document filename // we can't get the text from the index //because we didn't store it there //so get it from archive string path = doc.Get("path"); string name = GetInternalName(path); PaliReaderUtils.AalekhDecoder.UnzipFromZipLibrary(name); path = System.IO.Directory.GetCurrentDirectory() + @"\Work\" + name + ".htm"; string plainText = ""; //load text from zip archive temporarily using (StreamReader sr = new StreamReader(path, System.Text.Encoding.Default)) { plainText = parseHtml(sr.ReadToEnd()); } //-------------------------------Highlighter Code 1.4 TokenStream tokenStream = analyzer.TokenStream(new StringReader(plainText)); a.textFragment = highlighter.GetBestFragments(tokenStream, plainText, maxNumFragmentsRequired, "..."); if(File.Exists(path)) File.Delete(path); //------------------------------- resultList.Add(a); } SearchProgressBar.Value = 0; searcher.Close(); ssr = new ShowSearchResults(/*Box*/resultList); //this.Hide(); ssr.OpenBookEvent += new ShowSearchResults.OpenBook(this.TriggerOpenBook); ssr.Closing += new System.ComponentModel.CancelEventHandler(this.Closing_ResultWindow); this.Hide(); ssr.ShowDialog(); } catch (System.Exception e) { MessageBox.Show(" caught a " + e.GetType() + "\n with message: " + e.Message); } }