Example #1
0
        public SearchResult[] Search(string query)
        {
            Hits hits = _doSearch(query);

            List<SearchResult> results = new List<SearchResult>();
            for (int i = 0; i < hits.Length(); i++)
            {
                Document doc = hits.Doc(i);
                string contents = doc.Get("contents");
                SimpleHTMLFormatter formatter = new SimpleHTMLFormatter("<span class=\"highlight\">", "</span>");
                SimpleFragmenter fragmenter = new SimpleFragmenter(_fragmentSize);
                Highlighter hiliter = new Highlighter(formatter, new QueryScorer(QueryParser.Parse(query, "contents", _analyzer)));
                hiliter.SetTextFragmenter(fragmenter);
                int numfragments = contents.Length / fragmenter.GetFragmentSize() + 1;
                TokenStream tokenstream = _analyzer.TokenStream("contents", new StringReader(contents));
                TextFragment[] frags = hiliter.GetBestTextFragments(tokenstream, contents, false, numfragments);
                SearchResult sr = new SearchResult(doc, _analyzer, query, _fragmentSize);
                foreach (TextFragment frag in frags)
                {

                    if (frag.GetScore() > 0)
                        sr.AddFragment(frag.ToString());

                }
                results.Add(sr);

            }

            return results.ToArray();
        }
Example #2
0
 public static string GetOriginalHighlightedContents(SearchResult sr)
 {
     StringBuilder result = new StringBuilder("<html><meta http-equiv=\"Content-Type\" content=\"text/html; charset=utf-8\"><style>.highlight{background:yellow;}</style><body><font face=Arial size=5>");
     string contents = sr.GetDocContents();
     SimpleHTMLFormatter formatter = new SimpleHTMLFormatter("<span class=\"highlight\">", "</span>");
     SimpleFragmenter fragmenter = new SimpleFragmenter(sr.FragmentSize);
     Highlighter hiliter = new Highlighter(formatter, new QueryScorer(sr.QueryParser.Parse(sr.Query)));
     hiliter.SetTextFragmenter(fragmenter);
     int numfragments = contents.Length / fragmenter.GetFragmentSize() + 1;
     TokenStream tokenstream = sr.Analyzer.TokenStream("contents", new StringReader(contents));
     result.Append(hiliter.GetBestFragments(tokenstream, contents, numfragments, "..."));
     result.Append("</font></body></html>");
     result.Replace("\n", "<br/>");
     return result.ToString();
 }
Example #3
0
 public static string GetHilitedContentsWithoutHeaders(SearchResult sr)
 {
     StringBuilder result = new StringBuilder("<font face=Arial size=5>");
     string contents = sr.GetDocContents();
     SimpleHTMLFormatter formatter = new SimpleHTMLFormatter("<span class=\"highlight\">", "</span>");
     SimpleFragmenter fragmenter = new SimpleFragmenter(sr.FragmentSize);
     Highlighter hiliter = new Highlighter(formatter, new QueryScorer(sr.QueryParser.Parse(sr.Query)));
     hiliter.SetTextFragmenter(fragmenter);
     int numfragments = contents.Length / fragmenter.GetFragmentSize() + 1;
     TokenStream tokenstream = sr.Analyzer.TokenStream("contents", new StringReader(contents));
     result.Append(hiliter.GetBestFragments(tokenstream, contents, numfragments, "..."));
     result.Append("</font>");
     result.Replace("\n", "<br/>");
     return result.ToString();
 }
Example #4
0
        public static void Highlight(Document d, string query, Analyzer analyzer)
        {
            string contents = d.Get("contents");
            SimpleHTMLFormatter formatter = new SimpleHTMLFormatter("<span class=\"highlight\"><b>", "</b></span>");
            //SpanGradientFormatter formatter = new SpanGradientFormatter(10.0f, null, null, "#F1FD9F", "#EFF413");
            //SimpleHTMLEncoder encoder = new SimpleHTMLEncoder();
            SimpleFragmenter fragmenter = new SimpleFragmenter(250);
            Highlighter hiliter = new Highlighter(formatter, new QueryScorer(QueryParser.Parse(query, "contents", analyzer)));

            hiliter.SetTextFragmenter(fragmenter);
            int numfragments = contents.Length / fragmenter.GetFragmentSize() + 1;// +1 ensures its never zero. More than the required number of fragments dont harm.
            StringBuilder result = new StringBuilder("<html><meta http-equiv=\"Content-Type\" content=\"text/html; charset=utf-8\"><style>.highlight{background:yellow;}</style><head><title>Search Results - ");
            result.Append(d.Get("filename"));
            result.Append("</title></head><body><font face=Arial size=5>");
            TokenStream tokenstream = analyzer.TokenStream("contents", new System.IO.StringReader(contents));
            TextFragment[] frags = hiliter.GetBestTextFragments(tokenstream, contents, false, numfragments);
            foreach (TextFragment frag in frags)
            {
                if (frag.GetScore() > 0)
                {
                    result.Append(frag.ToString() + "<br/><hr/><br/>");

                }

            }

            string contentspath = System.IO.Path.Combine(System.Windows.Forms.Application.StartupPath, "contents.html");
            result.Append("</font><a target=_self href=\"file:///");
            result.Append(contentspath);
            result.Append("\">View Original Document...</a>");
            result.Append("</body></html>");
            result.Replace("\n", "<br/>");

            string resultspath = System.IO.Path.Combine(System.Windows.Forms.Application.StartupPath, "results.html");
            System.IO.File.WriteAllText(resultspath, result.ToString());
            //webBrowser1.Url = new Uri("file:///" + resultspath);

            Highlighter hiliter2 = new Highlighter(formatter, new QueryScorer(QueryParser.Parse(query, "contents", analyzer)));
            hiliter2.SetTextFragmenter(fragmenter);
            TokenStream tokstr = analyzer.TokenStream(new System.IO.StringReader(contents));
            StringBuilder htmlcontents = new StringBuilder("<html><meta http-equiv=\"Content-Type\" content=\"text/html; charset=utf-8\"><style>.highlight{background:yellow;}</style><body><font face=Arial size=5>");
            htmlcontents.Append(hiliter2.GetBestFragments(tokstr, contents, numfragments, "..."));
            htmlcontents.Append("</font></body></html>");
            htmlcontents.Replace("\n", "<br/>");
            System.IO.File.WriteAllText(contentspath, htmlcontents.ToString());
        }
Example #5
0
        public static SearchResult GetFastSearchResultFragments(ref SearchResult sr)
        {
            Document doc = sr.Document;
            string contents = doc.Get("contents");
            SimpleHTMLFormatter formatter = new SimpleHTMLFormatter("<span class=\"highlight\">", "</span>");
            SimpleFragmenter fragmenter = new SimpleFragmenter(sr.FragmentSize);
            Highlighter hiliter = new Highlighter(formatter, new QueryScorer(sr.QueryParser.Parse(sr.Query)));
            hiliter.SetTextFragmenter(fragmenter);
            int numfragments = contents.Length / fragmenter.GetFragmentSize() + 1;
            TokenStream tokenstream = sr.Analyzer.TokenStream("contents", new StringReader(contents));
            TextFragment[] frags = hiliter.GetBestTextFragments(tokenstream, contents, false, numfragments);
            //SearchResult sr = new SearchResult(doc, _analyzer, query, _fragmentSize);
            foreach (TextFragment frag in frags)
            {

                if (frag.GetScore() > 0)
                    sr.AddFragment(frag.ToString());

            }
            return sr;
        }
        private void Search()
        {
            try
            {
                SearchProgressBar.Maximum = 11;
                ProgressLabel.Text = "Progress: Initialize Search ...";
                Searcher searcher = new IndexSearcher(@"Canon\index");
                Analyzer analyzer = new StandardAnalyzer();
                ArrayList resultList = new ArrayList();

                System.IO.StreamReader in_Renamed = new System.IO.StreamReader(new System.IO.StreamReader(System.Console.OpenStandardInput(), System.Text.Encoding.Default).BaseStream, new System.IO.StreamReader(System.Console.OpenStandardInput(), System.Text.Encoding.Default).CurrentEncoding);

                String line = QueryInputBox.Text;
                if (line.Length == - 1)
                    return;
                ProgressLabel.Text = "Progress: Parsing Query ...";
                Query query = QueryParser.Parse(line, "contents", analyzer);
                //int[] ix = qtm.GetTermFrequencies();

                Hits hits = searcher.Search(query);
                SearchProgressBar.Increment(1);
                ProgressLabel.Text = "Progress: Searched. Analyzing results ...";

                //QueryHighlightExtractor highlighter = new QueryHighlightExtractor(query, new WhitespaceAnalyzer(), "<B>", "</B>");
                Highlighter highlighter = new Highlighter(new QueryScorer(query));
                highlighter.SetTextFragmenter(new SimpleFragmenter(80));
                int maxNumFragmentsRequired = 1;

                    //int HITS_PER_PAGE = 10;
                    for (int i = 0; i < 10; i++)
                    {
                            SearchProgressBar.Increment(1);
                            ProgressLabel.Text = "Progress: Analyzing hit " + (i+1).ToString();
                            // get the document from index
                            Document doc = hits.Doc(i);
                            //SegmentReader ir = new SegmentReader();
                            //Lucene.Net.Index.TermFreqVector tfv =
                            //tfv.GetTermFrequencies
                            string score = hits.Score(i).ToString();
                            //Box += "Hit no. " + i + " scored: " + score + " occ: " + /*highlighter.tokenFrequency */ " best fragment: \n";
                            ResultSet a = new ResultSet();
                            a.BookName = doc.Get("path").Replace(@"c:\cscd\temp\","");
                            a.Score = hits.Score(i);
                            a.numberOfHits = hits.Length();

                            // get the document filename
                            // we can't get the text from the index
                            //because we didn't store it there
                            //so get it from archive
                            string path = doc.Get("path");
                            string name = GetInternalName(path);
                            PaliReaderUtils.AalekhDecoder.UnzipFromZipLibrary(name);
                            path = System.IO.Directory.GetCurrentDirectory() + @"\Work\" + name + ".htm";
                            string plainText = "";
                            //load text from zip archive temporarily
                            using (StreamReader sr = new StreamReader(path, System.Text.Encoding.Default))
                            {
                                plainText = parseHtml(sr.ReadToEnd());
                            }
            //-------------------------------Highlighter Code 1.4
                            TokenStream tokenStream = analyzer.TokenStream(new StringReader(plainText));
                            a.textFragment = highlighter.GetBestFragments(tokenStream, plainText, maxNumFragmentsRequired, "...");
                            if(File.Exists(path))
                                File.Delete(path);
            //-------------------------------
                            resultList.Add(a);
                        }
                SearchProgressBar.Value = 0;
                searcher.Close();
                ssr = new ShowSearchResults(/*Box*/resultList);
                //this.Hide();
                ssr.OpenBookEvent += new ShowSearchResults.OpenBook(this.TriggerOpenBook);
                ssr.Closing += new System.ComponentModel.CancelEventHandler(this.Closing_ResultWindow);
                this.Hide();
                ssr.ShowDialog();

            }
            catch (System.Exception e)
            {
                MessageBox.Show(" caught a " + e.GetType() + "\n with message: " + e.Message);
            }
        }