public SearchResult[] Search(string query) { Hits hits = _doSearch(query); List<SearchResult> results = new List<SearchResult>(); for (int i = 0; i < hits.Length(); i++) { Document doc = hits.Doc(i); string contents = doc.Get("contents"); SimpleHTMLFormatter formatter = new SimpleHTMLFormatter("<span class=\"highlight\">", "</span>"); SimpleFragmenter fragmenter = new SimpleFragmenter(_fragmentSize); Highlighter hiliter = new Highlighter(formatter, new QueryScorer(QueryParser.Parse(query, "contents", _analyzer))); hiliter.SetTextFragmenter(fragmenter); int numfragments = contents.Length / fragmenter.GetFragmentSize() + 1; TokenStream tokenstream = _analyzer.TokenStream("contents", new StringReader(contents)); TextFragment[] frags = hiliter.GetBestTextFragments(tokenstream, contents, false, numfragments); SearchResult sr = new SearchResult(doc, _analyzer, query, _fragmentSize); foreach (TextFragment frag in frags) { if (frag.GetScore() > 0) sr.AddFragment(frag.ToString()); } results.Add(sr); } return results.ToArray(); }
public static string GetOriginalHighlightedContents(SearchResult sr) { StringBuilder result = new StringBuilder("<html><meta http-equiv=\"Content-Type\" content=\"text/html; charset=utf-8\"><style>.highlight{background:yellow;}</style><body><font face=Arial size=5>"); string contents = sr.GetDocContents(); SimpleHTMLFormatter formatter = new SimpleHTMLFormatter("<span class=\"highlight\">", "</span>"); SimpleFragmenter fragmenter = new SimpleFragmenter(sr.FragmentSize); Highlighter hiliter = new Highlighter(formatter, new QueryScorer(sr.QueryParser.Parse(sr.Query))); hiliter.SetTextFragmenter(fragmenter); int numfragments = contents.Length / fragmenter.GetFragmentSize() + 1; TokenStream tokenstream = sr.Analyzer.TokenStream("contents", new StringReader(contents)); result.Append(hiliter.GetBestFragments(tokenstream, contents, numfragments, "...")); result.Append("</font></body></html>"); result.Replace("\n", "<br/>"); return result.ToString(); }
public static string GetHilitedContentsWithoutHeaders(SearchResult sr) { StringBuilder result = new StringBuilder("<font face=Arial size=5>"); string contents = sr.GetDocContents(); SimpleHTMLFormatter formatter = new SimpleHTMLFormatter("<span class=\"highlight\">", "</span>"); SimpleFragmenter fragmenter = new SimpleFragmenter(sr.FragmentSize); Highlighter hiliter = new Highlighter(formatter, new QueryScorer(sr.QueryParser.Parse(sr.Query))); hiliter.SetTextFragmenter(fragmenter); int numfragments = contents.Length / fragmenter.GetFragmentSize() + 1; TokenStream tokenstream = sr.Analyzer.TokenStream("contents", new StringReader(contents)); result.Append(hiliter.GetBestFragments(tokenstream, contents, numfragments, "...")); result.Append("</font>"); result.Replace("\n", "<br/>"); return result.ToString(); }
public static void Highlight(Document d, string query, Analyzer analyzer) { string contents = d.Get("contents"); SimpleHTMLFormatter formatter = new SimpleHTMLFormatter("<span class=\"highlight\"><b>", "</b></span>"); //SpanGradientFormatter formatter = new SpanGradientFormatter(10.0f, null, null, "#F1FD9F", "#EFF413"); //SimpleHTMLEncoder encoder = new SimpleHTMLEncoder(); SimpleFragmenter fragmenter = new SimpleFragmenter(250); Highlighter hiliter = new Highlighter(formatter, new QueryScorer(QueryParser.Parse(query, "contents", analyzer))); hiliter.SetTextFragmenter(fragmenter); int numfragments = contents.Length / fragmenter.GetFragmentSize() + 1;// +1 ensures its never zero. More than the required number of fragments dont harm. StringBuilder result = new StringBuilder("<html><meta http-equiv=\"Content-Type\" content=\"text/html; charset=utf-8\"><style>.highlight{background:yellow;}</style><head><title>Search Results - "); result.Append(d.Get("filename")); result.Append("</title></head><body><font face=Arial size=5>"); TokenStream tokenstream = analyzer.TokenStream("contents", new System.IO.StringReader(contents)); TextFragment[] frags = hiliter.GetBestTextFragments(tokenstream, contents, false, numfragments); foreach (TextFragment frag in frags) { if (frag.GetScore() > 0) { result.Append(frag.ToString() + "<br/><hr/><br/>"); } } string contentspath = System.IO.Path.Combine(System.Windows.Forms.Application.StartupPath, "contents.html"); result.Append("</font><a target=_self href=\"file:///"); result.Append(contentspath); result.Append("\">View Original Document...</a>"); result.Append("</body></html>"); result.Replace("\n", "<br/>"); string resultspath = System.IO.Path.Combine(System.Windows.Forms.Application.StartupPath, "results.html"); System.IO.File.WriteAllText(resultspath, result.ToString()); //webBrowser1.Url = new Uri("file:///" + resultspath); Highlighter hiliter2 = new Highlighter(formatter, new QueryScorer(QueryParser.Parse(query, "contents", analyzer))); hiliter2.SetTextFragmenter(fragmenter); TokenStream tokstr = analyzer.TokenStream(new System.IO.StringReader(contents)); StringBuilder htmlcontents = new StringBuilder("<html><meta http-equiv=\"Content-Type\" content=\"text/html; charset=utf-8\"><style>.highlight{background:yellow;}</style><body><font face=Arial size=5>"); htmlcontents.Append(hiliter2.GetBestFragments(tokstr, contents, numfragments, "...")); htmlcontents.Append("</font></body></html>"); htmlcontents.Replace("\n", "<br/>"); System.IO.File.WriteAllText(contentspath, htmlcontents.ToString()); }
public static SearchResult GetFastSearchResultFragments(ref SearchResult sr) { Document doc = sr.Document; string contents = doc.Get("contents"); SimpleHTMLFormatter formatter = new SimpleHTMLFormatter("<span class=\"highlight\">", "</span>"); SimpleFragmenter fragmenter = new SimpleFragmenter(sr.FragmentSize); Highlighter hiliter = new Highlighter(formatter, new QueryScorer(sr.QueryParser.Parse(sr.Query))); hiliter.SetTextFragmenter(fragmenter); int numfragments = contents.Length / fragmenter.GetFragmentSize() + 1; TokenStream tokenstream = sr.Analyzer.TokenStream("contents", new StringReader(contents)); TextFragment[] frags = hiliter.GetBestTextFragments(tokenstream, contents, false, numfragments); //SearchResult sr = new SearchResult(doc, _analyzer, query, _fragmentSize); foreach (TextFragment frag in frags) { if (frag.GetScore() > 0) sr.AddFragment(frag.ToString()); } return sr; }
private SearchResult ExecuteQuery(string[] metaData, int resultOffset, int resultLength, Query query) { var startTime = DateTime.Now; var ticks = DateTime.Now.ToUniversalTime().Ticks; Query publishStartQuery = NumericRangeQuery.NewLongRange("publishStart", null, ticks, true, false); Query publishStopQuery = NumericRangeQuery.NewLongRange("publishStop", ticks, null, false, true); var booleanQuery = new BooleanQuery { {query, Occur.MUST}, {publishStartQuery, Occur.MUST}, {publishStopQuery, Occur.MUST} }; var scoreDocs = _searcher.Search(booleanQuery, null, MaxHits, Sort.RELEVANCE).ScoreDocs; var result = new SearchResult {NumberOfHits = scoreDocs.Length}; // Create highlighter IFormatter formatter = new SimpleHTMLFormatter("<span class=\"search-highlight;\">", "</span>"); var fragmenter = new SimpleFragmenter(120); var scorer = new QueryScorer(query); var highlighter = new Highlighter(formatter, scorer) {TextFragmenter = fragmenter}; if (resultOffset < scoreDocs.Length) { var resultUpperOffset = resultOffset + resultLength; if (resultUpperOffset > scoreDocs.Length) { resultUpperOffset = scoreDocs.Length; } for (var i = resultOffset; i < resultUpperOffset; i++) { var doc = scoreDocs[i]; var document = _searcher.Doc(doc.Doc); var content = document.Get("content"); var excerpt = ""; if (content != null) { var stream = _analyzer.TokenStream("", new StringReader(document.Get("content"))); excerpt = highlighter.GetBestFragments(stream, document.Get("content"), 2, "..."); } Guid pageId; (document.Get("pageId") ?? string.Empty).TryParseGuid(out pageId); var hit = new SearchHit { PageId = pageId, Path = document.Get("path"), Title = document.Get("title"), Excerpt = excerpt }; foreach (var key in metaData) { hit.MetaData.Add(key, document.Get(key)); } result.Hits.Add(hit); } } var timeTaken = DateTime.Now - startTime; result.SecondsTaken = timeTaken.TotalSeconds; return result; }
public static IndexItemCollection Search( int siteId, bool isAdminContentAdminOrSiteEditor, List<string> userRoles, Guid[] featureGuids, DateTime modifiedBeginDate, DateTime modifiedEndDate, string queryText, bool highlightResults, int highlightedFragmentSize, int pageNumber, int pageSize, int maxClauseCount, out int totalHits, out bool invalidQuery) { invalidQuery = false; totalHits = 0; IndexItemCollection results = new IndexItemCollection(); if (string.IsNullOrEmpty(queryText)) { return results; } using (Lucene.Net.Store.Directory searchDirectory = GetDirectory(siteId)) { if (!IndexReader.IndexExists(searchDirectory)) { return results; } long startTicks = DateTime.Now.Ticks; try { if (maxClauseCount != 1024) { BooleanQuery.MaxClauseCount = maxClauseCount; } // there are different analyzers for different languages // see LuceneSettings.config in the root of the web LuceneSettingsProvider provider = LuceneSettingsManager.Providers[GetSiteProviderName(siteId)]; Analyzer analyzer = provider.GetAnalyzer(); Query searchQuery = MultiFieldQueryParser.Parse( Lucene.Net.Util.Version.LUCENE_30, new string[] { queryText, queryText, queryText, queryText, queryText, queryText.Replace("*", string.Empty) }, new string[] { "Title", "ModuleTitle", "contents", "PageName", "PageMetaDesc", "Keyword" }, analyzer); BooleanQuery filterQuery = new BooleanQuery(); // won't be used to score the results if (!isAdminContentAdminOrSiteEditor) // skip role filters for these users { AddRoleFilters(userRoles, filterQuery); AddModuleRoleFilters(userRoles, filterQuery); } TermRangeQuery beginDateFilter = new TermRangeQuery( "PublishBeginDate", DateTime.MinValue.ToString("s"), DateTime.UtcNow.ToString("s"), true, true); filterQuery.Add(beginDateFilter, Occur.MUST); TermRangeQuery endDateFilter = new TermRangeQuery( "PublishEndDate", DateTime.UtcNow.ToString("s"), DateTime.MaxValue.ToString("s"), true, true); filterQuery.Add(endDateFilter, Occur.MUST); if ((modifiedBeginDate.Date > DateTime.MinValue.Date) || (modifiedEndDate.Date < DateTime.MaxValue.Date)) { TermRangeQuery lastModifiedDateFilter = new TermRangeQuery( "LastModUtc", modifiedBeginDate.Date.ToString("s"), modifiedEndDate.Date.ToString("s"), true, true); filterQuery.Add(lastModifiedDateFilter, Occur.MUST); } //if ((!DisableSearchFeatureFilters) && (featureGuid != Guid.Empty)) //{ // BooleanQuery featureFilter = new BooleanQuery(); // featureFilter.Add(new TermQuery(new Term("FeatureId", featureGuid.ToString())), Occur.MUST); // filterQuery.Add(featureFilter, Occur.MUST); //} if ((featureGuids != null) && (featureGuids.Length > 0)) { BooleanQuery featureFilter = new BooleanQuery(); foreach (Guid featureGuid in featureGuids) { featureFilter.Add(new TermQuery(new Term("FeatureId", featureGuid.ToString())), Occur.SHOULD); } filterQuery.Add(featureFilter, Occur.MUST); } Filter filter = new QueryWrapperFilter(filterQuery); // filterQuery won't affect result scores using (IndexSearcher searcher = new IndexSearcher(searchDirectory)) { //http://stackoverflow.com/questions/9872933/migrating-lucene-hitcollector-2-x-to-collector-3-x //TopScoreDocCollector collector = TopScoreDocCollector.Create(maxResults, true); int maxResults = int.MaxValue; TopDocs hits = searcher.Search(searchQuery, filter, maxResults); int startHit = 0; if (pageNumber > 1) { startHit = ((pageNumber - 1) * pageSize); } totalHits = hits.TotalHits; int end = startHit + pageSize; if (totalHits <= end) { end = totalHits; } int itemsAdded = 0; int itemsToAdd = end; QueryScorer scorer = new QueryScorer(searchQuery); SimpleHTMLFormatter formatter = new SimpleHTMLFormatter("<span class='searchterm'>", "</span>"); Highlighter highlighter = new Highlighter(formatter, scorer); highlighter.TextFragmenter = new SimpleFragmenter(highlightedFragmentSize); for (int i = startHit; i < itemsToAdd; i++) { Document doc = searcher.Doc(hits.ScoreDocs[i].Doc); IndexItem indexItem = new IndexItem(doc, hits.ScoreDocs[i].Score); if (highlightResults) { try { TokenStream stream = analyzer.TokenStream("contents", new StringReader(doc.Get("contents"))); string highlightedResult = highlighter.GetBestFragment(stream, doc.Get("contents")); if (highlightedResult != null) { indexItem.Intro = highlightedResult; } } catch (NullReferenceException) { } } results.Add(indexItem); itemsAdded += 1; } results.ItemCount = itemsAdded; results.PageIndex = pageNumber; results.ExecutionTime = DateTime.Now.Ticks - startTicks; } } catch (ParseException ex) { invalidQuery = true; log.Error("handled error for search terms " + queryText, ex); // these parser exceptions are generally caused by // spambots posting too much junk into the search form // heres an option to automatically ban the ip address HandleSpam(queryText, ex); return results; } catch (BooleanQuery.TooManyClauses ex) { invalidQuery = true; log.Error("handled error for search terms " + queryText, ex); return results; } catch (System.IO.IOException ex) { invalidQuery = true; log.Error("handled error for search terms " + queryText, ex); return results; } return results; } }
public static DataTable searchPitanja(string pretraga) { DataTable ResultsPitanja = new DataTable(); // create the searcher // index is placed in "index" subdirectory string indexDirectory = "J:/Triglav_Web_App/Triglav/Web/Lucene/Pitanja"; var analyzer = new StandardAnalyzer(Version.LUCENE_30); IndexSearcher searcher = new IndexSearcher(FSDirectory.Open(indexDirectory)); // parse the query, "text" is the default field to search var parser = new MultiFieldQueryParser(Version.LUCENE_30, new[] { "Naslov", "Sadrzaj", "Tagovi" }, analyzer); //var parser = new QueryParser(Version.LUCENE_30, "Sadrzaj", analyzer); Query query = parser.Parse(pretraga); //// create the result DataTable ResultsPitanja.Columns.Add("id", typeof(Int32)); ResultsPitanja.Columns.Add("Naslov", typeof(string)); ResultsPitanja.Columns.Add("Sadrzaj", typeof(string)); ResultsPitanja.Columns.Add("Tagovi", typeof(string)); ResultsPitanja.Columns.Add("DatumKreiranja", typeof(DateTime)); ResultsPitanja.Columns.Add("DatumZadnjeIzmjene", typeof(DateTime)); ResultsPitanja.Columns.Add("DatumZadnjeAktivnosti", typeof(DateTime)); ResultsPitanja.Columns.Add("DatumZatvaranjaPosta", typeof(DateTime)); ResultsPitanja.Columns.Add("PrihvaceniOdgovori", typeof(Int32)); ResultsPitanja.Columns.Add("BrojOdgovora", typeof(Int32)); ResultsPitanja.Columns.Add("BrojKomentara", typeof(Int32)); ResultsPitanja.Columns.Add("BrojOmiljenih", typeof(Int32)); ResultsPitanja.Columns.Add("BrojPregleda", typeof(Int32)); ResultsPitanja.Columns.Add("BrojPoena", typeof(Int32)); ResultsPitanja.Columns.Add("VlasnikID", typeof(Int32)); ResultsPitanja.Columns.Add("VlasnikNadimak", typeof(string)); ResultsPitanja.Columns.Add("PromijenioID", typeof(Int32)); ResultsPitanja.Columns.Add("RoditeljskiPostID", typeof(Int32)); //Results.Columns.Add("PodKategorija", typeof(Int32)); ResultsPitanja.Columns.Add("PostVrsta", typeof(Int32)); // ResultsPitanja.Columns.Add("SlikaURL", typeof(string)); ResultsPitanja.Columns.Add("temp", typeof(string)); ResultsPitanja.Columns.Add("Likes", typeof(Int32)); ResultsPitanja.Columns.Add("Unlikes", typeof(Int32)); ResultsPitanja.Columns.Add("Sazetak", typeof(string)); ResultsPitanja.Columns.Add("BrojRangiranja", typeof(Int32)); ResultsPitanja.Columns.Add("PrihvacenaIzmjena", typeof(Int32)); ResultsPitanja.Columns.Add("Podnaslov", typeof(string)); ResultsPitanja.Columns.Add("Broj.Razgovora", typeof(Int32)); ResultsPitanja.Columns.Add("sample", typeof(string)); // search TopDocs hits = searcher.Search(query, 5); //E this.total = hits.TotalHits; // create highlighter IFormatter formatter = new SimpleHTMLFormatter("<span style=\"font-weight:bold; background-color: #e5ecf9; \">", "</span>"); SimpleFragmenter fragmenter = new SimpleFragmenter(80); QueryScorer scorer = new QueryScorer(query); Highlighter highlighter = new Highlighter(formatter, scorer); highlighter.TextFragmenter = fragmenter; for (int i = 0; i < hits.ScoreDocs.Count(); i++) { // get the document from index Document doc = searcher.Doc(hits.ScoreDocs[i].Doc); TokenStream stream = analyzer.TokenStream("", new StringReader(doc.Get("Sadrzaj"))); String sample = highlighter.GetBestFragments(stream, doc.Get("Sadrzaj"), 3, "..."); //String path = doc.Get("path"); // create a new row with the result data DataRow rowPitanja = ResultsPitanja.NewRow(); rowPitanja["id"] = doc.Get("id"); rowPitanja["Naslov"] = doc.Get("Naslov"); rowPitanja["Sadrzaj"] = sample; //doc.Get("Sadrzaj"); rowPitanja["Tagovi"] = doc.Get("Tagovi"); rowPitanja["DatumKreiranja"] = doc.Get("DatumKreiranja"); rowPitanja["DatumZadnjeIzmjene"] = doc.Get("DatumZadnjeIzmjene"); rowPitanja["DatumZadnjeAktivnosti"] = doc.Get("DatumZadnjeAktivnosti"); //row["DatumZatvaranjaPosta"] = doc.Get("DatumZatvaranjaPosta"); rowPitanja["PrihvaceniOdgovori"] = doc.Get("PrihvaceniOdgovori"); rowPitanja["BrojOdgovora"] = doc.Get("BrojOdgovora"); rowPitanja["BrojKomentara"] = doc.Get("BrojKomentara"); rowPitanja["BrojOmiljenih"] = doc.Get("BrojOmiljenih"); rowPitanja["BrojPregleda"] = doc.Get("BrojPregleda"); rowPitanja["BrojPoena"] = doc.Get("BrojPoena"); //row["VlasnikID"] = doc.Get("VlasnikID"); rowPitanja["VlasnikNadimak"] = doc.Get("VlasnikNadimak"); //row["PromijenioID"] = doc.Get("PromijenioID"); //row["RoditeljskiPostID"] = doc.Get("RoditeljskiPostID"); //row["PodKategorija"] = doc.Get("PodKategorija"); rowPitanja["PostVrsta"] = doc.Get("PostVrsta"); //rowPitanja["SlikaURL"] = doc.Get("SlikaURL"); //row["temp"] = doc.Get("temp"); rowPitanja["Likes"] = doc.Get("Likes"); rowPitanja["Unlikes"] = doc.Get("Unlikes"); rowPitanja["Sazetak"] = doc.Get("Sazetak"); rowPitanja["BrojRangiranja"] = doc.Get("BrojRangiranja"); rowPitanja["PrihvacenaIzmjena"] = doc.Get("PrihvacenaIzmjena"); rowPitanja["Podnaslov"] = doc.Get("Podnaslov"); //row["Broj.Razgovora"] = doc.Get("Broj.Razgovora"); //rowPitanja["sample"] = sample; ResultsPitanja.Rows.Add(rowPitanja); } searcher.Dispose(); return ResultsPitanja; }
private void search() { DateTime start = DateTime.Now; // create the result DataTable this.Results.Columns.Add("title", typeof(string)); this.Results.Columns.Add("sample", typeof(string)); this.Results.Columns.Add("path", typeof(string)); this.Results.Columns.Add("url", typeof(string)); this.Results.Columns.Add("Type", typeof(string)); // create the searcher // index is placed in "index" subdirectory string indexDirectory = Server.MapPath("~/App_Data/index"); var analyzer = new StandardAnalyzer(Lucene.Net.Util.Version.LUCENE_30); // List<string> STOP_WORDS = StopAnalyzer.ENGLISH_STOP_WORDS_SET.ToList<string>(); IndexSearcher searcher = new IndexSearcher(FSDirectory.Open(indexDirectory)); BooleanQuery bquery = new BooleanQuery(); //var parser = new QueryParser(Lucene.Net.Util.Version.LUCENE_30, "text", analyzer); List<string> SearchTerm = new List<string> { "text", "path", "title", "Extension", "EXTPRP" }; List<string> Projects = new List<string>(); if (Session["ProjectList"] != null) { Projects = (List<string>)Session["ProjectList"]; } List<string> allType = new List<string> { "A", "B", "C" }; if (this.Request.QueryString["Page"] != null) { if (allType.Contains(this.Request.QueryString["Page"].ToString())) { allType.Remove(this.Request.QueryString["Page"]); foreach (string type in allType) { TermQuery termq1 = new TermQuery(new Term("EXTPRP", type)); bquery.Add(termq1, Occur.MUST_NOT); FuzzyQuery termq = new FuzzyQuery(new Term("EXTPRP", type), 0.5f, 0); bquery.Add(termq, Occur.MUST_NOT); } } } //Query query = parser.Parse(this.Query); //foreach (string term in SearchTerm) //{ // if (term == "title") // { // TermQuery termq = new TermQuery(new Term(term, this.Query)); // termq.Boost = 50f; // bquery.Add(termq, Occur.SHOULD); // } // else // { // TermQuery termq = new TermQuery(new Term(term, this.Query)); // termq.Boost = 5f; // bquery.Add(termq, Occur.SHOULD); // } //} foreach (string term in SearchTerm) { if (term == "title") { TermQuery termq = new TermQuery(new Term(term, this.Query)); termq.Boost = 5f; bquery.Add(termq, Occur.SHOULD); } else { FuzzyQuery termq = new FuzzyQuery(new Term(term, this.Query), 0.5f, 0); termq.Boost = 0.1f; bquery.Add(termq, Occur.SHOULD); } } //foreach (string project in Projects) //{ // TermQuery termq1 = new TermQuery(new Term("Project", project)); // bquery.Add(termq1, Occur.MUST_NOT); //} //foreach (string project in Projects.Distinct()) //{ // TermQuery termq1 = new TermQuery(new Term("path", project)); // bquery.Add(termq1, Occur.MUST); // FuzzyQuery termq = new FuzzyQuery(new Term("path", project), 0.5f, 0); // bquery.Add(termq, Occur.MUST); //} //bquery.Add(new TermQuery(new Term("Project", "DEV")), Occur.SHOULD); //List<ScoreDoc> TempArrList = new List<ScoreDoc>(); TopDocs hits = searcher.Search(bquery, null, 10000); //TopDocs hits = new TopDocs(TempArrList.Count(), TempArrList.ToArray(), hitsWithText.MaxScore); //hits.ScoreDocs.CopyTo(hits.ScoreDocs, 0); //hits.ScoreDocs = hits.ScoreDocs.OrderBy(obj => searcher.Doc(obj.Doc).Get("path")).ToArray(); if (Projects.Count() != 0) { hits.ScoreDocs = hits.ScoreDocs.Where(obj => Projects.Contains(Path.GetDirectoryName(searcher.Doc(obj.Doc).Get("path")))).Distinct().ToArray(); } //foreach (string project in Projects.Distinct()) //{ // //hits.ScoreDocs = hits.ScoreDocs.Where(obj => Regex.IsMatch(searcher.Doc(obj.Doc).Get("path").Replace(@"\", @"\\"), @".*" + project.Replace(@"\", @"\\") + ".*")).ToArray(); // string s = Path.GetDirectoryName("\\SAGITEC-1629\\Soogle\\CARS\\bhagyashree.txt"); // hits.ScoreDocs = hits.ScoreDocs.Where(obj => Path.GetDirectoryName(searcher.Doc(obj.Doc).Get("path")).Contains(project)).ToArray(); //} this.total = hits.ScoreDocs.Count(); this.startAt = InitStartAt(); int resultsCount = Math.Min(total, this.maxResults + this.startAt); // create highlighter IFormatter formatter = new SimpleHTMLFormatter("<span style=\"font-weight:bold;background-color:yellow;\">", "</span>"); SimpleFragmenter fragmenter = new SimpleFragmenter(200); QueryScorer scorer = new QueryScorer(bquery); Highlighter highlighter = new Highlighter(formatter, scorer); highlighter.TextFragmenter = fragmenter; int j = 0; for (int i = startAt; i < resultsCount; i++) { Document doc = searcher.Doc(hits.ScoreDocs[i].Doc); String path = doc.Get("path"); string getExtension = doc.Get("Extension"); TokenStream stream = analyzer.TokenStream("", new StringReader(doc.Get("text"))); String sample = ""; try { string document = doc.Get("text"); if (getExtension.ToLower() == ".png" || getExtension.ToLower() == ".jpg" || getExtension.ToLower() == ".gif" || getExtension.ToLower() == ".bmp") { sample = ""; } else { sample = highlighter.GetBestFragment(stream, document);//, 2, "..."); } } catch (Exception ex) { } // create a new row with the result data DataRow row = this.Results.NewRow(); row["title"] = doc.Get("title"); row["path"] = "http://sagitec-1629/KNBASE/" + path.Replace(@"\", "/").Replace("//SAGITEC-1629/Soogle/", ""); row["url"] = "http://sagitec-1629/KNBASE/" + path.Replace(@"\", "/").Replace("//SAGITEC-1629/Soogle/", ""); row["sample"] = sample; if (path.Contains('.')) { row["Type"] = GetMIMEType(path); } //if (!Projects.Contains(doc.Get("Project")) || !allType.Contains(doc.Get("EXTPRP"))) //{ this.Results.Rows.Add(row); //} j++; } Repeater1.DataSource = Results; Repeater1.DataBind(); searcher.Dispose(); // result information this.duration = DateTime.Now - start; this.fromItem = startAt + 1; this.toItem = Math.Min(startAt + maxResults, total); }
//******************************************************************************************************************************** private void search() { if (TextBoxQuery.Text != "") { DateTime start = DateTime.Now; // create the result DataTable this.Results.Columns.Add("title", typeof(string)); this.Results.Columns.Add("sample", typeof(string)); this.Results.Columns.Add("path", typeof(string)); this.Results.Columns.Add("url", typeof(string)); this.Results.Columns.Add("Type", typeof(string)); // create the searcher // index is placed in "index" subdirectory string indexDirectory = Server.MapPath(IndexDirPath); var analyzer = new StandardAnalyzer(Lucene.Net.Util.Version.LUCENE_30); // List<string> STOP_WORDS = StopAnalyzer.ENGLISH_STOP_WORDS_SET.ToList<string>(); IndexSearcher searcher = new IndexSearcher(FSDirectory.Open(indexDirectory)); BooleanQuery bquery = new BooleanQuery(); //var parser = new QueryParser(Lucene.Net.Util.Version.LUCENE_30, "text", analyzer); List<string> SearchTerm = new List<string> { "text", "path", "title", "Extension", "EXTPRP" }; List<string> Projects = new List<string>(); if (Session["ProjectList"] != null) { Projects = (List<string>)Session["ProjectList"]; } List<string> allType = null; if (hnkClickLink.Value == "") { allType = new List<string>(); } else { allType = new List<string> { "Doc", "Code", "Images", "Other" }; } if (this.Request.QueryString["Page"] != null) { if (allType.Contains(Convert.ToString(hnkClickLink.Value))) { allType.Remove(Convert.ToString(hnkClickLink.Value)); foreach (string type in allType) { TermQuery termq1 = new TermQuery(new Term("EXTPRP", type)); bquery.Add(termq1, Occur.MUST_NOT); FuzzyQuery termq = new FuzzyQuery(new Term("EXTPRP", type), 0.5f, 0); bquery.Add(termq, Occur.MUST_NOT); } } } //Query query = parser.Parse(this.Query); foreach (string term in SearchTerm) { if (term == "title") { TermQuery termq = new TermQuery(new Term(term, this.Query.ToLower())); termq.Boost = 5f; bquery.Add(termq, Occur.SHOULD); } else { TermQuery termq = new TermQuery(new Term(term, this.Query.ToLower())); termq.Boost = 0.1f; bquery.Add(termq, Occur.SHOULD); } } foreach (string term in SearchTerm) { if (this.Query.Contains(".")) { string SearchKeyword = this.Query.Replace(".", ""); if (term == "Extension") { TermQuery termq = new TermQuery(new Term(term, SearchKeyword.ToLower())); termq.Boost = 5f; bquery.Add(termq, Occur.SHOULD); } } else { if (term == "title") { FuzzyQuery termq = new FuzzyQuery(new Term(term, this.Query.ToLower())); termq.Boost = 5f; bquery.Add(termq, Occur.SHOULD); } else { //FuzzyQuery termq = new FuzzyQuery(new Term(term, this.Query), 0.5f, 0); //termq.Boost = 0.1f; //bquery.Add(termq, Occur.SHOULD); } } } TopDocs hits = searcher.Search(bquery, null, 10000); //TopDocs hits = new TopDocs(TempArrList.Count(), TempArrList.ToArray(), hitsWithText.MaxScore); //hits.ScoreDocs.CopyTo(hits.ScoreDocs, 0); //hits.ScoreDocs = hits.ScoreDocs.OrderBy(obj => searcher.Doc(obj.Doc).Get("path")).ToArray(); if (Projects.Count() != 0) { hits.ScoreDocs = hits.ScoreDocs.Where(obj => Projects.Contains(SplitPath(Path.GetDirectoryName(searcher.Doc(obj.Doc).Get("path"))))).Distinct().ToArray(); } this.total = hits.ScoreDocs.Count(); this.startAt = InitStartAt(); int resultsCount = Math.Min(total, this.maxResults + this.startAt); // create highlighter IFormatter formatter = new SimpleHTMLFormatter("<span style=\"font-weight:bold;background-color:yellow;\">", "</span>"); SimpleFragmenter fragmenter = new SimpleFragmenter(200); QueryScorer scorer = new QueryScorer(bquery); Highlighter highlighter = new Highlighter(formatter, scorer); highlighter.TextFragmenter = fragmenter; //highlighter.MaxDocCharsToAnalyze=200; //for (int i = startAt; i < resultsCount; i++) //{ // Document doc = searcher.Doc(hits.ScoreDocs[i].Doc); // String path = doc.Get("path"); // string getExtension = doc.Get("Extension"); // TokenStream stream = analyzer.TokenStream("", new StringReader(doc.Get("text"))); // String sample = ""; // try // { // string document = doc.Get("text"); // if (getExtension.ToLower() == ".png" || getExtension.ToLower() == ".jpg" || getExtension.ToLower() == ".gif" || getExtension.ToLower() == ".bmp" || getExtension.ToLower() == ".jpeg") // { // sample = ""; // } // else // { // string outp = highlighter.GetBestFragment(stream, document); // if (outp != null) // sample = ReplaceSpecialChar(outp.Trim()); //, 2, "..."); // else // sample = Limit(doc.Get("text").Trim(), 200); // } // } // catch (Exception ex) // { // } // // create a new row with the result data // DataRow row = this.Results.NewRow(); // row["title"] = doc.Get("title"); // row["path"] = ApplicationPath + path.Replace(@"\", "/").Replace(VirtualPath, ""); // row["url"] = ApplicationPath + path.Replace(@"\", "/").Replace(VirtualPath, ""); // row["sample"] = sample; // if (path.Contains('.')) // { // row["Type"] = GetMIMEType(path); // } // this.Results.Rows.Add(row); //} for (int i = 0; i < this.total; i++) { Document doc = searcher.Doc(hits.ScoreDocs[i].Doc); String path = doc.Get("path"); string getExtension = doc.Get("Extension"); TokenStream stream = analyzer.TokenStream("", new StringReader(doc.Get("text"))); String sample = ""; try { string document = doc.Get("text"); if (getExtension.ToLower() == ".png" || getExtension.ToLower() == ".jpg" || getExtension.ToLower() == ".gif" || getExtension.ToLower() == ".bmp" || getExtension.ToLower() == ".jpeg") { sample = ""; } else { string outp = highlighter.GetBestFragment(stream, document); if (outp != null) sample = Limit(outp.Trim(), 200); //, 2, "..."); else sample = Limit(doc.Get("text").Trim(), 200); } } catch (Exception ex) { } // create a new row with the result data DataRow row = Results.NewRow(); row["title"] = doc.Get("title"); row["path"] = ApplicationPath + path.Replace(@"\", "/").Replace(VirtualPath, ""); row["url"] = ApplicationPath + path.Replace(@"\", "/").Replace(VirtualPath, ""); row["sample"] = sample; if (path.Contains('.')) { row["Type"] = GetMIMEType(path); } Results.Rows.Add(row); } //****************************** Logic for Paging for Repeater Control**************************************** PagedDataSource pgitems = new PagedDataSource(); DataView dv = new DataView(Results); pgitems.DataSource = dv; pgitems.AllowPaging = true; pgitems.PageSize = 10;//You can set the number of items here using some logic. pgitems.CurrentPageIndex = PageNumber; btnPrev.Visible = !pgitems.IsFirstPage; btnNext.Visible = !pgitems.IsLastPage; if (pgitems.PageCount > 1) { rptPages.Visible = true; ArrayList pages = new ArrayList(); for (int i = PageNumber; i < 5 + PageNumber; i++) pages.Add((i + 1).ToString()); rptPages.DataSource = pages; rptPages.DataBind(); } else rptPages.Visible = false; Repeater1.DataSource = pgitems; Repeater1.DataBind(); //************************************************************************************************************* //Repeater1.DataSource = Results; //Repeater1.DataBind(); searcher.Dispose(); // result information this.duration = DateTime.Now - start; this.fromItem = startAt + 1; this.toItem = Math.Min(startAt + maxResults, total); } }
private IList<int> Search(string text, int tipodocumentoId, string startDate, string endDate) { var directory = this.GetDirectory(); var indexReader = this.GetIndexReader(directory); var searcher = new IndexSearcher(indexReader); try { var query = this.BuildQuery(text, tipodocumentoId); var filter = this.BuildDateFilter(startDate, endDate); var sort = new Sort(new SortField("dataCriacao", SortField.LONG, true)); var docs = searcher.Search(query, filter, this.configuracoesDaAplicacao.ResultadoMaximoConsulta, sort); // create highlighter var formatter = new SimpleHTMLFormatter("<span class=\"result-highlight\">", "</span>"); var scorer = new QueryScorer(query); this.Highlighter = new Highlighter(formatter, scorer); this.Stream = LuceneEngineBase.GetAnalyzer().TokenStream(string.Empty, new StringReader(text)); return this.BuildSearchResult(docs, searcher); } finally { searcher.Dispose(); indexReader.Dispose(); directory.Dispose(); } }
public void FormatWithTag(string preTag, string postTag) { htmlFormatter = new SimpleHTMLFormatter(preTag, postTag); }
public void FormatWithCssClass(string cssClass) { htmlFormatter = new CssHtmlFormatter(cssClass); }
public HtmlHighlighter(string queryText) { htmlContentQuery = new TermQuery(new Term("highlight", queryText)); htmlFormatter = new SimpleHTMLFormatter(); }