public SearchResult[] Search(string query) { Hits hits = _doSearch(query); List<SearchResult> results = new List<SearchResult>(); for (int i = 0; i < hits.Length(); i++) { Document doc = hits.Doc(i); string contents = doc.Get("contents"); SimpleHTMLFormatter formatter = new SimpleHTMLFormatter("<span class=\"highlight\">", "</span>"); SimpleFragmenter fragmenter = new SimpleFragmenter(_fragmentSize); Highlighter hiliter = new Highlighter(formatter, new QueryScorer(QueryParser.Parse(query, "contents", _analyzer))); hiliter.SetTextFragmenter(fragmenter); int numfragments = contents.Length / fragmenter.GetFragmentSize() + 1; TokenStream tokenstream = _analyzer.TokenStream("contents", new StringReader(contents)); TextFragment[] frags = hiliter.GetBestTextFragments(tokenstream, contents, false, numfragments); SearchResult sr = new SearchResult(doc, _analyzer, query, _fragmentSize); foreach (TextFragment frag in frags) { if (frag.GetScore() > 0) sr.AddFragment(frag.ToString()); } results.Add(sr); } return results.ToArray(); }
public IEnumerable<Hit> Search(string query, int maxResults) { var analyzer = new StandardAnalyzer(Lucene.Net.Util.Version.LUCENE_29); QueryParser qp = new QueryParser( Lucene.Net.Util.Version.LUCENE_29, "contents", analyzer ); Query q = qp.Parse(query); TopDocs top = searcher.Search(q, maxResults); List<Hit> result = new List<Hit>(); foreach (var scoreDoc in top.ScoreDocs) { var doc = searcher.Doc(scoreDoc.Doc); string contents = doc.Get("contents"); var scorer = new QueryScorer(q, searcher.IndexReader, "contents"); var highlighter = new Highlighter(scorer); result.Add(new Hit() { Relevance = scoreDoc.Score, Title = doc.Get("title"), Url = doc.Get("path"), Excerpt = highlighter.GetBestFragment(analyzer, "contents", contents) }); } return result; }
private string GeneratePreviewText(Query q, string text) { var scorer = new QueryScorer(q); Highlighter highlighter = new Highlighter(htmlFormatter, scorer); highlighter.TextFragmenter = new SimpleFragmenter(250); TokenStream stream = SearchEnvironment.DefaultAnalyzer.TokenStream("html_content", new StringReader(text)); return highlighter.GetBestFragments(stream, text, 4, "<br/>"); }
public static string GetHilitedContentsWithoutHeaders(SearchResult sr) { StringBuilder result = new StringBuilder("<font face=Arial size=5>"); string contents = sr.GetDocContents(); SimpleHTMLFormatter formatter = new SimpleHTMLFormatter("<span class=\"highlight\">", "</span>"); SimpleFragmenter fragmenter = new SimpleFragmenter(sr.FragmentSize); Highlighter hiliter = new Highlighter(formatter, new QueryScorer(sr.QueryParser.Parse(sr.Query))); hiliter.SetTextFragmenter(fragmenter); int numfragments = contents.Length / fragmenter.GetFragmentSize() + 1; TokenStream tokenstream = sr.Analyzer.TokenStream("contents", new StringReader(contents)); result.Append(hiliter.GetBestFragments(tokenstream, contents, numfragments, "...")); result.Append("</font>"); result.Replace("\n", "<br/>"); return result.ToString(); }
public static string GetOriginalHighlightedContents(SearchResult sr) { StringBuilder result = new StringBuilder("<html><meta http-equiv=\"Content-Type\" content=\"text/html; charset=utf-8\"><style>.highlight{background:yellow;}</style><body><font face=Arial size=5>"); string contents = sr.GetDocContents(); SimpleHTMLFormatter formatter = new SimpleHTMLFormatter("<span class=\"highlight\">", "</span>"); SimpleFragmenter fragmenter = new SimpleFragmenter(sr.FragmentSize); Highlighter hiliter = new Highlighter(formatter, new QueryScorer(sr.QueryParser.Parse(sr.Query))); hiliter.SetTextFragmenter(fragmenter); int numfragments = contents.Length / fragmenter.GetFragmentSize() + 1; TokenStream tokenstream = sr.Analyzer.TokenStream("contents", new StringReader(contents)); result.Append(hiliter.GetBestFragments(tokenstream, contents, numfragments, "...")); result.Append("</font></body></html>"); result.Replace("\n", "<br/>"); return result.ToString(); }
public static void Highlight(Document d, string query, Analyzer analyzer) { string contents = d.Get("contents"); SimpleHTMLFormatter formatter = new SimpleHTMLFormatter("<span class=\"highlight\"><b>", "</b></span>"); //SpanGradientFormatter formatter = new SpanGradientFormatter(10.0f, null, null, "#F1FD9F", "#EFF413"); //SimpleHTMLEncoder encoder = new SimpleHTMLEncoder(); SimpleFragmenter fragmenter = new SimpleFragmenter(250); Highlighter hiliter = new Highlighter(formatter, new QueryScorer(QueryParser.Parse(query, "contents", analyzer))); hiliter.SetTextFragmenter(fragmenter); int numfragments = contents.Length / fragmenter.GetFragmentSize() + 1;// +1 ensures its never zero. More than the required number of fragments dont harm. StringBuilder result = new StringBuilder("<html><meta http-equiv=\"Content-Type\" content=\"text/html; charset=utf-8\"><style>.highlight{background:yellow;}</style><head><title>Search Results - "); result.Append(d.Get("filename")); result.Append("</title></head><body><font face=Arial size=5>"); TokenStream tokenstream = analyzer.TokenStream("contents", new System.IO.StringReader(contents)); TextFragment[] frags = hiliter.GetBestTextFragments(tokenstream, contents, false, numfragments); foreach (TextFragment frag in frags) { if (frag.GetScore() > 0) { result.Append(frag.ToString() + "<br/><hr/><br/>"); } } string contentspath = System.IO.Path.Combine(System.Windows.Forms.Application.StartupPath, "contents.html"); result.Append("</font><a target=_self href=\"file:///"); result.Append(contentspath); result.Append("\">View Original Document...</a>"); result.Append("</body></html>"); result.Replace("\n", "<br/>"); string resultspath = System.IO.Path.Combine(System.Windows.Forms.Application.StartupPath, "results.html"); System.IO.File.WriteAllText(resultspath, result.ToString()); //webBrowser1.Url = new Uri("file:///" + resultspath); Highlighter hiliter2 = new Highlighter(formatter, new QueryScorer(QueryParser.Parse(query, "contents", analyzer))); hiliter2.SetTextFragmenter(fragmenter); TokenStream tokstr = analyzer.TokenStream(new System.IO.StringReader(contents)); StringBuilder htmlcontents = new StringBuilder("<html><meta http-equiv=\"Content-Type\" content=\"text/html; charset=utf-8\"><style>.highlight{background:yellow;}</style><body><font face=Arial size=5>"); htmlcontents.Append(hiliter2.GetBestFragments(tokstr, contents, numfragments, "...")); htmlcontents.Append("</font></body></html>"); htmlcontents.Replace("\n", "<br/>"); System.IO.File.WriteAllText(contentspath, htmlcontents.ToString()); }
public List<IndexResult> Search(string terms) { List<IndexResult> retObj = new List<IndexResult>(); var analyzer = new StandardAnalyzer(Lucene.Net.Util.Version.LUCENE_30); using (var searcher = new IndexSearcher(FSDirectory.Open(IndexDirectory))) { // parse the query, "text" is the default field to search var parser = new MultiFieldQueryParser(Lucene.Net.Util.Version.LUCENE_30, new[] { "text", "title", "urlkey", "searchterms" }, analyzer); Query query = parser.Parse(terms); TopDocs hits = searcher.Search(query, 200); SimpleFragmenter fragmenter = new SimpleFragmenter(80); QueryScorer scorer = new QueryScorer(query); Highlighter highlighter = new Highlighter(scorer); highlighter.TextFragmenter = fragmenter; for (int i = 0; i < hits.TotalHits; i++) { // get the document from index Document doc = searcher.Doc(hits.ScoreDocs[i].Doc); TokenStream stream = analyzer.TokenStream("", new StringReader(doc.Get("text"))); String sample = highlighter.GetBestFragments(stream, doc.Get("text"), 2, "..."); String title = doc.Get("title"); String urlkey = doc.Get("urlkey"); String type = doc.Get("type"); retObj.Add(new IndexResult() { Sample = sample, Title = title, Type = type, UrlKey = urlkey }); } return retObj; } }
public static SearchResult GetFastSearchResultFragments(ref SearchResult sr) { Document doc = sr.Document; string contents = doc.Get("contents"); SimpleHTMLFormatter formatter = new SimpleHTMLFormatter("<span class=\"highlight\">", "</span>"); SimpleFragmenter fragmenter = new SimpleFragmenter(sr.FragmentSize); Highlighter hiliter = new Highlighter(formatter, new QueryScorer(sr.QueryParser.Parse(sr.Query))); hiliter.SetTextFragmenter(fragmenter); int numfragments = contents.Length / fragmenter.GetFragmentSize() + 1; TokenStream tokenstream = sr.Analyzer.TokenStream("contents", new StringReader(contents)); TextFragment[] frags = hiliter.GetBestTextFragments(tokenstream, contents, false, numfragments); //SearchResult sr = new SearchResult(doc, _analyzer, query, _fragmentSize); foreach (TextFragment frag in frags) { if (frag.GetScore() > 0) sr.AddFragment(frag.ToString()); } return sr; }
public static IndexItemCollection Search( int siteId, bool isAdminContentAdminOrSiteEditor, List<string> userRoles, Guid[] featureGuids, DateTime modifiedBeginDate, DateTime modifiedEndDate, string queryText, bool highlightResults, int highlightedFragmentSize, int pageNumber, int pageSize, int maxClauseCount, out int totalHits, out bool invalidQuery) { invalidQuery = false; totalHits = 0; IndexItemCollection results = new IndexItemCollection(); if (string.IsNullOrEmpty(queryText)) { return results; } using (Lucene.Net.Store.Directory searchDirectory = GetDirectory(siteId)) { if (!IndexReader.IndexExists(searchDirectory)) { return results; } long startTicks = DateTime.Now.Ticks; try { if (maxClauseCount != 1024) { BooleanQuery.MaxClauseCount = maxClauseCount; } // there are different analyzers for different languages // see LuceneSettings.config in the root of the web LuceneSettingsProvider provider = LuceneSettingsManager.Providers[GetSiteProviderName(siteId)]; Analyzer analyzer = provider.GetAnalyzer(); Query searchQuery = MultiFieldQueryParser.Parse( Lucene.Net.Util.Version.LUCENE_30, new string[] { queryText, queryText, queryText, queryText, queryText, queryText.Replace("*", string.Empty) }, new string[] { "Title", "ModuleTitle", "contents", "PageName", "PageMetaDesc", "Keyword" }, analyzer); BooleanQuery filterQuery = new BooleanQuery(); // won't be used to score the results if (!isAdminContentAdminOrSiteEditor) // skip role filters for these users { AddRoleFilters(userRoles, filterQuery); AddModuleRoleFilters(userRoles, filterQuery); } TermRangeQuery beginDateFilter = new TermRangeQuery( "PublishBeginDate", DateTime.MinValue.ToString("s"), DateTime.UtcNow.ToString("s"), true, true); filterQuery.Add(beginDateFilter, Occur.MUST); TermRangeQuery endDateFilter = new TermRangeQuery( "PublishEndDate", DateTime.UtcNow.ToString("s"), DateTime.MaxValue.ToString("s"), true, true); filterQuery.Add(endDateFilter, Occur.MUST); if ((modifiedBeginDate.Date > DateTime.MinValue.Date) || (modifiedEndDate.Date < DateTime.MaxValue.Date)) { TermRangeQuery lastModifiedDateFilter = new TermRangeQuery( "LastModUtc", modifiedBeginDate.Date.ToString("s"), modifiedEndDate.Date.ToString("s"), true, true); filterQuery.Add(lastModifiedDateFilter, Occur.MUST); } //if ((!DisableSearchFeatureFilters) && (featureGuid != Guid.Empty)) //{ // BooleanQuery featureFilter = new BooleanQuery(); // featureFilter.Add(new TermQuery(new Term("FeatureId", featureGuid.ToString())), Occur.MUST); // filterQuery.Add(featureFilter, Occur.MUST); //} if ((featureGuids != null) && (featureGuids.Length > 0)) { BooleanQuery featureFilter = new BooleanQuery(); foreach (Guid featureGuid in featureGuids) { featureFilter.Add(new TermQuery(new Term("FeatureId", featureGuid.ToString())), Occur.SHOULD); } filterQuery.Add(featureFilter, Occur.MUST); } Filter filter = new QueryWrapperFilter(filterQuery); // filterQuery won't affect result scores using (IndexSearcher searcher = new IndexSearcher(searchDirectory)) { //http://stackoverflow.com/questions/9872933/migrating-lucene-hitcollector-2-x-to-collector-3-x //TopScoreDocCollector collector = TopScoreDocCollector.Create(maxResults, true); int maxResults = int.MaxValue; TopDocs hits = searcher.Search(searchQuery, filter, maxResults); int startHit = 0; if (pageNumber > 1) { startHit = ((pageNumber - 1) * pageSize); } totalHits = hits.TotalHits; int end = startHit + pageSize; if (totalHits <= end) { end = totalHits; } int itemsAdded = 0; int itemsToAdd = end; QueryScorer scorer = new QueryScorer(searchQuery); SimpleHTMLFormatter formatter = new SimpleHTMLFormatter("<span class='searchterm'>", "</span>"); Highlighter highlighter = new Highlighter(formatter, scorer); highlighter.TextFragmenter = new SimpleFragmenter(highlightedFragmentSize); for (int i = startHit; i < itemsToAdd; i++) { Document doc = searcher.Doc(hits.ScoreDocs[i].Doc); IndexItem indexItem = new IndexItem(doc, hits.ScoreDocs[i].Score); if (highlightResults) { try { TokenStream stream = analyzer.TokenStream("contents", new StringReader(doc.Get("contents"))); string highlightedResult = highlighter.GetBestFragment(stream, doc.Get("contents")); if (highlightedResult != null) { indexItem.Intro = highlightedResult; } } catch (NullReferenceException) { } } results.Add(indexItem); itemsAdded += 1; } results.ItemCount = itemsAdded; results.PageIndex = pageNumber; results.ExecutionTime = DateTime.Now.Ticks - startTicks; } } catch (ParseException ex) { invalidQuery = true; log.Error("handled error for search terms " + queryText, ex); // these parser exceptions are generally caused by // spambots posting too much junk into the search form // heres an option to automatically ban the ip address HandleSpam(queryText, ex); return results; } catch (BooleanQuery.TooManyClauses ex) { invalidQuery = true; log.Error("handled error for search terms " + queryText, ex); return results; } catch (System.IO.IOException ex) { invalidQuery = true; log.Error("handled error for search terms " + queryText, ex); return results; } return results; } }
public static DataTable searchPitanja(string pretraga) { DataTable ResultsPitanja = new DataTable(); // create the searcher // index is placed in "index" subdirectory string indexDirectory = "J:/Triglav_Web_App/Triglav/Web/Lucene/Pitanja"; var analyzer = new StandardAnalyzer(Version.LUCENE_30); IndexSearcher searcher = new IndexSearcher(FSDirectory.Open(indexDirectory)); // parse the query, "text" is the default field to search var parser = new MultiFieldQueryParser(Version.LUCENE_30, new[] { "Naslov", "Sadrzaj", "Tagovi" }, analyzer); //var parser = new QueryParser(Version.LUCENE_30, "Sadrzaj", analyzer); Query query = parser.Parse(pretraga); //// create the result DataTable ResultsPitanja.Columns.Add("id", typeof(Int32)); ResultsPitanja.Columns.Add("Naslov", typeof(string)); ResultsPitanja.Columns.Add("Sadrzaj", typeof(string)); ResultsPitanja.Columns.Add("Tagovi", typeof(string)); ResultsPitanja.Columns.Add("DatumKreiranja", typeof(DateTime)); ResultsPitanja.Columns.Add("DatumZadnjeIzmjene", typeof(DateTime)); ResultsPitanja.Columns.Add("DatumZadnjeAktivnosti", typeof(DateTime)); ResultsPitanja.Columns.Add("DatumZatvaranjaPosta", typeof(DateTime)); ResultsPitanja.Columns.Add("PrihvaceniOdgovori", typeof(Int32)); ResultsPitanja.Columns.Add("BrojOdgovora", typeof(Int32)); ResultsPitanja.Columns.Add("BrojKomentara", typeof(Int32)); ResultsPitanja.Columns.Add("BrojOmiljenih", typeof(Int32)); ResultsPitanja.Columns.Add("BrojPregleda", typeof(Int32)); ResultsPitanja.Columns.Add("BrojPoena", typeof(Int32)); ResultsPitanja.Columns.Add("VlasnikID", typeof(Int32)); ResultsPitanja.Columns.Add("VlasnikNadimak", typeof(string)); ResultsPitanja.Columns.Add("PromijenioID", typeof(Int32)); ResultsPitanja.Columns.Add("RoditeljskiPostID", typeof(Int32)); //Results.Columns.Add("PodKategorija", typeof(Int32)); ResultsPitanja.Columns.Add("PostVrsta", typeof(Int32)); // ResultsPitanja.Columns.Add("SlikaURL", typeof(string)); ResultsPitanja.Columns.Add("temp", typeof(string)); ResultsPitanja.Columns.Add("Likes", typeof(Int32)); ResultsPitanja.Columns.Add("Unlikes", typeof(Int32)); ResultsPitanja.Columns.Add("Sazetak", typeof(string)); ResultsPitanja.Columns.Add("BrojRangiranja", typeof(Int32)); ResultsPitanja.Columns.Add("PrihvacenaIzmjena", typeof(Int32)); ResultsPitanja.Columns.Add("Podnaslov", typeof(string)); ResultsPitanja.Columns.Add("Broj.Razgovora", typeof(Int32)); ResultsPitanja.Columns.Add("sample", typeof(string)); // search TopDocs hits = searcher.Search(query, 5); //E this.total = hits.TotalHits; // create highlighter IFormatter formatter = new SimpleHTMLFormatter("<span style=\"font-weight:bold; background-color: #e5ecf9; \">", "</span>"); SimpleFragmenter fragmenter = new SimpleFragmenter(80); QueryScorer scorer = new QueryScorer(query); Highlighter highlighter = new Highlighter(formatter, scorer); highlighter.TextFragmenter = fragmenter; for (int i = 0; i < hits.ScoreDocs.Count(); i++) { // get the document from index Document doc = searcher.Doc(hits.ScoreDocs[i].Doc); TokenStream stream = analyzer.TokenStream("", new StringReader(doc.Get("Sadrzaj"))); String sample = highlighter.GetBestFragments(stream, doc.Get("Sadrzaj"), 3, "..."); //String path = doc.Get("path"); // create a new row with the result data DataRow rowPitanja = ResultsPitanja.NewRow(); rowPitanja["id"] = doc.Get("id"); rowPitanja["Naslov"] = doc.Get("Naslov"); rowPitanja["Sadrzaj"] = sample; //doc.Get("Sadrzaj"); rowPitanja["Tagovi"] = doc.Get("Tagovi"); rowPitanja["DatumKreiranja"] = doc.Get("DatumKreiranja"); rowPitanja["DatumZadnjeIzmjene"] = doc.Get("DatumZadnjeIzmjene"); rowPitanja["DatumZadnjeAktivnosti"] = doc.Get("DatumZadnjeAktivnosti"); //row["DatumZatvaranjaPosta"] = doc.Get("DatumZatvaranjaPosta"); rowPitanja["PrihvaceniOdgovori"] = doc.Get("PrihvaceniOdgovori"); rowPitanja["BrojOdgovora"] = doc.Get("BrojOdgovora"); rowPitanja["BrojKomentara"] = doc.Get("BrojKomentara"); rowPitanja["BrojOmiljenih"] = doc.Get("BrojOmiljenih"); rowPitanja["BrojPregleda"] = doc.Get("BrojPregleda"); rowPitanja["BrojPoena"] = doc.Get("BrojPoena"); //row["VlasnikID"] = doc.Get("VlasnikID"); rowPitanja["VlasnikNadimak"] = doc.Get("VlasnikNadimak"); //row["PromijenioID"] = doc.Get("PromijenioID"); //row["RoditeljskiPostID"] = doc.Get("RoditeljskiPostID"); //row["PodKategorija"] = doc.Get("PodKategorija"); rowPitanja["PostVrsta"] = doc.Get("PostVrsta"); //rowPitanja["SlikaURL"] = doc.Get("SlikaURL"); //row["temp"] = doc.Get("temp"); rowPitanja["Likes"] = doc.Get("Likes"); rowPitanja["Unlikes"] = doc.Get("Unlikes"); rowPitanja["Sazetak"] = doc.Get("Sazetak"); rowPitanja["BrojRangiranja"] = doc.Get("BrojRangiranja"); rowPitanja["PrihvacenaIzmjena"] = doc.Get("PrihvacenaIzmjena"); rowPitanja["Podnaslov"] = doc.Get("Podnaslov"); //row["Broj.Razgovora"] = doc.Get("Broj.Razgovora"); //rowPitanja["sample"] = sample; ResultsPitanja.Rows.Add(rowPitanja); } searcher.Dispose(); return ResultsPitanja; }
private void search() { DateTime start = DateTime.Now; // create the result DataTable this.Results.Columns.Add("title", typeof(string)); this.Results.Columns.Add("sample", typeof(string)); this.Results.Columns.Add("path", typeof(string)); this.Results.Columns.Add("url", typeof(string)); this.Results.Columns.Add("Type", typeof(string)); // create the searcher // index is placed in "index" subdirectory string indexDirectory = Server.MapPath("~/App_Data/index"); var analyzer = new StandardAnalyzer(Lucene.Net.Util.Version.LUCENE_30); // List<string> STOP_WORDS = StopAnalyzer.ENGLISH_STOP_WORDS_SET.ToList<string>(); IndexSearcher searcher = new IndexSearcher(FSDirectory.Open(indexDirectory)); BooleanQuery bquery = new BooleanQuery(); //var parser = new QueryParser(Lucene.Net.Util.Version.LUCENE_30, "text", analyzer); List<string> SearchTerm = new List<string> { "text", "path", "title", "Extension", "EXTPRP" }; List<string> Projects = new List<string>(); if (Session["ProjectList"] != null) { Projects = (List<string>)Session["ProjectList"]; } List<string> allType = new List<string> { "A", "B", "C" }; if (this.Request.QueryString["Page"] != null) { if (allType.Contains(this.Request.QueryString["Page"].ToString())) { allType.Remove(this.Request.QueryString["Page"]); foreach (string type in allType) { TermQuery termq1 = new TermQuery(new Term("EXTPRP", type)); bquery.Add(termq1, Occur.MUST_NOT); FuzzyQuery termq = new FuzzyQuery(new Term("EXTPRP", type), 0.5f, 0); bquery.Add(termq, Occur.MUST_NOT); } } } //Query query = parser.Parse(this.Query); //foreach (string term in SearchTerm) //{ // if (term == "title") // { // TermQuery termq = new TermQuery(new Term(term, this.Query)); // termq.Boost = 50f; // bquery.Add(termq, Occur.SHOULD); // } // else // { // TermQuery termq = new TermQuery(new Term(term, this.Query)); // termq.Boost = 5f; // bquery.Add(termq, Occur.SHOULD); // } //} foreach (string term in SearchTerm) { if (term == "title") { TermQuery termq = new TermQuery(new Term(term, this.Query)); termq.Boost = 5f; bquery.Add(termq, Occur.SHOULD); } else { FuzzyQuery termq = new FuzzyQuery(new Term(term, this.Query), 0.5f, 0); termq.Boost = 0.1f; bquery.Add(termq, Occur.SHOULD); } } //foreach (string project in Projects) //{ // TermQuery termq1 = new TermQuery(new Term("Project", project)); // bquery.Add(termq1, Occur.MUST_NOT); //} //foreach (string project in Projects.Distinct()) //{ // TermQuery termq1 = new TermQuery(new Term("path", project)); // bquery.Add(termq1, Occur.MUST); // FuzzyQuery termq = new FuzzyQuery(new Term("path", project), 0.5f, 0); // bquery.Add(termq, Occur.MUST); //} //bquery.Add(new TermQuery(new Term("Project", "DEV")), Occur.SHOULD); //List<ScoreDoc> TempArrList = new List<ScoreDoc>(); TopDocs hits = searcher.Search(bquery, null, 10000); //TopDocs hits = new TopDocs(TempArrList.Count(), TempArrList.ToArray(), hitsWithText.MaxScore); //hits.ScoreDocs.CopyTo(hits.ScoreDocs, 0); //hits.ScoreDocs = hits.ScoreDocs.OrderBy(obj => searcher.Doc(obj.Doc).Get("path")).ToArray(); if (Projects.Count() != 0) { hits.ScoreDocs = hits.ScoreDocs.Where(obj => Projects.Contains(Path.GetDirectoryName(searcher.Doc(obj.Doc).Get("path")))).Distinct().ToArray(); } //foreach (string project in Projects.Distinct()) //{ // //hits.ScoreDocs = hits.ScoreDocs.Where(obj => Regex.IsMatch(searcher.Doc(obj.Doc).Get("path").Replace(@"\", @"\\"), @".*" + project.Replace(@"\", @"\\") + ".*")).ToArray(); // string s = Path.GetDirectoryName("\\SAGITEC-1629\\Soogle\\CARS\\bhagyashree.txt"); // hits.ScoreDocs = hits.ScoreDocs.Where(obj => Path.GetDirectoryName(searcher.Doc(obj.Doc).Get("path")).Contains(project)).ToArray(); //} this.total = hits.ScoreDocs.Count(); this.startAt = InitStartAt(); int resultsCount = Math.Min(total, this.maxResults + this.startAt); // create highlighter IFormatter formatter = new SimpleHTMLFormatter("<span style=\"font-weight:bold;background-color:yellow;\">", "</span>"); SimpleFragmenter fragmenter = new SimpleFragmenter(200); QueryScorer scorer = new QueryScorer(bquery); Highlighter highlighter = new Highlighter(formatter, scorer); highlighter.TextFragmenter = fragmenter; int j = 0; for (int i = startAt; i < resultsCount; i++) { Document doc = searcher.Doc(hits.ScoreDocs[i].Doc); String path = doc.Get("path"); string getExtension = doc.Get("Extension"); TokenStream stream = analyzer.TokenStream("", new StringReader(doc.Get("text"))); String sample = ""; try { string document = doc.Get("text"); if (getExtension.ToLower() == ".png" || getExtension.ToLower() == ".jpg" || getExtension.ToLower() == ".gif" || getExtension.ToLower() == ".bmp") { sample = ""; } else { sample = highlighter.GetBestFragment(stream, document);//, 2, "..."); } } catch (Exception ex) { } // create a new row with the result data DataRow row = this.Results.NewRow(); row["title"] = doc.Get("title"); row["path"] = "http://sagitec-1629/KNBASE/" + path.Replace(@"\", "/").Replace("//SAGITEC-1629/Soogle/", ""); row["url"] = "http://sagitec-1629/KNBASE/" + path.Replace(@"\", "/").Replace("//SAGITEC-1629/Soogle/", ""); row["sample"] = sample; if (path.Contains('.')) { row["Type"] = GetMIMEType(path); } //if (!Projects.Contains(doc.Get("Project")) || !allType.Contains(doc.Get("EXTPRP"))) //{ this.Results.Rows.Add(row); //} j++; } Repeater1.DataSource = Results; Repeater1.DataBind(); searcher.Dispose(); // result information this.duration = DateTime.Now - start; this.fromItem = startAt + 1; this.toItem = Math.Min(startAt + maxResults, total); }
private EntitySearchResults.Hit ScoreDocToSearchHit(IndexSearcher searcher, Highlighter highlighter, ScoreDoc scoreDoc) { var hit = new EntitySearchResults.Hit(); var doc = searcher.Doc(scoreDoc.Doc); hit.Id = Guid.Parse(doc.Get(FIELD_ID)); hit.Text = Highlight(highlighter, doc.Get(FIELD_NAME)); hit.Group = Highlight(highlighter, doc.Get(FIELD_GROUP_NAME)); hit.Description = Highlight(highlighter, string.Join("\n", doc.GetFields(MetadataDefinition.DescriptionDefinition).Select(x => x.StringValue))); hit.Tags = doc.GetFields() .Where(x => MetadataDefinition.KeywordDefinitions.Except(new[]{MetadataDefinition.DescriptionDefinition}).Contains(x.Name)) .Select(x => Highlight(highlighter, x.StringValue)) .ToArray(); return hit; }
private SearchResult ExecuteQuery(string[] metaData, int resultOffset, int resultLength, Query query) { var startTime = DateTime.Now; var ticks = DateTime.Now.ToUniversalTime().Ticks; Query publishStartQuery = NumericRangeQuery.NewLongRange("publishStart", null, ticks, true, false); Query publishStopQuery = NumericRangeQuery.NewLongRange("publishStop", ticks, null, false, true); var booleanQuery = new BooleanQuery { {query, Occur.MUST}, {publishStartQuery, Occur.MUST}, {publishStopQuery, Occur.MUST} }; var scoreDocs = _searcher.Search(booleanQuery, null, MaxHits, Sort.RELEVANCE).ScoreDocs; var result = new SearchResult {NumberOfHits = scoreDocs.Length}; // Create highlighter IFormatter formatter = new SimpleHTMLFormatter("<span class=\"search-highlight;\">", "</span>"); var fragmenter = new SimpleFragmenter(120); var scorer = new QueryScorer(query); var highlighter = new Highlighter(formatter, scorer) {TextFragmenter = fragmenter}; if (resultOffset < scoreDocs.Length) { var resultUpperOffset = resultOffset + resultLength; if (resultUpperOffset > scoreDocs.Length) { resultUpperOffset = scoreDocs.Length; } for (var i = resultOffset; i < resultUpperOffset; i++) { var doc = scoreDocs[i]; var document = _searcher.Doc(doc.Doc); var content = document.Get("content"); var excerpt = ""; if (content != null) { var stream = _analyzer.TokenStream("", new StringReader(document.Get("content"))); excerpt = highlighter.GetBestFragments(stream, document.Get("content"), 2, "..."); } Guid pageId; (document.Get("pageId") ?? string.Empty).TryParseGuid(out pageId); var hit = new SearchHit { PageId = pageId, Path = document.Get("path"), Title = document.Get("title"), Excerpt = excerpt }; foreach (var key in metaData) { hit.MetaData.Add(key, document.Get(key)); } result.Hits.Add(hit); } } var timeTaken = DateTime.Now - startTime; result.SecondsTaken = timeTaken.TotalSeconds; return result; }
private MetadataDefinitionSearchResults.Hit ScoreDocToSearchHit(IndexSearcher searcher, Highlighter highlighter, ScoreDoc scoreDoc) { var doc = searcher.Doc(scoreDoc.Doc); return new MetadataDefinitionSearchResults.Hit { Id = Guid.Parse(doc.Get(FIELD_ID)), Name = Highlight(highlighter, doc.Get(FIELD_NAME)), Description = Highlight(highlighter, doc.Get(FIELD_DESCRIPTION)), DataType = Highlight(highlighter, doc.Get(FIELD_DATA_TYPE)) }; }
private string Highlight(Highlighter highlighter, string description) { description = SimpleHTMLEncoder.HtmlEncode(description); if (highlighter != null) { var stream = _analyzer.TokenStream("", new StringReader(description)); var sample = highlighter.GetBestFragments(stream, description, 2, "..."); if (!string.IsNullOrEmpty(sample)) { return sample; } } return description; }
public MetadataDefinitionSearchResults Search(string q, int pageSize, int page) { using (var searcher = new IndexSearcher(IndexDir, true)) { Query query; Highlighter highlighter = null; if (q == null) { query = new MatchAllDocsQuery(); } else { query = _queryParser.Parse(q); if (q.Any() && !StringExtension.EndsWithWhitespace(q) && !new[] {'"', '*', '~', '?'}.Contains(q.Last())) { query = new BooleanQuery { {_queryParser.Parse(q + "*"), Occur.SHOULD}, {query, Occur.SHOULD} }; } highlighter = new Highlighter(new SimpleHTMLFormatter("<span class=\"is-highlighted\">", "</span>"), new QueryScorer(query)); } var take = page * pageSize; var sort = new Sort(new SortField(FIELD_NAME, CultureInfo.CurrentCulture)); var docs = searcher.Search(query, null, take, sort); var results = new MetadataDefinitionSearchResults { TotalHits = docs.TotalHits, Hits = docs.ScoreDocs.Skip(take - pageSize).Select(x => ScoreDocToSearchHit(searcher, highlighter, x)).ToArray() }; return results; } }
//******************************************************************************************************************************** private void search() { if (TextBoxQuery.Text != "") { DateTime start = DateTime.Now; // create the result DataTable this.Results.Columns.Add("title", typeof(string)); this.Results.Columns.Add("sample", typeof(string)); this.Results.Columns.Add("path", typeof(string)); this.Results.Columns.Add("url", typeof(string)); this.Results.Columns.Add("Type", typeof(string)); // create the searcher // index is placed in "index" subdirectory string indexDirectory = Server.MapPath(IndexDirPath); var analyzer = new StandardAnalyzer(Lucene.Net.Util.Version.LUCENE_30); // List<string> STOP_WORDS = StopAnalyzer.ENGLISH_STOP_WORDS_SET.ToList<string>(); IndexSearcher searcher = new IndexSearcher(FSDirectory.Open(indexDirectory)); BooleanQuery bquery = new BooleanQuery(); //var parser = new QueryParser(Lucene.Net.Util.Version.LUCENE_30, "text", analyzer); List<string> SearchTerm = new List<string> { "text", "path", "title", "Extension", "EXTPRP" }; List<string> Projects = new List<string>(); if (Session["ProjectList"] != null) { Projects = (List<string>)Session["ProjectList"]; } List<string> allType = null; if (hnkClickLink.Value == "") { allType = new List<string>(); } else { allType = new List<string> { "Doc", "Code", "Images", "Other" }; } if (this.Request.QueryString["Page"] != null) { if (allType.Contains(Convert.ToString(hnkClickLink.Value))) { allType.Remove(Convert.ToString(hnkClickLink.Value)); foreach (string type in allType) { TermQuery termq1 = new TermQuery(new Term("EXTPRP", type)); bquery.Add(termq1, Occur.MUST_NOT); FuzzyQuery termq = new FuzzyQuery(new Term("EXTPRP", type), 0.5f, 0); bquery.Add(termq, Occur.MUST_NOT); } } } //Query query = parser.Parse(this.Query); foreach (string term in SearchTerm) { if (term == "title") { TermQuery termq = new TermQuery(new Term(term, this.Query.ToLower())); termq.Boost = 5f; bquery.Add(termq, Occur.SHOULD); } else { TermQuery termq = new TermQuery(new Term(term, this.Query.ToLower())); termq.Boost = 0.1f; bquery.Add(termq, Occur.SHOULD); } } foreach (string term in SearchTerm) { if (this.Query.Contains(".")) { string SearchKeyword = this.Query.Replace(".", ""); if (term == "Extension") { TermQuery termq = new TermQuery(new Term(term, SearchKeyword.ToLower())); termq.Boost = 5f; bquery.Add(termq, Occur.SHOULD); } } else { if (term == "title") { FuzzyQuery termq = new FuzzyQuery(new Term(term, this.Query.ToLower())); termq.Boost = 5f; bquery.Add(termq, Occur.SHOULD); } else { //FuzzyQuery termq = new FuzzyQuery(new Term(term, this.Query), 0.5f, 0); //termq.Boost = 0.1f; //bquery.Add(termq, Occur.SHOULD); } } } TopDocs hits = searcher.Search(bquery, null, 10000); //TopDocs hits = new TopDocs(TempArrList.Count(), TempArrList.ToArray(), hitsWithText.MaxScore); //hits.ScoreDocs.CopyTo(hits.ScoreDocs, 0); //hits.ScoreDocs = hits.ScoreDocs.OrderBy(obj => searcher.Doc(obj.Doc).Get("path")).ToArray(); if (Projects.Count() != 0) { hits.ScoreDocs = hits.ScoreDocs.Where(obj => Projects.Contains(SplitPath(Path.GetDirectoryName(searcher.Doc(obj.Doc).Get("path"))))).Distinct().ToArray(); } this.total = hits.ScoreDocs.Count(); this.startAt = InitStartAt(); int resultsCount = Math.Min(total, this.maxResults + this.startAt); // create highlighter IFormatter formatter = new SimpleHTMLFormatter("<span style=\"font-weight:bold;background-color:yellow;\">", "</span>"); SimpleFragmenter fragmenter = new SimpleFragmenter(200); QueryScorer scorer = new QueryScorer(bquery); Highlighter highlighter = new Highlighter(formatter, scorer); highlighter.TextFragmenter = fragmenter; //highlighter.MaxDocCharsToAnalyze=200; //for (int i = startAt; i < resultsCount; i++) //{ // Document doc = searcher.Doc(hits.ScoreDocs[i].Doc); // String path = doc.Get("path"); // string getExtension = doc.Get("Extension"); // TokenStream stream = analyzer.TokenStream("", new StringReader(doc.Get("text"))); // String sample = ""; // try // { // string document = doc.Get("text"); // if (getExtension.ToLower() == ".png" || getExtension.ToLower() == ".jpg" || getExtension.ToLower() == ".gif" || getExtension.ToLower() == ".bmp" || getExtension.ToLower() == ".jpeg") // { // sample = ""; // } // else // { // string outp = highlighter.GetBestFragment(stream, document); // if (outp != null) // sample = ReplaceSpecialChar(outp.Trim()); //, 2, "..."); // else // sample = Limit(doc.Get("text").Trim(), 200); // } // } // catch (Exception ex) // { // } // // create a new row with the result data // DataRow row = this.Results.NewRow(); // row["title"] = doc.Get("title"); // row["path"] = ApplicationPath + path.Replace(@"\", "/").Replace(VirtualPath, ""); // row["url"] = ApplicationPath + path.Replace(@"\", "/").Replace(VirtualPath, ""); // row["sample"] = sample; // if (path.Contains('.')) // { // row["Type"] = GetMIMEType(path); // } // this.Results.Rows.Add(row); //} for (int i = 0; i < this.total; i++) { Document doc = searcher.Doc(hits.ScoreDocs[i].Doc); String path = doc.Get("path"); string getExtension = doc.Get("Extension"); TokenStream stream = analyzer.TokenStream("", new StringReader(doc.Get("text"))); String sample = ""; try { string document = doc.Get("text"); if (getExtension.ToLower() == ".png" || getExtension.ToLower() == ".jpg" || getExtension.ToLower() == ".gif" || getExtension.ToLower() == ".bmp" || getExtension.ToLower() == ".jpeg") { sample = ""; } else { string outp = highlighter.GetBestFragment(stream, document); if (outp != null) sample = Limit(outp.Trim(), 200); //, 2, "..."); else sample = Limit(doc.Get("text").Trim(), 200); } } catch (Exception ex) { } // create a new row with the result data DataRow row = Results.NewRow(); row["title"] = doc.Get("title"); row["path"] = ApplicationPath + path.Replace(@"\", "/").Replace(VirtualPath, ""); row["url"] = ApplicationPath + path.Replace(@"\", "/").Replace(VirtualPath, ""); row["sample"] = sample; if (path.Contains('.')) { row["Type"] = GetMIMEType(path); } Results.Rows.Add(row); } //****************************** Logic for Paging for Repeater Control**************************************** PagedDataSource pgitems = new PagedDataSource(); DataView dv = new DataView(Results); pgitems.DataSource = dv; pgitems.AllowPaging = true; pgitems.PageSize = 10;//You can set the number of items here using some logic. pgitems.CurrentPageIndex = PageNumber; btnPrev.Visible = !pgitems.IsFirstPage; btnNext.Visible = !pgitems.IsLastPage; if (pgitems.PageCount > 1) { rptPages.Visible = true; ArrayList pages = new ArrayList(); for (int i = PageNumber; i < 5 + PageNumber; i++) pages.Add((i + 1).ToString()); rptPages.DataSource = pages; rptPages.DataBind(); } else rptPages.Visible = false; Repeater1.DataSource = pgitems; Repeater1.DataBind(); //************************************************************************************************************* //Repeater1.DataSource = Results; //Repeater1.DataBind(); searcher.Dispose(); // result information this.duration = DateTime.Now - start; this.fromItem = startAt + 1; this.toItem = Math.Min(startAt + maxResults, total); } }
private void Search() { try { SearchProgressBar.Maximum = 11; ProgressLabel.Text = "Progress: Initialize Search ..."; Searcher searcher = new IndexSearcher(@"Canon\index"); Analyzer analyzer = new StandardAnalyzer(); ArrayList resultList = new ArrayList(); System.IO.StreamReader in_Renamed = new System.IO.StreamReader(new System.IO.StreamReader(System.Console.OpenStandardInput(), System.Text.Encoding.Default).BaseStream, new System.IO.StreamReader(System.Console.OpenStandardInput(), System.Text.Encoding.Default).CurrentEncoding); String line = QueryInputBox.Text; if (line.Length == - 1) return; ProgressLabel.Text = "Progress: Parsing Query ..."; Query query = QueryParser.Parse(line, "contents", analyzer); //int[] ix = qtm.GetTermFrequencies(); Hits hits = searcher.Search(query); SearchProgressBar.Increment(1); ProgressLabel.Text = "Progress: Searched. Analyzing results ..."; //QueryHighlightExtractor highlighter = new QueryHighlightExtractor(query, new WhitespaceAnalyzer(), "<B>", "</B>"); Highlighter highlighter = new Highlighter(new QueryScorer(query)); highlighter.SetTextFragmenter(new SimpleFragmenter(80)); int maxNumFragmentsRequired = 1; //int HITS_PER_PAGE = 10; for (int i = 0; i < 10; i++) { SearchProgressBar.Increment(1); ProgressLabel.Text = "Progress: Analyzing hit " + (i+1).ToString(); // get the document from index Document doc = hits.Doc(i); //SegmentReader ir = new SegmentReader(); //Lucene.Net.Index.TermFreqVector tfv = //tfv.GetTermFrequencies string score = hits.Score(i).ToString(); //Box += "Hit no. " + i + " scored: " + score + " occ: " + /*highlighter.tokenFrequency */ " best fragment: \n"; ResultSet a = new ResultSet(); a.BookName = doc.Get("path").Replace(@"c:\cscd\temp\",""); a.Score = hits.Score(i); a.numberOfHits = hits.Length(); // get the document filename // we can't get the text from the index //because we didn't store it there //so get it from archive string path = doc.Get("path"); string name = GetInternalName(path); PaliReaderUtils.AalekhDecoder.UnzipFromZipLibrary(name); path = System.IO.Directory.GetCurrentDirectory() + @"\Work\" + name + ".htm"; string plainText = ""; //load text from zip archive temporarily using (StreamReader sr = new StreamReader(path, System.Text.Encoding.Default)) { plainText = parseHtml(sr.ReadToEnd()); } //-------------------------------Highlighter Code 1.4 TokenStream tokenStream = analyzer.TokenStream(new StringReader(plainText)); a.textFragment = highlighter.GetBestFragments(tokenStream, plainText, maxNumFragmentsRequired, "..."); if(File.Exists(path)) File.Delete(path); //------------------------------- resultList.Add(a); } SearchProgressBar.Value = 0; searcher.Close(); ssr = new ShowSearchResults(/*Box*/resultList); //this.Hide(); ssr.OpenBookEvent += new ShowSearchResults.OpenBook(this.TriggerOpenBook); ssr.Closing += new System.ComponentModel.CancelEventHandler(this.Closing_ResultWindow); this.Hide(); ssr.ShowDialog(); } catch (System.Exception e) { MessageBox.Show(" caught a " + e.GetType() + "\n with message: " + e.Message); } }
/// <summary> /// �������� /// </summary> /// <returns></returns> private string GetALL_BACKDAT() { ClassSeachALL nClass = new ClassSeachALL(); //�������� RSK ALL = new RSK(); ArrayList cNew = new ArrayList(); //���������ڵ�õ������� �ϲ���� ������������ ���������ҳ�� ����һ����� foreach (string aabb in nRSD) { RSK cc = nClass.RTSTR2RSK(aabb); //���ݺϲ���ALL ALL.ALLNum = ALL.ALLNum + cc.ALLNum; foreach (OneRs ncb in cc.rs) { cNew.Add(ncb); } } ALL.ANum = 0; ALL.BNum = 0; ALL.rs = (ArrayList)cNew.Clone(); //�õ����ݵĸ��� //�õ����������� int all_a_num = ALL.ALLNum; //��ʼ int all_a_ws = A_WS; //���� int ALL_a_wl = A_WL; //���HTML��ʽ�����ݿ� �滻ģ���е����ݿռ� int CL = A_WS + A_WL; if (CL > ALL.rs.Count) { CL = ALL.rs.Count; } // "/s?wd=TYPE|title|body|type1|type2&ws=0&wl=10"; string[] FEFE = A_WD.Split('|'); QueryParser parser = new QueryParser("E", ClassST.OneAnalyzer); Query query = parser.Parse(FEFE[1] + " " + FEFE[2]); //Highlighter highlighter = new Highlighter(new QueryScorer(query)); // Highlighter highlighter = new Highlighter(); Highlighter highlighter = new Highlighter(new SimpleHTMLFormatter("<font color=\"red\">", "</font>"), new QueryScorer(query)); highlighter.TextFragmenter = new SimpleFragmenter(20); StringBuilder GHX = new StringBuilder(); for (int p = A_WS; p < CL; p++) { OneRs ncb = (OneRs)ALL.rs[p]; StringBuilder Tmp = new StringBuilder(); string N_D = newz_code.CODE2CN(ncb.D); string N_Url = newz_code.CODE2CN(ncb.url); string N_A = newz_code.CODE2CN(ncb.A); TokenStream tokenStream = ClassST.OneAnalyzer.TokenStream("D", new System.IO.StringReader(N_D)); System.String result = highlighter.GetBestFragments(tokenStream, N_D, 100, "..."); // string result = N_D; Tmp.Append(" <tr>\r\n"); Tmp.Append(" <td height=\"66\" scope=\"col\"><table width=\"95%\" height=\"78\" border=\"0\" cellpadding=\"0\" cellspacing=\"1\">\r\n"); Tmp.Append(" <tr>\r\n"); Tmp.Append(" <td scope=\"col\"><div align=\"left\"><a href=\"" + N_Url + "\" target=\"_blank\"><span class=\"STYLE17\">" + N_A + "</span></a></div></td>\r\n"); Tmp.Append(" </tr>\r\n"); Tmp.Append(" <tr>\r\n"); Tmp.Append(" <td><span class=\"STYLE18\">"); Tmp.Append(result); Tmp.Append("</span></td>\r\n"); Tmp.Append(" </tr>\r\n"); Tmp.Append(" <tr>\r\n"); Tmp.Append(" <td><a href=\"" + N_Url + "\" target=\"_blank\"><span class=\"STYLE19\">" + N_Url + "</span></a></td>\r\n"); Tmp.Append(" </tr>\r\n"); Tmp.Append(" </table><p> </p></td>\r\n"); // Tmp.Append(" <p> </p>\r\n"); Tmp.Append(" </tr>\r\n"); GHX.Append(Tmp.ToString()); } string NewTable = "<table width=\"95%\" height=\"152\" border=\"0\" cellpadding=\"0\" cellspacing=\"0\">\r\n"; NewTable = NewTable + GHX.ToString(); NewTable = NewTable + "</table>\r\n"; //������ string WebHtml = ClassST.ModelHTM.Replace("<%HTTP://ZD4004.BLOG.163.COM%>", NewTable); // // "/s?wd=TYPE|title|body|type1|type2&ws=0&wl=10"; //string[] FEFE = A_WD.Split('|'); //ѡ���� WebHtml = WebHtml.Replace("<%http://blog.163.com/zd4004/%>", ClassST.GetMainTypeListHtm(FEFE[0], FEFE[2])); //������ WebHtml = WebHtml.Replace("<%HTTP://BLOG.163.COM/ZD4004_1%>", ClassST.GetBoxListDat1(FEFE[0], FEFE[3], FEFE[4])); WebHtml = WebHtml.Replace("<%HTTP://BLOG.163.COM/ZD4004_2%>", ClassST.GetBoxListDat2(FEFE[0], FEFE[3], FEFE[4])); //ҳ����ʾ WebHtml = WebHtml.Replace("<%HTTP://BLOG.163.COM/ZD4004_URL%>", ClassST.GetPageNumHTML("/s?wd=" + A_WD + "&", all_a_num, all_a_ws, ALL_a_wl)); //���������Ŀ string KPP = "�������� "; if (all_a_ws + ALL_a_wl < all_a_num) { int ssrr = all_a_ws + ALL_a_wl; int all_a_wsn = all_a_ws + 1; KPP = KPP + "" + all_a_num.ToString() + " ����� ���� " + all_a_wsn.ToString() + " - " + ssrr.ToString() + " ��"; } else { int ssrr = all_a_num; int all_a_wsn = all_a_ws + 1; KPP = KPP + "" + all_a_num.ToString() + " ����� ���� " + all_a_wsn.ToString() + " - " + ssrr.ToString() + " ��"; } WebHtml = WebHtml.Replace("<%HTTP://BLOG.163.COM/ZD4004_NUM%>", KPP); return WebHtml; }