public IEnumerable<Hit> Search(string query, int maxResults) { var analyzer = new StandardAnalyzer(Lucene.Net.Util.Version.LUCENE_29); QueryParser qp = new QueryParser( Lucene.Net.Util.Version.LUCENE_29, "contents", analyzer ); Query q = qp.Parse(query); TopDocs top = searcher.Search(q, maxResults); List<Hit> result = new List<Hit>(); foreach (var scoreDoc in top.ScoreDocs) { var doc = searcher.Doc(scoreDoc.Doc); string contents = doc.Get("contents"); var scorer = new QueryScorer(q, searcher.IndexReader, "contents"); var highlighter = new Highlighter(scorer); result.Add(new Hit() { Relevance = scoreDoc.Score, Title = doc.Get("title"), Url = doc.Get("path"), Excerpt = highlighter.GetBestFragment(analyzer, "contents", contents) }); } return result; }
private string GeneratePreviewText(Query q, string text) { var scorer = new QueryScorer(q); Highlighter highlighter = new Highlighter(htmlFormatter, scorer); highlighter.TextFragmenter = new SimpleFragmenter(250); TokenStream stream = SearchEnvironment.DefaultAnalyzer.TokenStream("html_content", new StringReader(text)); return highlighter.GetBestFragments(stream, text, 4, "<br/>"); }
// This method takes a search term and a text as a parameter, and displays the text // with the search term in bold. public static void RealHighlighter(string searchTerm, string text) { TermQuery query = new TermQuery(new Term("mainText", searchTerm)); Lucene.Net.Search.Highlight.IScorer scorer = new QueryScorer(query); Highlighter highlighter = new Highlighter(scorer); System.IO.StringReader reader = new System.IO.StringReader(text); TokenStream tokenStream = new SimpleAnalyzer().TokenStream("mainText", reader); String[] toBePrinted = highlighter.GetBestFragments(tokenStream, text, 5); // 5 is the maximum number of fragments that gets tested foreach (var word in toBePrinted) { Console.Write(word); } }
// TEST METHOD FOR HIGHLIGHTING. public static void Highlighter() { string textTest = "I am a man that follows hell."; TermQuery queryTest = new TermQuery(new Term("", "hell")); Lucene.Net.Search.Highlight.IScorer scorer = new QueryScorer(queryTest); Highlighter highlighter = new Highlighter(scorer); System.IO.StringReader reader = new System.IO.StringReader(textTest); TokenStream tokenStream = new SimpleAnalyzer().TokenStream("field", reader); String[] toBePrinted = highlighter.GetBestFragments(tokenStream, textTest, 1); // 1 is the maximum number of fragments that gets tested foreach (var word in toBePrinted) { Console.WriteLine(word); } }
/// <summary> /// Highlights the field. /// </summary> /// <param name="fieldName">Name of the field.</param> /// <param name="fieldValue">The field value.</param> /// <param name="startTag">The start tag.</param> /// <param name="endTag">The end tag.</param> /// <param name="fragmentLength">Length of the fragment.</param> /// <param name="numberOfFragments">The number of fragments.</param> /// <returns></returns> protected string[] HighlightField(string fieldName, string fieldValue, string startTag = "<strong>", string endTag = "</strong>", int fragmentLength = 150, int numberOfFragments = 1) { var scorer = new Lucene.Net.Search.Highlight.QueryScorer(Query); IFormatter formatter = new SimpleHTMLFormatter(startTag, endTag); var highlighter = new Highlighter(formatter, scorer) { TextFragmenter = new SimpleFragmenter(fragmentLength) }; var sr = new StringReader(fieldValue); var specificAnalyzer = GetAnalyzer(fieldName); TokenStream stream = specificAnalyzer.TokenStream(fieldName, sr); return(highlighter.GetBestFragments(stream, fieldValue, numberOfFragments)); }
public List<IndexResult> Search(string terms) { List<IndexResult> retObj = new List<IndexResult>(); var analyzer = new StandardAnalyzer(Lucene.Net.Util.Version.LUCENE_30); using (var searcher = new IndexSearcher(FSDirectory.Open(IndexDirectory))) { // parse the query, "text" is the default field to search var parser = new MultiFieldQueryParser(Lucene.Net.Util.Version.LUCENE_30, new[] { "text", "title", "urlkey", "searchterms" }, analyzer); Query query = parser.Parse(terms); TopDocs hits = searcher.Search(query, 200); SimpleFragmenter fragmenter = new SimpleFragmenter(80); QueryScorer scorer = new QueryScorer(query); Highlighter highlighter = new Highlighter(scorer); highlighter.TextFragmenter = fragmenter; for (int i = 0; i < hits.TotalHits; i++) { // get the document from index Document doc = searcher.Doc(hits.ScoreDocs[i].Doc); TokenStream stream = analyzer.TokenStream("", new StringReader(doc.Get("text"))); String sample = highlighter.GetBestFragments(stream, doc.Get("text"), 2, "..."); String title = doc.Get("title"); String urlkey = doc.Get("urlkey"); String type = doc.Get("type"); retObj.Add(new IndexResult() { Sample = sample, Title = title, Type = type, UrlKey = urlkey }); } return retObj; } }
// This method is printing out the message details given the index document. // NOTE: The field "mainText" must be stored in indexing level. Same goes for any // other field you want to search. private static void DisplayMessage(Document d, string searchTerm) { // THIS IS USED IN THE DATABASE INDEXic //Console.WriteLine("id: " + d.Get("id") + "\n" + "messageBox: " + d.Get("messageBox") + "\n" + "incoming: " + d.Get("incoming") + "\n" + "date: " + d.Get("date") + "\n" + "mainText: " + d.Get("mainText")); // THIS IS USED IN MY TEST FILES //Console.WriteLine("id: " + d.Get("id") + "\n" + "mainText: " + d.Get("mainText")); string text = d.Get("mainText"); TermQuery query = new TermQuery(new Term("mainText", searchTerm)); Lucene.Net.Search.Highlight.IScorer scorer = new QueryScorer(query); Highlighter highlighter = new Highlighter(scorer); System.IO.StringReader reader = new System.IO.StringReader(text); TokenStream tokenStream = new SimpleAnalyzer().TokenStream("mainText", reader); String[] toBePrinted = highlighter.GetBestFragments(tokenStream, text, 5); // 5 is the maximum number of fragments that gets tested foreach (var word in toBePrinted) { Console.Write(word); } Console.WriteLine("====================="); Console.ReadKey(); }
public static DataTable searchPitanja(string pretraga) { DataTable ResultsPitanja = new DataTable(); // create the searcher // index is placed in "index" subdirectory string indexDirectory = "J:/Triglav_Web_App/Triglav/Web/Lucene/Pitanja"; var analyzer = new StandardAnalyzer(Version.LUCENE_30); IndexSearcher searcher = new IndexSearcher(FSDirectory.Open(indexDirectory)); // parse the query, "text" is the default field to search var parser = new MultiFieldQueryParser(Version.LUCENE_30, new[] { "Naslov", "Sadrzaj", "Tagovi" }, analyzer); //var parser = new QueryParser(Version.LUCENE_30, "Sadrzaj", analyzer); Query query = parser.Parse(pretraga); //// create the result DataTable ResultsPitanja.Columns.Add("id", typeof(Int32)); ResultsPitanja.Columns.Add("Naslov", typeof(string)); ResultsPitanja.Columns.Add("Sadrzaj", typeof(string)); ResultsPitanja.Columns.Add("Tagovi", typeof(string)); ResultsPitanja.Columns.Add("DatumKreiranja", typeof(DateTime)); ResultsPitanja.Columns.Add("DatumZadnjeIzmjene", typeof(DateTime)); ResultsPitanja.Columns.Add("DatumZadnjeAktivnosti", typeof(DateTime)); ResultsPitanja.Columns.Add("DatumZatvaranjaPosta", typeof(DateTime)); ResultsPitanja.Columns.Add("PrihvaceniOdgovori", typeof(Int32)); ResultsPitanja.Columns.Add("BrojOdgovora", typeof(Int32)); ResultsPitanja.Columns.Add("BrojKomentara", typeof(Int32)); ResultsPitanja.Columns.Add("BrojOmiljenih", typeof(Int32)); ResultsPitanja.Columns.Add("BrojPregleda", typeof(Int32)); ResultsPitanja.Columns.Add("BrojPoena", typeof(Int32)); ResultsPitanja.Columns.Add("VlasnikID", typeof(Int32)); ResultsPitanja.Columns.Add("VlasnikNadimak", typeof(string)); ResultsPitanja.Columns.Add("PromijenioID", typeof(Int32)); ResultsPitanja.Columns.Add("RoditeljskiPostID", typeof(Int32)); //Results.Columns.Add("PodKategorija", typeof(Int32)); ResultsPitanja.Columns.Add("PostVrsta", typeof(Int32)); // ResultsPitanja.Columns.Add("SlikaURL", typeof(string)); ResultsPitanja.Columns.Add("temp", typeof(string)); ResultsPitanja.Columns.Add("Likes", typeof(Int32)); ResultsPitanja.Columns.Add("Unlikes", typeof(Int32)); ResultsPitanja.Columns.Add("Sazetak", typeof(string)); ResultsPitanja.Columns.Add("BrojRangiranja", typeof(Int32)); ResultsPitanja.Columns.Add("PrihvacenaIzmjena", typeof(Int32)); ResultsPitanja.Columns.Add("Podnaslov", typeof(string)); ResultsPitanja.Columns.Add("Broj.Razgovora", typeof(Int32)); ResultsPitanja.Columns.Add("sample", typeof(string)); // search TopDocs hits = searcher.Search(query, 5); //E this.total = hits.TotalHits; // create highlighter IFormatter formatter = new SimpleHTMLFormatter("<span style=\"font-weight:bold; background-color: #e5ecf9; \">", "</span>"); SimpleFragmenter fragmenter = new SimpleFragmenter(80); QueryScorer scorer = new QueryScorer(query); Highlighter highlighter = new Highlighter(formatter, scorer); highlighter.TextFragmenter = fragmenter; for (int i = 0; i < hits.ScoreDocs.Count(); i++) { // get the document from index Document doc = searcher.Doc(hits.ScoreDocs[i].Doc); TokenStream stream = analyzer.TokenStream("", new StringReader(doc.Get("Sadrzaj"))); String sample = highlighter.GetBestFragments(stream, doc.Get("Sadrzaj"), 3, "..."); //String path = doc.Get("path"); // create a new row with the result data DataRow rowPitanja = ResultsPitanja.NewRow(); rowPitanja["id"] = doc.Get("id"); rowPitanja["Naslov"] = doc.Get("Naslov"); rowPitanja["Sadrzaj"] = sample; //doc.Get("Sadrzaj"); rowPitanja["Tagovi"] = doc.Get("Tagovi"); rowPitanja["DatumKreiranja"] = doc.Get("DatumKreiranja"); rowPitanja["DatumZadnjeIzmjene"] = doc.Get("DatumZadnjeIzmjene"); rowPitanja["DatumZadnjeAktivnosti"] = doc.Get("DatumZadnjeAktivnosti"); //row["DatumZatvaranjaPosta"] = doc.Get("DatumZatvaranjaPosta"); rowPitanja["PrihvaceniOdgovori"] = doc.Get("PrihvaceniOdgovori"); rowPitanja["BrojOdgovora"] = doc.Get("BrojOdgovora"); rowPitanja["BrojKomentara"] = doc.Get("BrojKomentara"); rowPitanja["BrojOmiljenih"] = doc.Get("BrojOmiljenih"); rowPitanja["BrojPregleda"] = doc.Get("BrojPregleda"); rowPitanja["BrojPoena"] = doc.Get("BrojPoena"); //row["VlasnikID"] = doc.Get("VlasnikID"); rowPitanja["VlasnikNadimak"] = doc.Get("VlasnikNadimak"); //row["PromijenioID"] = doc.Get("PromijenioID"); //row["RoditeljskiPostID"] = doc.Get("RoditeljskiPostID"); //row["PodKategorija"] = doc.Get("PodKategorija"); rowPitanja["PostVrsta"] = doc.Get("PostVrsta"); //rowPitanja["SlikaURL"] = doc.Get("SlikaURL"); //row["temp"] = doc.Get("temp"); rowPitanja["Likes"] = doc.Get("Likes"); rowPitanja["Unlikes"] = doc.Get("Unlikes"); rowPitanja["Sazetak"] = doc.Get("Sazetak"); rowPitanja["BrojRangiranja"] = doc.Get("BrojRangiranja"); rowPitanja["PrihvacenaIzmjena"] = doc.Get("PrihvacenaIzmjena"); rowPitanja["Podnaslov"] = doc.Get("Podnaslov"); //row["Broj.Razgovora"] = doc.Get("Broj.Razgovora"); //rowPitanja["sample"] = sample; ResultsPitanja.Rows.Add(rowPitanja); } searcher.Dispose(); return ResultsPitanja; }
/// <param name="queryScorer"><see cref="QueryScorer"/> that was used to score hits</param> /// <param name="fragmentSize">size in bytes of each fragment</param> public SimpleSpanFragmenter(QueryScorer queryScorer, int fragmentSize) { this.fragmentSize = fragmentSize; this.queryScorer = queryScorer; }
/// <param name="queryScorer"><see cref="QueryScorer"/> that was used to score hits</param> public SimpleSpanFragmenter(QueryScorer queryScorer) : this(queryScorer, DEFAULT_FRAGMENT_SIZE) { }
private void search() { DateTime start = DateTime.Now; // create the result DataTable this.Results.Columns.Add("title", typeof(string)); this.Results.Columns.Add("sample", typeof(string)); this.Results.Columns.Add("path", typeof(string)); this.Results.Columns.Add("url", typeof(string)); this.Results.Columns.Add("Type", typeof(string)); // create the searcher // index is placed in "index" subdirectory string indexDirectory = Server.MapPath("~/App_Data/index"); var analyzer = new StandardAnalyzer(Lucene.Net.Util.Version.LUCENE_30); // List<string> STOP_WORDS = StopAnalyzer.ENGLISH_STOP_WORDS_SET.ToList<string>(); IndexSearcher searcher = new IndexSearcher(FSDirectory.Open(indexDirectory)); BooleanQuery bquery = new BooleanQuery(); //var parser = new QueryParser(Lucene.Net.Util.Version.LUCENE_30, "text", analyzer); List<string> SearchTerm = new List<string> { "text", "path", "title", "Extension", "EXTPRP" }; List<string> Projects = new List<string>(); if (Session["ProjectList"] != null) { Projects = (List<string>)Session["ProjectList"]; } List<string> allType = new List<string> { "A", "B", "C" }; if (this.Request.QueryString["Page"] != null) { if (allType.Contains(this.Request.QueryString["Page"].ToString())) { allType.Remove(this.Request.QueryString["Page"]); foreach (string type in allType) { TermQuery termq1 = new TermQuery(new Term("EXTPRP", type)); bquery.Add(termq1, Occur.MUST_NOT); FuzzyQuery termq = new FuzzyQuery(new Term("EXTPRP", type), 0.5f, 0); bquery.Add(termq, Occur.MUST_NOT); } } } //Query query = parser.Parse(this.Query); //foreach (string term in SearchTerm) //{ // if (term == "title") // { // TermQuery termq = new TermQuery(new Term(term, this.Query)); // termq.Boost = 50f; // bquery.Add(termq, Occur.SHOULD); // } // else // { // TermQuery termq = new TermQuery(new Term(term, this.Query)); // termq.Boost = 5f; // bquery.Add(termq, Occur.SHOULD); // } //} foreach (string term in SearchTerm) { if (term == "title") { TermQuery termq = new TermQuery(new Term(term, this.Query)); termq.Boost = 5f; bquery.Add(termq, Occur.SHOULD); } else { FuzzyQuery termq = new FuzzyQuery(new Term(term, this.Query), 0.5f, 0); termq.Boost = 0.1f; bquery.Add(termq, Occur.SHOULD); } } //foreach (string project in Projects) //{ // TermQuery termq1 = new TermQuery(new Term("Project", project)); // bquery.Add(termq1, Occur.MUST_NOT); //} //foreach (string project in Projects.Distinct()) //{ // TermQuery termq1 = new TermQuery(new Term("path", project)); // bquery.Add(termq1, Occur.MUST); // FuzzyQuery termq = new FuzzyQuery(new Term("path", project), 0.5f, 0); // bquery.Add(termq, Occur.MUST); //} //bquery.Add(new TermQuery(new Term("Project", "DEV")), Occur.SHOULD); //List<ScoreDoc> TempArrList = new List<ScoreDoc>(); TopDocs hits = searcher.Search(bquery, null, 10000); //TopDocs hits = new TopDocs(TempArrList.Count(), TempArrList.ToArray(), hitsWithText.MaxScore); //hits.ScoreDocs.CopyTo(hits.ScoreDocs, 0); //hits.ScoreDocs = hits.ScoreDocs.OrderBy(obj => searcher.Doc(obj.Doc).Get("path")).ToArray(); if (Projects.Count() != 0) { hits.ScoreDocs = hits.ScoreDocs.Where(obj => Projects.Contains(Path.GetDirectoryName(searcher.Doc(obj.Doc).Get("path")))).Distinct().ToArray(); } //foreach (string project in Projects.Distinct()) //{ // //hits.ScoreDocs = hits.ScoreDocs.Where(obj => Regex.IsMatch(searcher.Doc(obj.Doc).Get("path").Replace(@"\", @"\\"), @".*" + project.Replace(@"\", @"\\") + ".*")).ToArray(); // string s = Path.GetDirectoryName("\\SAGITEC-1629\\Soogle\\CARS\\bhagyashree.txt"); // hits.ScoreDocs = hits.ScoreDocs.Where(obj => Path.GetDirectoryName(searcher.Doc(obj.Doc).Get("path")).Contains(project)).ToArray(); //} this.total = hits.ScoreDocs.Count(); this.startAt = InitStartAt(); int resultsCount = Math.Min(total, this.maxResults + this.startAt); // create highlighter IFormatter formatter = new SimpleHTMLFormatter("<span style=\"font-weight:bold;background-color:yellow;\">", "</span>"); SimpleFragmenter fragmenter = new SimpleFragmenter(200); QueryScorer scorer = new QueryScorer(bquery); Highlighter highlighter = new Highlighter(formatter, scorer); highlighter.TextFragmenter = fragmenter; int j = 0; for (int i = startAt; i < resultsCount; i++) { Document doc = searcher.Doc(hits.ScoreDocs[i].Doc); String path = doc.Get("path"); string getExtension = doc.Get("Extension"); TokenStream stream = analyzer.TokenStream("", new StringReader(doc.Get("text"))); String sample = ""; try { string document = doc.Get("text"); if (getExtension.ToLower() == ".png" || getExtension.ToLower() == ".jpg" || getExtension.ToLower() == ".gif" || getExtension.ToLower() == ".bmp") { sample = ""; } else { sample = highlighter.GetBestFragment(stream, document);//, 2, "..."); } } catch (Exception ex) { } // create a new row with the result data DataRow row = this.Results.NewRow(); row["title"] = doc.Get("title"); row["path"] = "http://sagitec-1629/KNBASE/" + path.Replace(@"\", "/").Replace("//SAGITEC-1629/Soogle/", ""); row["url"] = "http://sagitec-1629/KNBASE/" + path.Replace(@"\", "/").Replace("//SAGITEC-1629/Soogle/", ""); row["sample"] = sample; if (path.Contains('.')) { row["Type"] = GetMIMEType(path); } //if (!Projects.Contains(doc.Get("Project")) || !allType.Contains(doc.Get("EXTPRP"))) //{ this.Results.Rows.Add(row); //} j++; } Repeater1.DataSource = Results; Repeater1.DataBind(); searcher.Dispose(); // result information this.duration = DateTime.Now - start; this.fromItem = startAt + 1; this.toItem = Math.Min(startAt + maxResults, total); }
//******************************************************************************************************************************** private void search() { if (TextBoxQuery.Text != "") { DateTime start = DateTime.Now; // create the result DataTable this.Results.Columns.Add("title", typeof(string)); this.Results.Columns.Add("sample", typeof(string)); this.Results.Columns.Add("path", typeof(string)); this.Results.Columns.Add("url", typeof(string)); this.Results.Columns.Add("Type", typeof(string)); // create the searcher // index is placed in "index" subdirectory string indexDirectory = Server.MapPath(IndexDirPath); var analyzer = new StandardAnalyzer(Lucene.Net.Util.Version.LUCENE_30); // List<string> STOP_WORDS = StopAnalyzer.ENGLISH_STOP_WORDS_SET.ToList<string>(); IndexSearcher searcher = new IndexSearcher(FSDirectory.Open(indexDirectory)); BooleanQuery bquery = new BooleanQuery(); //var parser = new QueryParser(Lucene.Net.Util.Version.LUCENE_30, "text", analyzer); List<string> SearchTerm = new List<string> { "text", "path", "title", "Extension", "EXTPRP" }; List<string> Projects = new List<string>(); if (Session["ProjectList"] != null) { Projects = (List<string>)Session["ProjectList"]; } List<string> allType = null; if (hnkClickLink.Value == "") { allType = new List<string>(); } else { allType = new List<string> { "Doc", "Code", "Images", "Other" }; } if (this.Request.QueryString["Page"] != null) { if (allType.Contains(Convert.ToString(hnkClickLink.Value))) { allType.Remove(Convert.ToString(hnkClickLink.Value)); foreach (string type in allType) { TermQuery termq1 = new TermQuery(new Term("EXTPRP", type)); bquery.Add(termq1, Occur.MUST_NOT); FuzzyQuery termq = new FuzzyQuery(new Term("EXTPRP", type), 0.5f, 0); bquery.Add(termq, Occur.MUST_NOT); } } } //Query query = parser.Parse(this.Query); foreach (string term in SearchTerm) { if (term == "title") { TermQuery termq = new TermQuery(new Term(term, this.Query.ToLower())); termq.Boost = 5f; bquery.Add(termq, Occur.SHOULD); } else { TermQuery termq = new TermQuery(new Term(term, this.Query.ToLower())); termq.Boost = 0.1f; bquery.Add(termq, Occur.SHOULD); } } foreach (string term in SearchTerm) { if (this.Query.Contains(".")) { string SearchKeyword = this.Query.Replace(".", ""); if (term == "Extension") { TermQuery termq = new TermQuery(new Term(term, SearchKeyword.ToLower())); termq.Boost = 5f; bquery.Add(termq, Occur.SHOULD); } } else { if (term == "title") { FuzzyQuery termq = new FuzzyQuery(new Term(term, this.Query.ToLower())); termq.Boost = 5f; bquery.Add(termq, Occur.SHOULD); } else { //FuzzyQuery termq = new FuzzyQuery(new Term(term, this.Query), 0.5f, 0); //termq.Boost = 0.1f; //bquery.Add(termq, Occur.SHOULD); } } } TopDocs hits = searcher.Search(bquery, null, 10000); //TopDocs hits = new TopDocs(TempArrList.Count(), TempArrList.ToArray(), hitsWithText.MaxScore); //hits.ScoreDocs.CopyTo(hits.ScoreDocs, 0); //hits.ScoreDocs = hits.ScoreDocs.OrderBy(obj => searcher.Doc(obj.Doc).Get("path")).ToArray(); if (Projects.Count() != 0) { hits.ScoreDocs = hits.ScoreDocs.Where(obj => Projects.Contains(SplitPath(Path.GetDirectoryName(searcher.Doc(obj.Doc).Get("path"))))).Distinct().ToArray(); } this.total = hits.ScoreDocs.Count(); this.startAt = InitStartAt(); int resultsCount = Math.Min(total, this.maxResults + this.startAt); // create highlighter IFormatter formatter = new SimpleHTMLFormatter("<span style=\"font-weight:bold;background-color:yellow;\">", "</span>"); SimpleFragmenter fragmenter = new SimpleFragmenter(200); QueryScorer scorer = new QueryScorer(bquery); Highlighter highlighter = new Highlighter(formatter, scorer); highlighter.TextFragmenter = fragmenter; //highlighter.MaxDocCharsToAnalyze=200; //for (int i = startAt; i < resultsCount; i++) //{ // Document doc = searcher.Doc(hits.ScoreDocs[i].Doc); // String path = doc.Get("path"); // string getExtension = doc.Get("Extension"); // TokenStream stream = analyzer.TokenStream("", new StringReader(doc.Get("text"))); // String sample = ""; // try // { // string document = doc.Get("text"); // if (getExtension.ToLower() == ".png" || getExtension.ToLower() == ".jpg" || getExtension.ToLower() == ".gif" || getExtension.ToLower() == ".bmp" || getExtension.ToLower() == ".jpeg") // { // sample = ""; // } // else // { // string outp = highlighter.GetBestFragment(stream, document); // if (outp != null) // sample = ReplaceSpecialChar(outp.Trim()); //, 2, "..."); // else // sample = Limit(doc.Get("text").Trim(), 200); // } // } // catch (Exception ex) // { // } // // create a new row with the result data // DataRow row = this.Results.NewRow(); // row["title"] = doc.Get("title"); // row["path"] = ApplicationPath + path.Replace(@"\", "/").Replace(VirtualPath, ""); // row["url"] = ApplicationPath + path.Replace(@"\", "/").Replace(VirtualPath, ""); // row["sample"] = sample; // if (path.Contains('.')) // { // row["Type"] = GetMIMEType(path); // } // this.Results.Rows.Add(row); //} for (int i = 0; i < this.total; i++) { Document doc = searcher.Doc(hits.ScoreDocs[i].Doc); String path = doc.Get("path"); string getExtension = doc.Get("Extension"); TokenStream stream = analyzer.TokenStream("", new StringReader(doc.Get("text"))); String sample = ""; try { string document = doc.Get("text"); if (getExtension.ToLower() == ".png" || getExtension.ToLower() == ".jpg" || getExtension.ToLower() == ".gif" || getExtension.ToLower() == ".bmp" || getExtension.ToLower() == ".jpeg") { sample = ""; } else { string outp = highlighter.GetBestFragment(stream, document); if (outp != null) sample = Limit(outp.Trim(), 200); //, 2, "..."); else sample = Limit(doc.Get("text").Trim(), 200); } } catch (Exception ex) { } // create a new row with the result data DataRow row = Results.NewRow(); row["title"] = doc.Get("title"); row["path"] = ApplicationPath + path.Replace(@"\", "/").Replace(VirtualPath, ""); row["url"] = ApplicationPath + path.Replace(@"\", "/").Replace(VirtualPath, ""); row["sample"] = sample; if (path.Contains('.')) { row["Type"] = GetMIMEType(path); } Results.Rows.Add(row); } //****************************** Logic for Paging for Repeater Control**************************************** PagedDataSource pgitems = new PagedDataSource(); DataView dv = new DataView(Results); pgitems.DataSource = dv; pgitems.AllowPaging = true; pgitems.PageSize = 10;//You can set the number of items here using some logic. pgitems.CurrentPageIndex = PageNumber; btnPrev.Visible = !pgitems.IsFirstPage; btnNext.Visible = !pgitems.IsLastPage; if (pgitems.PageCount > 1) { rptPages.Visible = true; ArrayList pages = new ArrayList(); for (int i = PageNumber; i < 5 + PageNumber; i++) pages.Add((i + 1).ToString()); rptPages.DataSource = pages; rptPages.DataBind(); } else rptPages.Visible = false; Repeater1.DataSource = pgitems; Repeater1.DataBind(); //************************************************************************************************************* //Repeater1.DataSource = Results; //Repeater1.DataBind(); searcher.Dispose(); // result information this.duration = DateTime.Now - start; this.fromItem = startAt + 1; this.toItem = Math.Min(startAt + maxResults, total); } }
/// <summary> /// /// </summary> /// <remarks></remarks> /// <seealso cref=""/> /// <param name="origQuery"></param> /// <param name="queryFilter"></param> /// <param name="searchtext"></param> /// <returns></returns> public static IEnumerable<TextValue> doTextSearch(Query origQuery, String queryFilter, String searchtext) { String filter = queryFilter; BooleanQuery query = new BooleanQuery(); query.Add(origQuery, Occur.MUST); if (!filter.ToLower().StartsWith("ng_")) { filter = "ng_" + filter; } if (filter.ToLower().Equals("ng_all")) { filter = "ng_all"; queryFilter = "ng_all"; } HashSet<string> uniqueText = new HashSet<string>(); searchtext = searchtext.ToLower(); QueryParser parser = new QueryParser(Lucene.Net.Util.Version.LUCENE_30, filter, new KeywordAnalyzer()); parser.DefaultOperator = QueryParser.Operator.AND; Query X1 = parser.Parse(searchtext); query.Add(X1, Occur.MUST); // Query query = parser.Parse("tree data"); TopDocs tds = searcher.Search(query, 50); QueryScorer scorer = new QueryScorer(query, searchtext); Analyzer analyzer = new NGramAnalyzer(); List<TextValue> autoCompleteTextList = new List<TextValue>(); foreach (ScoreDoc sd in tds.ScoreDocs) { Document doc = searcher.Doc(sd.Doc); String docId = doc.GetField("doc_id").StringValue; TermQuery q1 = new TermQuery(new Term("id", docId.ToLower())); TermQuery q0 = new TermQuery(new Term("field", queryFilter.ToLower())); QueryParser parser1 = new QueryParser(Lucene.Net.Util.Version.LUCENE_30, "value", new KeywordAnalyzer()); parser1.DefaultOperator = QueryParser.Operator.AND; Query q2 = parser1.Parse(searchtext); BooleanQuery q3 = new BooleanQuery(); q3.Add(q1, Occur.MUST); q3.Add(q2, Occur.MUST); q3.Add(q0, Occur.MUST); TopDocs tdAutoComp = autoCompleteSearcher.Search(q3, 100); foreach (ScoreDoc sdAutoComp in tdAutoComp.ScoreDocs) { Document docAutoComp = autoCompleteSearcher.Doc(sdAutoComp.Doc); String toAdd = docAutoComp.GetField("value").StringValue; if (!uniqueText.Contains(toAdd)) { TextValue tv = new TextValue(); tv.Name = toAdd; tv.Value = toAdd; autoCompleteTextList.Add(tv); uniqueText.Add(toAdd); } } if (autoCompleteTextList.Count > 7) break; } return autoCompleteTextList; }
/// <param name="queryScorer">QueryScorer that was used to score hits</param> /// <param name="fragmentSize">size in bytes of each fragment</param> public SimpleSpanFragmenter(QueryScorer queryScorer, int fragmentSize) { this.fragmentSize = fragmentSize; this.queryScorer = queryScorer; }
/// <param name="queryScorer">QueryScorer that was used to score hits</param> public SimpleSpanFragmenter(QueryScorer queryScorer) : this(queryScorer, DEFAULT_FRAGMENT_SIZE) { }
public static IndexItemCollection Search( int siteId, bool isAdminContentAdminOrSiteEditor, List<string> userRoles, Guid[] featureGuids, DateTime modifiedBeginDate, DateTime modifiedEndDate, string queryText, bool highlightResults, int highlightedFragmentSize, int pageNumber, int pageSize, int maxClauseCount, out int totalHits, out bool invalidQuery) { invalidQuery = false; totalHits = 0; IndexItemCollection results = new IndexItemCollection(); if (string.IsNullOrEmpty(queryText)) { return results; } using (Lucene.Net.Store.Directory searchDirectory = GetDirectory(siteId)) { if (!IndexReader.IndexExists(searchDirectory)) { return results; } long startTicks = DateTime.Now.Ticks; try { if (maxClauseCount != 1024) { BooleanQuery.MaxClauseCount = maxClauseCount; } // there are different analyzers for different languages // see LuceneSettings.config in the root of the web LuceneSettingsProvider provider = LuceneSettingsManager.Providers[GetSiteProviderName(siteId)]; Analyzer analyzer = provider.GetAnalyzer(); Query searchQuery = MultiFieldQueryParser.Parse( Lucene.Net.Util.Version.LUCENE_30, new string[] { queryText, queryText, queryText, queryText, queryText, queryText.Replace("*", string.Empty) }, new string[] { "Title", "ModuleTitle", "contents", "PageName", "PageMetaDesc", "Keyword" }, analyzer); BooleanQuery filterQuery = new BooleanQuery(); // won't be used to score the results if (!isAdminContentAdminOrSiteEditor) // skip role filters for these users { AddRoleFilters(userRoles, filterQuery); AddModuleRoleFilters(userRoles, filterQuery); } TermRangeQuery beginDateFilter = new TermRangeQuery( "PublishBeginDate", DateTime.MinValue.ToString("s"), DateTime.UtcNow.ToString("s"), true, true); filterQuery.Add(beginDateFilter, Occur.MUST); TermRangeQuery endDateFilter = new TermRangeQuery( "PublishEndDate", DateTime.UtcNow.ToString("s"), DateTime.MaxValue.ToString("s"), true, true); filterQuery.Add(endDateFilter, Occur.MUST); if ((modifiedBeginDate.Date > DateTime.MinValue.Date) || (modifiedEndDate.Date < DateTime.MaxValue.Date)) { TermRangeQuery lastModifiedDateFilter = new TermRangeQuery( "LastModUtc", modifiedBeginDate.Date.ToString("s"), modifiedEndDate.Date.ToString("s"), true, true); filterQuery.Add(lastModifiedDateFilter, Occur.MUST); } //if ((!DisableSearchFeatureFilters) && (featureGuid != Guid.Empty)) //{ // BooleanQuery featureFilter = new BooleanQuery(); // featureFilter.Add(new TermQuery(new Term("FeatureId", featureGuid.ToString())), Occur.MUST); // filterQuery.Add(featureFilter, Occur.MUST); //} if ((featureGuids != null) && (featureGuids.Length > 0)) { BooleanQuery featureFilter = new BooleanQuery(); foreach (Guid featureGuid in featureGuids) { featureFilter.Add(new TermQuery(new Term("FeatureId", featureGuid.ToString())), Occur.SHOULD); } filterQuery.Add(featureFilter, Occur.MUST); } Filter filter = new QueryWrapperFilter(filterQuery); // filterQuery won't affect result scores using (IndexSearcher searcher = new IndexSearcher(searchDirectory)) { //http://stackoverflow.com/questions/9872933/migrating-lucene-hitcollector-2-x-to-collector-3-x //TopScoreDocCollector collector = TopScoreDocCollector.Create(maxResults, true); int maxResults = int.MaxValue; TopDocs hits = searcher.Search(searchQuery, filter, maxResults); int startHit = 0; if (pageNumber > 1) { startHit = ((pageNumber - 1) * pageSize); } totalHits = hits.TotalHits; int end = startHit + pageSize; if (totalHits <= end) { end = totalHits; } int itemsAdded = 0; int itemsToAdd = end; QueryScorer scorer = new QueryScorer(searchQuery); SimpleHTMLFormatter formatter = new SimpleHTMLFormatter("<span class='searchterm'>", "</span>"); Highlighter highlighter = new Highlighter(formatter, scorer); highlighter.TextFragmenter = new SimpleFragmenter(highlightedFragmentSize); for (int i = startHit; i < itemsToAdd; i++) { Document doc = searcher.Doc(hits.ScoreDocs[i].Doc); IndexItem indexItem = new IndexItem(doc, hits.ScoreDocs[i].Score); if (highlightResults) { try { TokenStream stream = analyzer.TokenStream("contents", new StringReader(doc.Get("contents"))); string highlightedResult = highlighter.GetBestFragment(stream, doc.Get("contents")); if (highlightedResult != null) { indexItem.Intro = highlightedResult; } } catch (NullReferenceException) { } } results.Add(indexItem); itemsAdded += 1; } results.ItemCount = itemsAdded; results.PageIndex = pageNumber; results.ExecutionTime = DateTime.Now.Ticks - startTicks; } } catch (ParseException ex) { invalidQuery = true; log.Error("handled error for search terms " + queryText, ex); // these parser exceptions are generally caused by // spambots posting too much junk into the search form // heres an option to automatically ban the ip address HandleSpam(queryText, ex); return results; } catch (BooleanQuery.TooManyClauses ex) { invalidQuery = true; log.Error("handled error for search terms " + queryText, ex); return results; } catch (System.IO.IOException ex) { invalidQuery = true; log.Error("handled error for search terms " + queryText, ex); return results; } return results; } }
private SearchResult ExecuteQuery(string[] metaData, int resultOffset, int resultLength, Query query) { var startTime = DateTime.Now; var ticks = DateTime.Now.ToUniversalTime().Ticks; Query publishStartQuery = NumericRangeQuery.NewLongRange("publishStart", null, ticks, true, false); Query publishStopQuery = NumericRangeQuery.NewLongRange("publishStop", ticks, null, false, true); var booleanQuery = new BooleanQuery { {query, Occur.MUST}, {publishStartQuery, Occur.MUST}, {publishStopQuery, Occur.MUST} }; var scoreDocs = _searcher.Search(booleanQuery, null, MaxHits, Sort.RELEVANCE).ScoreDocs; var result = new SearchResult {NumberOfHits = scoreDocs.Length}; // Create highlighter IFormatter formatter = new SimpleHTMLFormatter("<span class=\"search-highlight;\">", "</span>"); var fragmenter = new SimpleFragmenter(120); var scorer = new QueryScorer(query); var highlighter = new Highlighter(formatter, scorer) {TextFragmenter = fragmenter}; if (resultOffset < scoreDocs.Length) { var resultUpperOffset = resultOffset + resultLength; if (resultUpperOffset > scoreDocs.Length) { resultUpperOffset = scoreDocs.Length; } for (var i = resultOffset; i < resultUpperOffset; i++) { var doc = scoreDocs[i]; var document = _searcher.Doc(doc.Doc); var content = document.Get("content"); var excerpt = ""; if (content != null) { var stream = _analyzer.TokenStream("", new StringReader(document.Get("content"))); excerpt = highlighter.GetBestFragments(stream, document.Get("content"), 2, "..."); } Guid pageId; (document.Get("pageId") ?? string.Empty).TryParseGuid(out pageId); var hit = new SearchHit { PageId = pageId, Path = document.Get("path"), Title = document.Get("title"), Excerpt = excerpt }; foreach (var key in metaData) { hit.MetaData.Add(key, document.Get(key)); } result.Hits.Add(hit); } } var timeTaken = DateTime.Now - startTime; result.SecondsTaken = timeTaken.TotalSeconds; return result; }
private IList<int> Search(string text, int tipodocumentoId, string startDate, string endDate) { var directory = this.GetDirectory(); var indexReader = this.GetIndexReader(directory); var searcher = new IndexSearcher(indexReader); try { var query = this.BuildQuery(text, tipodocumentoId); var filter = this.BuildDateFilter(startDate, endDate); var sort = new Sort(new SortField("dataCriacao", SortField.LONG, true)); var docs = searcher.Search(query, filter, this.configuracoesDaAplicacao.ResultadoMaximoConsulta, sort); // create highlighter var formatter = new SimpleHTMLFormatter("<span class=\"result-highlight\">", "</span>"); var scorer = new QueryScorer(query); this.Highlighter = new Highlighter(formatter, scorer); this.Stream = LuceneEngineBase.GetAnalyzer().TokenStream(string.Empty, new StringReader(text)); return this.BuildSearchResult(docs, searcher); } finally { searcher.Dispose(); indexReader.Dispose(); directory.Dispose(); } }