//public List<int> SearchIuses(string searchTerm) //{ // List<int> results = new List<int>(); // IndexSearcher searcher = new IndexSearcher(FSDirectory.GetDirectory(indexPath)); // QueryParser parser = new QueryParser("Rubro", analyzer); // parser.SetEnablePositionIncrements(false); // PhraseQuery q = new PhraseQuery(); // String[] words = searchTerm.Split(' '); // foreach (string word in words) // { // q.Add(new Term("Rubro", word)); // } // Console.WriteLine(q.ToString()); // //Query query = parser.Parse(searchTerm); // Hits hitsFound = searcher.Search(q); // TesisIndx sampleDataFileRow = null; // for (int i = 0; i < hitsFound.Length(); i++) // { // sampleDataFileRow = new TesisIndx(); // Document doc = hitsFound.Doc(i); // sampleDataFileRow.Ius = int.Parse(doc.Get("Ius")); // sampleDataFileRow.Rubro = doc.Get("Rubro"); // sampleDataFileRow.Texto = doc.Get("Texto"); // float score = hitsFound.Score(i); // sampleDataFileRow.Score = score; // results.Add(sampleDataFileRow.Ius); // } // parser = new QueryParser("Texto", analyzer); // parser.SetEnablePositionIncrements(false); // q = new PhraseQuery(); // words = searchTerm.Split(' '); // foreach (string word in words) // { // q.Add(new Term("Texto", word)); // } // // query = parser.Parse(searchTerm); // hitsFound = searcher.Search(q); // for (int i = 0; i < hitsFound.Length(); i++) // { // sampleDataFileRow = new TesisIndx(); // Document doc = hitsFound.Doc(i); // sampleDataFileRow.Ius = int.Parse(doc.Get("Ius")); // sampleDataFileRow.Rubro = doc.Get("Rubro"); // sampleDataFileRow.Texto = doc.Get("Texto"); // float score = hitsFound.Score(i); // sampleDataFileRow.Score = score; // results.Add(sampleDataFileRow.Ius); // } // results.Distinct(); // return results; //} /// <summary> /// Busca en el índice previamente construido las tesis que tengan coincidencia ya sea en el Rubro o Texto /// del término buscado /// </summary> /// <param name="searchTerm"></param> /// <returns></returns> public List <int> SearchIuses(string searchTerm) { List <int> results = new List <int>(); IndexSearcher searcher = new IndexSearcher(FSDirectory.GetDirectory(indexPath)); QueryParser parser = new QueryParser("RubroIndx", analyzer); parser.SetEnablePositionIncrements(false); Query query = parser.Parse(String.Format("\"{0}\"", searchTerm)); Console.WriteLine(query.ToString()); Hits hitsFound = searcher.Search(query); TesisIndx sampleDataFileRow = null; for (int i = 0; i < hitsFound.Length(); i++) { sampleDataFileRow = new TesisIndx(); Document doc = hitsFound.Doc(i); sampleDataFileRow.Ius = int.Parse(doc.Get("Ius")); sampleDataFileRow.RubroIndx = doc.Get("RubroIndx"); sampleDataFileRow.TextoIndx = doc.Get("TextoIndx"); float score = hitsFound.Score(i); sampleDataFileRow.Score = score; results.Add(sampleDataFileRow.Ius); } parser = new QueryParser("TextoIndx", analyzer); parser.SetEnablePositionIncrements(false); query = parser.Parse(String.Format("\"{0}\"", searchTerm)); Console.WriteLine(query.ToString()); hitsFound = searcher.Search(query); for (int i = 0; i < hitsFound.Length(); i++) { sampleDataFileRow = new TesisIndx(); Document doc = hitsFound.Doc(i); sampleDataFileRow.Ius = int.Parse(doc.Get("Ius")); sampleDataFileRow.Rubro = doc.Get("RubroIndx"); sampleDataFileRow.Texto = doc.Get("TextoIndx"); float score = hitsFound.Score(i); sampleDataFileRow.Score = score; results.Add(sampleDataFileRow.Ius); } results.Distinct(); return(results); }
public static IDictionary <string, string> Query(string searchTerm) { BuildIndexTask.Wait(); System.Text.RegularExpressions.Regex rgx = new System.Text.RegularExpressions.Regex("[^a-zA-Z0-9]"); searchTerm = rgx.Replace(searchTerm, " "); IndexSearcher searcher = new IndexSearcher(luceneIndexDirectory); QueryParser parser = new QueryParser("Guideline", analyzer); Query query = parser.Parse(searchTerm.ToLower()); Hits hitsFound = searcher.Search(query); IDictionary <string, string> results = new Dictionary <string, string>(); for (int i = 0; i < hitsFound.Length(); i++) { Document doc = hitsFound.Doc(i); float score = hitsFound.Score(i); string CodeSnippetName = doc.Get("CodeSnippetName"); string CodeSnippet = doc.Get("CodeSnippet"); if (score > 0.6) { results.Add(CodeSnippetName, CodeSnippet); } } searcher.Close(); return(results); }
public IEnumerable <DataFileRow> Search(string searchTerm) { IndexSearcher searcher = new IndexSearcher(luceneIndexDirectory); QueryParser parser = new QueryParser("LineText", analyzer); Query query = parser.Parse(searchTerm); Hits hitsFound = searcher.Search(query); List <DataFileRow> results = new List <DataFileRow>(); DataFileRow sampleDataFileRow = null; for (int i = 0; i < hitsFound.Length(); i++) { sampleDataFileRow = new DataFileRow(); Document doc = hitsFound.Doc(i); sampleDataFileRow.LineNumber = int.Parse(doc.Get("LineNumber")); sampleDataFileRow.LineText = doc.Get("LineText"); float score = hitsFound.Score(i); sampleDataFileRow.Score = score; results.Add(sampleDataFileRow); } luceneIndexDirectory.Close(); searcher.Close(); return(results.OrderByDescending(x => x.Score).ToList()); }
public static void Main(string[] args) { string indexpath = args[0]; string query = args[1]; IndexSearcher searcher = new IndexSearcher(indexpath); Query parsedquery = QueryParser.Parse(query, "summary", new StandardAnalyzer()); Hits hits = searcher.Search(parsedquery); Console.WriteLine("Found " + hits.Length() + " document(s) that matched query '" + query + "':\n"); for (int i = 0; i < hits.Length(); i++) { Document doc = hits.Doc(i); Console.WriteLine(hits.Score(i) + ": " + doc.Get("excerpt") + "\n"); if (i == 50) { break; } } searcher.Close(); }
void DoTitleMatches() { string term = searchterm; try { string[] terms = Regex.Split(term, " +"); term = ""; foreach (string t in terms) { term += t + "~ "; } searchterm = "title:(" + term + ")"; DateTime now = DateTime.UtcNow; Query query = state.Parse(searchterm); Hits hits = state.Searcher.Search(query); int numhits = hits.Length(); LogRequest(searchterm, query, numhits, now); SendHeaders(200, "OK"); for (int i = 0; i < numhits && i < 10; i++) { Document doc = hits.Doc(i); float score = hits.Score(i); string pageNamespace = doc.Get("namespace"); string title = doc.Get("title"); SendResultLine(score, pageNamespace, title); } } catch (Exception e) { log.Error(e.Message + e.StackTrace); } }
public List <string> Search(string text) { List <string> lstFilteredValue = new List <string>(); try { IndexSearcher MyIndexSearcher = new IndexSearcher(LuceneDirectory); Query mainQuery = this.GetParsedQuerywc(text); //Do the search Hits hits = MyIndexSearcher.Search(mainQuery); int results = hits.Length(); for (int i = 0; i < results; i++) { Document doc = hits.Doc(i); float score = hits.Score(i); lstFilteredValue.Add(doc.Get("Name") + "," + doc.Get("Id")); } } catch (Exception GeneralException) { } return(lstFilteredValue); }
public static IList <string> Search(string searchTerm) { System.Text.RegularExpressions.Regex rgx = new System.Text.RegularExpressions.Regex("[^a-zA-Z0-9]"); searchTerm = rgx.Replace(searchTerm, " "); IndexSearcher searcher = new IndexSearcher(luceneIndexDirectory); QueryParser parser = new QueryParser("Data", analyzer); Query query = parser.Parse(searchTerm.ToLower()); Hits hitsFound = searcher.Search(query); IList <string> results = new List <string>(); for (int i = 0; i < hitsFound.Length(); i++) { Document doc = hitsFound.Doc(i); float score = hitsFound.Score(i); string fileName = doc.Get("FileName"); if (score > 0.6) { results.Add(doc.Get("FileName")); } } searcher.Close(); return(results); }
public override List <SearchResult> Search(string searchStr) { List <SearchResult> results = new List <SearchResult>(); string cleanSearchStr = cleaner.Replace(searchStr, "").ToLower().Trim(); IndexSearcher searcher = new IndexSearcher(directory); //QueryParser parser = new QueryParser("title", analyzer); //Query query = parser.Parse(cleanSearchStr + "~0.7"); Query query = parser.Parse(cleanSearchStr + "~0.7"); Hits hits = searcher.Search(query); int resultCount = hits.Length(); for (int i = 0; i < resultCount; i++) { SearchResult result = new SearchResult(); result.Item = DatabaseManager.Get <T>(int.Parse(hits.Doc(i).Get("id"))); result.Score = hits.Score(i); results.Add(result); } return(results); }
/// <summary> /// Searches the index. /// </summary> /// <param name="queryText"></param> /// <param name="categoryNames"></param> /// <param name="pageIndex"></param> /// <param name="pageSize"></param> /// <param name="roleIds"></param> /// <returns></returns> public SearchResultCollection Find(string queryText, IList <string> categoryNames, int pageIndex, int pageSize, IEnumerable <int> roleIds) { long startTicks = DateTime.Now.Ticks; // the overall-query BooleanQuery query = new BooleanQuery(); // add our parsed query if (!String.IsNullOrEmpty(queryText)) { Query multiQuery = MultiFieldQueryParser.Parse(new[] { queryText, queryText, queryText }, new[] { "title", "summary", "contents" }, new StandardAnalyzer()); query.Add(multiQuery, BooleanClause.Occur.MUST); } // add the security constraint - must be satisfied query.Add(this.BuildSecurityQuery(roleIds), BooleanClause.Occur.MUST); // Add the category query (if available) if (categoryNames != null) { query.Add(this.BuildCategoryQuery(categoryNames), BooleanClause.Occur.MUST); } IndexSearcher searcher = new IndexSearcher(this._indexDirectory); Hits hits = searcher.Search(query); int start = pageIndex * pageSize; int end = (pageIndex + 1) * pageSize; if (hits.Length() <= end) { end = hits.Length(); } SearchResultCollection results = new SearchResultCollection(end); results.TotalCount = hits.Length(); results.PageIndex = pageIndex; for (int i = start; i < end; i++) { SearchResult result = new SearchResult(); result.Title = hits.Doc(i).Get("title"); result.Summary = hits.Doc(i).Get("summary"); result.Author = hits.Doc(i).Get("author"); result.ModuleType = hits.Doc(i).Get("moduletype"); result.Path = hits.Doc(i).Get("path"); string[] categories = hits.Doc(i).GetValues("category"); result.Category = categories != null?String.Join(", ", categories) : String.Empty; result.DateCreated = DateTime.Parse((hits.Doc(i).Get("datecreated"))); result.Score = hits.Score(i); result.Boost = hits.Doc(i).GetBoost(); result.SectionId = Int32.Parse(hits.Doc(i).Get("sectionid")); results.Add(result); } searcher.Close(); results.ExecutionTime = DateTime.Now.Ticks - startTicks; return(results); }
public static void Main(System.String[] args) { try { Directory directory = new RAMDirectory(); Analyzer analyzer = new SimpleAnalyzer(); IndexWriter writer = new IndexWriter(directory, analyzer, true); System.String[] docs = new System.String[] { "a b c d e", "a b c d e a b c d e", "a b c d e f g h i j", "a c e", "e c a", "a c e a c e", "a c e a b c" }; for (int j = 0; j < docs.Length; j++) { Document d = new Document(); d.Add(Field.Text("contents", docs[j])); writer.AddDocument(d); } writer.Close(); Searcher searcher = new IndexSearcher(directory); System.String[] queries = new System.String[] { "\"a c e\"" }; Hits hits = null; QueryParsers.QueryParser parser = new QueryParsers.QueryParser("contents", analyzer); parser.SetPhraseSlop(4); for (int j = 0; j < queries.Length; j++) { Query query = parser.Parse(queries[j]); System.Console.Out.WriteLine("Query: " + query.ToString("contents")); //DateFilter filter = // new DateFilter("modified", Time(1997,0,1), Time(1998,0,1)); //DateFilter filter = DateFilter.Before("modified", Time(1997,00,01)); //System.out.println(filter); hits = searcher.Search(query); System.Console.Out.WriteLine(hits.Length() + " total results"); for (int i = 0; i < hits.Length() && i < 10; i++) { Document d = hits.Doc(i); System.Console.Out.WriteLine(i + " " + hits.Score(i) + " " + d.Get("contents")); } } searcher.Close(); } catch (System.Exception e) { System.Console.Out.WriteLine(" caught a " + e.GetType() + "\n with message: " + e.Message); } }
public EntityInfo Extract(Hits hits, int index) { Document doc = hits.Doc(index); //TODO if we are lonly looking for score (unlikely), avoid accessing doc (lazy load) EntityInfo entityInfo = Extract(doc); object[] eip = entityInfo.Projection; if (eip != null && eip.Length > 0) { for (int x = 0; x < projection.Length; x++) { switch (projection[x]) { case ProjectionConstants.SCORE: eip[x] = hits.Score(index); break; case ProjectionConstants.ID: eip[x] = entityInfo.Id; break; case ProjectionConstants.DOCUMENT: eip[x] = doc; break; case ProjectionConstants.DOCUMENT_ID: eip[x] = hits.Id(index); break; case ProjectionConstants.BOOST: eip[x] = doc.GetBoost(); break; case ProjectionConstants.THIS: //THIS could be projected more than once //THIS loading delayed to the Loader phase if (entityInfo.IndexesOfThis == null) { entityInfo.IndexesOfThis = new List <int>(1); } entityInfo.IndexesOfThis.Add(x); break; } } } return(entityInfo); }
private void DoTestSearch(System.IO.StringWriter out_Renamed, bool useCompoundFile) { Directory directory = new RAMDirectory(); Analyzer analyzer = new SimpleAnalyzer(); IndexWriter writer = new IndexWriter(directory, analyzer, true); writer.SetUseCompoundFile(useCompoundFile); System.String[] docs = new System.String[] { "a b c d e", "a b c d e a b c d e", "a b c d e f g h i j", "a c e", "e c a", "a c e a c e", "a c e a b c" }; for (int j = 0; j < docs.Length; j++) { Document d = new Document(); d.Add(Field.Text("contents", docs[j])); writer.AddDocument(d); } writer.Close(); Searcher searcher = new IndexSearcher(directory); System.String[] queries = new System.String[] { "a b", "\"a b\"", "\"a b c\"", "a c", "\"a c\"", "\"a c e\"" }; Hits hits = null; QueryParsers.QueryParser parser = new QueryParsers.QueryParser("contents", analyzer); parser.SetPhraseSlop(4); for (int j = 0; j < queries.Length; j++) { Query query = parser.Parse(queries[j]); out_Renamed.WriteLine("Query: " + query.ToString("contents")); //DateFilter filter = // new DateFilter("modified", Time(1997,0,1), Time(1998,0,1)); //DateFilter filter = DateFilter.Before("modified", Time(1997,00,01)); //System.out.println(filter); hits = searcher.Search(query); out_Renamed.WriteLine(hits.Length() + " total results"); for (int i = 0; i < hits.Length() && i < 10; i++) { Document d = hits.Doc(i); out_Renamed.WriteLine(i + " " + hits.Score(i) + " " + d.Get("contents")); } } searcher.Close(); }
public string[] Fncsearch(string strSearchContent) { DateTime start = DateTime.Now; try { searcher = new IndexSearcher(this.pathIndex); } catch (IOException ex) { System.Windows.Forms.MessageBox.Show("The index doesn't exist or is damaged. Please rebuild the index.\r\n\r\nDetails:\r\n" + ex.Message); return(null); } if (strSearchContent.Trim(new char[] { ' ' }) == String.Empty) { return(null); } if (strSearchContent == "*") { MessageBox.Show("Sorry cannot search files with *"); } Query query = QueryParser.Parse(strSearchContent, "text", new StandardAnalyzer()); Hits hits = searcher.Search(query); string[] strTempArr = new string[hits.Length()]; for (int i = 0; i < hits.Length(); i++) { Document doc = hits.Doc(i); string filename = doc.Get("title"); string path = doc.Get("path"); string folder = System.IO.Path.GetDirectoryName(path); DirectoryInfo di = new DirectoryInfo(folder); string s = filename + " " + path + " " + hits.Score(i).ToString(); System.Windows.Forms.ListViewItem item = new System.Windows.Forms.ListViewItem(new string[] { null, filename, di.Name, hits.Score(i).ToString() }); item.Tag = path; strTempArr[i] = s; } searcher.Close(); //string searchReport = String.Format("Search took {0}. Found {1} items.", (DateTime.Now - start), hits.Length()); //status(searchReport); return(strTempArr); }
internal IList <ISbItem> OpenSearchRequest(string phrase, out int total) { IList <ISbItem> versiculos = new List <ISbItem>(); IndexSearcher searcher = this.DefaultIndex.GetIndex(); QueryParser queryParser = new QueryParser("versiculo", new StandardAnalyzer()); Hits hits = searcher.Search(queryParser.Parse(phrase)); total = hits.Length(); for (int i = 0; i < (hits.Length() > maxOpenSearchResults ? maxOpenSearchResults : hits.Length()); i++) { ISbItem item = this.DefaultContainer.Ext().GetByID(Convert.ToInt64(hits.Doc(i).Get("id"))) as ISbItem; item.Tag = hits.Score(i).ToString(); this.DefaultContainer.Activate(item, 1); versiculos.Add(item); } return(versiculos); }
public DataTable search(string searchText) { DateTime start = DateTime.Now; try { searcher = new IndexSearcher(this.pathIndex); } catch (IOException ex) { throw new IndexDamagedException("The index doesn't exist or is damaged. Please rebuild the index.\r\n\r\nDetails:\r\n" + ex.Message); } if ((searchText.Trim() != null && searchText.Trim().Length == 0)) { return(new DataTable()); } Query query = QueryParser.Parse(searchText, "text", new StandardAnalyzer()); Hits hits = searcher.Search(query); DataTable dt = new DataTable(); dt.Columns.Add("title", typeof(string)); dt.Columns.Add("path", typeof(string)); dt.Columns.Add("hits", typeof(string)); for (int i = 0; i < hits.Length(); i++) { DataRow dr = dt.NewRow(); Document doc = hits.Doc(i); dr["title"] = doc.Get("title"); dr["path"] = doc.Get("path"); dr["hits"] = hits.Score(i).ToString(); dt.Rows.Add(dr); } searcher.Close(); return(dt); }
void DoRawSearch() { DateTime now = DateTime.UtcNow; Query query = state.Parse(searchterm); Hits hits = state.Searcher.Search(query); int numhits = hits.Length(); LogRequest("(raw)", query, numhits, now); SendHeaders(200, "OK"); for (int i = 0; i < numhits && i < 10; i++) { Document doc = hits.Doc(i); float score = hits.Score(i); string pageNamespace = doc.Get("namespace"); string title = doc.Get("title"); SendResultLine(score, pageNamespace, title); } }
/// <summary> /// This method searches for the search term passed by the caller. /// </summary> /// <param name="searchTerm">The search term as a string that the caller wants to search for within the /// index as referenced by this object.</param> /// <param name="ids">An out parameter that is populated by this method for the caller with docments ids.</param> /// <param name="results">An out parameter that is populated by this method for the caller with docments text.</param> /// <param name="scores">An out parameter that is populated by this method for the caller with docments scores.</param> internal int Search(string searchTerm, out Object[] ids, out string[] results, out float[] scores, WordExpander expandWithWordNet, bool expandOnNoHits) { checkDbLock(); if (!IsDbPresent) { ids = new Document[0]; results = new string[0]; scores = new float[0]; return(0); } IndexSearcher indexSearcher = new IndexSearcher(_directory); try { QueryParser queryParser = new QueryParser(_fieldName, _analyzer); Query query = queryParser.Parse(searchTerm); Hits hits = indexSearcher.Search(query); int numHits = hits.Length(); ids = new Document[numHits]; results = new string[numHits]; scores = new float[numHits]; for (int i = 0; i < numHits; ++i) { float score = hits.Score(i); var hdoc = hits.Doc(i); string text = hdoc.Get(_fieldName); //string idAsText = hdoc.Get(MyLuceneIndexer.DOC_ID_FIELD_NAME); ids[i] = hdoc; results[i] = text; scores[i] = score; } if (numHits == 0 && expandOnNoHits) { // Try expansion //QueryParser queryParser = new QueryParser(_fieldName, _analyzer); MultiFieldQueryParser queryParserWN = new MultiFieldQueryParser( new string[] { _fieldName, MyLuceneIndexer.HYPO_FIELD_NAME }, _analyzer); string hypo_expand = expandWithWordNet(searchTerm, false); Query queryWN = queryParserWN.Parse(hypo_expand); Hits hitsWN = indexSearcher.Search(queryWN); int numHitsWN = hitsWN.Length(); ids = new Document[numHitsWN]; results = new string[numHitsWN]; scores = new float[numHitsWN]; for (int i = 0; i < numHitsWN; ++i) { float score = hitsWN.Score(i); string text = hitsWN.Doc(i).Get(_fieldName); //string idAsText = hitsWN.Doc(i).Get(MyLuceneIndexer.DOC_ID_FIELD_NAME); ids[i] = hitsWN.Doc(i);// UInt64.Parse(idAsText); results[i] = text; scores[i] = score; } } } finally { indexSearcher.Close(); } return(ids.Length); }/*
internal int DeleteTopScoring0(string searchQuery, bool mustContainExact) { checkDbLock(); if (!IsDbPresent) { return(0); } // Searching: ulong[] ids; string[] results; float[] scores; int numHits; // find it writeToLog("Replacing best \"{0}\"...", searchQuery); //Search(query, out ids, out results, out scores); IndexSearcher indexSearcher = new IndexSearcher(_directory); int deleted = 0; try { QueryParser queryParser = new QueryParser(_fieldName, _analyzer); Query query = queryParser.Parse(searchQuery); Hits hits = indexSearcher.Search(query); string searchQueryToLower = searchQuery.ToLower(); numHits = hits.Length(); // if we want to do something smarter later ids = new ulong[numHits]; results = new string[numHits]; scores = new float[numHits]; for (int i = 0; i < numHits; ++i) { float score = hits.Score(i); string text = hits.Doc(i).Get(_fieldName); string idAsText = hits.Doc(i).Get(MyLuceneIndexer.DOC_ID_FIELD_NAME); ids[i] = UInt64.Parse(idAsText); results[i] = text; scores[i] = score; } if (numHits > 0) { //IndexReader indexReader = indexSearcher.GetIndexReader(); IndexWriter indexWriter = new IndexWriter(_directory, _analyzer); float topscore = scores[0]; for (int i = 0; i < numHits; i++) { if (scores[i] == topscore) { if (mustContainExact) { // checks word order basically? if (!results[i].ToLower().Contains(searchQueryToLower)) { writeToLog("Cannot or wont delete " + searchQueryToLower); continue; } } writeToLog("DEBUG9 deleting " + searchQueryToLower); //indexSearcher.GetIndexReader().DeleteDocument(i); //indexReader.DeleteDocuments(new Term( MyLuceneIndexer.DOC_ID_FIELD_NAME, ids[i].ToString () ) ); indexWriter.DeleteDocuments(new Term(MyLuceneIndexer.DOC_ID_FIELD_NAME, ids[i].ToString())); deleted++; } } //indexReader.Commit(); //indexReader.Close(); indexWriter.Commit(); indexWriter.Close(); } } finally { indexSearcher.Close(); } return(deleted); }
/// <summary> /// 构造返回结果 /// </summary> /// <returns></returns> private List <ProductSimpleInfo> ProductBinding(Hits hits, string key, int order, int pageNo, int pageLen, out int recCount) { recCount = hits.Length(); // 合并 int n = 0; ProductSimpleInfo info = null; IList <ProductSimpleInfo> list = new List <ProductSimpleInfo>(); while (n < recCount) { // 去除匹配度太低结果 if (hits.Score(n) < 0.01) { n++; continue; } try { info = new ProductSimpleInfo() { productID = int.Parse(hits.Doc(n).Get("ProductID")), productCode = hits.Doc(n).Get("ProductCode"), chineseName = hits.Doc(n).Get("ChineseName"), cadn = hits.Doc(n).Get("CADN"), longName = hits.Doc(n).Get("LongName"), pinyinName = hits.Doc(n).Get("PinyinName"), marketPrice = decimal.Parse(hits.Doc(n).Get("MarketPrice")), tradePrice = decimal.Parse(hits.Doc(n).Get("TradePrice")), sellCount = int.Parse(hits.Doc(n).Get("SellCount")), favorCount = int.Parse(hits.Doc(n).Get("Favorite")), productType = short.Parse(hits.Doc(n).Get("ProductType")), specifications = hits.Doc(n).Get("Specifications"), images = hits.Doc(n).Get("Images"), actions = short.Parse(hits.Doc(n).Get("Actions")), comments = int.Parse(hits.Doc(n).Get("Comments")), selling = int.Parse(hits.Doc(n).Get("Selling")), manufacturer = hits.Doc(n).Get("Manufacturer"), sellingTime = DateTime.Parse(hits.Doc(n).Get("SellingTime")), recommend = float.Parse(hits.Doc(n).Get("Recommend")), preferential = float.Parse(hits.Doc(n).Get("Preferential")), brandName = hits.Doc(n).Get("BrandName") }; if (order <= 0) { info.level = GetProductLevel(hits.Score(n), 1); info.score = GetProductLevel(info.sellCount, 0) + n; } } catch { } finally { if (info != null) { list.Add(info); } n++; } } recCount = list.Count(); // 返回数据 switch (order) { case 1: // 人气 降序 return(list.OrderByDescending(i => i.favorCount).Skip(pageLen * (pageNo - 1)).Take(pageLen).ToList()); case 2: // 人气 升序 return(list.OrderByDescending(i => i.favorCount).Skip(pageLen * (pageNo - 1)).Take(pageLen).ToList()); case 3: // 新品 降序 return(list.OrderByDescending(i => i.sellingTime).Skip(pageLen * (pageNo - 1)).Take(pageLen).ToList()); case 4: // 新品 升序 return(list.OrderBy(i => i.sellingTime).Skip(pageLen * (pageNo - 1)).Take(pageLen).ToList()); case 5: // 销量 降序 return(list.OrderByDescending(i => i.sellCount).Skip(pageLen * (pageNo - 1)).Take(pageLen).ToList()); case 6: // 销量 升序 return(list.OrderBy(i => i.sellCount).Skip(pageLen * (pageNo - 1)).Take(pageLen).ToList()); case 7: // 价格 降序 return(list.OrderByDescending(i => i.tradePrice).Skip(pageLen * (pageNo - 1)).Take(pageLen).ToList()); case 8: // 价格 升序 return(list.OrderBy(i => i.tradePrice).Skip(pageLen * (pageNo - 1)).Take(pageLen).ToList()); case 9: // 综合 升序 return(list.OrderBy(i => i.level).ThenBy(i => i.score).Skip(pageLen * (pageNo - 1)).Take(pageLen).ToList()); default: return(list.OrderByDescending(i => i.level).ThenBy(i => i.score).Skip(pageLen * (pageNo - 1)).Take(pageLen).ToList()); } }
/// <summary> /// 商品结果参数 品牌/分类 /// </summary> /// <param name="hits"></param> private IList <JXSearchEntity> ProductParaList(Hits hits) { int id = 0; IList <JXSearchEntity> list = new List <JXSearchEntity>(); try { int recCount = hits.Length(); // 总记录数 for (int j = 0; j < recCount; j++) { if (hits.Score(j) < 0.01) { continue; } #region 分类1 if (hits.Doc(j).Get("CFID1").Length > 0) { id = int.Parse(hits.Doc(j).Get("CFID1")); if (list.Where(g => g.id == id && g.typeID == 2).Count() <= 0) { list.Add(new JXSearchEntity() { id = id, chineseName = hits.Doc(j).Get("CFName1"), typeID = 2, parentID = int.Parse(hits.Doc(j).Get("ParentID1")) }); } } #endregion #region 分类2 if (hits.Doc(j).Get("CFID2").Length > 0) { id = int.Parse(hits.Doc(j).Get("CFID2")); if (list.Where(g => g.id == id && g.typeID == 2).Count() <= 0) { list.Add(new JXSearchEntity() { id = id, chineseName = hits.Doc(j).Get("CFName2"), typeID = 2, parentID = int.Parse(hits.Doc(j).Get("ParentID2")) }); } } #endregion #region 分类3 if (hits.Doc(j).Get("CFID3").Length > 0) { id = int.Parse(hits.Doc(j).Get("CFID3")); if (list.Where(g => g.id == id && g.typeID == 2).Count() <= 0) { list.Add(new JXSearchEntity() { id = id, chineseName = hits.Doc(j).Get("CFName3"), typeID = 2, parentID = int.Parse(hits.Doc(j).Get("ParentID3")) }); } } #endregion #region 品牌 if (hits.Doc(j).Get("BrandID").Length > 0) { id = int.Parse(hits.Doc(j).Get("BrandID")); if (list.Where(g => g.brandID == id && g.typeID == 5).Count() <= 0) { list.Add(new JXSearchEntity() { brandID = id, brandName = hits.Doc(j).Get("BrandName"), letter = hits.Doc(j).Get("BrandLetter"), typeID = 5 }); } } #endregion } } catch { } return(list); }
} // constructor /// <summary> /// Searches the keyword index using the keywordQuery. /// /// See http://www.dotlucene.net/documentation/QuerySyntax.html for the format of the keywordQuery. /// /// This function will return a fully-filled array of IndexableFileInfo objects. /// </summary> /// <param name="keywordQuery"></param> /// <param name="queryForHighlighter"></param> /// <returns></returns> public IndexableFileInfo[] doSearch(string keywordQuery, string queryForHighlighter) { IndexSearcher searcher; IndexReader indexReader; try { FSDirectory indexDir = FSDirectory.GetDirectory(this.luceneIndexDir, false); indexReader = IndexReader.Open(indexDir); searcher = new IndexSearcher(indexReader); } catch { // if the luceneIndexDir does not contain index files (yet), IndexSearcher // throws a nice Exception. return(new IndexableFileInfo[0]); } List <IndexableFileInfo> arrayList = new List <IndexableFileInfo>(); try { string Query = keywordQuery; if (Query == String.Empty) { return(new IndexableFileInfo[0]); } string HighlighterQuery = queryForHighlighter; // -- weirdly enough, when the query is empty, an exception is thrown during the QueryParser.Parse // this hack gets around that. if (HighlighterQuery == String.Empty) { HighlighterQuery = Guid.NewGuid().ToString(); } // parse the query, "text" is the default field to search // note: use the StandardAnalyzer! (the SimpleAnalyzer doesn't work correctly when searching by fields that are integers!) // MultiFieldQueryParser queryParser = new MultiFieldQueryParser(new string[] { "title", "contents" }, new hatWebPortalAnalyzer()); MultiFieldQueryParser queryParser = new MultiFieldQueryParser(new string[] { "title", "contents" }, new SimpleAnalyzer()); queryParser.SetDefaultOperator(QueryParser.AND_OPERATOR); Query query = queryParser.Parse(Query); QueryParser highlightQueryParser = new QueryParser("contents", new hatWebPortalAnalyzer()); Query highlighterQuery = highlightQueryParser.Parse(HighlighterQuery); query = searcher.Rewrite(query); // is this needed?? " Expert: called to re-write queries into primitive queries." // search Hits hits = searcher.Search(query, Sort.RELEVANCE); // create highlighter Highlighter highlighter = new Highlighter(new SimpleHTMLFormatter("<strong>", "</strong>"), new QueryScorer(highlighterQuery)); // -- go through hits and return results for (int i = 0; i < hits.Length(); i++) { Document d = hits.Doc(i); string filename = d.Get("filename"); string plainText = d.Get("contents"); string title = d.Get("title"); string sectionName = d.Get("SectionName"); string filenameParams = d.Get("filenameParams"); bool contentIsPageSummary = Convert.ToBoolean(d.Get("contentIsPageSummary")); double score = Convert.ToDouble(hits.Score(i)); DateTime lastModified = DateTools.StringToDate(d.Get("LastModified")); TokenStream tokenStream = new hatWebPortalAnalyzer().TokenStream("contents", new StringReader(plainText)); string fragment = plainText; if (!contentIsPageSummary) { fragment = highlighter.GetBestFragments(tokenStream, plainText, 2, "..."); } IndexableFileInfo newHit = new IndexableFileInfo(filename, filenameParams, title, fragment, sectionName, lastModified, contentIsPageSummary, score); arrayList.Add(newHit); } // for } finally { searcher.Close(); indexReader.Close(); } return(arrayList.ToArray()); } // SearchActiveDocument
/// <summary> /// search support multiple modules /// </summary> /// <param name="siteId"></param> /// <param name="isAdmin"></param> /// <param name="userRoles"></param> /// <param name="queryText"></param> /// <param name="highlightResults"></param> /// <param name="highlightedFragmentSize"></param> /// <param name="pageNumber"></param> /// <param name="pageSize"></param> /// <param name="totalHits"></param> /// <param name="invalidQuery"></param> /// <param name="moduleIDs"></param> /// <returns></returns> public static IndexItemCollection Search( int siteId, bool isAdmin, List <string> userRoles, string queryText, bool highlightResults, int highlightedFragmentSize, int pageNumber, int pageSize, out int totalHits, out bool invalidQuery, params Guid[] moduleIDs ) { invalidQuery = false; totalHits = 0; string indexPath = GetIndexPath(siteId); IndexItemCollection results = new IndexItemCollection(); if (string.IsNullOrEmpty(queryText)) { return(results); } bool useBackwardCompatibilityMode = true; if ( (ConfigurationManager.AppSettings["SearchUseBackwardCompatibilityMode"] != null) && (ConfigurationManager.AppSettings["SearchUseBackwardCompatibilityMode"] == "false") ) { useBackwardCompatibilityMode = false; } bool IncludeModuleRoleFilters = false; if ( (ConfigurationManager.AppSettings["SearchIncludeModuleRoleFilters"] != null) && (ConfigurationManager.AppSettings["SearchIncludeModuleRoleFilters"] == "true") ) { IncludeModuleRoleFilters = true; } if (IndexReader.IndexExists(indexPath)) { if (log.IsDebugEnabled) { log.Debug("Entered Search, indexPath = " + indexPath); } long startTicks = DateTime.Now.Ticks; try { BooleanQuery mainQuery = new BooleanQuery(); if ((!isAdmin) && (!useBackwardCompatibilityMode)) { AddRoleQueries(userRoles, mainQuery); } if ((!isAdmin) && (IncludeModuleRoleFilters)) { AddModuleRoleQueries(userRoles, mainQuery); } Query multiQuery = MultiFieldQueryParser.Parse( new string[] { queryText, queryText, queryText, queryText, queryText, queryText.Replace("*", string.Empty) }, new string[] { "Title", "ModuleTitle", "contents", "PageName", "PageMetaDesc", "Keyword" }, new StandardAnalyzer()); mainQuery.Add(multiQuery, BooleanClause.Occur.MUST); if (!useBackwardCompatibilityMode) { Term beginDateStart = new Term("PublishBeginDate", DateTime.MinValue.ToString("s")); Term beginDateEnd = new Term("PublishBeginDate", DateTime.UtcNow.ToString("s")); RangeQuery beginDateQuery = new RangeQuery(beginDateStart, beginDateEnd, true); mainQuery.Add(beginDateQuery, BooleanClause.Occur.MUST); Term endDateStart = new Term("PublishEndDate", DateTime.UtcNow.ToString("s")); Term endDateEnd = new Term("PublishEndDate", DateTime.MaxValue.ToString("s")); RangeQuery endDateQuery = new RangeQuery(endDateStart, endDateEnd, true); mainQuery.Add(endDateQuery, BooleanClause.Occur.MUST); } if (moduleIDs != null && moduleIDs.Length > 0) { BooleanQuery featureFilter = new BooleanQuery(); moduleIDs.ToList().ForEach(x => { if (x != Guid.Empty) { featureFilter.Add(new TermQuery(new Term("FeatureId", x.ToString())), BooleanClause.Occur.SHOULD); } }); if (featureFilter.Clauses().Count > 0) { mainQuery.Add(featureFilter, BooleanClause.Occur.MUST); } } IndexSearcher searcher = new IndexSearcher(indexPath); // a 0 based colection Hits hits = searcher.Search(mainQuery); int startHit = 0; if (pageNumber > 1) { startHit = ((pageNumber - 1) * pageSize); } totalHits = hits.Length(); int end = startHit + pageSize; if (totalHits <= end) { end = totalHits; } int itemsAdded = 0; int itemsToAdd = end; // in backward compatibility mode if multiple pages of results are found we amy not be showing every user the correct // number of hits they can see as we only filter out the current page //we may decrement total hits if filtering results so keep the original count int actualHits = totalHits; if (!useBackwardCompatibilityMode) { // this new way is much cleaner //all filtering is done by query so the hitcount is true //whereas with the old way it could be wrong since there // were possibly results filtered out after the query returned. QueryScorer scorer = new QueryScorer(multiQuery); Formatter formatter = new SimpleHTMLFormatter("<span class='searchterm'>", "</span>"); Highlighter highlighter = new Highlighter(formatter, scorer); highlighter.SetTextFragmenter(new SimpleFragmenter(highlightedFragmentSize)); for (int i = startHit; i < itemsToAdd; i++) { IndexItem indexItem = new IndexItem(hits.Doc(i), hits.Score(i)); if (highlightResults) { try { TokenStream stream = new StandardAnalyzer().TokenStream("contents", new StringReader(hits.Doc(i).Get("contents"))); string highlightedResult = highlighter.GetBestFragment(stream, hits.Doc(i).Get("contents")); if (highlightedResult != null) { indexItem.Intro = highlightedResult; } } catch (NullReferenceException) { } } results.Add(indexItem); itemsAdded += 1; } } else { //backward compatible with old indexes int filteredItems = 0; for (int i = startHit; i < itemsToAdd; i++) { bool needToDecrementTotalHits = false; if ( (isAdmin) || (WebUser.IsContentAdmin) || (WebUser.IsInRoles(hits.Doc(i).Get("ViewRoles"))) ) { IndexItem indexItem = new IndexItem(hits.Doc(i), hits.Score(i)); if ( (DateTime.UtcNow > indexItem.PublishBeginDate) && (DateTime.UtcNow < indexItem.PublishEndDate) ) { results.Add(indexItem); } else { needToDecrementTotalHits = true; } } else { needToDecrementTotalHits = true; } //filtered out a result so need to decrement if (needToDecrementTotalHits) { filteredItems += 1; totalHits -= 1; //we also are not getting as many results as the page size so if there are more items //we should increment itemsToAdd if ((itemsAdded + filteredItems) < actualHits) { itemsToAdd += 1; } } } } searcher.Close(); results.ItemCount = itemsAdded; results.PageIndex = pageNumber; results.ExecutionTime = DateTime.Now.Ticks - startTicks; } catch (ParseException ex) { invalidQuery = true; log.Error("handled error for search terms " + queryText, ex); // these parser exceptions are generally caused by // spambots posting too much junk into the search form // heres an option to automatically ban the ip address HandleSpam(queryText, ex); return(results); } catch (BooleanQuery.TooManyClauses ex) { invalidQuery = true; log.Error("handled error for search terms " + queryText, ex); return(results); } } return(results); }
static void Main(string[] args) { String indexPath = @"C:\Users\Brandon\Desktop\Multimedia Retrieval\W3 Files\Index"; //Analyzers build token streams which analyze text Analyzer analyzer = new StandardAnalyzer(); IndexWriter writer = new IndexWriter(indexPath, analyzer, true); //Set the seedUrl and initialize the crawler String seedUrl = "http://sydney.edu.au/engineering/it/"; WebCrawler crawler = new WebCrawler(); Queue <String> linkQueue = new Queue <String>(); linkQueue.Enqueue(seedUrl); HashSet <String> linkSet = new HashSet <String>(); Console.Write("Sites Explored: 0"); //Iteratively extract links from the first URL in the frontier //and adds its content to index while (linkQueue.Count != 0 && linkSet.Count < 50) { String currentLink = linkQueue.Dequeue(); try { if (linkSet.Contains(currentLink)) { continue; } String content = crawler.getUrlContent(currentLink); crawler.getLinks(linkQueue, content, currentLink); linkSet.Add(currentLink); Document doc = new Document(); doc.Add(new Field("link", currentLink, Field.Store.YES, Field.Index.NOT_ANALYZED)); doc.Add(new Field("content", content, Field.Store.YES, Field.Index.ANALYZED)); writer.AddDocument(doc); Console.Write("\rSites Explored: {0}", linkSet.Count); } catch (Exception) { continue; } } writer.Optimize(); writer.Close(); Console.WriteLine(); //Execute the search String search = "suits"; QueryParser parser = new QueryParser("content", analyzer); Query query = parser.Parse(search); var searcher = new IndexSearcher(indexPath); Hits hits = searcher.Search(query); int results = hits.Length(); Console.WriteLine("Found {0} results for \"{1}\"", results, search); for (int i = 0; i < results; i++) { Document doc = hits.Doc(i); float score = hits.Score(i); Console.WriteLine("Result num {0}, score {1}", i + 1, score); Console.WriteLine("URL: {0}", doc.Get("link")); } }
public override void Search(string SearchString) { base.Search(SearchString); SearchString = SearchString.ToLower(); var dir = FSDirectory.GetDirectory(IndexPath, false); var searcher = new IndexSearcher(dir); var parser = new QueryParser(ContentField, new StandardAnalyzer()); foreach (var s in SearchString.Split(new [] { ' ' })) { var query = parser.GetFuzzyQuery(ContentField, s, MinSimilarity); Hits hits = searcher.Search(query); for (int i = 0; i < hits.Length(); i++) { Document doc = hits.Doc(i); var result = new SearchResult { Score = hits.Score(i), Plugin = doc.Get(PluginField) }; //Text des aktuellen Dokuments auslesen string text = doc.Get(ContentField); //Alle indizierten Wörter dieses Dokumentes auslesen var tpv = (TermPositionVector)IndexReader.Open(dir).GetTermFreqVector(hits.Id(i), ContentField); String[] DocTerms = tpv.GetTerms(); //Die Anzahl der Erscheinungen aller Wörter auslesen int[] freq = tpv.GetTermFrequencies(); var words = new List <string>(DocTerms); //Hier wollen wir nun die Positionen der Erscheinungen des Suchwortes herausfinden for (int t = 0; t < freq.Length; t++) { //Falls das Suchwort mit dem aktuellen Wort übereinstimmt... if (ContainsSearchString(SearchString, DocTerms[t], words)) { //...können wir die Positionen auslesen TermVectorOffsetInfo[] offsets = tpv.GetOffsets(t); //Das Array beinhaltet nun für das Suchwort alle Auftreten mit jeweils Anfang und Ende for (int j = 0; j < offsets.Length; j++) { //Jetz muss nur noch ein kleiner Kontextausschnitt ausgelesen werden, damit der User etwas damit anfangen kann int start = offsets[j].GetStartOffset(); int end = offsets[j].GetEndOffset(); int contextStart = start - ContextLeftOffset; contextStart = contextStart < 0 ? 0 : contextStart; int contextEnd = end + ContextRightOffset; contextEnd = contextEnd > text.Length ? text.Length : contextEnd; //Nun wollen wir noch bis zum Ende des nächsten Wortes lesen, um das Ergebnis besser lesbar zu machen int nextEndSpace = text.IndexOf(" ", contextEnd); contextEnd = nextEndSpace > 0 ? nextEndSpace : contextEnd; //Maximal so viele Zeichen darf der Text nach einem Leerzeichen links von dem Suchergebnis durchsucht werden int leftSpaceOffset = contextStart; //Finden des nächstenLeerzeichens links vom Suchergebnis int nextStartSpace = text.LastIndexOf(" ", contextStart, leftSpaceOffset); //Falls es kein Space in der Nöhe gibt brauchen wir natürlich auch nichts verändern contextStart = nextStartSpace > 0 ? nextStartSpace : contextStart; int contextLength = contextEnd - contextStart; contextLength = contextLength > text.Length ? text.Length : contextLength; //Kontext auslesen string context = text.Substring(contextStart, contextLength); //und den Searchresults zusammen mit dem zugehörigen PlugInNamen und dem HitScore hinzufügen result.Contexts.Add(context); } } } SearchResults.Add(result); } } }
/// <summary> /// 查询索引 /// </summary> /// <param name="fieldName">FieldName</param> /// <param name="keywords">关键字</param> /// <param name="pageIndex">当前页</param> /// <param name="pageSize">分页大小</param> /// <param name="totalRecord">总的记录</param> /// <returns>索引列表</returns> /// <remarks>2013-08-15 朱成果 创建</remarks> public List <CBPdProductIndex> QueryDoc(string fieldName, string keywords, int pageIndex, int pageSize, out int totalRecord) { var search = new IndexSearcher(IndexStorePath); Query searchQuery; if (!string.IsNullOrEmpty(fieldName) && !string.IsNullOrEmpty(keywords)) { #region [关键字查询] var query = new BooleanQuery(); BooleanQuery childQuery; BooleanQuery esenQuery; if (fieldName == "ProductName") { #region 2016-4-6 杨浩 新增模糊搜索 childQuery = new BooleanQuery(); esenQuery = new BooleanQuery(); //模糊搜索 //esenQuery.Add(new FuzzyQuery(new Term("ProductName", Regex.Replace(keywords, @"\s", ""))), BooleanClause.Occur.SHOULD); //esenQuery.SetBoost(4.0F); //分词 盘古分词 var keyWordsSplitBySpace = GetKeyWordsSplitBySpace(keywords); //string keyWordsSplitBySpace = string.Format("{0}^{1}.0", keywords, (int)Math.Pow(3, 5)); //不启用分词,直接用模糊搜索 QueryParser productNameQueryParser = new QueryParser(global::Lucene.Net.Util.Version.LUCENE_29, "ProductName", new PanGuAnalyzer(true)); Query productNameQuery = productNameQueryParser.Parse(keyWordsSplitBySpace); childQuery.Add(productNameQuery, BooleanClause.Occur.SHOULD); //以什么开头,输入“ja”就可以搜到包含java和javascript两项结果了 Query prefixQuery_productName = new PrefixQuery(new Term("ProductName", keywords.Trim())); //直接模糊匹配,假设你想搜索跟‘wuzza’相似的词语,你可能得到‘fuzzy’和‘wuzzy’。 Query fuzzyQuery_productName = new FuzzyQuery(new Term("ProductName", keywords.Trim())); //通配符搜索 Query wildcardQuery_productName = new WildcardQuery(new Term("ProductName", string.Format("{0}", keywords.Trim()))); childQuery.Add(prefixQuery_productName, BooleanClause.Occur.SHOULD); childQuery.Add(fuzzyQuery_productName, BooleanClause.Occur.SHOULD); childQuery.Add(wildcardQuery_productName, BooleanClause.Occur.SHOULD); childQuery.SetBoost(4.0F); //childQuery.Add(esenQuery, BooleanClause.Occur.SHOULD); query.Add(childQuery, BooleanClause.Occur.MUST); #endregion //childQuery = new BooleanQuery(); //esenQuery = new BooleanQuery(); ////全词去空格 //esenQuery.Add(new TermQuery(new Term("ProductName", Regex.Replace(keywords, @"\s", ""))), // BooleanClause.Occur.SHOULD); //esenQuery.SetBoost(3.0F); //childQuery.Add(esenQuery, BooleanClause.Occur.SHOULD); //esenQuery = new BooleanQuery(); ////分词 盘古分词 //esenQuery.Add(new QueryParser("ProductName", new PanGuAnalyzer(true)).Parse(keywords), // BooleanClause.Occur.SHOULD); ////分词 按空格 //var keyColl = keywords.Split(new char[] { ' ' }, StringSplitOptions.RemoveEmptyEntries); //foreach (var item in keyColl) //{ // esenQuery.Add(new TermQuery(new Term("ProductName", item)), // BooleanClause.Occur.SHOULD); //} //esenQuery.SetBoost(2.9F); //childQuery.Add(esenQuery, BooleanClause.Occur.SHOULD); //query.Add(childQuery, BooleanClause.Occur.MUST); } else if (fieldName == "Category") { childQuery = new BooleanQuery(); esenQuery = new BooleanQuery(); esenQuery.Add(new TermQuery(new Term("Category", keywords)), BooleanClause.Occur.SHOULD); esenQuery.SetBoost(3.0F); childQuery.Add(esenQuery, BooleanClause.Occur.SHOULD); esenQuery = new BooleanQuery(); esenQuery.Add(new WildcardQuery(new Term("AssociationCategory", string.Format("*,{0},*", keywords))), BooleanClause.Occur.SHOULD); esenQuery.SetBoost(2.8F); childQuery.Add(esenQuery, BooleanClause.Occur.SHOULD); query.Add(childQuery, BooleanClause.Occur.MUST); } else if (fieldName == "BrandSysNo") { childQuery = new BooleanQuery(); childQuery.Add(new TermQuery(new Term("BrandSysNo", keywords)), BooleanClause.Occur.SHOULD); childQuery.SetBoost(3.0F); query.Add(childQuery, BooleanClause.Occur.MUST); } else if (fieldName == "DealerSysNos") { childQuery = new BooleanQuery(); childQuery.Add(new WildcardQuery(new Term("DealerSysNos", string.Format("*,{0},*", keywords))), BooleanClause.Occur.SHOULD); childQuery.SetBoost(2.8F); query.Add(childQuery, BooleanClause.Occur.MUST); } else if (fieldName == "ProductGroupCode") { childQuery = new BooleanQuery(); childQuery.Add(new WildcardQuery(new Term("ProductGroupCode", string.Format("*,{0},*", keywords))), BooleanClause.Occur.SHOULD); childQuery.SetBoost(2.8F); query.Add(childQuery, BooleanClause.Occur.MUST); } else { query.Add(new TermQuery(new Term(fieldName, keywords)), BooleanClause.Occur.SHOULD); } #endregion searchQuery = query; } else { searchQuery = new WildcardQuery(new Term("ProductName", "*雪花秀*")); } //排序方式 var sort = new Sort(); //搜索 Hits hits = search.Search(searchQuery, sort); totalRecord = hits.Length();//总的记录 int startIndex = (pageIndex - 1) * pageSize; if (startIndex < 0) { startIndex = 0; } int endIndex = startIndex + pageSize; if (endIndex > totalRecord - 1) { endIndex = totalRecord - 1; } List <CBPdProductIndex> lst = new List <CBPdProductIndex>(); for (int i = startIndex; i <= endIndex; i++) { var doc = hits.Doc(i); lst.Add( new CBPdProductIndex { DocID = hits.Id(i), Score = hits.Score(i), AssociationCategory = doc.Get("AssociationCategory"), Attributes = doc.Get("Attributes"), Barcode = doc.Get("Barcode"), BrandSysNo = Convert.ToInt32(doc.Get("BrandSysNo")), Category = Convert.ToInt32(doc.Get("Category")), DisplayOrder = Convert.ToInt32(doc.Get("DisplayOrder")), NameAcronymy = doc.Get("NameAcronymy"), Prices = doc.Get("Prices"), ProductImage = doc.Get("ProductImage"), ProductName = doc.Get("ProductName"), QRCode = doc.Get("QRCode"), Status = Convert.ToInt32(doc.Get("Status")), SysNo = Convert.ToInt32(doc.Get("SysNo")), BasicPrice = Convert.ToDecimal(doc.Get("BasicPrice")), Price = Convert.ToDecimal(doc.Get("Price")), DispalySymbol = 0, RankPrice = 0.00M, ProductGroupCode = Convert.ToString(doc.Get("ProductGroupCode")), DealerSysNos = doc.Get("DealerSysNos"), WarehouseSysNos = doc.Get("WarehouseSysNos") }); } search.Close(); return(lst); }
public float Score(int i) { return(hits.Score(i)); }
public static Hashtable Search(bool api) { BillType type; int number; int session = -1; if (HttpContext.Current.Request["session"] != null && HttpContext.Current.Request["session"] != "") { session = int.Parse(HttpContext.Current.Request["session"]); } string q = HttpContext.Current.Request["q"]; int start = 0, count = (!api ? 30 : 1000); if (HttpContext.Current.Request["start"] != null) { start = int.Parse(HttpContext.Current.Request["start"]); } if (HttpContext.Current.Request["count"] != null) { count = int.Parse(HttpContext.Current.Request["count"]); } BooleanQuery query = new BooleanQuery(); Hashtable no_results = new Hashtable(); no_results["count"] = 0; if (q != null && q.IndexOf("*") > -1) { return(no_results); } if (!api && session == -1 && q != null) { int slash = q.IndexOf('/'); if (slash >= q.Length - 4 && slash > 2) { try { session = int.Parse(q.Substring(slash + 1)); // and if that worked... q = q.Substring(0, slash); HttpContext.Current.Response.Redirect("billsearch.xpd?session=" + session + "&q=" + HttpUtility.UrlEncode(q)); return(null); } catch { } } } if (session == -1) { session = Util.CurrentSession; } //Console.Error.WriteLine("Find: " + q); string search_method = "search"; ArrayList specs = new ArrayList(); Hashtable scores = new Hashtable(); // Match a bill number exactly if (q != null && Bills.ParseID(q, out type, out number)) { if (!api) { // Redirect the user right to the bill page. // Don't even check if bill exists. HttpContext.Current.Response.Redirect( Bills.BillLink2(session, type, number)); return(null); } else { search_method = "search by bill number"; scores[session + EnumsConv.BillTypeToString(type) + number] = 1.0F; specs.Add(new Database.AndSpec( new Database.SpecEQ("session", session), new Database.SpecEQ("type", EnumsConv.BillTypeToString(type)), new Database.SpecEQ("number", number))); } } // Match public law number exactly if (!api && q != null && (q.StartsWith("P.L.") || q.StartsWith("PL"))) { try { string num = null; if (q.StartsWith("P.L.")) { num = q.Substring(4); } if (q.StartsWith("PL")) { num = q.Substring(2); } num = num.Replace(" ", ""); int dash = num.IndexOf('-'); int s = int.Parse(num.Substring(0, dash)); TableRow bill = Util.Database.DBSelectFirst("billindex", "session, type, number", new Database.SpecEQ("idx", "publiclawnumber"), new Database.SpecEQ("session", s), new Database.SpecEQ("value", num)); if (bill != null) { if (!api) { HttpContext.Current.Response.Redirect(Bills.BillLink3((int)bill["session"], (string)bill["type"], (int)bill["number"])); return(null); } else { search_method = "search by public law number"; scores[(int)bill["session"] + (string)bill["type"] + (int)bill["number"]] = 1.0F; specs.Add(new Database.AndSpec( new Database.SpecEQ("session", (int)bill["session"]), new Database.SpecEQ("type", (string)bill["type"]), new Database.SpecEQ("number", (int)bill["number"]))); } } } catch { } } if (session == -1) { session = Util.CurrentSession; } // Match USC reference Regex uscexp = new Regex(@"(\d[0-9A-Za-z\-]*)\s+U\.?S\.?C\.?\s+(\d[0-9A-Za-z\-]*)((\s*\([^\) ]+\))*)", RegexOptions.IgnoreCase); Match uscmc = (q == null ? null : uscexp.Match(q)); if (uscmc != null && uscmc.Success) { string title = uscmc.Groups[1].Value; string section = uscmc.Groups[2].Value; string paragraph = uscmc.Groups[3].Value; string[] ps = paragraph.Split('[', '(', ')', ' '); int psi = 0; while (psi < ps.Length - 1 && ps[psi] == "") { psi++; } int pse = ps.Length - 1; while (pse > 0 && ps[pse] == "") { pse--; } if (ps.Length != 0) { paragraph = "_" + String.Join("_", ps, psi, pse - psi + 1); } Table table = Util.Database.DBSelect("billusc", "session, type, number", new Database.SpecEQ("session", session), new Database.OrSpec( new Database.SpecEQ("ref", title + "_" + section + paragraph), new Database.SpecStartsWith("ref", title + "_" + section + paragraph + "_"))); foreach (TableRow bill in table) { search_method = "search by U.S.C. section"; scores[(int)bill["session"] + (string)bill["type"] + (int)bill["number"]] = 1.0F; specs.Add(new Database.AndSpec( new Database.SpecEQ("session", (int)bill["session"]), new Database.SpecEQ("type", (string)bill["type"]), new Database.SpecEQ("number", (int)bill["number"]))); } } int total_count = -1; if (specs.Count == 0) { if (q != null && q.Trim() != "") { BooleanQuery query1 = new BooleanQuery(); query.Add(query1, BooleanClause.Occur.MUST); try { /*if (!q.StartsWith("-")) { * PhraseQuery pq = new PhraseQuery(); * pq.Add( new Term("shorttitles", q) ); * pq.SetBoost((float)4); * query1.Add(pq, false, false); * }*/ Query query_titles2 = new QueryParser("shorttitles", new StandardAnalyzer()).Parse(q); query_titles2.SetBoost((float)3); query1.Add(query_titles2, BooleanClause.Occur.SHOULD); Query query_titles1 = new QueryParser("officialtitles", new StandardAnalyzer()).Parse(q); query_titles1.SetBoost((float)2); query1.Add(query_titles1, BooleanClause.Occur.SHOULD); Query query_summary = new QueryParser("summary", new StandardAnalyzer()).Parse(q); query1.Add(query_summary, BooleanClause.Occur.SHOULD); Query query_text = new QueryParser("fulltext", new StandardAnalyzer()).Parse(q); query1.Add(query_text, BooleanClause.Occur.SHOULD); } catch (Exception e) { return(no_results); } } string chamber = HttpContext.Current.Request["chamber"]; string[] status = HttpContext.Current.Request["status"] == null ? null : HttpContext.Current.Request["status"].Split(','); string sponsor = HttpContext.Current.Request["sponsor"]; string cosponsor = HttpContext.Current.Request["cosponsor"]; if (chamber != null && (chamber == "s" || chamber == "h")) { query.Add(new WildcardQuery(new Term("type", chamber + "*")), BooleanClause.Occur.MUST); } if (status != null && status[0] != "") { List <Term> terms = new List <Term>(); foreach (string s in status) { terms.Add(new Term("state", s)); } MultiPhraseQuery mpq = new MultiPhraseQuery(); mpq.Add(terms.ToArray()); query.Add(mpq, BooleanClause.Occur.MUST); } if (sponsor != null && sponsor != "") { query.Add(new TermQuery(new Term("sponsor", sponsor)), BooleanClause.Occur.MUST); } if (cosponsor != null && cosponsor != "") { query.Add(new TermQuery(new Term("cosponsor", cosponsor)), BooleanClause.Occur.MUST); } IndexSearcher searcher = new IndexSearcher(Util.DataPath + Path.DirectorySeparatorChar + session + Path.DirectorySeparatorChar + "index.bills.lucene"); Sort sort = null; if (HttpContext.Current.Request["sort"] != null && HttpContext.Current.Request["sort"] == "introduced") { sort = new Sort(new SortField("introduced", SortField.STRING, true)); } if (HttpContext.Current.Request["sort"] != null && HttpContext.Current.Request["sort"] == "lastaction") { sort = new Sort(new SortField("lastaction", SortField.STRING, true)); } Hits hits = searcher.Search(query, sort == null ? new Sort() : sort); int end = hits.Length(); if (start + count < end) { end = start + count; } total_count = hits.Length(); for (int i = start; i < end; i++) { Document doc = hits.Doc(i); string billsession = doc.Get("session"); string billtype = doc.Get("type"); string billnumber = doc.Get("number"); int istatus = (int)EnumsConv.BillStatusFromString(doc.Get("status")); float score; if (sort == null) // readjust the score based on status { score = hits.Score(i) + istatus / (float)8 * (float).2; } else // keep order from Lucene { score = -i; } scores[billsession + billtype + billnumber] = score; specs.Add(new Database.AndSpec( new Database.SpecEQ("session", billsession), new Database.SpecEQ("type", billtype), new Database.SpecEQ("number", billnumber))); } if (HttpContext.Current.Request["sort"] != null && HttpContext.Current.Request["sort"] == "hits" && specs.Count > 0) { Table hitsinfo = Util.Database.DBSelect("billhits", "*", Database.OrSpec.New((Database.Spec[])specs.ToArray(typeof(Database.Spec)))); foreach (TableRow billhits in hitsinfo) { scores["" + billhits["session"] + billhits["type"] + billhits["number"]] = (float)(int)billhits["hits1"]; } } } if (specs.Count == 0) { return(no_results); } Table billinfo = Util.Database.DBSelect("billstatus", "*", Database.OrSpec.New((Database.Spec[])specs.ToArray(typeof(Database.Spec)))); if (total_count == -1) { total_count = billinfo.Rows; } ArrayList ret = new ArrayList(); foreach (TableRow r in billinfo) { ret.Add(r); } BillHitComparer bhc = new BillHitComparer(); bhc.scores = scores; ret.Sort(bhc); Hashtable ret2 = new Hashtable(); ret2["count"] = total_count; ret2["method"] = search_method; ret2["results"] = ret; return(ret2); }
//TODO: The last param, Cache isn't used. Remove it and update dependant projects. (Version 1.3) public static SearchResults <Document> GetDocuments(QueryParser defaultQueryParser, QueryParser customQueryParser, IndexSearcher indexSearcher, string query, int pageNumber, int pageSize, bool shouldDocumentsBeClustered, string sort, int maximumNumberOfDocumentsToScore, Cache cache) { Query query2 = customQueryParser.Parse(query); Hits hits = null; if (!string.IsNullOrEmpty(sort)) { string[] sorts = sort.ToLower().Split(",".ToCharArray(), StringSplitOptions.RemoveEmptyEntries); List <SortField> sortFields = new List <SortField>(sorts.Length / 2); for (int i = 0; i < sorts.Length; i++) { if (sorts[i].Split(' ')[1] == "asc") { sortFields.Add(new SortField(sorts[i].Split(' ')[0], false)); } else { sortFields.Add(new SortField(sorts[i].Split(' ')[0], true)); } } hits = indexSearcher.Search(query2, new Sort(sortFields.ToArray())); } else { hits = indexSearcher.Search(query2); } SearchResults <Document> searchResults = new SearchResults <Document>(); searchResults.Documents = new List <Document>(); searchResults.Query = query2; if (hits.Length() != 0) { Dictionary <string, string> domains = new Dictionary <string, string>(); PriorityQueue <Document> priorityQueue = new PriorityQueue <Document>(); //Get the Hits!!! //TODO: Optimize this!!! (Version 1.3) for (int j = 0; j < hits.Length() && searchResults.Documents.Count < maximumNumberOfDocumentsToScore && priorityQueue.Count < maximumNumberOfDocumentsToScore; j++) { Document document = hits.Doc(j); float score = hits.Score(j); document.Add(new Field("documentid", j.ToString(), Field.Store.YES, Field.Index.NO)); document.Add(new Field("relevancyscore", score.ToString(), Field.Store.YES, Field.Index.NO)); if (!string.IsNullOrEmpty(sort)) { if (shouldDocumentsBeClustered) { if (document.GetField("domain") != null) { string domain = document.GetField("domain").StringValue(); if (!domains.ContainsKey(domain)) { domains.Add(domain, null); if (searchResults.Documents.Count < pageSize && j >= (pageNumber * pageSize) - pageSize) { searchResults.Documents.Add(document); } } } } else { if (searchResults.Documents.Count < pageSize && j >= (pageNumber * pageSize) - pageSize) { searchResults.Documents.Add(document); } } } else { priorityQueue.Enqueue(document, score * double.Parse(document.GetField("strength").StringValue())); } } if (string.IsNullOrEmpty(sort)) { for (int i = 0; i < hits.Length() && priorityQueue.Count != 0; i++) { Document document = priorityQueue.Dequeue(); if (shouldDocumentsBeClustered) { if (document.GetField("domain") != null) { string domain = document.GetField("domain").StringValue(); if (!domains.ContainsKey(domain)) { domains.Add(domain, null); if (searchResults.Documents.Count < pageSize && i >= (pageNumber * pageSize) - pageSize) { searchResults.Documents.Add(document); } } else { i--; } } } else { if (searchResults.Documents.Count < pageSize && i >= (pageNumber * pageSize) - pageSize) { searchResults.Documents.Add(document); } } } } if (shouldDocumentsBeClustered) { searchResults.TotalNumberOfHits = domains.Count; } else { searchResults.TotalNumberOfHits = hits.Length(); } } return(searchResults); }
private void DoNormalSearch(int offset, int limit, NamespaceFilter namespaces) { string encsearchterm = String.Format("title:({0})^4 OR ({1})", searchterm, searchterm); DateTime now = DateTime.UtcNow; Query query; /* If we fail to parse the query, it's probably due to illegal * use of metacharacters, so we escape them all and try again. */ try { query = state.Parse(encsearchterm); } catch (Exception e) { string escaped = ""; for (int i = 0; i < searchterm.Length; ++i) { escaped += "\\" + searchterm[i]; } encsearchterm = "title:(" + escaped + ")^4 OR (" + escaped + ")"; try { query = state.Parse(encsearchterm); } catch (Exception e2) { log.Error("Problem parsing search term: " + e2.Message + "\n" + e2.StackTrace); return; } } Hits hits = null; try { hits = state.Searcher.Search(query); } catch (Exception e) { log.Error("Error searching: " + e.Message + "\n" + e.StackTrace); return; } SendHeaders(200, "OK"); int numhits = hits.Length(); LogRequest(searchterm, query, numhits, now); SendOutputLine(numhits.ToString()); if (numhits == 0) { string spelfix = MakeSpelFix(searchterm); SendOutputLine(HttpUtility.UrlEncode(spelfix, Encoding.UTF8)); } else { // Lucene's filters seem to want to run over the entire // document set, which is really slow. We'll do namespace // checks as we go along, and stop once we've seen enough. // // The good side is that we can return the first N documents // pretty quickly. The bad side is that the total hits // number we return is bogus: it's for all namespaces combined. int matches = 0; //string lastMatch = ""; for (int i = 0; i < numhits && i < maxoffset; i++) { Document doc = hits.Doc(i); string pageNamespace = doc.Get("namespace"); if (namespaces.filter(pageNamespace)) { if (matches++ < offset) { continue; } string title = doc.Get("title"); /* * string squish = pageNamespace+":"+title; * if (lastMatch.Equals(squish)) { * // skip duplicate results due to indexing bugs * maxoffset++; * matches--; * continue; * } * lastMatch = squish; */ float score = hits.Score(i); SendResultLine(score, pageNamespace, title); if (matches >= (limit + offset)) { break; } } } } }
public static void Main(String[] a) { String indexName = "localhost_index"; String fn = "c:/Program Files/Apache Group/Apache/htdocs/manual/vhosts/index.html.en"; Uri url = null; for (int i = 0; i < a.Length; i++) { if (a[i].Equals("-i")) { indexName = a[++i]; } else if (a[i].Equals("-f")) { fn = a[++i]; } else if (a[i].Equals("-url")) { url = new Uri(a[++i]); } } StreamWriter temp_writer; temp_writer = new StreamWriter(Console.OpenStandardOutput(), Console.Out.Encoding); temp_writer.AutoFlush = true; StreamWriter o = temp_writer; IndexReader r = IndexReader.Open(indexName); o.WriteLine("Open index " + indexName + " which has " + r.NumDocs() + " docs"); LuceneMoreLikeThis mlt = new LuceneMoreLikeThis(r); o.WriteLine("Query generation parameters:"); o.WriteLine(mlt.DescribeParams()); o.WriteLine(); Query query = null; if (url != null) { o.WriteLine("Parsing URL: " + url); query = mlt.Like(url); } else if (fn != null) { o.WriteLine("Parsing file: " + fn); query = mlt.Like(new FileInfo(fn)); } o.WriteLine("q: " + query); o.WriteLine(); IndexSearcher searcher = new IndexSearcher(indexName); Hits hits = searcher.Search(query); int len = hits.Length(); o.WriteLine("found: " + len + " documents matching"); o.WriteLine(); for (int i = 0; i < Math.Min(25, len); i++) { Document d = hits.Doc(i); String summary = d.Get("summary"); o.WriteLine("score : " + hits.Score(i)); o.WriteLine("url : " + d.Get("url")); o.WriteLine("\ttitle : " + d.Get("title")); if (summary != null) { o.WriteLine("\tsummary: " + d.Get("summary")); } o.WriteLine(); } }