public EntityInfo Extract(Hits hits, int index) { Document doc = hits.Doc(index); //TODO if we are lonly looking for score (unlikely), avoid accessing doc (lazy load) EntityInfo entityInfo = Extract(doc); object[] eip = entityInfo.Projection; if (eip != null && eip.Length > 0) { for (int x = 0; x < projection.Length; x++) { switch (projection[x]) { case ProjectionConstants.SCORE: eip[x] = hits.Score(index); break; case ProjectionConstants.ID: eip[x] = entityInfo.Id; break; case ProjectionConstants.DOCUMENT: eip[x] = doc; break; case ProjectionConstants.DOCUMENT_ID: eip[x] = hits.Id(index); break; case ProjectionConstants.BOOST: eip[x] = doc.GetBoost(); break; case ProjectionConstants.THIS: //THIS could be projected more than once //THIS loading delayed to the Loader phase if (entityInfo.IndexesOfThis == null) { entityInfo.IndexesOfThis = new List <int>(1); } entityInfo.IndexesOfThis.Add(x); break; } } } return(entityInfo); }
/// <summary> /// Converts storyId as used in the database to the docId as used in lucene /// </summary> /// <param name="hostId"></param> /// <param name="storyId"></param> /// <returns>returns the docId for the story from lucene. If the story is not /// found a null value will be returned</returns> protected int?ConvertStoryIdtoDocId(int hostId, int storyId) { QueryParser queryParser = new QueryParser("id", new DnkAnalyzer()); Query q = queryParser.Parse(storyId.ToString()); IndexSearcher searcher = SearchQuery.GetSearcher(hostId); Hits hits = searcher.Search(q); if (hits.Length() > 0) { return(hits.Id(0)); } return(null); }
private int[] GetDocumentIdsForElementId(string elementId) { List <int> result = new List <int>(); IndexSearcher searcher = new IndexSearcher(CurrentDirectory); Hits hits = searcher.Search(new TermQuery(new Term(LuceneNodeIndexer.ELEMENTID_FOR_DELETING_FIELD, elementId))); for (int x = 0; x < hits.Length(); x++) { result.Add(hits.Id(x)); } return(result.ToArray()); }
/** * Lucene doesn't let us do deletes from an IndexWriter, only an IndexReader. * To avoid locking or constant open-close switches, be sure that writing * is only happening on an in-memory writer. * @param article * @throws IOException */ public void DeleteArticle(Article article) { OpenReader(); string key = article.Key; Hits hits = searcher.Search(new TermQuery( new Term("key", key))); if (hits.Length() == 0) { log.Debug("Nothing to delete for " + key); } for (int i = 0; i < hits.Length(); i++) { int id = hits.Id(i); log.Debug("Trying to delete article number " + id + " for " + key); try { reader.Delete(id); } catch (IOException e) { log.Warn("Couldn't delete article number " + id + " for " + key + "... " + e); } } }
public override void Search(string SearchString) { base.Search(SearchString); SearchString = SearchString.ToLower(); var dir = FSDirectory.GetDirectory(IndexPath, false); var searcher = new IndexSearcher(dir); var parser = new QueryParser(ContentField, new StandardAnalyzer()); foreach (var s in SearchString.Split(new [] { ' ' })) { var query = parser.GetFuzzyQuery(ContentField, s, MinSimilarity); Hits hits = searcher.Search(query); for (int i = 0; i < hits.Length(); i++) { Document doc = hits.Doc(i); var result = new SearchResult { Score = hits.Score(i), Plugin = doc.Get(PluginField) }; //Text des aktuellen Dokuments auslesen string text = doc.Get(ContentField); //Alle indizierten Wörter dieses Dokumentes auslesen var tpv = (TermPositionVector)IndexReader.Open(dir).GetTermFreqVector(hits.Id(i), ContentField); String[] DocTerms = tpv.GetTerms(); //Die Anzahl der Erscheinungen aller Wörter auslesen int[] freq = tpv.GetTermFrequencies(); var words = new List <string>(DocTerms); //Hier wollen wir nun die Positionen der Erscheinungen des Suchwortes herausfinden for (int t = 0; t < freq.Length; t++) { //Falls das Suchwort mit dem aktuellen Wort übereinstimmt... if (ContainsSearchString(SearchString, DocTerms[t], words)) { //...können wir die Positionen auslesen TermVectorOffsetInfo[] offsets = tpv.GetOffsets(t); //Das Array beinhaltet nun für das Suchwort alle Auftreten mit jeweils Anfang und Ende for (int j = 0; j < offsets.Length; j++) { //Jetz muss nur noch ein kleiner Kontextausschnitt ausgelesen werden, damit der User etwas damit anfangen kann int start = offsets[j].GetStartOffset(); int end = offsets[j].GetEndOffset(); int contextStart = start - ContextLeftOffset; contextStart = contextStart < 0 ? 0 : contextStart; int contextEnd = end + ContextRightOffset; contextEnd = contextEnd > text.Length ? text.Length : contextEnd; //Nun wollen wir noch bis zum Ende des nächsten Wortes lesen, um das Ergebnis besser lesbar zu machen int nextEndSpace = text.IndexOf(" ", contextEnd); contextEnd = nextEndSpace > 0 ? nextEndSpace : contextEnd; //Maximal so viele Zeichen darf der Text nach einem Leerzeichen links von dem Suchergebnis durchsucht werden int leftSpaceOffset = contextStart; //Finden des nächstenLeerzeichens links vom Suchergebnis int nextStartSpace = text.LastIndexOf(" ", contextStart, leftSpaceOffset); //Falls es kein Space in der Nöhe gibt brauchen wir natürlich auch nichts verändern contextStart = nextStartSpace > 0 ? nextStartSpace : contextStart; int contextLength = contextEnd - contextStart; contextLength = contextLength > text.Length ? text.Length : contextLength; //Kontext auslesen string context = text.Substring(contextStart, contextLength); //und den Searchresults zusammen mit dem zugehörigen PlugInNamen und dem HitScore hinzufügen result.Contexts.Add(context); } } } SearchResults.Add(result); } } }
/// <summary> /// 查询索引 /// </summary> /// <param name="fieldName">FieldName</param> /// <param name="keywords">关键字</param> /// <param name="pageIndex">当前页</param> /// <param name="pageSize">分页大小</param> /// <param name="totalRecord">总的记录</param> /// <returns>索引列表</returns> /// <remarks>2013-08-15 朱成果 创建</remarks> public List <CBPdProductIndex> QueryDoc(string fieldName, string keywords, int pageIndex, int pageSize, out int totalRecord) { var search = new IndexSearcher(IndexStorePath); Query searchQuery; if (!string.IsNullOrEmpty(fieldName) && !string.IsNullOrEmpty(keywords)) { #region [关键字查询] var query = new BooleanQuery(); BooleanQuery childQuery; BooleanQuery esenQuery; if (fieldName == "ProductName") { #region 2016-4-6 杨浩 新增模糊搜索 childQuery = new BooleanQuery(); esenQuery = new BooleanQuery(); //模糊搜索 //esenQuery.Add(new FuzzyQuery(new Term("ProductName", Regex.Replace(keywords, @"\s", ""))), BooleanClause.Occur.SHOULD); //esenQuery.SetBoost(4.0F); //分词 盘古分词 var keyWordsSplitBySpace = GetKeyWordsSplitBySpace(keywords); //string keyWordsSplitBySpace = string.Format("{0}^{1}.0", keywords, (int)Math.Pow(3, 5)); //不启用分词,直接用模糊搜索 QueryParser productNameQueryParser = new QueryParser(global::Lucene.Net.Util.Version.LUCENE_29, "ProductName", new PanGuAnalyzer(true)); Query productNameQuery = productNameQueryParser.Parse(keyWordsSplitBySpace); childQuery.Add(productNameQuery, BooleanClause.Occur.SHOULD); //以什么开头,输入“ja”就可以搜到包含java和javascript两项结果了 Query prefixQuery_productName = new PrefixQuery(new Term("ProductName", keywords.Trim())); //直接模糊匹配,假设你想搜索跟‘wuzza’相似的词语,你可能得到‘fuzzy’和‘wuzzy’。 Query fuzzyQuery_productName = new FuzzyQuery(new Term("ProductName", keywords.Trim())); //通配符搜索 Query wildcardQuery_productName = new WildcardQuery(new Term("ProductName", string.Format("{0}", keywords.Trim()))); childQuery.Add(prefixQuery_productName, BooleanClause.Occur.SHOULD); childQuery.Add(fuzzyQuery_productName, BooleanClause.Occur.SHOULD); childQuery.Add(wildcardQuery_productName, BooleanClause.Occur.SHOULD); childQuery.SetBoost(4.0F); //childQuery.Add(esenQuery, BooleanClause.Occur.SHOULD); query.Add(childQuery, BooleanClause.Occur.MUST); #endregion //childQuery = new BooleanQuery(); //esenQuery = new BooleanQuery(); ////全词去空格 //esenQuery.Add(new TermQuery(new Term("ProductName", Regex.Replace(keywords, @"\s", ""))), // BooleanClause.Occur.SHOULD); //esenQuery.SetBoost(3.0F); //childQuery.Add(esenQuery, BooleanClause.Occur.SHOULD); //esenQuery = new BooleanQuery(); ////分词 盘古分词 //esenQuery.Add(new QueryParser("ProductName", new PanGuAnalyzer(true)).Parse(keywords), // BooleanClause.Occur.SHOULD); ////分词 按空格 //var keyColl = keywords.Split(new char[] { ' ' }, StringSplitOptions.RemoveEmptyEntries); //foreach (var item in keyColl) //{ // esenQuery.Add(new TermQuery(new Term("ProductName", item)), // BooleanClause.Occur.SHOULD); //} //esenQuery.SetBoost(2.9F); //childQuery.Add(esenQuery, BooleanClause.Occur.SHOULD); //query.Add(childQuery, BooleanClause.Occur.MUST); } else if (fieldName == "Category") { childQuery = new BooleanQuery(); esenQuery = new BooleanQuery(); esenQuery.Add(new TermQuery(new Term("Category", keywords)), BooleanClause.Occur.SHOULD); esenQuery.SetBoost(3.0F); childQuery.Add(esenQuery, BooleanClause.Occur.SHOULD); esenQuery = new BooleanQuery(); esenQuery.Add(new WildcardQuery(new Term("AssociationCategory", string.Format("*,{0},*", keywords))), BooleanClause.Occur.SHOULD); esenQuery.SetBoost(2.8F); childQuery.Add(esenQuery, BooleanClause.Occur.SHOULD); query.Add(childQuery, BooleanClause.Occur.MUST); } else if (fieldName == "BrandSysNo") { childQuery = new BooleanQuery(); childQuery.Add(new TermQuery(new Term("BrandSysNo", keywords)), BooleanClause.Occur.SHOULD); childQuery.SetBoost(3.0F); query.Add(childQuery, BooleanClause.Occur.MUST); } else if (fieldName == "DealerSysNos") { childQuery = new BooleanQuery(); childQuery.Add(new WildcardQuery(new Term("DealerSysNos", string.Format("*,{0},*", keywords))), BooleanClause.Occur.SHOULD); childQuery.SetBoost(2.8F); query.Add(childQuery, BooleanClause.Occur.MUST); } else if (fieldName == "ProductGroupCode") { childQuery = new BooleanQuery(); childQuery.Add(new WildcardQuery(new Term("ProductGroupCode", string.Format("*,{0},*", keywords))), BooleanClause.Occur.SHOULD); childQuery.SetBoost(2.8F); query.Add(childQuery, BooleanClause.Occur.MUST); } else { query.Add(new TermQuery(new Term(fieldName, keywords)), BooleanClause.Occur.SHOULD); } #endregion searchQuery = query; } else { searchQuery = new WildcardQuery(new Term("ProductName", "*雪花秀*")); } //排序方式 var sort = new Sort(); //搜索 Hits hits = search.Search(searchQuery, sort); totalRecord = hits.Length();//总的记录 int startIndex = (pageIndex - 1) * pageSize; if (startIndex < 0) { startIndex = 0; } int endIndex = startIndex + pageSize; if (endIndex > totalRecord - 1) { endIndex = totalRecord - 1; } List <CBPdProductIndex> lst = new List <CBPdProductIndex>(); for (int i = startIndex; i <= endIndex; i++) { var doc = hits.Doc(i); lst.Add( new CBPdProductIndex { DocID = hits.Id(i), Score = hits.Score(i), AssociationCategory = doc.Get("AssociationCategory"), Attributes = doc.Get("Attributes"), Barcode = doc.Get("Barcode"), BrandSysNo = Convert.ToInt32(doc.Get("BrandSysNo")), Category = Convert.ToInt32(doc.Get("Category")), DisplayOrder = Convert.ToInt32(doc.Get("DisplayOrder")), NameAcronymy = doc.Get("NameAcronymy"), Prices = doc.Get("Prices"), ProductImage = doc.Get("ProductImage"), ProductName = doc.Get("ProductName"), QRCode = doc.Get("QRCode"), Status = Convert.ToInt32(doc.Get("Status")), SysNo = Convert.ToInt32(doc.Get("SysNo")), BasicPrice = Convert.ToDecimal(doc.Get("BasicPrice")), Price = Convert.ToDecimal(doc.Get("Price")), DispalySymbol = 0, RankPrice = 0.00M, ProductGroupCode = Convert.ToString(doc.Get("ProductGroupCode")), DealerSysNos = doc.Get("DealerSysNos"), WarehouseSysNos = doc.Get("WarehouseSysNos") }); } search.Close(); return(lst); }
public List <Post> Similar(int postid, int itemsToReturn) { List <Post> TList = new List <Post>(); int docId = -1; IndexSearcher searcher = null; IndexReader reader = null; if (rd == null) { BuildIndex(); } lck.AcquireReaderLock(ReaderTimeOut); try { Analyzer analyzer = GetAnalyzer(); QueryParser parser = GetQueryParser(analyzer); parser.SetDefaultOperator(QueryParser.AND_OPERATOR); Query q = parser.Parse("postid:" + postid); searcher = new IndexSearcher(rd, true); //TODO #pragma warning disable CS0618 // Type or member is obsolete Hits hits = searcher.Search(q); #pragma warning restore CS0618 // Type or member is obsolete if (hits != null && hits.Length() > 0) { docId = hits.Id(0); } if (docId > -1) { reader = IndexReader.Open(rd, true); TermFreqVector tfv = reader.GetTermFreqVector(docId, "exact"); BooleanQuery booleanQuery = new BooleanQuery(); for (int j = 0; j < tfv.Size(); j++) { TermQuery tq = new TermQuery(new Term("exact", tfv.GetTerms()[j])); booleanQuery.Add(tq, BooleanClause.Occur.SHOULD); } //TODO #pragma warning disable CS0618 // Type or member is obsolete Hits similarhits = searcher.Search(booleanQuery, Sort.RELEVANCE); #pragma warning restore CS0618 // Type or member is obsolete for (int i = 0; i < similarhits.Length(); i++) { Document doc = similarhits.Doc(i); if (similarhits.Id(i) != docId) { TList.Add(CreateFromDocument(doc, analyzer, null)); } if (TList.Count >= itemsToReturn) { break; } } } } catch (Exception) { } finally { if (searcher != null) { searcher.Close(); } if (reader != null) { reader.Close(); } lck.ReleaseReaderLock(); } return(TList); }