Ejemplo n.º 1
0
        public EntityInfo Extract(Hits hits, int index)
        {
            Document doc = hits.Doc(index);
            //TODO if we are lonly looking for score (unlikely), avoid accessing doc (lazy load)
            EntityInfo entityInfo = Extract(doc);

            object[] eip = entityInfo.Projection;

            if (eip != null && eip.Length > 0)
            {
                for (int x = 0; x < projection.Length; x++)
                {
                    switch (projection[x])
                    {
                    case ProjectionConstants.SCORE:
                        eip[x] = hits.Score(index);
                        break;

                    case ProjectionConstants.ID:
                        eip[x] = entityInfo.Id;
                        break;

                    case ProjectionConstants.DOCUMENT:
                        eip[x] = doc;
                        break;

                    case ProjectionConstants.DOCUMENT_ID:
                        eip[x] = hits.Id(index);
                        break;

                    case ProjectionConstants.BOOST:
                        eip[x] = doc.GetBoost();
                        break;

                    case ProjectionConstants.THIS:
                        //THIS could be projected more than once
                        //THIS loading delayed to the Loader phase
                        if (entityInfo.IndexesOfThis == null)
                        {
                            entityInfo.IndexesOfThis = new List <int>(1);
                        }
                        entityInfo.IndexesOfThis.Add(x);
                        break;
                    }
                }
            }

            return(entityInfo);
        }
Ejemplo n.º 2
0
        /// <summary>
        /// Converts storyId as used in the database to the docId as used in lucene
        /// </summary>
        /// <param name="hostId"></param>
        /// <param name="storyId"></param>
        /// <returns>returns the docId for the story from lucene. If the story is not
        /// found a null value will be returned</returns>
        protected int?ConvertStoryIdtoDocId(int hostId, int storyId)
        {
            QueryParser queryParser = new QueryParser("id", new DnkAnalyzer());
            Query       q           = queryParser.Parse(storyId.ToString());

            IndexSearcher searcher = SearchQuery.GetSearcher(hostId);
            Hits          hits     = searcher.Search(q);

            if (hits.Length() > 0)
            {
                return(hits.Id(0));
            }

            return(null);
        }
Ejemplo n.º 3
0
        private int[] GetDocumentIdsForElementId(string elementId)
        {
            List <int> result = new List <int>();

            IndexSearcher searcher = new IndexSearcher(CurrentDirectory);

            Hits hits = searcher.Search(new TermQuery(new Term(LuceneNodeIndexer.ELEMENTID_FOR_DELETING_FIELD, elementId)));

            for (int x = 0; x < hits.Length(); x++)
            {
                result.Add(hits.Id(x));
            }

            return(result.ToArray());
        }
Ejemplo n.º 4
0
        /**
         * Lucene doesn't let us do deletes from an IndexWriter, only an IndexReader.
         * To avoid locking or constant open-close switches, be sure that writing
         * is only happening on an in-memory writer.
         * @param article
         * @throws IOException
         */
        public void DeleteArticle(Article article)
        {
            OpenReader();
            string key  = article.Key;
            Hits   hits = searcher.Search(new TermQuery(
                                              new Term("key", key)));

            if (hits.Length() == 0)
            {
                log.Debug("Nothing to delete for " + key);
            }
            for (int i = 0; i < hits.Length(); i++)
            {
                int id = hits.Id(i);
                log.Debug("Trying to delete article number " + id + " for " + key);
                try {
                    reader.Delete(id);
                } catch (IOException e) {
                    log.Warn("Couldn't delete article number " +
                             id + " for " + key + "... " + e);
                }
            }
        }
Ejemplo n.º 5
0
        public override void Search(string SearchString)
        {
            base.Search(SearchString);
            SearchString = SearchString.ToLower();
            var dir      = FSDirectory.GetDirectory(IndexPath, false);
            var searcher = new IndexSearcher(dir);
            var parser   = new QueryParser(ContentField, new StandardAnalyzer());

            foreach (var s in SearchString.Split(new [] { ' ' }))
            {
                var query = parser.GetFuzzyQuery(ContentField, s, MinSimilarity);

                Hits hits = searcher.Search(query);

                for (int i = 0; i < hits.Length(); i++)
                {
                    Document doc    = hits.Doc(i);
                    var      result = new SearchResult {
                        Score = hits.Score(i), Plugin = doc.Get(PluginField)
                    };

                    //Text des aktuellen Dokuments auslesen
                    string text = doc.Get(ContentField);
                    //Alle indizierten Wörter dieses Dokumentes auslesen
                    var      tpv      = (TermPositionVector)IndexReader.Open(dir).GetTermFreqVector(hits.Id(i), ContentField);
                    String[] DocTerms = tpv.GetTerms();
                    //Die Anzahl der Erscheinungen aller Wörter auslesen
                    int[] freq  = tpv.GetTermFrequencies();
                    var   words = new List <string>(DocTerms);
                    //Hier wollen wir nun die Positionen der Erscheinungen des Suchwortes herausfinden
                    for (int t = 0; t < freq.Length; t++)
                    {
                        //Falls das Suchwort mit dem aktuellen Wort übereinstimmt...
                        if (ContainsSearchString(SearchString, DocTerms[t], words))
                        {
                            //...können wir die Positionen auslesen
                            TermVectorOffsetInfo[] offsets = tpv.GetOffsets(t);
                            //Das Array beinhaltet nun für das Suchwort alle Auftreten mit jeweils Anfang und Ende
                            for (int j = 0; j < offsets.Length; j++)
                            {
                                //Jetz muss nur noch ein kleiner Kontextausschnitt ausgelesen werden, damit der User etwas damit anfangen kann
                                int start        = offsets[j].GetStartOffset();
                                int end          = offsets[j].GetEndOffset();
                                int contextStart = start - ContextLeftOffset;
                                contextStart = contextStart < 0 ? 0 : contextStart;
                                int contextEnd = end + ContextRightOffset;
                                contextEnd = contextEnd > text.Length ? text.Length : contextEnd;
                                //Nun wollen wir noch bis zum Ende des nächsten Wortes lesen, um das Ergebnis besser lesbar zu machen
                                int nextEndSpace = text.IndexOf(" ", contextEnd);
                                contextEnd = nextEndSpace > 0 ? nextEndSpace : contextEnd;
                                //Maximal so viele Zeichen darf der Text nach einem Leerzeichen links von dem Suchergebnis durchsucht werden
                                int leftSpaceOffset = contextStart;
                                //Finden des nächstenLeerzeichens links vom Suchergebnis
                                int nextStartSpace = text.LastIndexOf(" ", contextStart, leftSpaceOffset);
                                //Falls es kein Space in der Nöhe gibt brauchen wir natürlich auch nichts verändern
                                contextStart = nextStartSpace > 0 ? nextStartSpace : contextStart;
                                int contextLength = contextEnd - contextStart;
                                contextLength = contextLength > text.Length ? text.Length : contextLength;
                                //Kontext auslesen
                                string context = text.Substring(contextStart, contextLength);
                                //und den Searchresults zusammen mit dem zugehörigen PlugInNamen und dem HitScore hinzufügen
                                result.Contexts.Add(context);
                            }
                        }
                    }
                    SearchResults.Add(result);
                }
            }
        }
Ejemplo n.º 6
0
        /// <summary>
        /// 查询索引
        /// </summary>
        /// <param name="fieldName">FieldName</param>
        /// <param name="keywords">关键字</param>
        /// <param name="pageIndex">当前页</param>
        /// <param name="pageSize">分页大小</param>
        /// <param name="totalRecord">总的记录</param>
        /// <returns>索引列表</returns>
        /// <remarks>2013-08-15 朱成果 创建</remarks>
        public List <CBPdProductIndex> QueryDoc(string fieldName, string keywords, int pageIndex, int pageSize, out int totalRecord)
        {
            var   search = new IndexSearcher(IndexStorePath);
            Query searchQuery;

            if (!string.IsNullOrEmpty(fieldName) && !string.IsNullOrEmpty(keywords))
            {
                #region [关键字查询]
                var          query = new BooleanQuery();
                BooleanQuery childQuery;
                BooleanQuery esenQuery;
                if (fieldName == "ProductName")
                {
                    #region 2016-4-6 杨浩 新增模糊搜索
                    childQuery = new BooleanQuery();
                    esenQuery  = new BooleanQuery();
                    //模糊搜索
                    //esenQuery.Add(new FuzzyQuery(new Term("ProductName", Regex.Replace(keywords, @"\s", ""))), BooleanClause.Occur.SHOULD);

                    //esenQuery.SetBoost(4.0F);


                    //分词 盘古分词
                    var keyWordsSplitBySpace = GetKeyWordsSplitBySpace(keywords);


                    //string keyWordsSplitBySpace = string.Format("{0}^{1}.0", keywords, (int)Math.Pow(3, 5));
                    //不启用分词,直接用模糊搜索
                    QueryParser productNameQueryParser = new QueryParser(global::Lucene.Net.Util.Version.LUCENE_29, "ProductName", new PanGuAnalyzer(true));
                    Query       productNameQuery       = productNameQueryParser.Parse(keyWordsSplitBySpace);
                    childQuery.Add(productNameQuery, BooleanClause.Occur.SHOULD);

                    //以什么开头,输入“ja”就可以搜到包含java和javascript两项结果了
                    Query prefixQuery_productName = new PrefixQuery(new Term("ProductName", keywords.Trim()));

                    //直接模糊匹配,假设你想搜索跟‘wuzza’相似的词语,你可能得到‘fuzzy’和‘wuzzy’。
                    Query fuzzyQuery_productName = new FuzzyQuery(new Term("ProductName", keywords.Trim()));
                    //通配符搜索
                    Query wildcardQuery_productName = new WildcardQuery(new Term("ProductName", string.Format("{0}", keywords.Trim())));

                    childQuery.Add(prefixQuery_productName, BooleanClause.Occur.SHOULD);
                    childQuery.Add(fuzzyQuery_productName, BooleanClause.Occur.SHOULD);
                    childQuery.Add(wildcardQuery_productName, BooleanClause.Occur.SHOULD);
                    childQuery.SetBoost(4.0F);


                    //childQuery.Add(esenQuery, BooleanClause.Occur.SHOULD);
                    query.Add(childQuery, BooleanClause.Occur.MUST);
                    #endregion

                    //childQuery = new BooleanQuery();
                    //esenQuery = new BooleanQuery();
                    ////全词去空格
                    //esenQuery.Add(new TermQuery(new Term("ProductName", Regex.Replace(keywords, @"\s", ""))),
                    //        BooleanClause.Occur.SHOULD);
                    //esenQuery.SetBoost(3.0F);
                    //childQuery.Add(esenQuery, BooleanClause.Occur.SHOULD);

                    //esenQuery = new BooleanQuery();
                    ////分词 盘古分词
                    //esenQuery.Add(new QueryParser("ProductName", new PanGuAnalyzer(true)).Parse(keywords),
                    //    BooleanClause.Occur.SHOULD);

                    ////分词  按空格
                    //var keyColl = keywords.Split(new char[] { ' ' }, StringSplitOptions.RemoveEmptyEntries);
                    //foreach (var item in keyColl)
                    //{
                    //    esenQuery.Add(new TermQuery(new Term("ProductName", item)),
                    //        BooleanClause.Occur.SHOULD);
                    //}
                    //esenQuery.SetBoost(2.9F);
                    //childQuery.Add(esenQuery, BooleanClause.Occur.SHOULD);
                    //query.Add(childQuery, BooleanClause.Occur.MUST);
                }

                else if (fieldName == "Category")
                {
                    childQuery = new BooleanQuery();
                    esenQuery  = new BooleanQuery();
                    esenQuery.Add(new TermQuery(new Term("Category", keywords)),
                                  BooleanClause.Occur.SHOULD);
                    esenQuery.SetBoost(3.0F);
                    childQuery.Add(esenQuery, BooleanClause.Occur.SHOULD);

                    esenQuery = new BooleanQuery();
                    esenQuery.Add(new WildcardQuery(new Term("AssociationCategory", string.Format("*,{0},*", keywords))),
                                  BooleanClause.Occur.SHOULD);
                    esenQuery.SetBoost(2.8F);
                    childQuery.Add(esenQuery, BooleanClause.Occur.SHOULD);

                    query.Add(childQuery, BooleanClause.Occur.MUST);
                }

                else if (fieldName == "BrandSysNo")
                {
                    childQuery = new BooleanQuery();
                    childQuery.Add(new TermQuery(new Term("BrandSysNo", keywords)),
                                   BooleanClause.Occur.SHOULD);
                    childQuery.SetBoost(3.0F);
                    query.Add(childQuery, BooleanClause.Occur.MUST);
                }
                else if (fieldName == "DealerSysNos")
                {
                    childQuery = new BooleanQuery();
                    childQuery.Add(new WildcardQuery(new Term("DealerSysNos", string.Format("*,{0},*", keywords))),
                                   BooleanClause.Occur.SHOULD);
                    childQuery.SetBoost(2.8F);
                    query.Add(childQuery, BooleanClause.Occur.MUST);
                }
                else if (fieldName == "ProductGroupCode")
                {
                    childQuery = new BooleanQuery();
                    childQuery.Add(new WildcardQuery(new Term("ProductGroupCode", string.Format("*,{0},*", keywords))),
                                   BooleanClause.Occur.SHOULD);
                    childQuery.SetBoost(2.8F);
                    query.Add(childQuery, BooleanClause.Occur.MUST);
                }
                else
                {
                    query.Add(new TermQuery(new Term(fieldName, keywords)),
                              BooleanClause.Occur.SHOULD);
                }
                #endregion
                searchQuery = query;
            }
            else
            {
                searchQuery = new WildcardQuery(new Term("ProductName", "*雪花秀*"));
            }
            //排序方式
            var sort = new Sort();
            //搜索
            Hits hits = search.Search(searchQuery, sort);

            totalRecord = hits.Length();//总的记录
            int startIndex = (pageIndex - 1) * pageSize;
            if (startIndex < 0)
            {
                startIndex = 0;
            }
            int endIndex = startIndex + pageSize;
            if (endIndex > totalRecord - 1)
            {
                endIndex = totalRecord - 1;
            }
            List <CBPdProductIndex> lst = new List <CBPdProductIndex>();
            for (int i = startIndex; i <= endIndex; i++)
            {
                var doc = hits.Doc(i);
                lst.Add(
                    new CBPdProductIndex
                {
                    DocID = hits.Id(i),
                    Score = hits.Score(i),
                    AssociationCategory = doc.Get("AssociationCategory"),
                    Attributes          = doc.Get("Attributes"),
                    Barcode             = doc.Get("Barcode"),
                    BrandSysNo          = Convert.ToInt32(doc.Get("BrandSysNo")),
                    Category            = Convert.ToInt32(doc.Get("Category")),
                    DisplayOrder        = Convert.ToInt32(doc.Get("DisplayOrder")),
                    NameAcronymy        = doc.Get("NameAcronymy"),
                    Prices           = doc.Get("Prices"),
                    ProductImage     = doc.Get("ProductImage"),
                    ProductName      = doc.Get("ProductName"),
                    QRCode           = doc.Get("QRCode"),
                    Status           = Convert.ToInt32(doc.Get("Status")),
                    SysNo            = Convert.ToInt32(doc.Get("SysNo")),
                    BasicPrice       = Convert.ToDecimal(doc.Get("BasicPrice")),
                    Price            = Convert.ToDecimal(doc.Get("Price")),
                    DispalySymbol    = 0,
                    RankPrice        = 0.00M,
                    ProductGroupCode = Convert.ToString(doc.Get("ProductGroupCode")),
                    DealerSysNos     = doc.Get("DealerSysNos"),
                    WarehouseSysNos  = doc.Get("WarehouseSysNos")
                });
            }
            search.Close();
            return(lst);
        }
Ejemplo n.º 7
0
        public List <Post> Similar(int postid, int itemsToReturn)
        {
            List <Post> TList = new List <Post>();

            int docId = -1;

            IndexSearcher searcher = null;
            IndexReader   reader   = null;

            if (rd == null)
            {
                BuildIndex();
            }

            lck.AcquireReaderLock(ReaderTimeOut);
            try
            {
                Analyzer    analyzer = GetAnalyzer();
                QueryParser parser   = GetQueryParser(analyzer);
                parser.SetDefaultOperator(QueryParser.AND_OPERATOR);

                Query q = parser.Parse("postid:" + postid);

                searcher = new IndexSearcher(rd, true);
                //TODO
#pragma warning disable CS0618 // Type or member is obsolete
                Hits hits = searcher.Search(q);
#pragma warning restore CS0618 // Type or member is obsolete
                if (hits != null && hits.Length() > 0)
                {
                    docId = hits.Id(0);
                }

                if (docId > -1)
                {
                    reader = IndexReader.Open(rd, true);

                    TermFreqVector tfv          = reader.GetTermFreqVector(docId, "exact");
                    BooleanQuery   booleanQuery = new BooleanQuery();
                    for (int j = 0; j < tfv.Size(); j++)
                    {
                        TermQuery tq = new TermQuery(new Term("exact", tfv.GetTerms()[j]));
                        booleanQuery.Add(tq, BooleanClause.Occur.SHOULD);
                    }
                    //TODO
#pragma warning disable CS0618 // Type or member is obsolete
                    Hits similarhits = searcher.Search(booleanQuery, Sort.RELEVANCE);
#pragma warning restore CS0618 // Type or member is obsolete

                    for (int i = 0; i < similarhits.Length(); i++)
                    {
                        Document doc = similarhits.Doc(i);
                        if (similarhits.Id(i) != docId)
                        {
                            TList.Add(CreateFromDocument(doc, analyzer, null));
                        }

                        if (TList.Count >= itemsToReturn)
                        {
                            break;
                        }
                    }
                }
            }
            catch (Exception)
            {
            }
            finally
            {
                if (searcher != null)
                {
                    searcher.Close();
                }

                if (reader != null)
                {
                    reader.Close();
                }

                lck.ReleaseReaderLock();
            }



            return(TList);
        }