Beispiel #1
0
 public void Delete(int id, int type)
 {
     var model = new LuceneModel
     {
         ID = id,
         Type = type,
         IndexType = LuceneType.Delete
     };
     LuceneModels.Enqueue(model);
 }
Beispiel #2
0
        public List<LuceneModel> GetList(string keyword,int? type, int pageIndex, int pageSize, out int dataCount)
        {
            var ver = Lucene.Net.Util.Version.LUCENE_29;  
            dataCount = 0;
            var result = new List<LuceneModel>();
            try
            {
                //TODO:查询条件看 http://blog.itpub.net/12639172/viewspace-626546
                //http://www.cnblogs.com/piziyimao/archive/2013/01/31/2887072.html
                //http://blog.csdn.net/weizengxun/article/details/8101097 这个好
                //TODO:优化查询 太慢了
                IndexSearcher searcher;
                searcher = new IndexSearcher(FSDirectory.Open(new DirectoryInfo(LucenePath)), true);
                /* 
                 * 多字段查询同时搜索title和summary 
                 */
                //string[] fields = { "title", "summary" };  
                //MultiFieldQueryParser parser = new MultiFieldQueryParser(ver, fields, analyzer);  
                //Query query = parser.Parse(k);  
                //Query query1 = new QueryParser(ver, "classid", analyzer).Parse("1");  

                /* 
                 * TermQuery搜索文档的最小单位 
                 * 如果不分词索引,只能整体都符合才能命中 
                 * 如classname以不分词的形式索引 
                 * classname="体育新闻" 
                 * 你只搜体育或新闻是不能命中的,只有收体育新闻才能命中 
                 * TermQuery适合用来搜索Id,classname之类的索引 
                 */
                //Query query1 = new TermQuery(new Term("id", "1"));  
                //Query query2 = new TermQuery(new Term("classname", k));  

                /* 
                 * QueryParser支持分词索引支持类似sql语句的查询等, 
                 * 搜索形式是把你输入的关键字先分词,分词之后再拿来搜索 
                 * 当然由于自带的StandardAnalyzer对中文分词不怎么好, 
                 * (经测试StandardAnalyzer对中文的分词不会一空格来分词) 
                 * 我们就拿英文搜索来说吧。 
                 * 例如我们输入的关键字为 I love you  
                 * 他会先把I love you 分词 可能 分成 I/love/you/i love/ i love you/具体怎么分大家可以试试看 
                 * 然后再拿来搜索,如果你的文档中存在 i 或者 love 或者you都会命中 
                 * 注意:使用QueryParser的搜索当k为Empty或null时会报错注意处理 
                 */
                //查询关键字  
                //if (isk)
                //{
                //    Query query3 = new QueryParser(ver, "title", analyzer).Parse(k);
                //    booleanQuery.Add(query3, BooleanClause.Occur.MUST);
                //}
                /* 
                 * NumericRangeQuery按范围搜索,前提是搜索的字段必须是以NumericField文档索引 
                 * NumericRangeQuery可以对数字、日期、时间进行范围搜索 
                 * 对日期和时间搜索时,先把日期转成数字存索引,查询时也是把日期转成数字再查询, 
                 * 但是此种方法日期转换比较麻烦 
                 */  
                var bq = new BooleanQuery();
                if (!string.IsNullOrEmpty(keyword))
                {
                    keyword = SplitContent.SplitWords(keyword, new PanGuTokenizer());
                    var queryParser = new QueryParser(ver, "title", new PanGuAnalyzer(true));
                    var query = queryParser.Parse(keyword);
                    var titleQueryParser = new QueryParser(ver, "content", new PanGuAnalyzer(true));
                    var titleQuery = titleQueryParser.Parse(keyword);
                    bq.Add(query, BooleanClause.Occur.SHOULD);
                    //表示SHOULD条件关系为“or”,BooleanClause.Occur.MUST表示“and”,BooleanClause.Occur.MUST_NOT表示“not”
                    bq.Add(titleQuery, BooleanClause.Occur.SHOULD);
                }
                else
                {
                    Query query3 = new WildcardQuery(new Term("title","*"));
                    bq.Add(query3, BooleanClause.Occur.MUST);
                }
                if (type.HasValue)
                {
                    Query query = new TermQuery(new Term("type", type.Value.ToString()));
                    bq.Add(query, BooleanClause.Occur.MUST);
                }

                //Sort sort Sort(“time”); //相当于SQL的“order by time”
                //Sort sort = new Sort(“time”, true); // 相当于SQL的“order by time desc”
                var sort = new Sort(new SortField("createtime", SortField.LONG, true));

                #region 获取所有数据
                var docs = searcher.Search(bq, null, pageSize*pageIndex, sort);

                //搜索结果总数超出指定收集器大小,则摈弃
                if (docs.totalHits > pageSize * 1000)
                    dataCount = pageSize * 1000;
                else
                    dataCount = docs.totalHits;
                int currentCount = (pageIndex - 1) * pageSize;

                while (currentCount < dataCount && currentCount < pageSize * pageIndex)
                {
                    var model = new LuceneModel();
                    Document doc = searcher.Doc(docs.scoreDocs[currentCount].doc);

                    model.Content = doc.Get("content");
                    model.Title = doc.Get("title");
                    model.ID = Convert.ToInt32(doc.Get("id"));
                    model.ClickCount = Convert.ToInt32(doc.Get("clickcount"));
                    model.Images = doc.Get("images");
                    model.Tags = doc.Get("tags");
                    model.Type = Convert.ToInt32(doc.Get("type"));
                    model.CreateTime = DateTime.ParseExact(doc.Get("createtime"), "yyyyMMddHHmmss", System.Globalization.CultureInfo.CurrentCulture);
                    result.Add(model);

                    #region 循环中有try catch 非常消耗性能 解决方案 报错不处理
                    //try
                    //{
                    //    model.Content = doc.Get("content");
                    //    model.HightLightContent = SplitContent.HightLight(ky, doc.Get("content"));
                    //    model.Title = doc.Get("title");
                    //    model.HightLightTitle = SplitContent.HightLight(ky, doc.Get("title"));
                    //    model.ID = Convert.ToInt32(doc.Get("id"));
                    //    model.ClickCount = Convert.ToInt32(doc.Get("clickcount"));
                    //    model.Images = doc.Get("images");
                    //    model.Tags = doc.Get("tags");
                    //    model.Type = Convert.ToInt32(doc.Get("type"));
                    //    model.CreateTime = DateTimeExtension.UnixToDateTime(Convert.ToInt32(doc.Get("createtime")));
                    //    result.Add(model);
                    //}
                    //catch (Exception e)
                    //{
                    //    LoggerHelper.Logger(
                    //        string.Format("Lucene搜索错误:ID:{0},Type:{1},Title:{2},CreateTime:{3}", doc.Get("id"),
                    //            doc.Get("type"), doc.Get("title"), doc.Get("createtime")), e);
                    //}
                    //finally
                    //{
                    //    i++;
                    //}
                    #endregion

                    currentCount++;
                }
                #endregion


                #region 分页
                //var collector = TopScoreDocCollector.create(3000, true);
                //searcher.Search(bq, null, collector);
                //var hits = collector.TopDocs((pageIndex - 1) * pageSize, pageSize).scoreDocs;//取前十条数据  可以通过它实现搜索结果分页
                //dataCount = hits.Length;
                //foreach (var item in hits)
                //{
                //    result.Add(new LuceneModel
                //    {
                //        Content=searcher.Doc(item.doc).Get("content"),
                //        Title = searcher.Doc(item.doc).Get("title"),
                //        ID = int.Parse(searcher.Doc(item.doc).Get("id")),
                //        ClickCount = int.Parse(searcher.Doc(item.doc).Get("clickcount")),
                //        Images = searcher.Doc(item.doc).Get("images"),
                //        Tags = searcher.Doc(item.doc).Get("tags"),
                //        Type = int.Parse(searcher.Doc(item.doc).Get("type")),
                //        CreateTime = DateTime.ParseExact(searcher.Doc(item.doc).Get("createtime"), "yyyyMMddHHmmss", System.Globalization.CultureInfo.CurrentCulture)
                //    });
                //}
                #endregion
                return result;
            }
            catch (Exception ex)
            {
                throw new LuceneException.LuceneException("Lucene获取列表错误",ex);
            }
        }
Beispiel #3
0
 public void Edit(LuceneModel model)
 {
     model.IndexType = LuceneType.Modify;
     LuceneModels.Enqueue(model);
 }
Beispiel #4
0
 public void Add(LuceneModel model)
 {
     model.IndexType = LuceneType.Insert;
     LuceneModels.Enqueue(model);
 }
Beispiel #5
0
 private void ModifyData(LuceneModel model, IndexWriter writer)
 {
     DeleteData(model, writer);
     InsertData(model,writer);
 }
Beispiel #6
0
 private void DeleteData(LuceneModel model, IndexWriter writer)
 {
     writer.DeleteDocuments(new[] {new Term("id", model.ID.ToString()), new Term("type", model.Type.ToString())});
 }
Beispiel #7
0
        private void InsertData(LuceneModel model, IndexWriter writer)
        {
            var document = new Document();//new一篇文档对象 --一条记录对应索引库中的一个文档

            //向文档中添加字段  Add(字段,值,是否保存字段原始值,是否针对该列创建索引)
            //--所有字段的值都将以字符串类型保存 因为索引库只存储字符串类型数据
            /** 
            * Field.Store.COMPRESS:压缩保存,用于长文本或二进制数据 依赖于ICSharpCode的SharpZipLib.dll组件,在索引文件有一定规模的时候可以压缩内容;
            * Field.Store.YES:保存 
            * Field.Store.NO:不保存 
            * 
            * Field.Index.NO:不建立索引 
            * Field.Index.ANALYZED:分词,建索引 
            * Field.Index.NOT_ANALYZED:不分词,建索引 
            * Field.Index.NO_NORMS:不分词,建索引.但是Field的值不像通常那样被保存,而是只取一个byte,这样节约存储空间 
            * 
            * Field.TermVector.NO:不保存term vectors 
            * Field.TermVector.YES:保存term vectors 
            * Field.TermVector.WITH_POSITIONS:保存term vectors.(保存值和token位置信息) 
            * Field.TermVector.WITH_OFFSETS:保存term vectors.(保存值和Token的offset) 
            * Field.TermVector.WITH_POSITIONS_OFFSETS:保存term vectors.(保存值和token位置信息和Token的offset) 
            */
            document.Add(new Field("id", model.ID.ToString(), Field.Store.YES, Field.Index.NOT_ANALYZED));
            //Field.Store:表示是否保存字段原值。指定Field.Store.YES的字段在检索时才能用document.Get取出原值  
            //Field.Index.NOT_ANALYZED:指定不按照分词后的结果保存--是否按分词后结果保存取决于是否对该列内容进行模糊查询
            document.Add(new Field("type", model.Type.ToString(), Field.Store.YES, Field.Index.NOT_ANALYZED));
            var titleFile = new Field("title", model.Title, Field.Store.YES, Field.Index.ANALYZED,
                Field.TermVector.WITH_POSITIONS_OFFSETS);
            titleFile.SetBoost(TitleBoost);
            document.Add(titleFile);
            //Field.Index.ANALYZED:指定文章内容按照分词后结果保存 否则无法实现后续的模糊查询 
            //WITH_POSITIONS_OFFSETS:指示不仅保存分割后的词 还保存词之间的距离
            if (!string.IsNullOrEmpty(model.Content))
                document.Add(new Field("content", model.Content, Field.Store.COMPRESS, Field.Index.ANALYZED,
                    Field.TermVector.WITH_POSITIONS_OFFSETS));
            document.Add(
                new Field("createtime", model.CreateTime.ToString("yyyyMMddhhmmss"), Field.Store.YES, Field.Index.NOT_ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS));
            if (!string.IsNullOrEmpty(model.Images))
                document.Add(new Field("images", model.Images, Field.Store.YES, Field.Index.NO));
            if (!string.IsNullOrEmpty(model.Tags))
                document.Add(new Field("tags", model.Tags, Field.Store.YES, Field.Index.ANALYZED,
                    Field.TermVector.WITH_POSITIONS_OFFSETS));
            document.Add(new Field("clickcount", model.ClickCount.ToString(), Field.Store.YES, Field.Index.NO));
            writer.AddDocument(document);
        }