Exemplo n.º 1
0
        private uint VALUE_EXTENSION = 6;       //扩展名
        //private uint VALUE_HASHCODE = 7;        //检索hash

        /// <summary>
        /// 检索,带有等级不同标记和时间,用于推送
        /// </summary>
        /// <param name="dbname">数据库路径</param>
        /// <param name="querystr">检索语句</param>
        /// <param name="valuefilter">等级不同标记</param>
        /// <param name="addtime">添加时间</param>
        /// <returns>成返回的检索结果</returns>
        public Xapian.MSet Query(string dbname, string querystr, string valuefilter, string addtime)
        {
            try
            {
                Xapian.Database database;
                database = new Xapian.Database(dbname);
                Xapian.StringValueRangeProcessor svr  = new Xapian.StringValueRangeProcessor(VALUE_SECLEVEL, "Sec", true);  //等级的前缀
                Xapian.StringValueRangeProcessor svrt = new Xapian.StringValueRangeProcessor(VALUE_TIME, "Addt", true);     //时间的前缀
                Xapian.Enquire enquire = new Xapian.Enquire(database);
                //设置检索的前缀
                Xapian.QueryParser qp = new Xapian.QueryParser();
                if (querystr.Contains(@"//") || querystr.Contains(@"\\"))        //判断是否为http://、ftp:// 或者共享文件夹\\ip
                {
                    qp.AddPrefix("", "A");
                }
                else
                {
                    qp.AddPrefix("", "");
                    qp.AddPrefix("", "C");
                    qp.AddPrefix("", "T");
                    //检索语句分词(非检索分词)
                    ChineseSeg cs = new ChineseSeg();
                    querystr = cs.JiebaSegnotSearch(querystr);
                }

                qp.SetDatabase(database);
                qp.SetDefaultOp(Xapian.Query.op.OP_ELITE_SET);
                qp.AddValuerangeprocessor(svr);
                qp.AddValuerangeprocessor(svrt);
                qp.SetStemmingStrategy(Xapian.QueryParser.stem_strategy.STEM_NONE);

                uint flags = (uint)(Xapian.QueryParser.feature_flag.FLAG_BOOLEAN |
                                    Xapian.QueryParser.feature_flag.FLAG_PHRASE |
                                    Xapian.QueryParser.feature_flag.FLAG_LOVEHATE |
                                    Xapian.QueryParser.feature_flag.FLAG_BOOLEAN_ANY_CASE |
                                    Xapian.QueryParser.feature_flag.FLAG_WILDCARD |
                                    Xapian.QueryParser.feature_flag.FLAG_PURE_NOT);

                DateTime DateTimestart = DateTime.Now;
                string   str           = "";
                //判断等级不同类型,0为不接触等级不同,添加过滤
                if (valuefilter == "0")
                {
                    str = querystr + " Sec0..0.5";
                }
                else
                {
                    str = querystr;
                }
                //设置时间过滤
                str = str + @" Addt" + addtime + @".." + DateTimestart.ToString("yyyy/MM/dd");
                Xapian.Query query = qp.ParseQuery(str, flags);
                Console.WriteLine("query is" + query.GetDescription() + "\n");
                //开始检索
                enquire.SetQuery(query);
                //返回结果
                Xapian.MSet XapAns = enquire.GetMSet(0, int.MaxValue);
                return(XapAns);
            }
            catch (Exception e)
            {
                log.Error(e.Message);
                Console.Error.WriteLine("Exception: " + e.ToString());
                return(null);
            }
        }
Exemplo n.º 2
0
        /// <summary>
        /// 检查是否存在该文档否则插入数据库
        /// </summary>
        /// <param name="dbname">数据库名字</param>
        /// <param name="list">要插入的文章结构体列表</param>
        /// <returns>是否成功,成功为1,失败为0</returns>
        public int checkandinsertIndex(string dbname, List <xapIndex> list)
        {
            ChineseSeg cs     = new ChineseSeg();
            string     DBName = dbname;

            //操作索引
            try
            {
                Xapian.WritableDatabase database;
                database = new Xapian.WritableDatabase(DBName, Xapian.Xapian.DB_CREATE_OR_OPEN);

                foreach (var item in list)
                {
                    Xapian.Enquire enquire = new Xapian.Enquire(database);
                    //设置检索的前缀
                    Xapian.QueryParser qp = new Xapian.QueryParser();
                    qp.SetDatabase(database);
                    qp.SetDefaultOp(Xapian.Query.op.OP_ELITE_SET);
                    qp.SetStemmingStrategy(Xapian.QueryParser.stem_strategy.STEM_NONE);
                    //要检查的是hash值
                    string querystr = item.hashcode;
                    qp.AddPrefix("", "Q");  //hash前缀为Q
                    Xapian.Query query = qp.ParseQuery(querystr);
                    Console.WriteLine("query is" + query.GetDescription() + "\n");
                    //开始检索
                    enquire.SetQuery(query);
                    //返回结果
                    Xapian.MSet XapAns = enquire.GetMSet(0, int.MaxValue);
                    if (XapAns == null || XapAns.Size() == 0)                           //如果没有结果就新增
                    {
                        Xapian.TermGenerator indexer = new Xapian.TermGenerator();
                        Xapian.Document      doc     = new Xapian.Document();
                        doc.SetData(HttpUtility.HtmlEncode(item.content));              //设置负载域

                        DateTime DateTimestart = DateTime.Now;
                        doc.AddValue(VALUE_TIME, DateTimestart.ToString("yyyy/MM/dd")); //插入时间
                        doc.AddValue(VALUE_AHREF, item.ahref);                          //原文链接
                        doc.AddValue(VALUE_LOCALINK, item.link);                        //本地链接
                        doc.AddValue(VALUE_TITLE, item.title);                          //文章标题
                        doc.AddValue(VALUE_SOURCE, item.source.ToString());             //来源类型
                        doc.AddValue(VALUE_SECLEVEL, item.seclevel.ToString());         //等级
                        doc.AddValue(VALUE_EXTENSION, item.extension.ToString());       //扩展名
                        doc.AddValue(VALUE_HASHCODE, item.hashcode);                    //hash

                        indexer.SetDocument(doc);
                        indexer.SetStemmingStrategy(Xapian.TermGenerator.stem_strategy.STEM_NONE);  //设置不解析策略

                        string strcut   = cs.JiebaSeg(item.content);
                        string titlecut = cs.JiebaSeg(item.title);

                        indexer.IndexText(strcut, 1, "C");             //设置内容前缀
                        indexer.IndexText(titlecut, 1, "T");           //设置标题前缀
                        indexer.IndexText(item.hashcode, 1, "Q");      //设置文档名hash
                        indexer.IndexText(item.ahref, 1, "A");         //设置链接前缀(用于推送文件夹订阅)

                        database.AddDocument(doc);                     //加入数据库
                    }
                }
                database.Commit();                                      //提交数据库
                database.Close();                                       //关闭数据库
            }
            catch (Exception e)
            {
                log.Error(e.Message);
                Console.Error.WriteLine("Exception: " + e.ToString());
                return(0);
            }
            return(1);
        }
Exemplo n.º 3
0
        /// <summary>
        /// 根据等级不同级别和后缀类型筛选,用于正文检索
        /// </summary>
        /// <param name="dbname">数据库名</param>
        /// <param name="querystr">检索词</param>
        /// <param name="secsource">等级不同级别</param>
        /// <param name="filetype">文本后缀名</param>
        /// <returns></returns>
        public Xapian.MSet Query(string dbname, string querystr, int secsource, string filetype)
        {
            try
            {
                Xapian.Database database;
                database = new Xapian.Database(dbname);
                Xapian.StringValueRangeProcessor svr = new Xapian.StringValueRangeProcessor(VALUE_SECLEVEL, "Sec", true);   //等级的前缀
                Xapian.StringValueRangeProcessor sou = new Xapian.StringValueRangeProcessor(VALUE_SOURCE, "Sou", true);     //来源前缀
                Xapian.StringValueRangeProcessor ext = new Xapian.StringValueRangeProcessor(VALUE_EXTENSION, "Ext", true);  //扩展名前缀
                Xapian.Enquire enquire = new Xapian.Enquire(database);
                //设置检索的前缀
                Xapian.QueryParser qp = new Xapian.QueryParser();
                qp.AddPrefix("", "");
                qp.AddPrefix("", "C");
                qp.AddPrefix("", "T");
                qp.SetDatabase(database);
                qp.SetDefaultOp(Xapian.Query.op.OP_ELITE_SET);
                qp.AddValuerangeprocessor(svr);
                qp.AddValuerangeprocessor(ext);
                qp.AddValuerangeprocessor(sou);
                qp.SetStemmingStrategy(Xapian.QueryParser.stem_strategy.STEM_NONE);

                uint flags = (uint)(Xapian.QueryParser.feature_flag.FLAG_BOOLEAN |
                                    Xapian.QueryParser.feature_flag.FLAG_PHRASE |
                                    Xapian.QueryParser.feature_flag.FLAG_LOVEHATE |
                                    Xapian.QueryParser.feature_flag.FLAG_BOOLEAN_ANY_CASE |
                                    Xapian.QueryParser.feature_flag.FLAG_WILDCARD |
                                    Xapian.QueryParser.feature_flag.FLAG_PURE_NOT);

                string str = "";
                //过滤等级不同
                switch (secsource)
                {
                case 0:
                    str = querystr + " Sec0..0.5";          //无等级
                    break;

                case 1:
                    str = querystr + " Sou1..1.5";          //web来源
                    break;

                case 2:
                    str = querystr + " Sou2..2.5";          //Ftp来源
                    break;

                case 3:
                    str = querystr + " Sou3..3.5";          //Share来源
                    break;

                case 4:
                    str = querystr + " Sou4..4.5";          //等级不同来源
                    break;

                default:
                    str = querystr;                         //所有
                    break;
                }

                //过滤后缀
                switch (filetype)
                {
                case "ppt":
                    str = str + " Ext1..1.5";
                    break;

                case "word":
                    str = str + " Ext2..2.5";
                    break;

                case "excel":
                    str = str + " Ext3..3.5";
                    break;

                case "pdf":
                    str = str + " Ext5..5.5";
                    break;

                case "txt":
                    str = str + " Ext4..4.5";
                    break;

                case "html":
                    str = str + " Ext6..6.5";
                    break;

                default:
                    break;
                }

                Xapian.Query query = qp.ParseQuery(str, flags);
                Console.WriteLine("query is" + query.GetDescription() + "\n");
                //开始检索
                enquire.SetQuery(query);
                Xapian.MSet XapAns = enquire.GetMSet(0, int.MaxValue);
                //返回结果
                return(XapAns);
            }
            catch (Exception e)
            {
                log.Error("Message" + e.Message);
                log.Error("InnerException" + e.InnerException);
                log.Error("StackTrace" + e.StackTrace);
                Console.Error.WriteLine("Exception: " + e.ToString());
                return(null);
            }
        }