/// <summary> /// 在数据库中删除一篇文档 /// </summary> /// <param name="dbname">数据库名</param> /// <param name="hashcodelist">文章路径hashcode列表</param> /// <returns>是否成功,成功为1,失败为0</returns> public int delDocument(string dbname, List <string> hashcodelist) { string DBName = dbname; try { Xapian.WritableDatabase database; database = new Xapian.WritableDatabase(DBName, Xapian.Xapian.DB_CREATE_OR_OPEN); foreach (var item in hashcodelist) { Xapian.Enquire enquire = new Xapian.Enquire(database); //设置检索的前缀 Xapian.QueryParser qp = new Xapian.QueryParser(); qp.SetDatabase(database); qp.SetDefaultOp(Xapian.Query.op.OP_ELITE_SET); qp.SetStemmingStrategy(Xapian.QueryParser.stem_strategy.STEM_NONE); //检索hash值 string querystr = item; qp.AddPrefix("", "Q"); //hash前缀为Q Xapian.Query query = qp.ParseQuery(querystr); Console.WriteLine("query is" + query.GetDescription() + "\n"); //开始检索 enquire.SetQuery(query); //返回结果 Xapian.MSet XapAns = enquire.GetMSet(0, int.MaxValue); var a = XapAns.Size(); for (Xapian.MSetIterator iter = XapAns.Begin(); iter != XapAns.End(); ++iter) { Xapian.Document iterdoc = iter.GetDocument(); if (iterdoc.GetValue(VALUE_HASHCODE) != item) //防止hash检查出错 { continue; } else { uint docid = iter.GetDocId(); //获取唯一id database.DeleteDocument(docid); //删除文档 } } } database.Commit(); //提交数据库 database.Close(); //关闭数据库 } catch (Exception e) { log.Error(e.Message); Console.Error.WriteLine("Exception: " + e.ToString()); return(0); } return(1); }
private uint VALUE_EXTENSION = 6; //扩展名 //private uint VALUE_HASHCODE = 7; //检索hash /// <summary> /// 检索,带有等级不同标记和时间,用于推送 /// </summary> /// <param name="dbname">数据库路径</param> /// <param name="querystr">检索语句</param> /// <param name="valuefilter">等级不同标记</param> /// <param name="addtime">添加时间</param> /// <returns>成返回的检索结果</returns> public Xapian.MSet Query(string dbname, string querystr, string valuefilter, string addtime) { try { Xapian.Database database; database = new Xapian.Database(dbname); Xapian.StringValueRangeProcessor svr = new Xapian.StringValueRangeProcessor(VALUE_SECLEVEL, "Sec", true); //等级的前缀 Xapian.StringValueRangeProcessor svrt = new Xapian.StringValueRangeProcessor(VALUE_TIME, "Addt", true); //时间的前缀 Xapian.Enquire enquire = new Xapian.Enquire(database); //设置检索的前缀 Xapian.QueryParser qp = new Xapian.QueryParser(); if (querystr.Contains(@"//") || querystr.Contains(@"\\")) //判断是否为http://、ftp:// 或者共享文件夹\\ip { qp.AddPrefix("", "A"); } else { qp.AddPrefix("", ""); qp.AddPrefix("", "C"); qp.AddPrefix("", "T"); //检索语句分词(非检索分词) ChineseSeg cs = new ChineseSeg(); querystr = cs.JiebaSegnotSearch(querystr); } qp.SetDatabase(database); qp.SetDefaultOp(Xapian.Query.op.OP_ELITE_SET); qp.AddValuerangeprocessor(svr); qp.AddValuerangeprocessor(svrt); qp.SetStemmingStrategy(Xapian.QueryParser.stem_strategy.STEM_NONE); uint flags = (uint)(Xapian.QueryParser.feature_flag.FLAG_BOOLEAN | Xapian.QueryParser.feature_flag.FLAG_PHRASE | Xapian.QueryParser.feature_flag.FLAG_LOVEHATE | Xapian.QueryParser.feature_flag.FLAG_BOOLEAN_ANY_CASE | Xapian.QueryParser.feature_flag.FLAG_WILDCARD | Xapian.QueryParser.feature_flag.FLAG_PURE_NOT); DateTime DateTimestart = DateTime.Now; string str = ""; //判断等级不同类型,0为不接触等级不同,添加过滤 if (valuefilter == "0") { str = querystr + " Sec0..0.5"; } else { str = querystr; } //设置时间过滤 str = str + @" Addt" + addtime + @".." + DateTimestart.ToString("yyyy/MM/dd"); Xapian.Query query = qp.ParseQuery(str, flags); Console.WriteLine("query is" + query.GetDescription() + "\n"); //开始检索 enquire.SetQuery(query); //返回结果 Xapian.MSet XapAns = enquire.GetMSet(0, int.MaxValue); return(XapAns); } catch (Exception e) { log.Error(e.Message); Console.Error.WriteLine("Exception: " + e.ToString()); return(null); } }
/// <summary> /// 更新一篇文章列表 /// </summary> /// <param name="dbname">数据库路径</param> /// <param name="list">文章列表</param> /// <returns>是否成功,成功为1,失败为0</returns> public int updateDocument(string dbname, List <xapIndex> list) { ChineseSeg cs = new ChineseSeg(); string DBName = dbname; try { Xapian.WritableDatabase database; database = new Xapian.WritableDatabase(DBName, Xapian.Xapian.DB_CREATE_OR_OPEN); foreach (var item in list) { Xapian.Enquire enquire = new Xapian.Enquire(database); //设置检索的前缀 Xapian.QueryParser qp = new Xapian.QueryParser(); qp.SetDatabase(database); qp.SetDefaultOp(Xapian.Query.op.OP_ELITE_SET); qp.SetStemmingStrategy(Xapian.QueryParser.stem_strategy.STEM_NONE); //通过hash查找文章 string querystr = item.hashcode; qp.AddPrefix("", "Q"); //hash前缀为Q Xapian.Query query = qp.ParseQuery(querystr); Console.WriteLine("query is" + query.GetDescription() + "\n"); //开始检索 enquire.SetQuery(query); //返回结果 Xapian.MSet XapAns = enquire.GetMSet(0, int.MaxValue); for (Xapian.MSetIterator iter = XapAns.Begin(); iter != XapAns.End(); ++iter) { Xapian.Document iterdoc = iter.GetDocument(); if (iterdoc.GetValue(VALUE_HASHCODE) != item.hashcode) //以防出现hash筛选错误 { continue; } else { uint docid = iter.GetDocId(); //获取唯一id Xapian.Document doc = new Xapian.Document(); Xapian.TermGenerator indexer = new Xapian.TermGenerator(); doc.SetData(HttpUtility.HtmlEncode(item.content)); //设置负载域 DateTime DateTimestart = DateTime.Now; doc.AddValue(VALUE_TIME, DateTimestart.ToString("yyyy/MM/dd")); //插入时间 doc.AddValue(VALUE_AHREF, item.ahref); //原文链接 doc.AddValue(VALUE_LOCALINK, item.link); //本地链接 doc.AddValue(VALUE_TITLE, item.title); //文章标题 doc.AddValue(VALUE_SOURCE, item.source.ToString()); //来源类型 doc.AddValue(VALUE_SECLEVEL, item.seclevel.ToString()); //等级 doc.AddValue(VALUE_EXTENSION, item.extension.ToString()); //扩展名 doc.AddValue(VALUE_HASHCODE, item.hashcode); //hash indexer.SetDocument(doc); indexer.SetStemmingStrategy(Xapian.TermGenerator.stem_strategy.STEM_NONE); //设置不解析策略 string strcut = cs.JiebaSeg(item.content); string titlecut = cs.JiebaSeg(item.title); indexer.IndexText(strcut, 1, "C"); //设置内容前缀 indexer.IndexText(titlecut, 1, "T"); //设置标题前缀 indexer.IndexText(item.hashcode, 1, "Q"); //设置文档名hash indexer.IndexText(item.ahref, 1, "A"); //设置链接前缀(用于推送文件夹订阅) database.ReplaceDocument(docid, doc); //替换文档 } } } database.Commit(); //提交数据库 database.Close(); //关闭数据库 } catch (Exception e) { log.Error(e.Message); Console.Error.WriteLine("Exception: " + e.ToString()); return(0); } return(1); }
/// <summary> /// 根据等级不同级别和后缀类型筛选,用于正文检索 /// </summary> /// <param name="dbname">数据库名</param> /// <param name="querystr">检索词</param> /// <param name="secsource">等级不同级别</param> /// <param name="filetype">文本后缀名</param> /// <returns></returns> public Xapian.MSet Query(string dbname, string querystr, int secsource, string filetype) { try { Xapian.Database database; database = new Xapian.Database(dbname); Xapian.StringValueRangeProcessor svr = new Xapian.StringValueRangeProcessor(VALUE_SECLEVEL, "Sec", true); //等级的前缀 Xapian.StringValueRangeProcessor sou = new Xapian.StringValueRangeProcessor(VALUE_SOURCE, "Sou", true); //来源前缀 Xapian.StringValueRangeProcessor ext = new Xapian.StringValueRangeProcessor(VALUE_EXTENSION, "Ext", true); //扩展名前缀 Xapian.Enquire enquire = new Xapian.Enquire(database); //设置检索的前缀 Xapian.QueryParser qp = new Xapian.QueryParser(); qp.AddPrefix("", ""); qp.AddPrefix("", "C"); qp.AddPrefix("", "T"); qp.SetDatabase(database); qp.SetDefaultOp(Xapian.Query.op.OP_ELITE_SET); qp.AddValuerangeprocessor(svr); qp.AddValuerangeprocessor(ext); qp.AddValuerangeprocessor(sou); qp.SetStemmingStrategy(Xapian.QueryParser.stem_strategy.STEM_NONE); uint flags = (uint)(Xapian.QueryParser.feature_flag.FLAG_BOOLEAN | Xapian.QueryParser.feature_flag.FLAG_PHRASE | Xapian.QueryParser.feature_flag.FLAG_LOVEHATE | Xapian.QueryParser.feature_flag.FLAG_BOOLEAN_ANY_CASE | Xapian.QueryParser.feature_flag.FLAG_WILDCARD | Xapian.QueryParser.feature_flag.FLAG_PURE_NOT); string str = ""; //过滤等级不同 switch (secsource) { case 0: str = querystr + " Sec0..0.5"; //无等级 break; case 1: str = querystr + " Sou1..1.5"; //web来源 break; case 2: str = querystr + " Sou2..2.5"; //Ftp来源 break; case 3: str = querystr + " Sou3..3.5"; //Share来源 break; case 4: str = querystr + " Sou4..4.5"; //等级不同来源 break; default: str = querystr; //所有 break; } //过滤后缀 switch (filetype) { case "ppt": str = str + " Ext1..1.5"; break; case "word": str = str + " Ext2..2.5"; break; case "excel": str = str + " Ext3..3.5"; break; case "pdf": str = str + " Ext5..5.5"; break; case "txt": str = str + " Ext4..4.5"; break; case "html": str = str + " Ext6..6.5"; break; default: break; } Xapian.Query query = qp.ParseQuery(str, flags); Console.WriteLine("query is" + query.GetDescription() + "\n"); //开始检索 enquire.SetQuery(query); Xapian.MSet XapAns = enquire.GetMSet(0, int.MaxValue); //返回结果 return(XapAns); } catch (Exception e) { log.Error("Message" + e.Message); log.Error("InnerException" + e.InnerException); log.Error("StackTrace" + e.StackTrace); Console.Error.WriteLine("Exception: " + e.ToString()); return(null); } }