Beispiel #1
0
        /// <summary>
        /// 插入好几篇文章
        /// </summary>
        /// <param name="dbname">数据库名字</param>
        /// <param name="list">要插入的文章结构体列表</param>
        /// <returns>是否成功,成功为1,失败为0</returns>
        public int insertIndex(string dbname, List <xapIndex> list)
        {
            ChineseSeg cs     = new ChineseSeg();
            string     DBName = dbname;

            //操作索引
            try
            {
                Xapian.WritableDatabase database;
                database = new Xapian.WritableDatabase(DBName, Xapian.Xapian.DB_CREATE_OR_OPEN);

                foreach (var item in list)
                {
                    Console.WriteLine("插入数据:" + item.title);
                    Xapian.TermGenerator indexer = new Xapian.TermGenerator();
                    Xapian.Document      doc     = new Xapian.Document();
                    doc.SetData(HttpUtility.HtmlEncode(item.content));              //设置负载域

                    DateTime DateTimestart = DateTime.Now;
                    doc.AddValue(VALUE_TIME, DateTimestart.ToString("yyyy/MM/dd")); //插入时间
                    doc.AddValue(VALUE_AHREF, item.ahref);                          //原文链接
                    doc.AddValue(VALUE_LOCALINK, item.link);                        //本地链接
                    doc.AddValue(VALUE_TITLE, item.title);                          //文章标题
                    doc.AddValue(VALUE_SOURCE, item.source.ToString());             //来源类型
                    doc.AddValue(VALUE_SECLEVEL, item.seclevel.ToString());         //等级
                    doc.AddValue(VALUE_EXTENSION, item.extension.ToString());       //扩展名
                    doc.AddValue(VALUE_HASHCODE, item.hashcode);                    //hash

                    indexer.SetDocument(doc);
                    indexer.SetStemmingStrategy(Xapian.TermGenerator.stem_strategy.STEM_NONE); //设置不解析策略

                    string strcut   = cs.JiebaSeg(item.content);                               //中文分词
                    string titlecut = cs.JiebaSeg(item.title);                                 //中文分词

                    indexer.IndexText(strcut, 1, "C");                                         //设置内容前缀
                    indexer.IndexText(titlecut, 1, "T");                                       //设置标题前缀
                    indexer.IndexText(item.hashcode, 1, "Q");                                  //设置文档名hash
                    indexer.IndexText(item.ahref, 1, "A");                                     //设置链接前缀(用于推送文件夹订阅)

                    database.AddDocument(doc);                                                 //加入数据库
                }
                database.Commit();                                                             //提交数据库
                database.Close();                                                              //关闭数据库
            }
            catch (Exception e)
            {
                log.Error(e.Message);
                Console.Error.WriteLine("Exception: " + e.ToString());
                return(0);
            }
            return(1);
        }
Beispiel #2
0
        /// <summary>
        /// 在数据库中删除一篇文档
        /// </summary>
        /// <param name="dbname">数据库名</param>
        /// <param name="hashcodelist">文章路径hashcode列表</param>
        /// <returns>是否成功,成功为1,失败为0</returns>
        public int delDocument(string dbname, List <string> hashcodelist)
        {
            string DBName = dbname;

            try
            {
                Xapian.WritableDatabase database;
                database = new Xapian.WritableDatabase(DBName, Xapian.Xapian.DB_CREATE_OR_OPEN);
                foreach (var item in hashcodelist)
                {
                    Xapian.Enquire enquire = new Xapian.Enquire(database);
                    //设置检索的前缀
                    Xapian.QueryParser qp = new Xapian.QueryParser();
                    qp.SetDatabase(database);
                    qp.SetDefaultOp(Xapian.Query.op.OP_ELITE_SET);
                    qp.SetStemmingStrategy(Xapian.QueryParser.stem_strategy.STEM_NONE);
                    //检索hash值
                    string querystr = item;
                    qp.AddPrefix("", "Q");  //hash前缀为Q
                    Xapian.Query query = qp.ParseQuery(querystr);
                    Console.WriteLine("query is" + query.GetDescription() + "\n");
                    //开始检索
                    enquire.SetQuery(query);
                    //返回结果
                    Xapian.MSet XapAns = enquire.GetMSet(0, int.MaxValue);
                    var         a      = XapAns.Size();
                    for (Xapian.MSetIterator iter = XapAns.Begin(); iter != XapAns.End(); ++iter)
                    {
                        Xapian.Document iterdoc = iter.GetDocument();
                        if (iterdoc.GetValue(VALUE_HASHCODE) != item)   //防止hash检查出错
                        {
                            continue;
                        }
                        else
                        {
                            uint docid = iter.GetDocId();               //获取唯一id
                            database.DeleteDocument(docid);             //删除文档
                        }
                    }
                }
                database.Commit();                                      //提交数据库
                database.Close();                                       //关闭数据库
            }
            catch (Exception e)
            {
                log.Error(e.Message);
                Console.Error.WriteLine("Exception: " + e.ToString());
                return(0);
            }

            return(1);
        }
Beispiel #3
0
    public static void Main()
    {
        try {
            // Test the version number reporting functions give plausible
            // results.
            string v = "";
            v += Xapian.Version.Major();
            v += ".";
            v += Xapian.Version.Minor();
            v += ".";
            v += Xapian.Version.Revision();
            string v2 = Xapian.Version.String();
            if (v != v2)
            {
                System.Console.WriteLine("Unexpected version output (" + v + " != " + v2 + ")");
                System.Environment.Exit(1);
            }

            Xapian.Stem     stem = new Xapian.Stem("english");
            Xapian.Document doc  = new Xapian.Document();
            // Currently SWIG doesn't generate zero-byte clean code for
            // transferring strings between C# and C++.

            /*
             * doc.SetData("a\0b");
             * if (doc.GetData() == "a") {
             *  System.Console.WriteLine("GetData+SetData truncates at a zero byte");
             *  System.Environment.Exit(1);
             * }
             * if (doc.GetData() != "a\0b") {
             *  System.Console.WriteLine("GetData+SetData doesn't transparently handle a zero byte");
             *  System.Environment.Exit(1);
             * }
             */
            doc.SetData("is there anybody out there?");
            doc.AddTerm("XYzzy");
            doc.AddPosting(stem.Apply("is"), 1);
            doc.AddPosting(stem.Apply("there"), 2);
            doc.AddPosting(stem.Apply("anybody"), 3);
            doc.AddPosting(stem.Apply("out"), 4);
            doc.AddPosting(stem.Apply("there"), 5);

            Xapian.WritableDatabase db = new Xapian.WritableDatabase("", Xapian.Xapian.DB_BACKEND_INMEMORY);
            db.AddDocument(doc);
            if (db.GetDocCount() != 1)
            {
                System.Environment.Exit(1);
            }

            if (doc.TermListCount() != 5)
            {
                System.Environment.Exit(1);
            }
            int count             = 0;
            Xapian.TermIterator i = doc.TermListBegin();
            while (i != doc.TermListEnd())
            {
                ++count;
                ++i;
            }
            if (count != 5)
            {
                System.Environment.Exit(1);
            }

            // Check exception handling for Xapian::DocNotFoundError.
            try {
                Xapian.Document doc2 = db.GetDocument(2);
                System.Console.WriteLine("Retrieved non-existent document: " + doc2.ToString());
                System.Environment.Exit(1);
            } catch (System.Exception e) {
                // We expect DocNotFoundError
                if (e.Message.Substring(0, 16) != "DocNotFoundError")
                {
                    System.Console.WriteLine("Unexpected exception from accessing non-existent document: " + e.Message);
                    System.Environment.Exit(1);
                }
            }

            Xapian.QueryParser qp = new Xapian.QueryParser();

            // Check QueryParser parsing error.
            try {
                qp.ParseQuery("test AND");
                System.Console.WriteLine("Successfully parsed bad query");
                System.Environment.Exit(1);
            } catch (System.Exception e) {
                if (e.Message != "QueryParserError: Syntax: <expression> AND <expression>")
                {
                    System.Console.WriteLine("Exception string not as expected, got: '" + e.Message + "'");
                    System.Environment.Exit(1);
                }
            }

            // FIXME: It would be better if the (uint) cast wasn't required here.
            qp.ParseQuery("hello world", (uint)Xapian.QueryParser.feature_flag.FLAG_BOOLEAN);

            // Test wrapping of null-able grouping parameter.
            qp.AddBooleanPrefix("colour", "XC");
            qp.AddBooleanPrefix("color", "XC");
            qp.AddBooleanPrefix("foo", "XFOO", null);
            qp.AddBooleanPrefix("bar", "XBAR", "XBA*");
            qp.AddBooleanPrefix("baa", "XBAA", "XBA*");
            // FIXME: It would be better if the (uint) cast wasn't required here.
            Xapian.DateRangeProcessor rpdate = new Xapian.DateRangeProcessor(1, (uint)Xapian.Xapian.RP_DATE_PREFER_MDY, 1960);
            qp.AddRangeprocessor(rpdate);
            qp.AddRangeprocessor(rpdate, null);
            qp.AddRangeprocessor(rpdate, "foo");

            if (Xapian.Query.MatchAll.GetDescription() != "Query(<alldocuments>)")
            {
                System.Console.WriteLine("Unexpected Query.MatchAll.toString()");
                System.Environment.Exit(1);
            }

            if (Xapian.Query.MatchNothing.GetDescription() != "Query()")
            {
                System.Console.WriteLine("Unexpected Query.MatchNothing.toString()");
                System.Environment.Exit(1);
            }

            // Check that OP_ELITE_SET works (in 0.9.6 and earlier it had the
            // wrong value in C#).
            try {
                Xapian.Query foo  = new Xapian.Query(Xapian.Query.op.OP_OR, "hello", "world");
                Xapian.Query foo2 = new Xapian.Query(Xapian.Query.op.OP_ELITE_SET, foo, foo);
                foo = foo2; // Avoid "unused variable" warning.
            } catch (System.Exception e) {
                System.Console.WriteLine("Using OP_ELITE_SET cause exception '" + e.Message + "'");
                System.Environment.Exit(1);
            }

            // Feature test for MatchDecider.
            doc = new Xapian.Document();
            doc.SetData("Two");
            doc.AddPosting(stem.Apply("out"), 1);
            doc.AddPosting(stem.Apply("source"), 2);
            doc.AddValue(0, "yes");
            db.AddDocument(doc);

            Xapian.Query   query   = new Xapian.Query(stem.Apply("out"));
            Xapian.Enquire enquire = new Xapian.Enquire(db);

            // Check Xapian::BAD_VALUENO is wrapped suitably.
            enquire.SetCollapseKey(Xapian.Xapian.BAD_VALUENO);

            enquire.SetQuery(query);
            Xapian.MSet mset = enquire.GetMSet(0, 10, null, new TestMatchDecider());
            if (mset.Size() != 1)
            {
                System.Console.WriteLine("MatchDecider found " + mset.Size().ToString() + " documents, expected 1");
                System.Environment.Exit(1);
            }
            if (mset.GetDocId(0) != 2)
            {
                System.Console.WriteLine("MatchDecider mset has wrong docid in");
                System.Environment.Exit(1);
            }

            mset = enquire.GetMSet(0, 10);
            for (Xapian.MSetIterator m = mset.Begin(); m != mset.End(); m++)
            {
                // In Xapian 1.2.6 and earlier, the iterator would become
                // eligible for garbage collection after being advanced.
                // It didn't actually get garbage collected often, but when
                // it did, it caused a crash.  Here we force a GC run to make
                // this issue manifest if it is present.
                System.GC.Collect();
                System.GC.WaitForPendingFinalizers();
            }

            // Test setting and getting metadata
            if (db.GetMetadata("Foo") != "")
            {
                System.Console.WriteLine("db.GetMetadata(\"Foo\") returned wrong value \"" + db.GetMetadata("Foo") + "\" - expected \"\"");
                System.Environment.Exit(1);
            }
            db.SetMetadata("Foo", "Foo");
            if (db.GetMetadata("Foo") != "Foo")
            {
                System.Console.WriteLine("db.GetMetadata(\"Foo\") returned wrong value \"" + db.GetMetadata("Foo") + "\" - expected \"Foo\"");
                System.Environment.Exit(1);
            }

            // Test OP_SCALE_WEIGHT and corresponding constructor
            Xapian.Query query4 = new Xapian.Query(Xapian.Query.op.OP_SCALE_WEIGHT, new Xapian.Query("foo"), 5.0);
            if (query4.GetDescription() != "Query(5 * foo)")
            {
                System.Console.WriteLine("Unexpected query4.GetDescription()");
                System.Environment.Exit(1);
            }
        } catch (System.Exception e) {
            System.Console.WriteLine("Exception: " + e.ToString());
            System.Environment.Exit(1);
        }
    }
Beispiel #4
0
        /// <summary>
        /// 更新一篇文章列表
        /// </summary>
        /// <param name="dbname">数据库路径</param>
        /// <param name="list">文章列表</param>
        /// <returns>是否成功,成功为1,失败为0</returns>
        public int updateDocument(string dbname, List <xapIndex> list)
        {
            ChineseSeg cs     = new ChineseSeg();
            string     DBName = dbname;

            try
            {
                Xapian.WritableDatabase database;
                database = new Xapian.WritableDatabase(DBName, Xapian.Xapian.DB_CREATE_OR_OPEN);

                foreach (var item in list)
                {
                    Xapian.Enquire enquire = new Xapian.Enquire(database);
                    //设置检索的前缀
                    Xapian.QueryParser qp = new Xapian.QueryParser();
                    qp.SetDatabase(database);
                    qp.SetDefaultOp(Xapian.Query.op.OP_ELITE_SET);
                    qp.SetStemmingStrategy(Xapian.QueryParser.stem_strategy.STEM_NONE);
                    //通过hash查找文章
                    string querystr = item.hashcode;
                    qp.AddPrefix("", "Q");  //hash前缀为Q
                    Xapian.Query query = qp.ParseQuery(querystr);
                    Console.WriteLine("query is" + query.GetDescription() + "\n");
                    //开始检索
                    enquire.SetQuery(query);
                    //返回结果
                    Xapian.MSet XapAns = enquire.GetMSet(0, int.MaxValue);
                    for (Xapian.MSetIterator iter = XapAns.Begin(); iter != XapAns.End(); ++iter)
                    {
                        Xapian.Document iterdoc = iter.GetDocument();
                        if (iterdoc.GetValue(VALUE_HASHCODE) != item.hashcode)              //以防出现hash筛选错误
                        {
                            continue;
                        }
                        else
                        {
                            uint                 docid   = iter.GetDocId();                 //获取唯一id
                            Xapian.Document      doc     = new Xapian.Document();
                            Xapian.TermGenerator indexer = new Xapian.TermGenerator();
                            doc.SetData(HttpUtility.HtmlEncode(item.content));              //设置负载域

                            DateTime DateTimestart = DateTime.Now;
                            doc.AddValue(VALUE_TIME, DateTimestart.ToString("yyyy/MM/dd")); //插入时间
                            doc.AddValue(VALUE_AHREF, item.ahref);                          //原文链接
                            doc.AddValue(VALUE_LOCALINK, item.link);                        //本地链接
                            doc.AddValue(VALUE_TITLE, item.title);                          //文章标题
                            doc.AddValue(VALUE_SOURCE, item.source.ToString());             //来源类型
                            doc.AddValue(VALUE_SECLEVEL, item.seclevel.ToString());         //等级
                            doc.AddValue(VALUE_EXTENSION, item.extension.ToString());       //扩展名
                            doc.AddValue(VALUE_HASHCODE, item.hashcode);                    //hash

                            indexer.SetDocument(doc);
                            indexer.SetStemmingStrategy(Xapian.TermGenerator.stem_strategy.STEM_NONE);  //设置不解析策略

                            string strcut   = cs.JiebaSeg(item.content);
                            string titlecut = cs.JiebaSeg(item.title);

                            indexer.IndexText(strcut, 1, "C");          //设置内容前缀
                            indexer.IndexText(titlecut, 1, "T");        //设置标题前缀
                            indexer.IndexText(item.hashcode, 1, "Q");   //设置文档名hash
                            indexer.IndexText(item.ahref, 1, "A");      //设置链接前缀(用于推送文件夹订阅)

                            database.ReplaceDocument(docid, doc);       //替换文档
                        }
                    }
                }
                database.Commit();                                      //提交数据库
                database.Close();                                       //关闭数据库
            }
            catch (Exception e)
            {
                log.Error(e.Message);
                Console.Error.WriteLine("Exception: " + e.ToString());
                return(0);
            }
            return(1);
        }
Beispiel #5
0
    public static void Main() {
	try {
	    // Test the version number reporting functions give plausible
	    // results.
	    string v = "";
	    v += Xapian.Version.Major();
	    v += ".";
	    v += Xapian.Version.Minor();
	    v += ".";
	    v += Xapian.Version.Revision();
	    string v2 = Xapian.Version.String();
	    if (v != v2) {
		System.Console.WriteLine("Unexpected version output (" + v + " != " + v2 + ")");
		System.Environment.Exit(1);
	    }

	    Xapian.Stem stem = new Xapian.Stem("english");
	    Xapian.Document doc = new Xapian.Document();
	    // Currently SWIG doesn't generate zero-byte clean code for
	    // transferring strings between C# and C++.
	    /*
	    doc.SetData("a\0b");
	    if (doc.GetData() == "a") {
		System.Console.WriteLine("GetData+SetData truncates at a zero byte");
		System.Environment.Exit(1);
	    }
	    if (doc.GetData() != "a\0b") {
		System.Console.WriteLine("GetData+SetData doesn't transparently handle a zero byte");
		System.Environment.Exit(1);
	    }
	    */
	    doc.SetData("is there anybody out there?");
	    doc.AddTerm("XYzzy");
	    doc.AddPosting(stem.Apply("is"), 1);
	    doc.AddPosting(stem.Apply("there"), 2);
	    doc.AddPosting(stem.Apply("anybody"), 3);
	    doc.AddPosting(stem.Apply("out"), 4);
	    doc.AddPosting(stem.Apply("there"), 5);

	    Xapian.WritableDatabase db = new Xapian.WritableDatabase("", Xapian.Xapian.DB_BACKEND_INMEMORY);
	    db.AddDocument(doc);
	    if (db.GetDocCount() != 1) {
		System.Environment.Exit(1);
	    }

	    if (doc.TermListCount() != 5) {
		System.Environment.Exit(1);
	    }
	    int count = 0;
	    Xapian.TermIterator i = doc.TermListBegin();
	    while (i != doc.TermListEnd()) {
		++count;
		++i;
	    }
	    if (count != 5) {
		System.Environment.Exit(1);
	    }

	    // Check exception handling for Xapian::DocNotFoundError.
	    try {
		Xapian.Document doc2 = db.GetDocument(2);
		System.Console.WriteLine("Retrieved non-existent document: " + doc2.ToString());
		System.Environment.Exit(1);
	    } catch (System.Exception e) {
		// We expect DocNotFoundError
		if (e.Message.Substring(0, 16) != "DocNotFoundError") {
		    System.Console.WriteLine("Unexpected exception from accessing non-existent document: " + e.Message);
		    System.Environment.Exit(1);
		}
	    }

	    // Check QueryParser parsing error.
	    try {
		Xapian.QueryParser qp = new Xapian.QueryParser();
		qp.ParseQuery("test AND");
		System.Console.WriteLine("Successfully parsed bad query");
		System.Environment.Exit(1);
	    } catch (System.Exception e) {
		if (e.Message != "QueryParserError: Syntax: <expression> AND <expression>") {
		    System.Console.WriteLine("Exception string not as expected, got: '" + e.Message + "'");
		    System.Environment.Exit(1);
		}
	    }

	    {
		Xapian.QueryParser qp = new Xapian.QueryParser();
		// FIXME: It would be better if the (uint) cast wasn't required
		// here.
		qp.ParseQuery("hello world", (uint)Xapian.QueryParser.feature_flag.FLAG_BOOLEAN);
	    }

            if (Xapian.Query.MatchAll.GetDescription() != "Query(<alldocuments>)") {
		System.Console.WriteLine("Unexpected Query.MatchAll.toString()");
		System.Environment.Exit(1);
            }

            if (Xapian.Query.MatchNothing.GetDescription() != "Query()") {
		System.Console.WriteLine("Unexpected Query.MatchNothing.toString()");
		System.Environment.Exit(1);
            }

	    // Check that OP_ELITE_SET works (in 0.9.6 and earlier it had the
	    // wrong value in C#).
	    try {
		Xapian.Query foo = new Xapian.Query(Xapian.Query.op.OP_OR, "hello", "world");
		Xapian.Query foo2 = new Xapian.Query(Xapian.Query.op.OP_ELITE_SET, foo, foo);
		foo = foo2; // Avoid "unused variable" warning.
	    } catch (System.Exception e) {
		System.Console.WriteLine("Using OP_ELITE_SET cause exception '" + e.Message + "'");
		System.Environment.Exit(1);
	    }

	    // Feature test for MatchDecider.
	    doc = new Xapian.Document();
	    doc.SetData("Two");
	    doc.AddPosting(stem.Apply("out"), 1);
	    doc.AddPosting(stem.Apply("source"), 2);
	    doc.AddValue(0, "yes");
	    db.AddDocument(doc);

	    Xapian.Query query = new Xapian.Query(stem.Apply("out"));
	    Xapian.Enquire enquire = new Xapian.Enquire(db);
	    enquire.SetQuery(query);
	    Xapian.MSet mset = enquire.GetMSet(0, 10, null, new TestMatchDecider());
	    if (mset.Size() != 1) {
		System.Console.WriteLine("MatchDecider found " + mset.Size().ToString() + " documents, expected 1");
		System.Environment.Exit(1);
	    }
	    if (mset.GetDocId(0) != 2) {
		System.Console.WriteLine("MatchDecider mset has wrong docid in");
		System.Environment.Exit(1);
	    }

	    mset = enquire.GetMSet(0, 10);
	    for (Xapian.MSetIterator m = mset.Begin(); m != mset.End(); m++) {
		// In Xapian 1.2.6 and earlier, the iterator would become
		// eligible for garbage collection after being advanced.
		// It didn't actually get garbage collected often, but when
		// it did, it caused a crash.  Here we force a GC run to make
		// this issue manifest if it is present.
		System.GC.Collect();
		System.GC.WaitForPendingFinalizers();
	    }

            // Test setting and getting metadata
            if (db.GetMetadata("Foo") !=  "") {
		System.Console.WriteLine("db.GetMetadata(\"Foo\") returned wrong value \"" + db.GetMetadata("Foo") + "\" - expected \"\"");
		System.Environment.Exit(1);
            }
            db.SetMetadata("Foo", "Foo");
            if (db.GetMetadata("Foo") !=  "Foo") {
		System.Console.WriteLine("db.GetMetadata(\"Foo\") returned wrong value \"" + db.GetMetadata("Foo") + "\" - expected \"Foo\"");
		System.Environment.Exit(1);
            }

	    // Test OP_SCALE_WEIGHT and corresponding constructor
	    Xapian.Query query4 = new Xapian.Query(Xapian.Query.op.OP_SCALE_WEIGHT, new Xapian.Query("foo"), 5.0);
	    if (query4.GetDescription() != "Query(5 * foo)") {
		System.Console.WriteLine("Unexpected query4.GetDescription()");
		System.Environment.Exit(1);
	    }

	} catch (System.Exception e) {
	    System.Console.WriteLine("Exception: " + e.ToString());
	    System.Environment.Exit(1);
	}
    }