Commit() public method

Commits all pending changes (added & deleted documents, optimizations, segment merges, added indexes, etc.) to the index, and syncs all referenced index files, such that a reader will see the changes and the index updates will survive an OS or machine crash or power loss. Note that this does not wait for any running background merges to finish. This may be a costly operation, so you should test the cost in your application and do it only when really necessary.

Note that this operation calls Directory.sync on the index files. That call should not return until the file contents & metadata are on stable storage. For FSDirectory, this calls the OS's fsync. But, beware: some hardware devices may in fact cache writes even during fsync, and return before the bits are actually on stable storage, to give the appearance of faster performance. If you have such a device, and it does not have a battery backup (for example) then on power loss it may still lose data. Lucene cannot guarantee consistency on such devices.

NOTE: if this method hits an OutOfMemoryError you should immediately close the writer. See above for details.

public Commit ( ) : void
return void
        public void TestGetFilterHandleNumericParseError()
        {
            NumericRangeFilterBuilder filterBuilder = new NumericRangeFilterBuilder();
            filterBuilder.SetStrictMode(false);

            String xml = "<NumericRangeFilter fieldName='AGE' type='int' lowerTerm='-1' upperTerm='NaN'/>";
            XmlDocument doc = GetDocumentFromString(xml);
            Filter filter = filterBuilder.GetFilter(doc.DocumentElement);
            Store.Directory ramDir = NewDirectory();
            IndexWriter writer = new IndexWriter(ramDir, NewIndexWriterConfig(TEST_VERSION_CURRENT, null));
            writer.Commit();
            try
            {
                AtomicReader reader = SlowCompositeReaderWrapper.Wrap(DirectoryReader.Open(ramDir));
                try
                {
                    assertNull(filter.GetDocIdSet(reader.AtomicContext, reader.LiveDocs));
                }
                finally
                {
                    reader.Dispose();
                }
            }
            finally
            {
                writer.Commit();
                writer.Dispose();
                ramDir.Dispose();
            }
        }
        /// <summary>
        /// index 1 document and commit.
        /// prepare for crashing.
        /// index 1 more document, and upon commit, creation of segments_2 will crash.
        /// </summary>
        private void IndexAndCrashOnCreateOutputSegments2()
        {
            Directory realDirectory = FSDirectory.Open(Path);
            CrashAfterCreateOutput crashAfterCreateOutput = new CrashAfterCreateOutput(realDirectory);

            // NOTE: cannot use RandomIndexWriter because it
            // sometimes commits:
            IndexWriter indexWriter = new IndexWriter(crashAfterCreateOutput, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())));

            indexWriter.AddDocument(Document);
            // writes segments_1:
            indexWriter.Commit();

            crashAfterCreateOutput.GetCrashAfterCreateOutput = "segments_2";
            indexWriter.AddDocument(Document);
            try
            {
                // tries to write segments_2 but hits fake exc:
                indexWriter.Commit();
                Assert.Fail("should have hit CrashingException");
            }
            catch (CrashingException e)
            {
                // expected
            }
            // writes segments_3
            indexWriter.Dispose();
            Assert.IsFalse(SlowFileExists(realDirectory, "segments_2"));
            crashAfterCreateOutput.Dispose();
        }
Beispiel #3
0
        private static RAMDirectory MakeEmptyIndex(int numDeletedDocs)
        {
            RAMDirectory d = new RAMDirectory();
            IndexWriter  w = new IndexWriter(d, new WhitespaceAnalyzer(), true, MaxFieldLength.LIMITED, null);

            for (int i = 0; i < numDeletedDocs; i++)
            {
                w.AddDocument(new Document(), null);
            }
            w.Commit(null);
            w.DeleteDocuments(null, new MatchAllDocsQuery());
            w.Commit(null);

            if (0 < numDeletedDocs)
            {
                Assert.IsTrue(w.HasDeletions(null), "writer has no deletions");
            }

            Assert.AreEqual(numDeletedDocs, w.MaxDoc(), "writer is missing some deleted docs");
            Assert.AreEqual(0, w.NumDocs(null), "writer has non-deleted docs");
            w.Close();
            IndexReader r = IndexReader.Open((Directory)d, true, null);

            Assert.AreEqual(numDeletedDocs, r.NumDeletedDocs, "reader has wrong number of deleted docs");
            r.Close();
            return(d);
        }
        public virtual void TestExceptionDuringSave()
        {
            MockDirectoryWrapper dir = NewMockDirectory();
            dir.FailOn(new FailureAnonymousInnerClassHelper(this, dir));
            IndexWriter writer = new IndexWriter(dir, GetConfig(Random(), new PersistentSnapshotDeletionPolicy(new KeepOnlyLastCommitDeletionPolicy(), dir, OpenMode_e.CREATE_OR_APPEND)));
            writer.AddDocument(new Document());
            writer.Commit();

            PersistentSnapshotDeletionPolicy psdp = (PersistentSnapshotDeletionPolicy)writer.Config.DelPolicy;
            try
            {
                psdp.Snapshot();
            }
            catch (IOException ioe)
            {
                if (ioe.Message.Equals("now fail on purpose"))
                {
                    // ok
                }
                else
                {
                    throw ioe;
                }
            }
            Assert.AreEqual(0, psdp.SnapshotCount);
            writer.Dispose();
            Assert.AreEqual(1, DirectoryReader.ListCommits(dir).Count);
            dir.Dispose();
        }
        public void TestReadersWriters()
        {
            Directory dir;
            
            using(dir = new RAMDirectory())
            {
                Document doc;
                IndexWriter writer;
                IndexReader reader;

                using (writer = new IndexWriter(dir, new WhitespaceAnalyzer(), true, IndexWriter.MaxFieldLength.UNLIMITED))
                {
                    Field field = new Field("name", "value", Field.Store.YES,Field.Index.ANALYZED);
                    doc = new Document();
                    doc.Add(field);
                    writer.AddDocument(doc);
                    writer.Commit();

                    using (reader = writer.GetReader())
                    {
                        IndexReader r1 = reader.Reopen();
                    }

                    Assert.Throws<AlreadyClosedException>(() => reader.Reopen(), "IndexReader shouldn't be open here");
                }
                
                Assert.Throws<AlreadyClosedException>(() => writer.AddDocument(doc), "IndexWriter shouldn't be open here");

                Assert.IsTrue(dir.isOpen_ForNUnit, "RAMDirectory");
            }
            Assert.IsFalse(dir.isOpen_ForNUnit, "RAMDirectory");
        }
        public void Init()
        {
            facetHandlers = new List<IFacetHandler>();

            directory = new RAMDirectory();
            analyzer = new WhitespaceAnalyzer();
            selectionProperties = new Dictionary<string, string>();
            IndexWriter writer = new IndexWriter(directory, analyzer, true, IndexWriter.MaxFieldLength.UNLIMITED);

            writer.AddDocument(Doc("prop1=val1", "prop2=val1", "prop5=val1"));
            writer.AddDocument(Doc("prop1=val2", "prop3=val1", "prop7=val7"));
            writer.AddDocument(Doc("prop1=val2", "prop3=val2", "prop3=val3"));
            writer.AddDocument(Doc("prop1=val1", "prop2=val1"));
            writer.AddDocument(Doc("prop1=val1", "prop2=val1"));
            writer.AddDocument(Doc("prop1=val1", "prop2=val1", "prop4=val2", "prop4=val3"));
            writer.Commit();

            attributesFacetHandler = new AttributesFacetHandler(AttributeHandlerName, AttributeHandlerName, null, null,
                new Dictionary<string, string>());
            facetHandlers.Add(attributesFacetHandler);
            IndexReader reader = IndexReader.Open(directory, true);
            boboReader = BoboIndexReader.GetInstance(reader, facetHandlers);
            attributesFacetHandler.LoadFacetData(boboReader);
            browser = new BoboBrowser(boboReader);
        }
        public void Test_IndexReader_IsCurrent()
        {
            RAMDirectory ramDir = new RAMDirectory();
            IndexWriter writer = new IndexWriter(ramDir, new KeywordAnalyzer(), true, new IndexWriter.MaxFieldLength(1000));
            Field field = new Field("TEST", "mytest", Field.Store.YES, Field.Index.ANALYZED);
            Document doc = new Document();
            doc.Add(field);
            writer.AddDocument(doc);

            IndexReader reader = writer.GetReader();

            writer.DeleteDocuments(new Lucene.Net.Index.Term("TEST", "mytest"));

            Assert.IsFalse(reader.IsCurrent());

            int resCount1 = new IndexSearcher(reader).Search(new TermQuery(new Term("TEST", "mytest")),100).TotalHits;
            Assert.AreEqual(1, resCount1);

            writer.Commit();

            Assert.IsFalse(reader.IsCurrent());

            int resCount2 = new IndexSearcher(reader).Search(new TermQuery(new Term("TEST", "mytest")),100).TotalHits;
            Assert.AreEqual(1, resCount2, "Reopen not invoked yet, resultCount must still be 1.");

            reader = reader.Reopen();
            Assert.IsTrue(reader.IsCurrent());

            int resCount3 = new IndexSearcher(reader).Search(new TermQuery(new Term("TEST", "mytest")), 100).TotalHits;
            Assert.AreEqual(0, resCount3, "After reopen, resultCount must be 0.");

            reader.Close();
            writer.Dispose();
        }
        public static void Run(string[] args)
        {
            IDictionary<string, string> arguments = CommandHelpers.GetArguments(args, 1);
            if (arguments == null)
            {
                PrintUsage();
                return;
            }

            Lucene.Net.Store.Directory directory = CommandHelpers.GetLuceneDirectory(arguments);
            if (directory == null)
            {
                PrintUsage();
                return;
            }

            if (IndexReader.IndexExists(directory))
            {
                using (IndexWriter writer = new IndexWriter(directory, new StandardAnalyzer(Lucene.Net.Util.Version.LUCENE_30), true, IndexWriter.MaxFieldLength.UNLIMITED))
                {
                    writer.DeleteAll();
                    writer.Commit(new Dictionary<string, string>());
                }
            }

            Console.WriteLine("All Done");
        }
        //END
        //this method creates document from an ObjectToIndex
        public void BuildIndex(FileToIndex file)
        {
            using (var analyzer = new Lucene.Net.Analysis.Ru.RussianAnalyzer(Version.LUCENE_30))
            {
                using (IndexWriter idxw = new IndexWriter(_directory, analyzer, IndexWriter.MaxFieldLength.UNLIMITED))
                {
                    //check if document exists, if true deletes existing

                    var searchQuery = new TermQuery(new Term("Id", file.Id.ToString()));
                    idxw.DeleteDocuments(searchQuery);
                    //creation
                    Document doc = new Document();
                    doc.Add(new Field("Id", file.Id.ToString(), Field.Store.YES, Field.Index.NOT_ANALYZED));//аналайзер разбивает строки на слова
                    doc.Add(new Field("Title", file.Title, Field.Store.YES, Field.Index.ANALYZED));
                    doc.Add(new Field("Description", file.Description, Field.Store.YES, Field.Index.ANALYZED));
                    doc.Add(new Field("Authors", file.Authors, Field.Store.YES, Field.Index.ANALYZED));
                    doc.Add(new Field("Text", file.Text, Field.Store.YES, Field.Index.ANALYZED));
                    doc.Add(new Field("Hashtags", file.Hashtags, Field.Store.YES, Field.Index.ANALYZED));
                    doc.Add(new Field("Discipline", file.Discipline, Field.Store.YES, Field.Index.ANALYZED));
                    //write the document to the index
                    idxw.AddDocument(doc);
                    //optimize and close the writer
                    idxw.Commit();

                    idxw.Optimize();

                }
            }
        }
Beispiel #10
0
        private IndexWriter InitIndex(IConcurrentMergeScheduler scheduler, Random random, MockDirectoryWrapper dir, bool initialCommit)
        {
            dir.LockFactory = NoLockFactory.DoNoLockFactory;

            scheduler.SetSuppressExceptions();

            IndexWriter writer = new IndexWriter(dir,
                NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random))
                .SetMaxBufferedDocs(10)
                .SetMergeScheduler(scheduler));

            if (initialCommit)
            {
                writer.Commit();
            }

            Document doc = new Document();
            doc.Add(NewTextField("content", "aaa", Field.Store.NO));
            doc.Add(NewTextField("id", "0", Field.Store.NO));
            for (int i = 0; i < 157; i++)
            {
                writer.AddDocument(doc);
            }

            return writer;
        }
		public virtual void  TestMultiValueSource()
		{
			Directory dir = new MockRAMDirectory();
			IndexWriter w = new IndexWriter(dir, new WhitespaceAnalyzer(), IndexWriter.MaxFieldLength.UNLIMITED);
			Document doc = new Document();
			Field f = new Field("field", "", Field.Store.NO, Field.Index.NOT_ANALYZED);
			doc.Add(f);
			
			for (int i = 0; i < 17; i++)
			{
				f.SetValue("" + i);
				w.AddDocument(doc);
				w.Commit();
			}
			
			IndexReader r = w.GetReader();
			w.Close();
			
			Assert.IsTrue(r.GetSequentialSubReaders().Length > 1);
			
			ValueSource s1 = new IntFieldSource("field");
			DocValues v1 = s1.GetValues(r);
			DocValues v2 = new MultiValueSource(s1).GetValues(r);
			
			for (int i = 0; i < r.MaxDoc(); i++)
			{
				Assert.AreEqual(v1.IntVal(i), i);
				Assert.AreEqual(v2.IntVal(i), i);
			}
			
			Lucene.Net.Search.FieldCache_Fields.DEFAULT.PurgeAllCaches();
			
			r.Close();
			dir.Close();
		}
        public override void WriteEndVersion(Process process, AbstractConnection input, Entity entity, bool force = false) {
            if (entity.Updates + entity.Inserts <= 0 && !force)
                return;

            var versionType = entity.Version == null ? "string" : entity.Version.SimpleType;
            var end = entity.End ?? new DefaultFactory(Logger).Convert(entity.End, versionType);

            using (var dir = LuceneDirectoryFactory.Create(this, TflBatchEntity(entity.ProcessName))) {
                using (var writer = new IndexWriter(dir, new KeywordAnalyzer(), IndexWriter.MaxFieldLength.UNLIMITED)) {
                    var doc = new Document();
                    doc.Add(new NumericField("id", global::Lucene.Net.Documents.Field.Store.YES, true).SetIntValue(entity.TflBatchId));
                    doc.Add(new global::Lucene.Net.Documents.Field("process", entity.ProcessName, global::Lucene.Net.Documents.Field.Store.YES, global::Lucene.Net.Documents.Field.Index.NOT_ANALYZED_NO_NORMS));
                    doc.Add(new global::Lucene.Net.Documents.Field("connection", input.Name, global::Lucene.Net.Documents.Field.Store.YES, global::Lucene.Net.Documents.Field.Index.NOT_ANALYZED_NO_NORMS));
                    doc.Add(new global::Lucene.Net.Documents.Field("entity", entity.Alias, global::Lucene.Net.Documents.Field.Store.YES, global::Lucene.Net.Documents.Field.Index.NOT_ANALYZED_NO_NORMS));
                    doc.Add(new NumericField("updates", global::Lucene.Net.Documents.Field.Store.YES, true).SetLongValue(entity.Updates));
                    doc.Add(new NumericField("inserts", global::Lucene.Net.Documents.Field.Store.YES, true).SetLongValue(entity.Inserts));
                    doc.Add(new NumericField("deletes", global::Lucene.Net.Documents.Field.Store.YES, true).SetLongValue(entity.Deletes));
                    doc.Add(LuceneWriter.CreateField("version", versionType, new SearchType { Analyzer = "keyword" }, end));
                    doc.Add(new global::Lucene.Net.Documents.Field("version_type", versionType, global::Lucene.Net.Documents.Field.Store.YES, global::Lucene.Net.Documents.Field.Index.NOT_ANALYZED_NO_NORMS));
                    doc.Add(new NumericField("tflupdate", global::Lucene.Net.Documents.Field.Store.YES, true).SetLongValue(DateTime.UtcNow.Ticks));
                    writer.AddDocument(doc);
                    writer.Commit();
                    writer.Optimize();
                }
            }
        }
		public virtual void  TestSimpleSkip()
		{
			RAMDirectory dir = new RAMDirectory();
			IndexWriter writer = new IndexWriter(dir, new PayloadAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED);
			Term term = new Term("test", "a");
			for (int i = 0; i < 5000; i++)
			{
				Document d1 = new Document();
				d1.Add(new Field(term.Field, term.Text, Field.Store.NO, Field.Index.ANALYZED));
				writer.AddDocument(d1);
			}
			writer.Commit();
			writer.Optimize();
			writer.Close();
			
			IndexReader reader = SegmentReader.GetOnlySegmentReader(dir);
			SegmentTermPositions tp = (SegmentTermPositions) reader.TermPositions();
            tp.freqStream = new CountingStream(this, tp.freqStream);
			
			for (int i = 0; i < 2; i++)
			{
				counter = 0;
				tp.Seek(term);
				
				CheckSkipTo(tp, 14, 185); // no skips
				CheckSkipTo(tp, 17, 190); // one skip on level 0
				CheckSkipTo(tp, 287, 200); // one skip on level 1, two on level 0
				
				// this test would fail if we had only one skip level,
				// because than more bytes would be read from the freqStream
				CheckSkipTo(tp, 4800, 250); // one skip on level 2
			}
		}
Beispiel #14
0
 /// <summary>
 /// 创建索引文档
 /// </summary>
 /// <param name="dic"></param>
 public void AddLuceneIndex(Dictionary<string, string> dic) {
     //var analyzer = new StandardAnalyzer(Version.LUCENE_30);
     var analyzer = GetAnalyzer();
     using (var directory = GetLuceneDirectory())
     using (var writer = new IndexWriter(directory, analyzer, IndexWriter.MaxFieldLength.UNLIMITED)) {
         var doc = new Document();
         foreach (KeyValuePair<string, string> pair in dic) {
             // add new index entry
             //Field.Store.YES:表示是否存储原值。
             //只有当Field.Store.YES在后面才能用doc.Get("number")取出值来
             //Field.Index. NOT_ANALYZED:不进行分词保存
             //todo:boost
             if (NotAnalyzeFields.Exists(one => one == pair.Key)) {
                 doc.Add(new Field(pair.Key, pair.Value, Field.Store.YES, Field.Index.NOT_ANALYZED));
             }
             else {
                 doc.Add(new Field(pair.Key, pair.Value, Field.Store.YES, Field.Index.ANALYZED));
             }
         }
         //doc.Boost
         writer.AddDocument(doc);
         writer.Commit();
         writer.Optimize();
         analyzer.Close();
     }
 }
		//Rolls back index to a chosen ID
		private void  RollBackLast(int id)
		{
			
			// System.out.println("Attempting to rollback to "+id);
			System.String ids = "-" + id;
			IndexCommit last = null;
			IList<IndexCommit> commits = IndexReader.ListCommits(dir);
			for (System.Collections.IEnumerator iterator = commits.GetEnumerator(); iterator.MoveNext(); )
			{
				IndexCommit commit = (IndexCommit) iterator.Current;
                System.Collections.Generic.IDictionary<string, string> ud = commit.GetUserData();
				if (ud.Count > 0)
					if (((System.String) ud["index"]).EndsWith(ids))
						last = commit;
			}
			
			if (last == null)
				throw new System.SystemException("Couldn't find commit point " + id);
			
			IndexWriter w = new IndexWriter(dir, new WhitespaceAnalyzer(), new RollbackDeletionPolicy(this, id), MaxFieldLength.UNLIMITED, last);
            System.Collections.Generic.IDictionary<string, string> data = new System.Collections.Generic.Dictionary<string, string>();
			data["index"] = "Rolled back to 1-" + id;
			w.Commit(data);
			w.Close();
		}
        public void HelloWorldTest()
        {
            Directory directory = new RAMDirectory();
            Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_29);
            IndexWriter writer = new IndexWriter(directory,
                analyzer,
                IndexWriter.MaxFieldLength.UNLIMITED);

            Document doc = new Document();
            doc.Add(new Field("id", "1", Field.Store.YES, Field.Index.NO));
            doc.Add(new Field("postBody", "sample test", Field.Store.YES, Field.Index.ANALYZED));
            writer.AddDocument(doc);
            writer.Optimize();
            writer.Commit();
            writer.Close();

            QueryParser parser = new QueryParser(Version.LUCENE_29, "postBody", analyzer);
            Query query = parser.Parse("sample test");

            //Setup searcher
            IndexSearcher searcher = new IndexSearcher(directory, true);
            //Do the search
            var hits = searcher.Search(query, null, 10);

            for (int i = 0; i < hits.TotalHits; i++)
            {
                var doc1 = hits.ScoreDocs[i];
            }

            searcher.Close();
            directory.Close();
        }
        public static void CreateIndex() {
            try
            {
                var cloudAccount = Azure.GetStorageAccount();

                using (var cacheDirectory = new RAMDirectory())
                {
                    using (var azureDirectory = new AzureDirectory(cloudAccount, Azure.StorageContainerName, cacheDirectory))
                    {
                        using (Analyzer analyzer = new StandardAnalyzer(Lucene.Net.Util.Version.LUCENE_30))
                        {
                            using (var indexWriter = new IndexWriter(azureDirectory, analyzer, true, IndexWriter.MaxFieldLength.UNLIMITED))
                            {
                                AddDocuments(indexWriter);

                                indexWriter.Commit();
                            }
                        }
                    }
                }
            }
            catch (StorageException ex)
            {
                Trace.TraceError(ex.Message);
            }
        }
        static Analyzer analyzer = new MMSegAnalyzer(); //MMSegAnalyzer //StandardAnalyzer
        static void Main(string[] args)
        {

            string[] texts = new string[] { 
                "京华时报1月23日报道 昨天,受一股来自中西伯利亚的强冷空气影响,本市出现大风降温天气,白天最高气温只有零下7摄氏度,同时伴有6到7级的偏北风。",
                "【AppsFlyer:社交平台口碑营销效果最佳http://t.cn/zTHEQRM】社交网络分享应用的方式,在新应用获取用户非常有效率。搜索方式可为移动应用带来最高玩家质量,但玩家量和转化率较低。广告可带来最大用户量,但用户质量却相对不高,转化率也不够高。刺激性流量的转化率最高,但是平均玩家质量是最低",
                "Server Core省去了Windows Server的图形界面,改为命令行的方式来管理服务器。它不仅拥有更精简的体积与更优化的性能,还可缩短50%-60%的系统更新时间。现在,SQL Server已经支持Windows Server Core,计划内停机时间的大幅缩减让企业关键数据库应用获得更高的可用性。",
                "【各移动游戏分发渠道的优势和分成比例】联通沃商店:线下资源和话费支付能力(分成比例3:7),触控:技术和运营能力(分成比例5:5),91无线:评测、运营、数据等服务能力(分成比例4:6),UC:用户接入点、WebApp的支持(分成比例5:5),斯凯网络:硬件厂商资源(分成比例3:7)http://t.cn/zTHnwJk",
                "iPod之父创办的Nest收购家居能源数据监测服务MyEnergy,将从小小恒温器进展为家居节能整套方案 |Nest公司要做的并不只是一个小小温控器,而是提供智能家居节能整套方案。而MyEnergy积累的数据能对Nest起到很大帮助,并且也为Nest带来更多能源服务商方面的联系: http://t.cn/zTHs8qQ",
                "今日,58同城将正式与支付宝达成战略合作。这既是支付宝首次为阿里系外的企业提供担保支付服务,也是58同城推动消费者保障服务在支付和结算方面迈出的重要一步,此番合作将对整个行业产生颠覆性的影响。58要做的就是不断的了解用户痛点,不断的与虚假信息斗争,建立一个人人信赖的生活服务平台。",
                "【iPhone如何征服日本】虽然日本身为现代移动技术的摇篮,智能手机和触屏设备的普及也领先于其他地区,但iPhone仍然顺利地征服这个岛国,成为该国最畅销的手机。一方面得益于女性用户的追捧,但更多地,还是源自日本移动行业的内在问题。http://t.cn/zTHENrI",
                "【东方体育中心游泳馆今起重新开放,成人票20元/场】#爱体育#“立夏”过了,夏天近了,喜欢游泳的筒子心痒难耐了吧!@965365上海体育发布 说,经过一个多月的内装修,东方体育中心室内游泳馆今天起重新对外开放,开放时间为13:00-21:00,票价详情点大图了解~今夏挥洒汗水,“玉兰桥”走起!",
                "【深圳地铁免费伞 一年借出2000把归还70把】深圳地铁站摆放了“红雨伞”,下雨时可免费借给乘客使用。但一年来,地铁借给市民2000多把雨伞,只还回来70把,有的站甚至已经没有雨伞了。工作人员尝试联系部分借伞人,发现登记电话号码常常显示是空号……地铁站的红雨伞,你借了会还吗?(南方都市报)",
                "【银行的速度,移动互联网的速度】招商银行信用卡副总经理彭千在GMIC上分享招商银行移动互联网尝试案例:先后和开心和人人推出联名信用卡,但银行动作太慢了,推出是开心网已经不开心了,人人网已经没有人了!",
                "【黑石超级公关】4月21日的新闻联播上,黑石集团主席施瓦茨曼向清华大学捐赠1亿美元,并牵头筹集2亿美元,投资3亿美元与清华大学合作筹建“苏世民书院”的新闻被列为头条。很明显“未来中国不再是选修课,而是必修课。”1亿美元投资清华,背后是扭转坑中投形象的战略公关…",
                "【传谷歌将效仿苹果开设谷歌眼镜零售店】科技博客Business Insider今天援引消息人士说法称,谷歌正计划开设零售店,销售谷歌眼镜。谷歌门店或将专为眼镜产品服务,即只展示各类品牌、型号的“谷歌眼镜”产品。早前的消息指出,谷歌拟效仿苹果和微软,计划推出自主品牌的零售门店,以展示旗下各类产品。",
                "【武汉一高中禁止学生校内用手机 现场砸毁】近期在武昌东亭二路,一所学校收缴并砸毁学生手机24部,其中包括iPhone5等较昂贵的智能手机,也有价值数百元的普通手机,并设“手机尸体展示台”展出近期砸毁的部分手机残骸,均已经无法使用。",
                "【小偷慌不择路当街撒钱 警民携手完璧归赵】日前,一男子来到青浦一小作坊佯装购买商品,后借机溜进卧室行窃。老板娘在周围群众的帮助下将男子扭获,男子见势不妙,掏出一沓钞票当街抛撒。民警到达现场后,将男子抛撒的钱一一清点,共计6600元。警察蜀黍真心想为当天帮忙捡钱的群众竖起大拇指!",
                "#瓜大活动预告#【风起云涌南山下,群雄逐鹿辩工大】经过层层奋战,软件与微电子学院和理学院最终杀入了决赛,巅峰对决,即将展开。智慧的火花,头脑的竞技,唇舌的交战,精彩,一触即发。5月13日,周一晚七点,翱翔学生中心,我们与你不见不散!via人人网@西北工业大学_学生会",
                "#GMIC#尚伦律师事务所合伙人张明若律师:在中国,发生了很多起创业者因为法律意识淡薄,在天使投融资期间甚至没有签订法律文件的创业悲剧。这份文件的目的是帮助暂时还请不起律师的创业者。这份法律文件模板简单、对买卖双方公平、且免费!",
                "【金蝶随手记创始人谷风:先自我否定,再创新!】当创业者们把目光聚焦在娱乐、社交、手游、电商这些热门品类时,相信没有多少人会料到记账这一细分领域里也有产品能做到6000万级别的用户规模,堪称“屌丝逆袭”。http://t.cn/zTQvB16",
                "【陕西回应省纪委人员开车打架致死案:车辆是私家车 车主是纪委临时工】乾县青仁村发生斗殴,一死两伤,嫌犯开的显示单位为陕西省纪委的轿车引起质疑。陕西公安厅称,陕VHA088克莱斯勒轿车系嫌犯付某借用朋友的私家车。乾县公安局此前通报,陕VHA088车主是陕西省纪委临时工范小勇http://t.cn/zTQP5kC",
                "【经典干货!25个强大的PS炫光特效教程】这些经典的特效教程是很多教PS老师们的课堂案例,更被很多出版物摘录其中。那么今天毫无保留的全盘托出,同学们一定要好好练习。完成的同学也可以到优设群交作业哟,给大家分享你的设计过程和经验心得:)@尼拉贡戈小麦穗 →http://t.cn/zTHdOCK",
                "【树莓派的三个另类“武装”玩法】树莓派(Raspberry Pi)一直以来以极低的价格和“信用卡大小”的尺寸为人追捧。对于爱折腾的发烧友来说,永远可以在常人意想不到的地方发挥出自己折腾的功力。当一般人仅仅研究其编程玩法时,另一帮人已经琢磨着要把树莓派“武装”成另一个样子。http://t.cn/zTHFxIS",
                "【媒体札记:白宫信访办】@徐达内:19年前铊中毒的清华女生朱令的同情者,找到了“白宫请愿”这个易于操作又声势浩大的方法,要求美国将朱当年的室友孙维驱逐出境。随着意见领袖和各大媒体的加入,这一“跨国抗议” 的景观搅动了对官方公信力,冤假错案判断标准的全民讨论。http://t.cn/zTHsLIC",
                "【传第七大道页游海外月流水近1亿元http://t.cn/zTQPnnv】根据消息人士的透露,第七大道目前旗下网页游戏海外月流水收入已达近1亿元人民币,实质已是国内游戏公司海外收入第一,已超过大家所熟知的端游上市公司。孟治昀如是表示:“谁能告诉我,中国网游企业出口收入哪家公司高于第七大道?”",
                "【简介:他废掉了一切不服者】弗格森执教曼联26年,夺得13个英超冠军,4个联赛杯冠军,5个足总杯冠军,2个欧冠冠军,1个世俱杯冠军,1个优胜者杯冠军,1个欧洲超级杯。如果非要用一句话来总结他的伟大,小编个人的总结是:他废掉了一切敢于“不服者”,包括小贝同学",
                "这个世界干啥最赚钱?历史证明,持续保持对一个国家进行专制统治,通过无节制的赋税和滥发货币来掠夺全体国民的私人财富是唯一的标准答案。历史在进步,这种商业模式也在改头换面,于是,党专制替代家族专制,集体世袭权利代替个体世袭权力。既然改头换面,理论体系也得改变,这个理论体系就是特色论。",
                "【拥有“全球最美海滩”的塞舌尔将对中国游客免签!】#便民提示#准备出国白相的筒子冒个泡吧~你们有福啦。拥有“全球最美丽的海滩”和“最洁净的海水”美誉的塞舌尔,将可凭我国有效护照免签入境,最多停留30天这里还是英国威廉王子和王妃的蜜月地~~所以,别再只盯着马尔代夫一处啦",
                "【用数据告诉你手游有多热】今天,作为本届GMIC 的一部分,GGS全球移动游戏峰会召开。嘉宾和游戏开发者们探讨了移动游戏的现状与发展趋势。手游则是最为重要的一大关键词。盛大游戏总裁钱东海分享了日本最大手游公司CEO预测的数据:2015年全球游戏产业的格局中80%都是手机游戏。http://t.cn/zTHdkFY"
            };

            IndexWriter iw = new IndexWriter(FSDirectory.Open(INDEX_DIR), analyzer, true, IndexWriter.MaxFieldLength.LIMITED);
            int i = 0;
            foreach (string text in texts)
            {
                Document doc = new Document();
                doc.Add(new Field("body", text, Field.Store.YES, Field.Index.ANALYZED));
                iw.AddDocument(doc);
                Console.WriteLine("Indexed doc: {0}", text);
            }
            iw.Commit();
            iw.Optimize();
            iw.Dispose();

            Console.WriteLine();

            Console.WriteLine("Building index done!\r\n\r\n");

            while (true)
            {
                Console.Write("Enter the keyword: ");
                string keyword = Console.ReadLine();
                Search(keyword);
                Console.WriteLine();
            }

            //Console.ReadLine();
        }
        private void CreateIndex(IndexWriter writer, string a, string b)
        {
            Document doc = new Document();
            doc.Add(new Field("title", a, Field.Store.YES, Field.Index.ANALYZED));
            doc.Add(new Field("content", b, Field.Store.YES, Field.Index.ANALYZED));

            writer.AddDocument(doc);
            writer.Commit();
        }
Beispiel #20
0
        public void SetUp()
        {
            var analyzer = new StandardAnalyzer(Version.LUCENE_29);
            context = new TestableContext(directory, analyzer, Version.LUCENE_29, new NoOpIndexWriter(), new object());

            var writer = new IndexWriter(directory, analyzer, true, IndexWriter.MaxFieldLength.UNLIMITED);

            writer.Commit();
        }
 public int Index(Func<IndexWriter, int> indexWriterAction, bool createNewIndex)
 {
     using (var writer = new IndexWriter(IndexDirectory, Analyzer, createNewIndex, IndexWriter.MaxFieldLength.UNLIMITED))
     {
         var count = indexWriterAction(writer);
         writer.Commit();
         return count;
     }
 }
        public void TestSplit()
        {
            string format = "{0:000000000}";
            IndexWriter w;
            using (Directory dir = NewDirectory())
            {
                using (w = new IndexWriter(dir, NewIndexWriterConfig(
                    TEST_VERSION_CURRENT, new MockAnalyzer(Random(), MockTokenizer.WHITESPACE, false))
                    .SetOpenMode(IndexWriterConfig.OpenMode_e.CREATE).SetMergePolicy(NoMergePolicy.COMPOUND_FILES)))
                {
                    for (int x = 0; x < 11; x++)
                    {
                        Document doc = CreateDocument(x, "1", 3, format);
                        w.AddDocument(doc);
                        if (x % 3 == 0) w.Commit();
                    }
                    for (int x = 11; x < 20; x++)
                    {
                        Document doc = CreateDocument(x, "2", 3, format);
                        w.AddDocument(doc);
                        if (x % 3 == 0) w.Commit();
                    }
                }

                Term midTerm = new Term("id", string.Format(CultureInfo.InvariantCulture, format, 11));


                CheckSplitting(dir, midTerm, 11, 9);

                // delete some documents
                using (w = new IndexWriter(dir, NewIndexWriterConfig(

                    TEST_VERSION_CURRENT, new MockAnalyzer(Random(), MockTokenizer.WHITESPACE, false))
                        .SetOpenMode(IndexWriterConfig.OpenMode_e.APPEND).SetMergePolicy(NoMergePolicy.COMPOUND_FILES)))
                {
                    w.DeleteDocuments(midTerm);
                    w.DeleteDocuments(new Term("id", string.Format(CultureInfo.InvariantCulture, format, 2)));
                }


                CheckSplitting(dir, midTerm, 10, 8);

            }
        }
Beispiel #23
0
        public void FileProcessor1(bool isStart, bool isFinished, string filePath)
        {
            if (isStart)
            {
                OpenIndex(true);
                return;
            }
            if (isFinished)
            {
                writer.Commit();
                CloseIndex();
                return;
            }

            UInt64 ms = NTFS.InfoFromPath(filePath);

            if (ms >= 0)
            {
                num_indexed_file++;

                PerFieldAnalyzerWrapper analyzer = new PerFieldAnalyzerWrapper(new StandardAnalyzer());
                analyzer.AddAnalyzer("contents", new SourceCodeAnalyzer());
                analyzer.AddAnalyzer("path2", new PathAnalyzer());

                //SourceCodeAnalyzer    analyzer = new SourceCodeAnalyzer();
                writer.AddDocument(FileDocument.Document(filePath, ms), analyzer);

                if (num_indexed_file % 20 == 1)
                {
                    writer.Commit();
                }
            }

            /*
             * lock (dispatch_queue)
             * {
             *  if (dispatch_queue == null)
             *  {
             *      dispatch_queue = new Queue<string>();
             *  }
             *  dispatch_queue.Enqueue(filePath);
             * }
             * */
        }
Beispiel #24
0
 public void Index(List<Field> fields)
 {
     var analyzer = new AccentedAnalyzer();
     var directory = FSDirectory.Open(System.IO.Directory.GetParent(IndexPath));
     var writer = new IndexWriter(directory, analyzer, IndexWriter.MaxFieldLength.LIMITED);
     AddDocument(writer, fields);
     writer.Optimize();
     writer.Commit();
     writer.Close();
 }
Beispiel #25
0
        public void DeleteFromIndex(Term term)
        {
            Directory directory = FSDirectory.Open(new System.IO.DirectoryInfo(serverPath));
            Analyzer analyzer = new StandardAnalyzer(Lucene.Net.Util.Version.LUCENE_29);
            IndexWriter writer = new IndexWriter(directory, analyzer, false, IndexWriter.MaxFieldLength.UNLIMITED);

            writer.DeleteDocuments(term);
            writer.Commit();
            writer.Close();
        }
 protected internal virtual void PrepareIndexAndSnapshots(SnapshotDeletionPolicy sdp, IndexWriter writer, int numSnapshots)
 {
     for (int i = 0; i < numSnapshots; i++)
     {
         // create dummy document to trigger commit.
         writer.AddDocument(new Document());
         writer.Commit();
         Snapshots.Add(sdp.Snapshot());
     }
 }
 public static void AddDocument(Document document)
 {
     var analyzer = new StandardAnalyzer(Version.LUCENE_29);
     using (var writer = new IndexWriter(Directory, analyzer, IndexWriter.MaxFieldLength.UNLIMITED))
     {
         writer.AddDocument(document);
         writer.Optimize();
         writer.Commit();
         analyzer.Close();
     }
 }
 public override void SetUp()
 {
     base.SetUp();
     dir = NewDirectory();
     using (IndexWriter w = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).SetMergePolicy(NoMergePolicy.COMPOUND_FILES)))
     {
         Document doc;
         for (int i = 0; i < NUM_DOCS; i++)
         {
             doc = new Document();
             doc.Add(NewStringField("id", i + "", Field.Store.YES));
             doc.Add(NewTextField("f", i + " " + i, Field.Store.YES));
             w.AddDocument(doc);
             if (i % 3 == 0) w.Commit();
         }
         w.Commit();
         w.DeleteDocuments(new Term("id", "" + (NUM_DOCS - 1)));
     }
     input = DirectoryReader.Open(dir);
 }
Beispiel #29
0
        public bool ProcessFile(string filePath)
        {
            long ms = (long)NTFS.InfoFromPath(filePath);

            if (ms >= 0)
            {
                Logger.Log.Info("Indexing " + filePath);
                Document doc = _doc_factory.CreateFromPath(filePath, ms);
                if (doc != null)
                {
                    _nb_indexed++;
                    _writer.AddDocument(doc, _default_analyzer);
                    if (_nb_indexed % 20 == 1)
                    {
                        _writer.Commit();
                    }
                }
            }
            return(true);
        }
        static void Main(string[] args)
        {
            // Initialize Lucene
            Directory directory = FSDirectory.Open(new System.IO.DirectoryInfo("LuceneIndex"));
            StandardAnalyzer analyzer = new StandardAnalyzer(Lucene.Net.Util.Version.LUCENE_29);
            IndexWriter writer = new IndexWriter(directory, analyzer, IndexWriter.MaxFieldLength.UNLIMITED);

            // Open DB connection
            OpenConnections();

            // Insert documents into the index

            // Clear the index
            writer.DeleteAll();
            string sql = "SELECT a.ArticleId, a.Title, a.IntroText, a.Content, a.Published, a.DateCreated, " +
                "u.UserName, a.SectionId FROM Article AS a INNER JOIN aspnet_Users AS u ON " +
                "a.CreatedBy = u.UserId";

            SqlCommand cmd = new SqlCommand(sql, dbConn);
            SqlDataReader reader = cmd.ExecuteReader();

            while (reader.Read())
            {
                // Extract the fields to be indexed
                int articleId = reader.GetInt32(reader.GetOrdinal("ArticleId"));
                string title = reader["Title"].ToString();
                string content = reader["IntroText"].ToString() + reader["Content"].ToString();
                string intro = content.Substring(0, content.Length > 100 ? 100 : content.Length);
                string author = reader["UserName"].ToString();
                int sectionId = reader.GetInt32(reader.GetOrdinal("SectionId"));
                bool published = reader.GetBoolean(reader.GetOrdinal("Published"));
                DateTime pubDate = reader.GetDateTime(reader.GetOrdinal("DateCreated"));
                string strPubDate = DateTools.DateToString(pubDate, DateTools.Resolution.DAY);

                Document doc = new Document();
                doc.Add(new Field("ArticleId", articleId.ToString(), Field.Store.YES, Field.Index.NO));
                doc.Add(new Field("Title", title, Field.Store.YES, Field.Index.ANALYZED));
                doc.Add(new Field("Content", content, Field.Store.NO, Field.Index.ANALYZED));
                doc.Add(new Field("Intro", title, Field.Store.YES, Field.Index.NO));
                doc.Add(new Field("Author", author, Field.Store.YES, Field.Index.NO));
                doc.Add(new Field("SectionId", sectionId.ToString(), Field.Store.NO, Field.Index.NOT_ANALYZED));
                doc.Add(new Field("Published", published.ToString(), Field.Store.NO, Field.Index.NOT_ANALYZED));
                doc.Add(new Field("PubDate", strPubDate, Field.Store.YES, Field.Index.NOT_ANALYZED));

                writer.AddDocument(doc);
            }

            // Close everything, shut down.
            writer.Optimize();
            writer.Commit();
            writer.Close();

            dbConn.Close();
        }
Beispiel #31
0
 /// <summary>
 /// Gets the index reader.
 /// </summary>
 /// <returns>Index reader.</returns>
 public void CloseIndexWriter(IndexWriter writer)
 {
     lock (this)
     {
         --_currentWriters;
         if (_currentWriters == 0)
         {
             writer.Commit();
             writer.Close();
         }
     }
 }
 private static void RebuildIndex(Lucene.Net.Store.Directory directory, Analyzer analyzer, List<Product> products)
 {
     IndexWriter writer = new IndexWriter(directory, analyzer, IndexWriter.MaxFieldLength.LIMITED);
     writer.DeleteAll();
     writer.Commit();
     foreach (Product p in products) // Add Documents to the Index.
     {
         AddDocumentToIndex(p, writer);
     }
     writer.Optimize();
     writer.Commit();
     writer.Dispose();
 }
 public void MyTestInitialize()
 {
     this.directory = new RAMDirectory();
     IndexWriter writer = new IndexWriter(this.directory, new WhitespaceAnalyzer(), IndexWriter.MaxFieldLength.UNLIMITED);
     for (int i = 0; i < 10; i++)
     {
         Document d = new Document();
         d.Add(new Field("Text", i.ToString(), Field.Store.YES, Field.Index.ANALYZED));
         writer.AddDocument(d);
         writer.Commit();
     }
     writer.Close();
 }
Beispiel #34
0
 /// <summary>
 /// Libera a instancia.
 /// </summary>
 public void Dispose()
 {
     _writer.Flush(true, true, true);
     _writer.Commit();
     _writer.ExpungeDeletes(true);
     try
     {
         if (Disposing != null)
         {
             Disposing(this, EventArgs.Empty);
         }
     }
     finally
     {
         _writer.Close();
         _resetEvent.Set();
     }
 }
        /*
         * Run one indexer and 2 searchers against single index as
         * stress test.
         */

        public virtual void RunTest(Directory directory)
        {
            TimedThread[] threads = new TimedThread[4];

            IndexWriterConfig conf = (new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random()))).SetMaxBufferedDocs(7);

            ((TieredMergePolicy)conf.MergePolicy).MaxMergeAtOnce = 3;
            IndexWriter writer = RandomIndexWriter.MockIndexWriter(directory, conf, Random());

            // Establish a base index of 100 docs:
            for (int i = 0; i < 100; i++)
            {
                Documents.Document d = new Documents.Document();
                d.Add(NewStringField("id", Convert.ToString(i), Field.Store.YES));
                d.Add(NewTextField("contents", English.IntToEnglish(i), Field.Store.NO));
                if ((i - 1) % 7 == 0)
                {
                    writer.Commit();
                }
                writer.AddDocument(d);
            }
            writer.Commit();

            IndexReader r = DirectoryReader.Open(directory);

            Assert.AreEqual(100, r.NumDocs);
            r.Dispose();

            IndexerThread indexerThread = new IndexerThread(writer, threads);

            threads[0] = indexerThread;
            indexerThread.Start();

            IndexerThread indexerThread2 = new IndexerThread(writer, threads);

            threads[1] = indexerThread2;
            indexerThread2.Start();

            SearcherThread searcherThread1 = new SearcherThread(directory, threads);

            threads[2] = searcherThread1;
            searcherThread1.Start();

            SearcherThread searcherThread2 = new SearcherThread(directory, threads);

            threads[3] = searcherThread2;
            searcherThread2.Start();

            indexerThread.Join();
            indexerThread2.Join();
            searcherThread1.Join();
            searcherThread2.Join();

            writer.Dispose();

            Assert.IsTrue(!indexerThread.Failed, "hit unexpected exception in indexer");
            Assert.IsTrue(!indexerThread2.Failed, "hit unexpected exception in indexer2");
            Assert.IsTrue(!searcherThread1.Failed, "hit unexpected exception in search1");
            Assert.IsTrue(!searcherThread2.Failed, "hit unexpected exception in search2");
            //System.out.println("    Writer: " + indexerThread.count + " iterations");
            //System.out.println("Searcher 1: " + searcherThread1.count + " searchers created");
            //System.out.println("Searcher 2: " + searcherThread2.count + " searchers created");
        }
Beispiel #36
0
            public override void DoWork()
            {
                var config = outerInstance.NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random()))
                             .SetMaxBufferedDocs(3)
                             .SetMergeScheduler(newScheduler1())
                             .SetMergePolicy(NewLogMergePolicy(2));
                IndexWriter writer1 = new IndexWriter(dir1, config);

                ((IConcurrentMergeScheduler)writer1.Config.MergeScheduler).SetSuppressExceptions();

                // Intentionally use different params so flush/merge
                // happen @ different times
                var config2 = outerInstance.NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random()))
                              .SetMaxBufferedDocs(2)
                              .SetMergeScheduler(newScheduler2())
                              .SetMergePolicy(NewLogMergePolicy(3));
                IndexWriter writer2 = new IndexWriter(dir2, config2);

                ((IConcurrentMergeScheduler)writer2.Config.MergeScheduler).SetSuppressExceptions();

                Update(writer1);
                Update(writer2);

                DoFail = true;
                try
                {
                    lock (@lock)
                    {
                        try
                        {
                            writer1.PrepareCommit();
                        }
                        catch (Exception)
                        {
                            writer1.Rollback();
                            writer2.Rollback();
                            return;
                        }
                        try
                        {
                            writer2.PrepareCommit();
                        }
                        catch (Exception)
                        {
                            writer1.Rollback();
                            writer2.Rollback();
                            return;
                        }

                        writer1.Commit();
                        writer2.Commit();
                    }
                }
                finally
                {
                    DoFail = false;
                }

                writer1.Dispose();
                writer2.Dispose();
            }
Beispiel #37
0
        /// <summary>
        /// Add text to the existing index.
        /// </summary>
        /// <param name="writer">The index writer.</param>
        /// <param name="facetWriter">The facet index writer.</param>
        /// <param name="addTextData">The text data to add.</param>
        /// <param name="config">The facet configuration information.</param>
        public void AddText(Lucene.Net.Index.IndexWriter writer, DirectoryTaxonomyWriter facetWriter, Dictionary <FacetField, AddTextData[]> addTextData, FacetsConfig config)
        {
            long totalTextLength           = 0;
            long maxTextLengthBeforeCommit = 30000000L;

            // For each text facet.
            foreach (KeyValuePair <FacetField, AddTextData[]> item in addTextData)
            {
                // If text exists.
                if (item.Value != null && item.Value.Length > 0)
                {
                    // Add the text.
                    FieldType nameFieldType = new Lucene.Net.Documents.FieldType()
                    {
                        Indexed      = true,
                        Tokenized    = false,
                        Stored       = true,
                        IndexOptions = FieldInfo.IndexOptions.DOCS_AND_FREQS_AND_POSITIONS,
                    };

                    // Add the text.
                    FieldType completeFieldType = new Lucene.Net.Documents.FieldType()
                    {
                        Indexed      = true,
                        Tokenized    = false,
                        Stored       = true,
                        IndexOptions = FieldInfo.IndexOptions.DOCS_AND_FREQS_AND_POSITIONS,
                    };

                    // Add the text.
                    FieldType textFieldType = new Lucene.Net.Documents.FieldType()
                    {
                        Indexed      = true,
                        Tokenized    = false,
                        Stored       = false,
                        IndexOptions = FieldInfo.IndexOptions.DOCS_AND_FREQS_AND_POSITIONS,
                    };

                    // For each text.
                    foreach (AddTextData data in item.Value)
                    {
                        // Should the data be stored.
                        completeFieldType.Stored = data.StoreText;

                        // Create the document.
                        Lucene.Net.Documents.Document document     = new Lucene.Net.Documents.Document();
                        Lucene.Net.Documents.Field    textName     = new Field("textname", data.Name.ToLower(), nameFieldType);
                        Lucene.Net.Documents.Field    textComplete = new Field("textcomplete", data.Text.ToLower(), completeFieldType);

                        document.Add(item.Key);
                        document.Add(textName);
                        document.Add(textComplete);

                        // Split the white spaces from the text.
                        string[] words = data.Text.Words();

                        // If words exist.
                        if (words != null && words.Length > 0)
                        {
                            // Add the query for each word.
                            for (int j = 0; j < words.Length; j++)
                            {
                                // Format the word.
                                string word = words[j].ToLower().RemovePunctuationFromStartAndEnd();

                                // If a word exists.
                                if (!String.IsNullOrEmpty(word))
                                {
                                    Lucene.Net.Documents.Field textData = new Field("facetcontent", word, textFieldType);
                                    document.Add(textData);
                                }
                            }
                        }

                        // Add the document.
                        writer.AddDocument(config.Build(facetWriter, document));

                        // Commit after a set number of documents.
                        totalTextLength += (long)data.Text.Length;
                        if (totalTextLength > maxTextLengthBeforeCommit)
                        {
                            // Commit the index.
                            writer.Commit();
                            facetWriter.Commit();
                            totalTextLength = 0;
                        }
                    }
                }
            }

            // Commit the index.
            writer.Commit();
            facetWriter.Commit();
        }
Beispiel #38
0
        public virtual void RunTest(string testName)
        {
            m_failed.Value    = (false);
            m_addCount.Value  = 0;
            m_delCount.Value  = 0;
            m_packCount.Value = 0;

            long t0 = J2N.Time.NanoTime() / J2N.Time.MillisecondsPerNanosecond; // LUCENENET: Use NanoTime() rather than CurrentTimeMilliseconds() for more accurate/reliable results

            Random random = new J2N.Randomizer(Random.NextInt64());

            using LineFileDocs docs = new LineFileDocs(random, DefaultCodecSupportsDocValues);
            DirectoryInfo tempDir = CreateTempDir(testName);

            m_dir = GetDirectory(NewMockFSDirectory(tempDir)); // some subclasses rely on this being MDW
            if (m_dir is BaseDirectoryWrapper baseDirectoryWrapper)
            {
                baseDirectoryWrapper.CheckIndexOnDispose = false; // don't double-checkIndex, we do it ourselves.
            }
            MockAnalyzer analyzer = new MockAnalyzer(LuceneTestCase.Random);

            analyzer.MaxTokenLength = TestUtil.NextInt32(LuceneTestCase.Random, 1, IndexWriter.MAX_TERM_LENGTH);
            IndexWriterConfig conf = NewIndexWriterConfig(TEST_VERSION_CURRENT, analyzer).SetInfoStream(new FailOnNonBulkMergesInfoStream());

            if (LuceneTestCase.TestNightly)
            {
                // newIWConfig makes smallish max seg size, which
                // results in tons and tons of segments for this test
                // when run nightly:
                MergePolicy mp = conf.MergePolicy;
                if (mp is TieredMergePolicy tieredMergePolicy)
                {
                    //tieredMergePolicy.MaxMergedSegmentMB = 5000.0;
                    tieredMergePolicy.MaxMergedSegmentMB = 2500.0; // LUCENENET specific - reduced each number by 50% to keep testing time under 1 hour
                }
                else if (mp is LogByteSizeMergePolicy logByteSizeMergePolicy)
                {
                    //logByteSizeMergePolicy.MaxMergeMB = 1000.0;
                    logByteSizeMergePolicy.MaxMergeMB = 500.0; // LUCENENET specific - reduced each number by 50% to keep testing time under 1 hour
                }
                else if (mp is LogMergePolicy logMergePolicy)
                {
                    //logMergePolicy.MaxMergeDocs = 100000;
                    logMergePolicy.MaxMergeDocs = 50000; // LUCENENET specific - reduced each number by 50% to keep testing time under 1 hour
                }
            }

            conf.SetMergedSegmentWarmer(new IndexReaderWarmerAnonymousClass(this));

            if (Verbose)
            {
                conf.SetInfoStream(new PrintStreamInfoStreamAnonymousClass(Console.Out));
            }
            m_writer = new IndexWriter(m_dir, conf);
            TestUtil.ReduceOpenFiles(m_writer);

            TaskScheduler es = LuceneTestCase.Random.NextBoolean() ? null : TaskScheduler.Default;

            DoAfterWriter(es);

            int NUM_INDEX_THREADS = TestUtil.NextInt32(LuceneTestCase.Random, 2, 4);

            //int RUN_TIME_SEC = LuceneTestCase.TestNightly ? 300 : RandomMultiplier;
            // LUCENENET specific - lowered from 300 to 150 to reduce total time on Nightly
            // build to less than 1 hour.
            int RUN_TIME_SEC = LuceneTestCase.TestNightly ? 150 : RandomMultiplier;

            ISet <string>             delIDs     = new ConcurrentHashSet <string>();
            ISet <string>             delPackIDs = new ConcurrentHashSet <string>();
            ConcurrentQueue <SubDocs> allSubDocs = new ConcurrentQueue <SubDocs>();

            long stopTime = (J2N.Time.NanoTime() / J2N.Time.MillisecondsPerNanosecond) + (RUN_TIME_SEC * 1000); // LUCENENET: Use NanoTime() rather than CurrentTimeMilliseconds() for more accurate/reliable results

            ThreadJob[] indexThreads = LaunchIndexingThreads(docs, NUM_INDEX_THREADS, stopTime, delIDs, delPackIDs, allSubDocs);

            if (Verbose)
            {
                Console.WriteLine("TEST: DONE start " + NUM_INDEX_THREADS + " indexing threads [" + ((J2N.Time.NanoTime() / J2N.Time.MillisecondsPerNanosecond) - t0) + " ms]"); // LUCENENET: Use NanoTime() rather than CurrentTimeMilliseconds() for more accurate/reliable results
            }

            // Let index build up a bit
            Thread.Sleep(100);

            DoSearching(es, stopTime);

            if (Verbose)
            {
                Console.WriteLine("TEST: all searching done [" + ((J2N.Time.NanoTime() / J2N.Time.MillisecondsPerNanosecond) - t0) + " ms]"); // LUCENENET: Use NanoTime() rather than CurrentTimeMilliseconds() for more accurate/reliable results
            }

            for (int thread = 0; thread < indexThreads.Length; thread++)
            {
                indexThreads[thread].Join();
            }

            if (Verbose)
            {
                Console.WriteLine("TEST: done join indexing threads [" + ((J2N.Time.NanoTime() / J2N.Time.MillisecondsPerNanosecond) - t0) + " ms]; addCount=" + m_addCount + " delCount=" + m_delCount); // LUCENENET: Use NanoTime() rather than CurrentTimeMilliseconds() for more accurate/reliable results
            }

            IndexSearcher s = GetFinalSearcher();

            if (Verbose)
            {
                Console.WriteLine("TEST: finalSearcher=" + s);
            }

            assertFalse(m_failed);

            bool doFail = false;

            // Verify: make sure delIDs are in fact deleted:
            foreach (string id in delIDs)
            {
                TopDocs hits = s.Search(new TermQuery(new Term("docid", id)), 1);
                if (hits.TotalHits != 0)
                {
                    Console.WriteLine("doc id=" + id + " is supposed to be deleted, but got " + hits.TotalHits + " hits; first docID=" + hits.ScoreDocs[0].Doc);
                    doFail = true;
                }
            }

            // Verify: make sure delPackIDs are in fact deleted:
            foreach (string id in delPackIDs)
            {
                TopDocs hits = s.Search(new TermQuery(new Term("packID", id)), 1);
                if (hits.TotalHits != 0)
                {
                    Console.WriteLine("packID=" + id + " is supposed to be deleted, but got " + hits.TotalHits + " matches");
                    doFail = true;
                }
            }

            // Verify: make sure each group of sub-docs are still in docID order:
            foreach (SubDocs subDocs in allSubDocs)
            {
                TopDocs hits = s.Search(new TermQuery(new Term("packID", subDocs.PackID)), 20);
                if (!subDocs.Deleted)
                {
                    // We sort by relevance but the scores should be identical so sort falls back to by docID:
                    if (hits.TotalHits != subDocs.SubIDs.Count)
                    {
                        Console.WriteLine("packID=" + subDocs.PackID + ": expected " + subDocs.SubIDs.Count + " hits but got " + hits.TotalHits);
                        doFail = true;
                    }
                    else
                    {
                        int lastDocID  = -1;
                        int startDocID = -1;
                        foreach (ScoreDoc scoreDoc in hits.ScoreDocs)
                        {
                            int docID = scoreDoc.Doc;
                            if (lastDocID != -1)
                            {
                                assertEquals(1 + lastDocID, docID);
                            }
                            else
                            {
                                startDocID = docID;
                            }
                            lastDocID = docID;
                            Document doc = s.Doc(docID);
                            assertEquals(subDocs.PackID, doc.Get("packID"));
                        }

                        lastDocID = startDocID - 1;
                        foreach (string subID in subDocs.SubIDs)
                        {
                            hits = s.Search(new TermQuery(new Term("docid", subID)), 1);
                            assertEquals(1, hits.TotalHits);
                            int docID = hits.ScoreDocs[0].Doc;
                            if (lastDocID != -1)
                            {
                                assertEquals(1 + lastDocID, docID);
                            }
                            lastDocID = docID;
                        }
                    }
                }
                else
                {
                    // Pack was deleted -- make sure its docs are
                    // deleted.  We can't verify packID is deleted
                    // because we can re-use packID for update:
                    foreach (string subID in subDocs.SubIDs)
                    {
                        assertEquals(0, s.Search(new TermQuery(new Term("docid", subID)), 1).TotalHits);
                    }
                }
            }

            // Verify: make sure all not-deleted docs are in fact
            // not deleted:
            int endID = Convert.ToInt32(docs.NextDoc().Get("docid"), CultureInfo.InvariantCulture);

            docs.Dispose();

            for (int id = 0; id < endID; id++)
            {
                string stringID = id.ToString(CultureInfo.InvariantCulture);
                if (!delIDs.Contains(stringID))
                {
                    TopDocs hits = s.Search(new TermQuery(new Term("docid", stringID)), 1);
                    if (hits.TotalHits != 1)
                    {
                        Console.WriteLine("doc id=" + stringID + " is not supposed to be deleted, but got hitCount=" + hits.TotalHits + "; delIDs=" + Collections.ToString(delIDs));
                        doFail = true;
                    }
                }
            }
            assertFalse(doFail);

            assertEquals("index=" + m_writer.SegString() + " addCount=" + m_addCount + " delCount=" + m_delCount, m_addCount - m_delCount, s.IndexReader.NumDocs);
            ReleaseSearcher(s);

            m_writer.Commit();

            assertEquals("index=" + m_writer.SegString() + " addCount=" + m_addCount + " delCount=" + m_delCount, m_addCount - m_delCount, m_writer.NumDocs);

            DoClose();
            m_writer.Dispose(false);

            // Cannot shutdown until after writer is closed because
            // writer has merged segment warmer that uses IS to run
            // searches, and that IS may be using this es!

            /*if (es != null)
             * {
             * es.shutdown();
             * es.awaitTermination(1, TimeUnit.SECONDS);
             * }*/

            TestUtil.CheckIndex(m_dir);
            m_dir.Dispose();
            //System.IO.Directory.Delete(tempDir.FullName, true);
            TestUtil.Rm(tempDir);

            if (Verbose)
            {
                Console.WriteLine("TEST: done [" + ((J2N.Time.NanoTime() / J2N.Time.MillisecondsPerNanosecond) - t0) + " ms]"); // LUCENENET: Use NanoTime() rather than CurrentTimeMilliseconds() for more accurate/reliable results
            }
        }
        private void DoTestReopenWithCommit(Random random, Directory dir, bool withReopen)
        {
            IndexWriter iwriter = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)).SetOpenMode(OpenMode_e.CREATE).SetMergeScheduler(new SerialMergeScheduler()).SetMergePolicy(NewLogMergePolicy()));

            iwriter.Commit();
            DirectoryReader reader = DirectoryReader.Open(dir);

            try
            {
                int       M          = 3;
                FieldType customType = new FieldType(TextField.TYPE_STORED);
                customType.Tokenized = false;
                FieldType customType2 = new FieldType(TextField.TYPE_STORED);
                customType2.Tokenized = false;
                customType2.OmitNorms = true;
                FieldType customType3 = new FieldType();
                customType3.Stored = true;
                for (int i = 0; i < 4; i++)
                {
                    for (int j = 0; j < M; j++)
                    {
                        Document doc = new Document();
                        doc.Add(NewField("id", i + "_" + j, customType));
                        doc.Add(NewField("id2", i + "_" + j, customType2));
                        doc.Add(NewField("id3", i + "_" + j, customType3));
                        iwriter.AddDocument(doc);
                        if (i > 0)
                        {
                            int      k = i - 1;
                            int      n = j + k * M;
                            Document prevItereationDoc = reader.Document(n);
                            Assert.IsNotNull(prevItereationDoc);
                            string id = prevItereationDoc.Get("id");
                            Assert.AreEqual(k + "_" + j, id);
                        }
                    }
                    iwriter.Commit();
                    if (withReopen)
                    {
                        // reopen
                        DirectoryReader r2 = DirectoryReader.OpenIfChanged(reader);
                        if (r2 != null)
                        {
                            reader.Dispose();
                            reader = r2;
                        }
                    }
                    else
                    {
                        // recreate
                        reader.Dispose();
                        reader = DirectoryReader.Open(dir);
                    }
                }
            }
            finally
            {
                iwriter.Dispose();
                reader.Dispose();
            }
        }
            public override void Run()
            {
                DirectoryReader reader  = null;
                bool            success = false;

                try
                {
                    Random random = Random();
                    while (NumUpdates.GetAndDecrement() > 0)
                    {
                        double group = random.NextDouble();
                        Term   t;
                        if (group < 0.1)
                        {
                            t = new Term("updKey", "g0");
                        }
                        else if (group < 0.5)
                        {
                            t = new Term("updKey", "g1");
                        }
                        else if (group < 0.8)
                        {
                            t = new Term("updKey", "g2");
                        }
                        else
                        {
                            t = new Term("updKey", "g3");
                        }
                        //              System.out.println("[" + Thread.currentThread().getName() + "] numUpdates=" + numUpdates + " updateTerm=" + t);
                        if (random.NextBoolean()) // sometimes unset a value
                        {
                            //                System.err.println("[" + Thread.currentThread().getName() + "] t=" + t + ", f=" + f + ", updValue=UNSET");
                            Writer.UpdateBinaryDocValue(t, f, null);
                            Writer.UpdateNumericDocValue(t, Cf, null);
                        }
                        else
                        {
                            long updValue = random.Next();
                            //                System.err.println("[" + Thread.currentThread().getName() + "] t=" + t + ", f=" + f + ", updValue=" + updValue);
                            Writer.UpdateBinaryDocValue(t, f, TestBinaryDocValuesUpdates.ToBytes(updValue));
                            Writer.UpdateNumericDocValue(t, Cf, updValue * 2);
                        }

                        if (random.NextDouble() < 0.2)
                        {
                            // delete a random document
                            int doc = random.Next(NumDocs);
                            //                System.out.println("[" + Thread.currentThread().getName() + "] deleteDoc=doc" + doc);
                            Writer.DeleteDocuments(new Term("id", "doc" + doc));
                        }

                        if (random.NextDouble() < 0.05) // commit every 20 updates on average
                        {
                            //                  System.out.println("[" + Thread.currentThread().getName() + "] commit");
                            Writer.Commit();
                        }

                        if (random.NextDouble() < 0.1) // reopen NRT reader (apply updates), on average once every 10 updates
                        {
                            if (reader == null)
                            {
                                //                  System.out.println("[" + Thread.currentThread().getName() + "] open NRT");
                                reader = DirectoryReader.Open(Writer, true);
                            }
                            else
                            {
                                //                  System.out.println("[" + Thread.currentThread().getName() + "] reopen NRT");
                                DirectoryReader r2 = DirectoryReader.OpenIfChanged(reader, Writer, true);
                                if (r2 != null)
                                {
                                    reader.Dispose();
                                    reader = r2;
                                }
                            }
                        }
                    }
                    //            System.out.println("[" + Thread.currentThread().getName() + "] DONE");
                    success = true;
                }
                catch (IOException e)
                {
                    throw new Exception(e.ToString(), e);
                }
                finally
                {
                    if (reader != null)
                    {
                        try
                        {
                            reader.Dispose();
                        }
                        catch (IOException e)
                        {
                            if (success) // suppress this exception only if there was another exception
                            {
                                throw new Exception(e.ToString(), e);
                            }
                        }
                    }
                    Done.Signal();
                }
            }
        public virtual void TestCloseUnderException()
        {
            int iters = 1000 + 1 + Random.nextInt(20);

            for (int j = 0; j < iters; j++)
            {
                Directory   dir    = NewDirectory();
                IndexWriter writer = new IndexWriter(dir,
                                                     NewIndexWriterConfig(Random, TEST_VERSION_CURRENT, new MockAnalyzer(Random)));
                writer.Commit();
                writer.Dispose();
                DirectoryReader    open         = DirectoryReader.Open(dir);
                bool               throwOnClose = !Rarely();
                AtomicReader       wrap         = SlowCompositeReaderWrapper.Wrap(open);
                FilterAtomicReader reader       = new FilterAtomicReaderAnonymousClass(this, wrap, throwOnClose);
                IList <IndexReader.IReaderClosedListener> listeners = new List <IndexReader.IReaderClosedListener>();
                int         listenerCount = Random.Next(20);
                AtomicInt32 count         = new AtomicInt32();
                bool        faultySet     = false;
                for (int i = 0; i < listenerCount; i++)
                {
                    if (Rarely())
                    {
                        faultySet = true;
                        reader.AddReaderClosedListener(new FaultyListener());
                    }
                    else
                    {
                        count.IncrementAndGet();
                        reader.AddReaderClosedListener(new CountListener(count));
                    }
                }
                if (!faultySet && !throwOnClose)
                {
                    reader.AddReaderClosedListener(new FaultyListener());
                }
                try
                {
                    reader.Dispose();
                    Assert.Fail("expected Exception");
                }
                catch (Exception ex) when(ex.IsIllegalStateException())
                {
                    if (throwOnClose)
                    {
                        Assert.AreEqual("BOOM!", ex.Message);
                    }
                    else
                    {
                        Assert.AreEqual("GRRRRRRRRRRRR!", ex.Message);
                    }
                }

                try
                {
                    var aaa = reader.Fields;
                    Assert.Fail("we are closed");
                }
                catch (Exception ex) when(ex.IsAlreadyClosedException())
                {
                }

                if (Random.NextBoolean())
                {
                    reader.Dispose(); // call it again
                }
                Assert.AreEqual(0, count);
                wrap.Dispose();
                dir.Dispose();
            }
        }
Beispiel #42
0
        public virtual void TestAddDocumentOnDiskFull()
        {
            for (int pass = 0; pass < 2; pass++)
            {
                if (VERBOSE)
                {
                    Console.WriteLine("TEST: pass="******"TEST: cycle: diskFree=" + diskFree);
                    }
                    MockDirectoryWrapper dir = new MockDirectoryWrapper(Random, new RAMDirectory());
                    dir.MaxSizeInBytes = diskFree;
                    IndexWriter     writer = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random)));
                    IMergeScheduler ms     = writer.Config.MergeScheduler;
                    if (ms is IConcurrentMergeScheduler)
                    {
                        // this test intentionally produces exceptions
                        // in the threads that CMS launches; we don't
                        // want to pollute test output with these.
                        ((IConcurrentMergeScheduler)ms).SetSuppressExceptions();
                    }

                    bool hitError = false;
                    try
                    {
                        for (int i = 0; i < 200; i++)
                        {
                            AddDoc(writer);
                        }
                        if (VERBOSE)
                        {
                            Console.WriteLine("TEST: done adding docs; now commit");
                        }
                        writer.Commit();
                    }
                    catch (IOException e)
                    {
                        if (VERBOSE)
                        {
                            Console.WriteLine("TEST: exception on addDoc");
                            Console.WriteLine(e.StackTrace);
                        }
                        hitError = true;
                    }

                    if (hitError)
                    {
                        if (doAbort)
                        {
                            if (VERBOSE)
                            {
                                Console.WriteLine("TEST: now rollback");
                            }
                            writer.Rollback();
                        }
                        else
                        {
                            try
                            {
                                if (VERBOSE)
                                {
                                    Console.WriteLine("TEST: now close");
                                }
                                writer.Dispose();
                            }
                            catch (IOException e)
                            {
                                if (VERBOSE)
                                {
                                    Console.WriteLine("TEST: exception on close; retry w/ no disk space limit");
                                    Console.WriteLine(e.StackTrace);
                                }
                                dir.MaxSizeInBytes = 0;
                                writer.Dispose();
                            }
                        }

                        //TestUtil.SyncConcurrentMerges(ms);

                        if (TestUtil.AnyFilesExceptWriteLock(dir))
                        {
                            TestIndexWriter.AssertNoUnreferencedFiles(dir, "after disk full during addDocument");

                            // Make sure reader can open the index:
                            DirectoryReader.Open(dir).Dispose();
                        }

                        dir.Dispose();
                        // Now try again w/ more space:

                        diskFree += TEST_NIGHTLY ? TestUtil.NextInt32(Random, 400, 600) : TestUtil.NextInt32(Random, 3000, 5000);
                    }
                    else
                    {
                        //TestUtil.SyncConcurrentMerges(writer);
                        dir.MaxSizeInBytes = 0;
                        writer.Dispose();
                        dir.Dispose();
                        break;
                    }
                }
            }
        }
        public virtual void TestFutureCommit()
        {
            Directory dir = NewDirectory();

            IndexWriter w   = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).SetIndexDeletionPolicy(NoDeletionPolicy.INSTANCE));
            Document    doc = new Document();

            w.AddDocument(doc);

            // commit to "first"
            IDictionary <string, string> commitData = new Dictionary <string, string>();

            commitData["tag"] = "first";
            w.SetCommitData(commitData);
            w.Commit();

            // commit to "second"
            w.AddDocument(doc);
            commitData["tag"] = "second";
            w.SetCommitData(commitData);
            w.Dispose();

            // open "first" with IndexWriter
            IndexCommit commit = null;

            foreach (IndexCommit c in DirectoryReader.ListCommits(dir))
            {
                if (c.UserData["tag"].Equals("first"))
                {
                    commit = c;
                    break;
                }
            }

            Assert.IsNotNull(commit);

            w = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).SetIndexDeletionPolicy(NoDeletionPolicy.INSTANCE).SetIndexCommit(commit));

            Assert.AreEqual(1, w.NumDocs);

            // commit IndexWriter to "third"
            w.AddDocument(doc);
            commitData["tag"] = "third";
            w.SetCommitData(commitData);
            w.Dispose();

            // make sure "second" commit is still there
            commit = null;
            foreach (IndexCommit c in DirectoryReader.ListCommits(dir))
            {
                if (c.UserData["tag"].Equals("second"))
                {
                    commit = c;
                    break;
                }
            }

            Assert.IsNotNull(commit);

            dir.Dispose();
        }
        public virtual void TestPrepareCommitRollback()
        {
            Directory dir = NewDirectory();

            if (dir is MockDirectoryWrapper)
            {
                ((MockDirectoryWrapper)dir).PreventDoubleWrite = false;
            }

            IndexWriter writer = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).SetMaxBufferedDocs(2).SetMergePolicy(NewLogMergePolicy(5)));

            writer.Commit();

            for (int i = 0; i < 23; i++)
            {
                AddDoc(writer);
            }

            DirectoryReader reader = DirectoryReader.Open(dir);

            Assert.AreEqual(0, reader.NumDocs);

            writer.PrepareCommit();

            IndexReader reader2 = DirectoryReader.Open(dir);

            Assert.AreEqual(0, reader2.NumDocs);

            writer.Rollback();

            IndexReader reader3 = DirectoryReader.OpenIfChanged(reader);

            Assert.IsNull(reader3);
            Assert.AreEqual(0, reader.NumDocs);
            Assert.AreEqual(0, reader2.NumDocs);
            reader.Dispose();
            reader2.Dispose();

            writer = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())));
            for (int i = 0; i < 17; i++)
            {
                AddDoc(writer);
            }

            reader = DirectoryReader.Open(dir);
            Assert.AreEqual(0, reader.NumDocs);
            reader.Dispose();

            writer.PrepareCommit();

            reader = DirectoryReader.Open(dir);
            Assert.AreEqual(0, reader.NumDocs);
            reader.Dispose();

            writer.Commit();
            reader = DirectoryReader.Open(dir);
            Assert.AreEqual(17, reader.NumDocs);
            reader.Dispose();
            writer.Dispose();
            dir.Dispose();
        }
        public virtual void TestDeletes1()
        {
            //IndexWriter.debug2 = System.out;
            Directory         dir = new MockDirectoryWrapper(new Random(Random().Next()), new RAMDirectory());
            IndexWriterConfig iwc = new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random()));

            iwc.SetMergeScheduler(new SerialMergeScheduler());
            iwc.SetMaxBufferedDocs(5000);
            iwc.SetRAMBufferSizeMB(100);
            RangeMergePolicy fsmp = new RangeMergePolicy(this, false);

            iwc.SetMergePolicy(fsmp);
            IndexWriter writer = new IndexWriter(dir, iwc);

            for (int x = 0; x < 5; x++)
            {
                writer.AddDocument(DocHelper.CreateDocument(x, "1", 2));
                //System.out.println("numRamDocs(" + x + ")" + writer.numRamDocs());
            }
            //System.out.println("commit1");
            writer.Commit();
            Assert.AreEqual(1, writer.SegmentCount);
            for (int x = 5; x < 10; x++)
            {
                writer.AddDocument(DocHelper.CreateDocument(x, "2", 2));
                //System.out.println("numRamDocs(" + x + ")" + writer.numRamDocs());
            }
            //System.out.println("commit2");
            writer.Commit();
            Assert.AreEqual(2, writer.SegmentCount);

            for (int x = 10; x < 15; x++)
            {
                writer.AddDocument(DocHelper.CreateDocument(x, "3", 2));
                //System.out.println("numRamDocs(" + x + ")" + writer.numRamDocs());
            }

            writer.DeleteDocuments(new Term("id", "1"));

            writer.DeleteDocuments(new Term("id", "11"));

            // flushing without applying deletes means
            // there will still be deletes in the segment infos
            writer.Flush(false, false);
            Assert.IsTrue(writer.bufferedUpdatesStream.Any());

            // get reader flushes pending deletes
            // so there should not be anymore
            IndexReader r1 = writer.GetReader();

            Assert.IsFalse(writer.bufferedUpdatesStream.Any());
            r1.Dispose();

            // delete id:2 from the first segment
            // merge segments 0 and 1
            // which should apply the delete id:2
            writer.DeleteDocuments(new Term("id", "2"));
            writer.Flush(false, false);
            fsmp         = (RangeMergePolicy)writer.Config.MergePolicy;
            fsmp.DoMerge = true;
            fsmp.Start   = 0;
            fsmp.Length  = 2;
            writer.MaybeMerge();

            Assert.AreEqual(2, writer.SegmentCount);

            // id:2 shouldn't exist anymore because
            // it's been applied in the merge and now it's gone
            IndexReader r2 = writer.GetReader();

            int[] id2docs = ToDocsArray(new Term("id", "2"), null, r2);
            Assert.IsTrue(id2docs == null);
            r2.Dispose();

            /*
             * /// // added docs are in the ram buffer
             * /// for (int x = 15; x < 20; x++) {
             * ///  writer.AddDocument(TestIndexWriterReader.CreateDocument(x, "4", 2));
             * ///  System.out.println("numRamDocs(" + x + ")" + writer.numRamDocs());
             * /// }
             * /// Assert.IsTrue(writer.numRamDocs() > 0);
             * /// // delete from the ram buffer
             * /// writer.DeleteDocuments(new Term("id", Integer.toString(13)));
             * ///
             * /// Term id3 = new Term("id", Integer.toString(3));
             * ///
             * /// // delete from the 1st segment
             * /// writer.DeleteDocuments(id3);
             * ///
             * /// Assert.IsTrue(writer.numRamDocs() > 0);
             * ///
             * /// //System.out
             * /// //    .println("segdels1:" + writer.docWriter.deletesToString());
             * ///
             * /// //Assert.IsTrue(writer.docWriter.segmentDeletes.Size() > 0);
             * ///
             * /// // we cause a merge to happen
             * /// fsmp.doMerge = true;
             * /// fsmp.start = 0;
             * /// fsmp.Length = 2;
             * /// System.out.println("maybeMerge "+writer.SegmentInfos);
             * ///
             * /// SegmentInfo info0 = writer.SegmentInfos.Info(0);
             * /// SegmentInfo info1 = writer.SegmentInfos.Info(1);
             * ///
             * /// writer.MaybeMerge();
             * /// System.out.println("maybeMerge after "+writer.SegmentInfos);
             * /// // there should be docs in RAM
             * /// Assert.IsTrue(writer.numRamDocs() > 0);
             * ///
             * /// // assert we've merged the 1 and 2 segments
             * /// // and still have a segment leftover == 2
             * /// Assert.AreEqual(2, writer.SegmentInfos.Size());
             * /// Assert.IsFalse(segThere(info0, writer.SegmentInfos));
             * /// Assert.IsFalse(segThere(info1, writer.SegmentInfos));
             * ///
             * /// //System.out.println("segdels2:" + writer.docWriter.deletesToString());
             * ///
             * /// //Assert.IsTrue(writer.docWriter.segmentDeletes.Size() > 0);
             * ///
             * /// IndexReader r = writer.GetReader();
             * /// IndexReader r1 = r.getSequentialSubReaders()[0];
             * /// printDelDocs(r1.GetLiveDocs());
             * /// int[] docs = toDocsArray(id3, null, r);
             * /// System.out.println("id3 docs:"+Arrays.toString(docs));
             * /// // there shouldn't be any docs for id:3
             * /// Assert.IsTrue(docs == null);
             * /// r.Dispose();
             * ///
             * /// part2(writer, fsmp);
             * ///
             */
            // System.out.println("segdels2:"+writer.docWriter.segmentDeletes.toString());
            //System.out.println("close");
            writer.Dispose();
            dir.Dispose();
        }
Beispiel #46
0
 public void Save()
 {
     writer.Optimize();
     writer.Commit();
 }
        public virtual void TestManyReopensAndFields()
        {
            Directory         dir    = NewDirectory();
            Random            random = Random();
            IndexWriterConfig conf   = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random));
            LogMergePolicy    lmp    = NewLogMergePolicy();

            lmp.MergeFactor = 3; // merge often
            conf.SetMergePolicy(lmp);
            IndexWriter writer = new IndexWriter(dir, conf);

            bool            isNRT = random.NextBoolean();
            DirectoryReader reader;

            if (isNRT)
            {
                reader = DirectoryReader.Open(writer, true);
            }
            else
            {
                writer.Commit();
                reader = DirectoryReader.Open(dir);
            }

            int numFields    = random.Next(4) + 3;             // 3-7
            int numNDVFields = random.Next(numFields / 2) + 1; // 1-3

            long[] fieldValues   = new long[numFields];
            bool[] fieldHasValue = new bool[numFields];
            Arrays.Fill(fieldHasValue, true);
            for (int i = 0; i < fieldValues.Length; i++)
            {
                fieldValues[i] = 1;
            }

            int numRounds = AtLeast(15);
            int docID     = 0;

            for (int i = 0; i < numRounds; i++)
            {
                int numDocs = AtLeast(5);
                //      System.out.println("[" + Thread.currentThread().getName() + "]: round=" + i + ", numDocs=" + numDocs);
                for (int j = 0; j < numDocs; j++)
                {
                    Document doc = new Document();
                    doc.Add(new StringField("id", "doc-" + docID, Store.NO));
                    doc.Add(new StringField("key", "all", Store.NO)); // update key
                    // add all fields with their current value
                    for (int f = 0; f < fieldValues.Length; f++)
                    {
                        if (f < numNDVFields)
                        {
                            doc.Add(new NumericDocValuesField("f" + f, fieldValues[f]));
                        }
                        else
                        {
                            doc.Add(new BinaryDocValuesField("f" + f, TestBinaryDocValuesUpdates.ToBytes(fieldValues[f])));
                        }
                    }
                    writer.AddDocument(doc);
                    ++docID;
                }

                // if field's value was unset before, unset it from all new added documents too
                for (int field = 0; field < fieldHasValue.Length; field++)
                {
                    if (!fieldHasValue[field])
                    {
                        if (field < numNDVFields)
                        {
                            writer.UpdateNumericDocValue(new Term("key", "all"), "f" + field, null);
                        }
                        else
                        {
                            writer.UpdateBinaryDocValue(new Term("key", "all"), "f" + field, null);
                        }
                    }
                }

                int    fieldIdx    = random.Next(fieldValues.Length);
                string updateField = "f" + fieldIdx;
                if (random.NextBoolean())
                {
                    //        System.out.println("[" + Thread.currentThread().getName() + "]: unset field '" + updateField + "'");
                    fieldHasValue[fieldIdx] = false;
                    if (fieldIdx < numNDVFields)
                    {
                        writer.UpdateNumericDocValue(new Term("key", "all"), updateField, null);
                    }
                    else
                    {
                        writer.UpdateBinaryDocValue(new Term("key", "all"), updateField, null);
                    }
                }
                else
                {
                    fieldHasValue[fieldIdx] = true;
                    if (fieldIdx < numNDVFields)
                    {
                        writer.UpdateNumericDocValue(new Term("key", "all"), updateField, ++fieldValues[fieldIdx]);
                    }
                    else
                    {
                        writer.UpdateBinaryDocValue(new Term("key", "all"), updateField, TestBinaryDocValuesUpdates.ToBytes(++fieldValues[fieldIdx]));
                    }
                    //        System.out.println("[" + Thread.currentThread().getName() + "]: updated field '" + updateField + "' to value " + fieldValues[fieldIdx]);
                }

                if (random.NextDouble() < 0.2)
                {
                    int deleteDoc = random.Next(docID); // might also delete an already deleted document, ok!
                    writer.DeleteDocuments(new Term("id", "doc-" + deleteDoc));
                    //        System.out.println("[" + Thread.currentThread().getName() + "]: deleted document: doc-" + deleteDoc);
                }

                // verify reader
                if (!isNRT)
                {
                    writer.Commit();
                }

                //      System.out.println("[" + Thread.currentThread().getName() + "]: reopen reader: " + reader);
                DirectoryReader newReader = DirectoryReader.OpenIfChanged(reader);
                Assert.IsNotNull(newReader);
                reader.Dispose();
                reader = newReader;
                //      System.out.println("[" + Thread.currentThread().getName() + "]: reopened reader: " + reader);
                Assert.IsTrue(reader.NumDocs > 0); // we delete at most one document per round
                BytesRef scratch = new BytesRef();
                foreach (AtomicReaderContext context in reader.Leaves)
                {
                    AtomicReader r = context.AtomicReader;
                    //        System.out.println(((SegmentReader) r).getSegmentName());
                    IBits liveDocs = r.LiveDocs;
                    for (int field = 0; field < fieldValues.Length; field++)
                    {
                        string           f             = "f" + field;
                        BinaryDocValues  bdv           = r.GetBinaryDocValues(f);
                        NumericDocValues ndv           = r.GetNumericDocValues(f);
                        IBits            docsWithField = r.GetDocsWithField(f);
                        if (field < numNDVFields)
                        {
                            Assert.IsNotNull(ndv);
                            Assert.IsNull(bdv);
                        }
                        else
                        {
                            Assert.IsNull(ndv);
                            Assert.IsNotNull(bdv);
                        }
                        int maxDoc = r.MaxDoc;
                        for (int doc = 0; doc < maxDoc; doc++)
                        {
                            if (liveDocs == null || liveDocs.Get(doc))
                            {
                                //              System.out.println("doc=" + (doc + context.DocBase) + " f='" + f + "' vslue=" + getValue(bdv, doc, scratch));
                                if (fieldHasValue[field])
                                {
                                    Assert.IsTrue(docsWithField.Get(doc));
                                    if (field < numNDVFields)
                                    {
                                        Assert.AreEqual(fieldValues[field], ndv.Get(doc), "invalid value for doc=" + doc + ", field=" + f + ", reader=" + r);
                                    }
                                    else
                                    {
                                        Assert.AreEqual(fieldValues[field], TestBinaryDocValuesUpdates.GetValue(bdv, doc, scratch), "invalid value for doc=" + doc + ", field=" + f + ", reader=" + r);
                                    }
                                }
                                else
                                {
                                    Assert.IsFalse(docsWithField.Get(doc));
                                }
                            }
                        }
                    }
                }
                //      System.out.println();
            }

            IOUtils.Dispose(writer, reader, dir);
        }
Beispiel #48
0
        public void End(bool shouldClose)
        {
            if (!_is_started)
            {
                return;
            }
            if (!shouldClose)
            {
                return;
            }
            //build 2del file list
            if (!_job_status.Cancelled)
            {
                TermEnum term_enum = _index_reader.Terms();
                Term     path_term = new Term("path");
                int      nb_terms  = 0;
                while (term_enum.SkipTo(path_term))                 //skip to new term equal or *ABOVE* "path:" !!!
                {
                    Term term = term_enum.Term();
                    if (term.Field() != path_term.Field())
                    {
                        break;
                    }
                    if (!File.Exists(term.Text()))
                    {
                        _del_file_list.Add(term.Text());
                    }
                    if (_job_status.Cancelled)
                    {
                        break;
                    }
                    nb_terms++;
                }
                term_enum.Close();
                Logger.Log.Info("update: deletion: {0} analyzed terms, found {1} vanished files.", nb_terms, _del_file_list.Count);
            }
            _index_searcher.Close();
            _index_reader.Close();
            //--- deleting deprecated
            if ((_del_file_list.Count > 0) && (!_job_status.Cancelled))
            {
                Stopwatch watch = new Stopwatch();
                watch.Start();

                int         num_file = 0;
                int         nb_files = _del_file_list.Count;
                IndexWriter writer   = new IndexWriter(_index_path, _default_analyzer, false);

                foreach (string path in _del_file_list)
                {
                    if (((num_file++) % 101) == 1)
                    {
                        int progress = ((((num_file++) + 1)) * 100) / nb_files;
                        _job_status.Progress    = progress;
                        _job_status.Description = String.Format("upd: removing (from index) file {0}/{1} - {2}", num_file, _del_file_list.Count,
                                                                StringFu.TimeSpanToString(new TimeSpan((long)(watch.ElapsedMilliseconds) * 10000)));
                    }
                    if (_job_status.Cancelled)
                    {
                        break;
                    }
                    writer.DeleteDocuments(new Term("path", path));
                }
                writer.Commit();
                writer.Close();
                watch.Stop();
            }
            //adding new files
            if ((_add_file_list.Count > 0) && (!_job_status.Cancelled))
            {
                Stopwatch watch = new Stopwatch();
                watch.Start();

                IndexWriter writer = null;
                try
                {
                    writer = new IndexWriter(_index_path, _default_analyzer, false, new IndexWriter.MaxFieldLength(IndexWriter.DEFAULT_MAX_FIELD_LENGTH));
                    int num_file = 0;
                    int nb_files = _add_file_list.Count;
                    foreach (BasicFileInfo fi in _add_file_list)
                    {
                        if (((num_file++) % 101) == 1)
                        {
                            int progress = ((((num_file++) + 1)) * 100) / nb_files;
                            _job_status.Progress    = progress;
                            _job_status.Description = String.Format("upd: indexing new file {0}/{1} - {2}", num_file, _add_file_list.Count,
                                                                    StringFu.TimeSpanToString(new TimeSpan((long)(watch.ElapsedMilliseconds) * 10000)));
                        }
                        if (_job_status.Cancelled)
                        {
                            break;
                        }

                        writer.AddDocument(_doc_factory.CreateFromPath(fi.FilePath, fi.LastModification));
                        if (num_file % 20 == 0)
                        {
                            writer.Commit();
                        }
                    }
                    writer.Commit();
                }
                catch (System.Exception ex)
                {
                    Log.Error(ex);
                }
                finally
                {
                    if (writer != null)
                    {
                        writer.Close();
                        writer = null;
                    }
                }
                watch.Stop();
            }
            //updating modified files
            if ((_upd_file_list.Count > 0) && (!_job_status.Cancelled))
            {
                Stopwatch watch = new Stopwatch();
                watch.Start();

                int         num_file = 0;
                int         nb_files = _upd_file_list.Count;
                IndexWriter writer   = null;
                try
                {
                    writer = new IndexWriter(_index_path, _default_analyzer, false,
                                             new IndexWriter.MaxFieldLength(IndexWriter.DEFAULT_MAX_FIELD_LENGTH));

                    foreach (BasicFileInfo fi in _upd_file_list)
                    {
                        if (((num_file++) % 101) == 1)
                        {
                            int progress = ((((num_file++) + 1)) * 100) / nb_files;
                            _job_status.Progress    = progress;
                            _job_status.Description = String.Format("upd: modified file {0}/{1} - {2}", num_file, _upd_file_list.Count,
                                                                    StringFu.TimeSpanToString(new TimeSpan((long)(watch.ElapsedMilliseconds) * 10000)));
                        }
                        if (_job_status.Cancelled)
                        {
                            break;
                        }
                        writer.UpdateDocument(new Term("path", fi.FilePath),
                                              _doc_factory.CreateFromPath(fi.FilePath, fi.LastModification));
                    }
                    writer.Commit();
                    //LittleBeagle.Properties.Settings.Default.NbIndexedFiles = num_file;
                }
                catch (System.Exception ex)
                {
                    Log.Error(ex);
                }
                finally
                {
                    if (writer != null)
                    {
                        writer.Close();
                        writer = null;
                    }
                }
                watch.Stop();
            }
        }
        public virtual void TestReopenOnCommit()
        {
            Directory   dir    = NewDirectory();
            IndexWriter writer = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).SetIndexDeletionPolicy(new KeepAllCommits <IndexCommit>()).SetMaxBufferedDocs(-1).SetMergePolicy(NewLogMergePolicy(10)));

            for (int i = 0; i < 4; i++)
            {
                Document doc = new Document();
                doc.Add(NewStringField("id", "" + i, Field.Store.NO));
                writer.AddDocument(doc);
                IDictionary <string, string> data = new Dictionary <string, string>();
                data["index"]     = i + "";
                writer.CommitData = data;
                writer.Commit();
            }
            for (int i = 0; i < 4; i++)
            {
                writer.DeleteDocuments(new Term("id", "" + i));
                IDictionary <string, string> data = new Dictionary <string, string>();
                data["index"]     = (4 + i) + "";
                writer.CommitData = data;
                writer.Commit();
            }
            writer.Dispose();

            DirectoryReader r = DirectoryReader.Open(dir);

            Assert.AreEqual(0, r.NumDocs);

            ICollection <IndexCommit> commits = DirectoryReader.ListCommits(dir);

            foreach (IndexCommit commit in commits)
            {
                DirectoryReader r2 = DirectoryReader.OpenIfChanged(r, commit);
                Assert.IsNotNull(r2);
                Assert.IsTrue(r2 != r);

                IDictionary <string, string> s = commit.UserData;
                int v;
                if (s.Count == 0)
                {
                    // First commit created by IW
                    v = -1;
                }
                else
                {
                    v = Convert.ToInt32(s["index"]);
                }
                if (v < 4)
                {
                    Assert.AreEqual(1 + v, r2.NumDocs);
                }
                else
                {
                    Assert.AreEqual(7 - v, r2.NumDocs);
                }
                r.Dispose();
                r = r2;
            }
            r.Dispose();
            dir.Dispose();
        }
        public override void SetUp()
        {
            base.SetUp();

            /*
             * for (int i = 0; i < testFields.length; i++) {
             * fieldInfos.Add(testFields[i], true, true, testFieldsStorePos[i], testFieldsStoreOff[i]);
             * }
             */

            Array.Sort(testTerms);
            int tokenUpto = 0;

            for (int i = 0; i < testTerms.Length; i++)
            {
                positions[i] = new int[TERM_FREQ];
                // first position must be 0
                for (int j = 0; j < TERM_FREQ; j++)
                {
                    // positions are always sorted in increasing order
                    positions[i][j] = (int)(j * 10 + new Random(1).NextDouble() * 10);
                    TestToken token = tokens[tokenUpto++] = new TestToken(this);
                    token.text        = testTerms[i];
                    token.pos         = positions[i][j];
                    token.startOffset = j * 10;
                    token.endOffset   = j * 10 + testTerms[i].Length;
                }
            }
            Array.Sort(tokens);

            dir = NewDirectory();
            IndexWriter writer = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MyAnalyzer(this)).SetMaxBufferedDocs(-1).SetMergePolicy(NewLogMergePolicy(false, 10)).SetUseCompoundFile(false));

            Document doc = new Document();

            for (int i = 0; i < testFields.Length; i++)
            {
                FieldType customType = new FieldType(TextField.TYPE_NOT_STORED);
                if (testFieldsStorePos[i] && testFieldsStoreOff[i])
                {
                    customType.StoreTermVectors         = true;
                    customType.StoreTermVectorPositions = true;
                    customType.StoreTermVectorOffsets   = true;
                }
                else if (testFieldsStorePos[i] && !testFieldsStoreOff[i])
                {
                    customType.StoreTermVectors         = true;
                    customType.StoreTermVectorPositions = true;
                }
                else if (!testFieldsStorePos[i] && testFieldsStoreOff[i])
                {
                    customType.StoreTermVectors       = true;
                    customType.StoreTermVectorOffsets = true;
                }
                else
                {
                    customType.StoreTermVectors = true;
                }
                doc.Add(new Field(testFields[i], "", customType));
            }

            //Create 5 documents for testing, they all have the same
            //terms
            for (int j = 0; j < 5; j++)
            {
                writer.AddDocument(doc);
            }
            writer.Commit();
            seg = writer.NewestSegment();
            writer.Dispose();

            fieldInfos = SegmentReader.ReadFieldInfos(seg);
        }
Beispiel #51
0
        public virtual void TestDeletedDocs()
        {
            Directory   dir    = NewDirectory();
            IndexWriter writer = new IndexWriter(dir, (IndexWriterConfig)NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).SetMaxBufferedDocs(2));

            for (int i = 0; i < 19; i++)
            {
                Document  doc        = new Document();
                FieldType customType = new FieldType(TextField.TYPE_STORED);
                customType.StoreTermVectors         = true;
                customType.StoreTermVectorPositions = true;
                customType.StoreTermVectorOffsets   = true;
                doc.Add(NewField("field", "aaa" + i, customType));
                writer.AddDocument(doc);
            }
            writer.ForceMerge(1);
            writer.Commit();
            writer.DeleteDocuments(new Term("field", "aaa5"));
            writer.Dispose();

            ByteArrayOutputStream bos     = new ByteArrayOutputStream(1024);
            CheckIndex            checker = new CheckIndex(dir);

            checker.InfoStream = new StreamWriter(bos, Encoding.UTF8);
            if (VERBOSE)
            {
                checker.InfoStream = Console.Out;
            }
            CheckIndex.Status indexStatus = checker.DoCheckIndex();
            if (indexStatus.Clean == false)
            {
                Console.WriteLine("CheckIndex failed");
                checker.FlushInfoStream();
                Console.WriteLine(bos.ToString());
                Assert.Fail();
            }

            CheckIndex.Status.SegmentInfoStatus seg = indexStatus.SegmentInfos[0];
            Assert.IsTrue(seg.OpenReaderPassed);

            Assert.IsNotNull(seg.Diagnostics);

            Assert.IsNotNull(seg.FieldNormStatus);
            Assert.IsNull(seg.FieldNormStatus.Error);
            Assert.AreEqual(1, seg.FieldNormStatus.TotFields);

            Assert.IsNotNull(seg.TermIndexStatus);
            Assert.IsNull(seg.TermIndexStatus.Error);
            Assert.AreEqual(18, seg.TermIndexStatus.TermCount);
            Assert.AreEqual(18, seg.TermIndexStatus.TotFreq);
            Assert.AreEqual(18, seg.TermIndexStatus.TotPos);

            Assert.IsNotNull(seg.StoredFieldStatus);
            Assert.IsNull(seg.StoredFieldStatus.Error);
            Assert.AreEqual(18, seg.StoredFieldStatus.DocCount);
            Assert.AreEqual(18, seg.StoredFieldStatus.TotFields);

            Assert.IsNotNull(seg.TermVectorStatus);
            Assert.IsNull(seg.TermVectorStatus.Error);
            Assert.AreEqual(18, seg.TermVectorStatus.DocCount);
            Assert.AreEqual(18, seg.TermVectorStatus.TotVectors);

            Assert.IsTrue(seg.Diagnostics.Count > 0);
            IList <string> onlySegments = new List <string>();

            onlySegments.Add("_0");

            Assert.IsTrue(checker.DoCheckIndex(onlySegments).Clean == true);
            dir.Dispose();
        }
Beispiel #52
0
        // Collections.synchronizedMap(new WeakHashMap<SegmentCoreReaders, bool?>());

        public virtual void RunTest(string testName)
        {
            Failed.Set(false);
            AddCount.Set(0);
            DelCount.Set(0);
            PackCount.Set(0);

            DateTime t0 = DateTime.UtcNow;

            Random        random  = new Random(Random().Next());
            LineFileDocs  docs    = new LineFileDocs(random, DefaultCodecSupportsDocValues());
            DirectoryInfo tempDir = CreateTempDir(testName);

            Dir = GetDirectory(NewMockFSDirectory(tempDir)); // some subclasses rely on this being MDW
            if (Dir is BaseDirectoryWrapper)
            {
                ((BaseDirectoryWrapper)Dir).CheckIndexOnClose = false; // don't double-checkIndex, we do it ourselves.
            }
            MockAnalyzer analyzer = new MockAnalyzer(Random());

            analyzer.MaxTokenLength = TestUtil.NextInt(Random(), 1, IndexWriter.MAX_TERM_LENGTH);
            IndexWriterConfig conf = NewIndexWriterConfig(TEST_VERSION_CURRENT, analyzer).SetInfoStream(new FailOnNonBulkMergesInfoStream());

            if (LuceneTestCase.TEST_NIGHTLY)
            {
                // newIWConfig makes smallish max seg size, which
                // results in tons and tons of segments for this test
                // when run nightly:
                MergePolicy mp = conf.MergePolicy;
                if (mp is TieredMergePolicy)
                {
                    ((TieredMergePolicy)mp).MaxMergedSegmentMB = 5000.0;
                }
                else if (mp is LogByteSizeMergePolicy)
                {
                    ((LogByteSizeMergePolicy)mp).MaxMergeMB = 1000.0;
                }
                else if (mp is LogMergePolicy)
                {
                    ((LogMergePolicy)mp).MaxMergeDocs = 100000;
                }
            }

            conf.SetMergedSegmentWarmer(new IndexReaderWarmerAnonymousInnerClassHelper(this));

            if (VERBOSE)
            {
                conf.InfoStream = new PrintStreamInfoStreamAnonymousInnerClassHelper(this, Console.Out);
            }
            Writer = new IndexWriter(Dir, conf);
            TestUtil.ReduceOpenFiles(Writer);

            //TaskScheduler es = Random().NextBoolean() ? null : Executors.newCachedThreadPool(new NamedThreadFactory(testName));
            TaskScheduler es = null;

            DoAfterWriter(es);

            int NUM_INDEX_THREADS = TestUtil.NextInt(Random(), 2, 4);

            int RUN_TIME_SEC = LuceneTestCase.TEST_NIGHTLY ? 300 : RANDOM_MULTIPLIER;

            ISet <string>   delIDs     = new ConcurrentHashSet <string>(new HashSet <string>());
            ISet <string>   delPackIDs = new ConcurrentHashSet <string>(new HashSet <string>());
            IList <SubDocs> allSubDocs = new SynchronizedCollection <SubDocs>();

            DateTime stopTime = DateTime.UtcNow.AddSeconds(RUN_TIME_SEC);

            ThreadClass[] indexThreads = LaunchIndexingThreads(docs, NUM_INDEX_THREADS, stopTime, delIDs, delPackIDs, allSubDocs);

            if (VERBOSE)
            {
                Console.WriteLine("TEST: DONE start " + NUM_INDEX_THREADS + " indexing threads [" + (DateTime.UtcNow - t0).TotalMilliseconds + " ms]");
            }

            // Let index build up a bit
            Thread.Sleep(100);

            DoSearching(es, stopTime);

            if (VERBOSE)
            {
                Console.WriteLine("TEST: all searching done [" + (DateTime.UtcNow - t0).TotalMilliseconds + " ms]");
            }

            for (int thread = 0; thread < indexThreads.Length; thread++)
            {
                indexThreads[thread].Join();
            }

            if (VERBOSE)
            {
                Console.WriteLine("TEST: done join indexing threads [" + (DateTime.UtcNow - t0).TotalMilliseconds + " ms]; addCount=" + AddCount + " delCount=" + DelCount);
            }

            IndexSearcher s = FinalSearcher;

            if (VERBOSE)
            {
                Console.WriteLine("TEST: finalSearcher=" + s);
            }

            Assert.IsFalse(Failed.Get());

            bool doFail = false;

            // Verify: make sure delIDs are in fact deleted:
            foreach (string id in delIDs)
            {
                TopDocs hits = s.Search(new TermQuery(new Term("docid", id)), 1);
                if (hits.TotalHits != 0)
                {
                    Console.WriteLine("doc id=" + id + " is supposed to be deleted, but got " + hits.TotalHits + " hits; first docID=" + hits.ScoreDocs[0].Doc);
                    doFail = true;
                }
            }

            // Verify: make sure delPackIDs are in fact deleted:
            foreach (string id in delPackIDs)
            {
                TopDocs hits = s.Search(new TermQuery(new Term("packID", id)), 1);
                if (hits.TotalHits != 0)
                {
                    Console.WriteLine("packID=" + id + " is supposed to be deleted, but got " + hits.TotalHits + " matches");
                    doFail = true;
                }
            }

            // Verify: make sure each group of sub-docs are still in docID order:
            foreach (SubDocs subDocs in allSubDocs.ToList())
            {
                TopDocs hits = s.Search(new TermQuery(new Term("packID", subDocs.PackID)), 20);
                if (!subDocs.Deleted)
                {
                    // We sort by relevance but the scores should be identical so sort falls back to by docID:
                    if (hits.TotalHits != subDocs.SubIDs.Count)
                    {
                        Console.WriteLine("packID=" + subDocs.PackID + ": expected " + subDocs.SubIDs.Count + " hits but got " + hits.TotalHits);
                        doFail = true;
                    }
                    else
                    {
                        int lastDocID  = -1;
                        int startDocID = -1;
                        foreach (ScoreDoc scoreDoc in hits.ScoreDocs)
                        {
                            int docID = scoreDoc.Doc;
                            if (lastDocID != -1)
                            {
                                Assert.AreEqual(1 + lastDocID, docID);
                            }
                            else
                            {
                                startDocID = docID;
                            }
                            lastDocID = docID;
                            Document doc = s.Doc(docID);
                            Assert.AreEqual(subDocs.PackID, doc.Get("packID"));
                        }

                        lastDocID = startDocID - 1;
                        foreach (string subID in subDocs.SubIDs)
                        {
                            hits = s.Search(new TermQuery(new Term("docid", subID)), 1);
                            Assert.AreEqual(1, hits.TotalHits);
                            int docID = hits.ScoreDocs[0].Doc;
                            if (lastDocID != -1)
                            {
                                Assert.AreEqual(1 + lastDocID, docID);
                            }
                            lastDocID = docID;
                        }
                    }
                }
                else
                {
                    // Pack was deleted -- make sure its docs are
                    // deleted.  We can't verify packID is deleted
                    // because we can re-use packID for update:
                    foreach (string subID in subDocs.SubIDs)
                    {
                        Assert.AreEqual(0, s.Search(new TermQuery(new Term("docid", subID)), 1).TotalHits);
                    }
                }
            }

            // Verify: make sure all not-deleted docs are in fact
            // not deleted:
            int endID = Convert.ToInt32(docs.NextDoc().Get("docid"));

            docs.Dispose();

            for (int id = 0; id < endID; id++)
            {
                string stringID = "" + id;
                if (!delIDs.Contains(stringID))
                {
                    TopDocs hits = s.Search(new TermQuery(new Term("docid", stringID)), 1);
                    if (hits.TotalHits != 1)
                    {
                        Console.WriteLine("doc id=" + stringID + " is not supposed to be deleted, but got hitCount=" + hits.TotalHits + "; delIDs=" + string.Join(",", delIDs.ToArray()));
                        doFail = true;
                    }
                }
            }
            Assert.IsFalse(doFail);

            Assert.AreEqual(AddCount.Get() - DelCount.Get(), s.IndexReader.NumDocs, "index=" + Writer.SegString() + " addCount=" + AddCount + " delCount=" + DelCount);
            ReleaseSearcher(s);

            Writer.Commit();

            Assert.AreEqual(AddCount.Get() - DelCount.Get(), Writer.NumDocs(), "index=" + Writer.SegString() + " addCount=" + AddCount + " delCount=" + DelCount);

            DoClose();
            Writer.Dispose(false);

            // Cannot shutdown until after writer is closed because
            // writer has merged segment warmer that uses IS to run
            // searches, and that IS may be using this es!

            /*if (es != null)
             * {
             * es.shutdown();
             * es.awaitTermination(1, TimeUnit.SECONDS);
             * }*/

            TestUtil.CheckIndex(Dir);
            Dir.Dispose();
            System.IO.Directory.Delete(tempDir.FullName, true);

            if (VERBOSE)
            {
                Console.WriteLine("TEST: done [" + (DateTime.UtcNow - t0).TotalMilliseconds + " ms]");
            }
        }
Beispiel #53
0
        /// <summary>
        /// Add documents.
        /// </summary>
        /// <param name="writer">The index writer.</param>
        /// <param name="directoryInfo">The directory information where all the files that are to be added are located.</param>
        /// <param name="files">The list of files that are to be added.</param>
        /// <param name="documents">The supported documents search filter, used to indicate what files are to be added.</param>
        public void AddDocuments(Lucene.Net.Index.IndexWriter writer, DirectoryInfo directoryInfo, string[] files, SupportedDocumentExtension documents)
        {
            System.Windows.Forms.RichTextBox textbox = new System.Windows.Forms.RichTextBox();

            FieldType pathFieldType = new Lucene.Net.Documents.FieldType()
            {
                Indexed      = true,
                Tokenized    = false,
                Stored       = true,
                IndexOptions = FieldInfo.IndexOptions.DOCS_AND_FREQS_AND_POSITIONS,
            };
            FieldType contentFieldType = new Lucene.Net.Documents.FieldType()
            {
                Indexed      = true,
                Tokenized    = documents.TokenizeContent,
                Stored       = documents.StoreContent,
                IndexOptions = FieldInfo.IndexOptions.DOCS_AND_FREQS_AND_POSITIONS,
            };

            // For each file.
            for (int i = 0; i < files.Length; i++)
            {
                // If the file exists
                if (File.Exists(files[i]))
                {
                    Lucene.Net.Documents.Document document = new Lucene.Net.Documents.Document();
                    System.IO.StreamReader        stream   = null;

                    try
                    {
                        FileInfo fileInfo = new FileInfo(files[i]);
                        string   file     = files[i].Replace(directoryInfo.Root.FullName, "").ToLower();

                        Lucene.Net.Documents.Field path     = new Field("path", file.ToLower().Replace("\\", "/"), pathFieldType);
                        Lucene.Net.Documents.Field modified = new Field("modified", fileInfo.LastWriteTime.ToShortDateString() + " " + fileInfo.LastWriteTime.ToShortTimeString(), pathFieldType);

                        // Add the fields.
                        document.Add(path);
                        document.Add(modified);

                        // Create the stream reader.
                        stream = new StreamReader(files[i]);
                        string contentRtf = stream.ReadToEnd();
                        textbox.Rtf = contentRtf;
                        string content = textbox.Text;

                        // If content exists.
                        if (!String.IsNullOrEmpty(content))
                        {
                            // Split the white spaces from the text.
                            string[] words = content.Words();

                            // If words exist.
                            if (words != null && words.Length > 0)
                            {
                                // Add the query for each word.
                                for (int j = 0; j < words.Length; j++)
                                {
                                    // Format the word.
                                    string word = words[j].ToLower().RemovePunctuationFromStartAndEnd();

                                    // If a word exists.
                                    if (!String.IsNullOrEmpty(word))
                                    {
                                        Lucene.Net.Documents.Field contentField = new Field("content", word, contentFieldType);
                                        document.Add(contentField);
                                    }
                                }
                            }
                        }

                        // Add the document.
                        writer.AddDocument(document.Fields);
                        stream.Close();

                        // Commit after a set number of documents.
                        documents.TotalDocumentSize += fileInfo.Length;
                        if (documents.TotalDocumentSize > documents.MaxDocumentSizePerCommit)
                        {
                            writer.Commit();
                            documents.TotalDocumentSize = 0;
                        }
                    }
                    catch (Exception)
                    {
                        throw;
                    }
                    finally
                    {
                        if (stream != null)
                        {
                            stream.Dispose();
                        }
                    }
                }
            }
        }
Beispiel #54
0
        public virtual void TestRollingUpdates_Mem()
        {
            Random random             = new Random(Random().Next());
            BaseDirectoryWrapper dir  = NewDirectory();
            LineFileDocs         docs = new LineFileDocs(random, DefaultCodecSupportsDocValues());

            //provider.register(new MemoryCodec());
            // LUCENE TODO: uncomment this out once MemoryPostingsFormat is brought over
            //if ((!"Lucene3x".Equals(Codec.Default.Name)) && Random().NextBoolean())
            //{
            //    Codec.Default =
            //        TestUtil.AlwaysPostingsFormat(new MemoryPostingsFormat(random().nextBoolean(), random.NextFloat()));
            //}

            MockAnalyzer analyzer = new MockAnalyzer(Random());

            analyzer.MaxTokenLength = TestUtil.NextInt(Random(), 1, IndexWriter.MAX_TERM_LENGTH);

            IndexWriter   w          = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, analyzer));
            int           SIZE       = AtLeast(20);
            int           id         = 0;
            IndexReader   r          = null;
            IndexSearcher s          = null;
            int           numUpdates = (int)(SIZE * (2 + (TEST_NIGHTLY ? 200 * Random().NextDouble() : 5 * Random().NextDouble())));

            if (VERBOSE)
            {
                Console.WriteLine("TEST: numUpdates=" + numUpdates);
            }
            int updateCount = 0;

            // TODO: sometimes update ids not in order...
            for (int docIter = 0; docIter < numUpdates; docIter++)
            {
                Documents.Document doc  = docs.NextDoc();
                string             myID = "" + id;
                if (id == SIZE - 1)
                {
                    id = 0;
                }
                else
                {
                    id++;
                }
                if (VERBOSE)
                {
                    Console.WriteLine("  docIter=" + docIter + " id=" + id);
                }
                ((Field)doc.GetField("docid")).StringValue = myID;

                Term idTerm = new Term("docid", myID);

                bool doUpdate;
                if (s != null && updateCount < SIZE)
                {
                    TopDocs hits = s.Search(new TermQuery(idTerm), 1);
                    Assert.AreEqual(1, hits.TotalHits);
                    doUpdate = !w.TryDeleteDocument(r, hits.ScoreDocs[0].Doc);
                    if (VERBOSE)
                    {
                        if (doUpdate)
                        {
                            Console.WriteLine("  tryDeleteDocument failed");
                        }
                        else
                        {
                            Console.WriteLine("  tryDeleteDocument succeeded");
                        }
                    }
                }
                else
                {
                    doUpdate = true;
                    if (VERBOSE)
                    {
                        Console.WriteLine("  no searcher: doUpdate=true");
                    }
                }

                updateCount++;

                if (doUpdate)
                {
                    w.UpdateDocument(idTerm, doc);
                }
                else
                {
                    w.AddDocument(doc);
                }

                if (docIter >= SIZE && Random().Next(50) == 17)
                {
                    if (r != null)
                    {
                        r.Dispose();
                    }

                    bool applyDeletions = Random().NextBoolean();

                    if (VERBOSE)
                    {
                        Console.WriteLine("TEST: reopen applyDeletions=" + applyDeletions);
                    }

                    r = w.GetReader(applyDeletions);
                    if (applyDeletions)
                    {
                        s = NewSearcher(r);
                    }
                    else
                    {
                        s = null;
                    }
                    Assert.IsTrue(!applyDeletions || r.NumDocs == SIZE, "applyDeletions=" + applyDeletions + " r.NumDocs=" + r.NumDocs + " vs SIZE=" + SIZE);
                    updateCount = 0;
                }
            }

            if (r != null)
            {
                r.Dispose();
            }

            w.Commit();
            Assert.AreEqual(SIZE, w.NumDocs());

            w.Dispose();

            TestIndexWriter.AssertNoUnreferencedFiles(dir, "leftover files after rolling updates");

            docs.Dispose();

            // LUCENE-4455:
            SegmentInfos infos = new SegmentInfos();

            infos.Read(dir);
            long totalBytes = 0;

            foreach (SegmentCommitInfo sipc in infos.Segments)
            {
                totalBytes += sipc.SizeInBytes();
            }
            long totalBytes2 = 0;

            foreach (string fileName in dir.ListAll())
            {
                if (!fileName.StartsWith(IndexFileNames.SEGMENTS))
                {
                    totalBytes2 += dir.FileLength(fileName);
                }
            }
            Assert.AreEqual(totalBytes2, totalBytes);
            dir.Dispose();
        }
Beispiel #55
0
        // builds an index with payloads in the given Directory and performs
        // different tests to verify the payload encoding
        private void PerformTest(Directory dir)
        {
            PayloadAnalyzer analyzer = new PayloadAnalyzer();
            IndexWriter     writer   = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, analyzer).SetOpenMode(OpenMode.CREATE).SetMergePolicy(NewLogMergePolicy()));

            // should be in sync with value in TermInfosWriter
            const int skipInterval = 16;

            const int    numTerms  = 5;
            const string fieldName = "f1";

            int numDocs = skipInterval + 1;

            // create content for the test documents with just a few terms
            Term[]        terms = GenerateTerms(fieldName, numTerms);
            StringBuilder sb    = new StringBuilder();

            for (int i = 0; i < terms.Length; i++)
            {
                sb.Append(terms[i].Text());
                sb.Append(" ");
            }
            string content = sb.ToString();

            int payloadDataLength = numTerms * numDocs * 2 + numTerms * numDocs * (numDocs - 1) / 2;
            var payloadData       = GenerateRandomData(payloadDataLength);

            Document d = new Document();

            d.Add(NewTextField(fieldName, content, Field.Store.NO));
            // add the same document multiple times to have the same payload lengths for all
            // occurrences within two consecutive skip intervals
            int offset = 0;

            for (int i = 0; i < 2 * numDocs; i++)
            {
                analyzer = new PayloadAnalyzer(fieldName, payloadData, offset, 1);
                offset  += numTerms;
                writer.AddDocument(d, analyzer);
            }

            // make sure we create more than one segment to test merging
            writer.Commit();

            // now we make sure to have different payload lengths next at the next skip point
            for (int i = 0; i < numDocs; i++)
            {
                analyzer = new PayloadAnalyzer(fieldName, payloadData, offset, i);
                offset  += i * numTerms;
                writer.AddDocument(d, analyzer);
            }

            writer.ForceMerge(1);
            // flush
            writer.Dispose();

            /*
             * Verify the index
             * first we test if all payloads are stored correctly
             */
            IndexReader reader = DirectoryReader.Open(dir);

            var verifyPayloadData = new byte[payloadDataLength];

            offset = 0;
            var tps = new DocsAndPositionsEnum[numTerms];

            for (int i = 0; i < numTerms; i++)
            {
                tps[i] = MultiFields.GetTermPositionsEnum(reader, MultiFields.GetLiveDocs(reader), terms[i].Field, new BytesRef(terms[i].Text()));
            }

            while (tps[0].NextDoc() != DocIdSetIterator.NO_MORE_DOCS)
            {
                for (int i = 1; i < numTerms; i++)
                {
                    tps[i].NextDoc();
                }
                int freq = tps[0].Freq;

                for (int i = 0; i < freq; i++)
                {
                    for (int j = 0; j < numTerms; j++)
                    {
                        tps[j].NextPosition();
                        BytesRef br = tps[j].GetPayload();
                        if (br != null)
                        {
                            Array.Copy(br.Bytes, br.Offset, verifyPayloadData, offset, br.Length);
                            offset += br.Length;
                        }
                    }
                }
            }

            AssertByteArrayEquals(payloadData, verifyPayloadData);

            /*
             *  test lazy skipping
             */
            DocsAndPositionsEnum tp = MultiFields.GetTermPositionsEnum(reader, MultiFields.GetLiveDocs(reader), terms[0].Field, new BytesRef(terms[0].Text()));

            tp.NextDoc();
            tp.NextPosition();
            // NOTE: prior rev of this test was failing to first
            // call next here:
            tp.NextDoc();
            // now we don't read this payload
            tp.NextPosition();
            BytesRef payload = tp.GetPayload();

            Assert.AreEqual(1, payload.Length, "Wrong payload length.");
            Assert.AreEqual(payload.Bytes[payload.Offset], payloadData[numTerms]);
            tp.NextDoc();
            tp.NextPosition();

            // we don't read this payload and skip to a different document
            tp.Advance(5);
            tp.NextPosition();
            payload = tp.GetPayload();
            Assert.AreEqual(1, payload.Length, "Wrong payload length.");
            Assert.AreEqual(payload.Bytes[payload.Offset], payloadData[5 * numTerms]);

            /*
             * Test different lengths at skip points
             */
            tp = MultiFields.GetTermPositionsEnum(reader, MultiFields.GetLiveDocs(reader), terms[1].Field, new BytesRef(terms[1].Text()));
            tp.NextDoc();
            tp.NextPosition();
            Assert.AreEqual(1, tp.GetPayload().Length, "Wrong payload length.");
            tp.Advance(skipInterval - 1);
            tp.NextPosition();
            Assert.AreEqual(1, tp.GetPayload().Length, "Wrong payload length.");
            tp.Advance(2 * skipInterval - 1);
            tp.NextPosition();
            Assert.AreEqual(1, tp.GetPayload().Length, "Wrong payload length.");
            tp.Advance(3 * skipInterval - 1);
            tp.NextPosition();
            Assert.AreEqual(3 * skipInterval - 2 * numDocs - 1, tp.GetPayload().Length, "Wrong payload length.");

            reader.Dispose();

            // test long payload
            analyzer = new PayloadAnalyzer();
            writer   = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, analyzer).SetOpenMode(OpenMode.CREATE));
            string singleTerm = "lucene";

            d = new Document();
            d.Add(NewTextField(fieldName, singleTerm, Field.Store.NO));
            // add a payload whose length is greater than the buffer size of BufferedIndexOutput
            payloadData = GenerateRandomData(2000);
            analyzer.SetPayloadData(fieldName, payloadData, 100, 1500);
            writer.AddDocument(d);

            writer.ForceMerge(1);
            // flush
            writer.Dispose();

            reader = DirectoryReader.Open(dir);
            tp     = MultiFields.GetTermPositionsEnum(reader, MultiFields.GetLiveDocs(reader), fieldName, new BytesRef(singleTerm));
            tp.NextDoc();
            tp.NextPosition();

            BytesRef bref = tp.GetPayload();

            verifyPayloadData = new byte[bref.Length];
            var portion = new byte[1500];

            Array.Copy(payloadData, 100, portion, 0, 1500);

            AssertByteArrayEquals(portion, bref.Bytes, bref.Offset, bref.Length);
            reader.Dispose();
        }
        public virtual void TestRandom()
        {
            int                    numThreads          = 1 + Random().Next(8);
            int                    numDocumentsToIndex = 50 + AtLeast(70);
            AtomicInt32            numDocs             = new AtomicInt32(numDocumentsToIndex);
            Directory              dir         = NewDirectory();
            IndexWriterConfig      iwc         = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random()));
            MockDefaultFlushPolicy flushPolicy = new MockDefaultFlushPolicy();

            iwc.SetFlushPolicy(flushPolicy);

            int numDWPT = 1 + Random().Next(8);
            DocumentsWriterPerThreadPool threadPool = new ThreadAffinityDocumentsWriterThreadPool(numDWPT);

            iwc.SetIndexerThreadPool(threadPool);

            IndexWriter writer = new IndexWriter(dir, iwc);

            flushPolicy = (MockDefaultFlushPolicy)writer.Config.FlushPolicy;
            DocumentsWriter docsWriter = writer.DocsWriter;

            Assert.IsNotNull(docsWriter);
            DocumentsWriterFlushControl flushControl = docsWriter.flushControl;

            Assert.AreEqual(0, flushControl.FlushBytes, " bytes must be 0 after init");

            IndexThread[] threads = new IndexThread[numThreads];
            for (int x = 0; x < threads.Length; x++)
            {
                threads[x] = new IndexThread(this, numDocs, numThreads, writer, LineDocFile, true);
                threads[x].Start();
            }

            for (int x = 0; x < threads.Length; x++)
            {
                threads[x].Join();
            }
            Assert.AreEqual(0, flushControl.FlushBytes, " all flushes must be due");
            Assert.AreEqual(numDocumentsToIndex, writer.NumDocs);
            Assert.AreEqual(numDocumentsToIndex, writer.MaxDoc);
            if (flushPolicy.FlushOnRAM && !flushPolicy.FlushOnDocCount && !flushPolicy.FlushOnDeleteTerms)
            {
                long maxRAMBytes = (long)(iwc.RAMBufferSizeMB * 1024.0 * 1024.0);
                Assert.IsTrue(flushPolicy.PeakBytesWithoutFlush <= maxRAMBytes, "peak bytes without flush exceeded watermark");
                if (flushPolicy.HasMarkedPending)
                {
                    assertTrue("max: " + maxRAMBytes + " " + flushControl.peakActiveBytes, maxRAMBytes <= flushControl.peakActiveBytes);
                }
            }
            AssertActiveBytesAfter(flushControl);
            writer.Commit();
            Assert.AreEqual(0, flushControl.ActiveBytes);
            IndexReader r = DirectoryReader.Open(dir);

            Assert.AreEqual(numDocumentsToIndex, r.NumDocs);
            Assert.AreEqual(numDocumentsToIndex, r.MaxDoc);
            if (!flushPolicy.FlushOnRAM)
            {
                assertFalse("never stall if we don't flush on RAM", docsWriter.flushControl.stallControl.WasStalled);
                assertFalse("never block if we don't flush on RAM", docsWriter.flushControl.stallControl.HasBlocked);
            }
            r.Dispose();
            writer.Dispose();
            dir.Dispose();
        }
Beispiel #57
0
        /// <summary>
        /// Add documents.
        /// </summary>
        /// <param name="writer">The index writer.</param>
        /// <param name="facetWriter">The facet index writer.</param>
        /// <param name="directoryInfo">The directory information where all the files that are to be added are located.</param>
        /// <param name="files">The list of files that are to be added.</param>
        /// <param name="documents">The supported documents search filter, used to indicate what files are to be added.</param>
        /// <param name="facetField">The facet field information.</param>
        /// <param name="config">The facet configuration information.</param>
        public void AddDocuments(Lucene.Net.Index.IndexWriter writer, DirectoryTaxonomyWriter facetWriter,
                                 DirectoryInfo directoryInfo, string[] files, SupportedDocumentExtension documents, FacetField facetField, FacetsConfig config)
        {
            FieldType pathFieldType = new Lucene.Net.Documents.FieldType()
            {
                Indexed      = true,
                Tokenized    = false,
                Stored       = true,
                IndexOptions = FieldInfo.IndexOptions.DOCS_AND_FREQS_AND_POSITIONS,
            };
            FieldType contentFieldType = new Lucene.Net.Documents.FieldType()
            {
                Indexed      = true,
                Tokenized    = documents.TokenizeContent,
                Stored       = documents.StoreContent,
                IndexOptions = FieldInfo.IndexOptions.DOCS_AND_FREQS_AND_POSITIONS,
            };

            // For each file.
            for (int i = 0; i < files.Length; i++)
            {
                // If the file exists
                if (File.Exists(files[i]))
                {
                    Lucene.Net.Documents.Document document = new Lucene.Net.Documents.Document();

                    try
                    {
                        FileInfo fileInfo = new FileInfo(files[i]);
                        string   file     = files[i].Replace(directoryInfo.Root.FullName, "").ToLower();

                        Lucene.Net.Documents.Field path     = new Field("path", file.ToLower().Replace("\\", "/"), pathFieldType);
                        Lucene.Net.Documents.Field modified = new Field("modified", fileInfo.LastWriteTime.ToShortDateString() + " " + fileInfo.LastWriteTime.ToShortTimeString(), pathFieldType);

                        // Add the fields.
                        document.Add(facetField);
                        document.Add(path);
                        document.Add(modified);

                        // Create the stream reader.
                        OpenDocument(files[i]);
                        string content = Nequeo.Xml.Document.ExtractContent(_xDocument);

                        // If content exists.
                        if (!String.IsNullOrEmpty(content))
                        {
                            // Split the white spaces from the text.
                            string[] words = content.Words();

                            // If words exist.
                            if (words != null && words.Length > 0)
                            {
                                // Add the query for each word.
                                for (int j = 0; j < words.Length; j++)
                                {
                                    // Format the word.
                                    string word = words[j].ToLower().RemovePunctuationFromStartAndEnd();

                                    // If a word exists.
                                    if (!String.IsNullOrEmpty(word))
                                    {
                                        Lucene.Net.Documents.Field contentField = new Field("facetcontent", word, contentFieldType);
                                        document.Add(contentField);
                                    }
                                }
                            }
                        }

                        // Add the document.
                        writer.AddDocument(config.Build(facetWriter, document));
                        _document.Close();

                        // Commit after a set number of documents.
                        documents.TotalDocumentSize += fileInfo.Length;
                        if (documents.TotalDocumentSize > documents.MaxDocumentSizePerCommit)
                        {
                            // Commit the index.
                            writer.Commit();
                            facetWriter.Commit();
                            documents.TotalDocumentSize = 0;
                        }
                    }
                    catch (Exception)
                    {
                        throw;
                    }
                    finally
                    {
                        CloseDocument();
                    }
                }
            }
        }
        public virtual void TestTonsOfUpdates()
        {
            // LUCENE-5248: make sure that when there are many updates, we don't use too much RAM
            Directory         dir    = NewDirectory();
            Random            random = Random();
            IndexWriterConfig conf   = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random));

            conf.SetRAMBufferSizeMB(IndexWriterConfig.DEFAULT_RAM_BUFFER_SIZE_MB);
            conf.SetMaxBufferedDocs(IndexWriterConfig.DISABLE_AUTO_FLUSH); // don't flush by doc
            IndexWriter writer = new IndexWriter(dir, conf);

            // test data: lots of documents (few 10Ks) and lots of update terms (few hundreds)
            int numDocs                  = AtLeast(20000);
            int numBinaryFields          = AtLeast(5);
            int numTerms                 = TestUtil.NextInt(random, 10, 100); // terms should affect many docs
            HashSet <string> updateTerms = new HashSet <string>();

            while (updateTerms.Count < numTerms)
            {
                updateTerms.Add(TestUtil.RandomSimpleString(random));
            }

            //    System.out.println("numDocs=" + numDocs + " numBinaryFields=" + numBinaryFields + " numTerms=" + numTerms);

            // build a large index with many BDV fields and update terms
            for (int i = 0; i < numDocs; i++)
            {
                Document doc            = new Document();
                int      numUpdateTerms = TestUtil.NextInt(random, 1, numTerms / 10);
                for (int j = 0; j < numUpdateTerms; j++)
                {
                    doc.Add(new StringField("upd", RandomInts.RandomFrom(random, updateTerms), Store.NO));
                }
                for (int j = 0; j < numBinaryFields; j++)
                {
                    long val = random.Next();
                    doc.Add(new BinaryDocValuesField("f" + j, TestBinaryDocValuesUpdates.ToBytes(val)));
                    doc.Add(new NumericDocValuesField("cf" + j, val * 2));
                }
                writer.AddDocument(doc);
            }

            writer.Commit(); // commit so there's something to apply to

            // set to flush every 2048 bytes (approximately every 12 updates), so we get
            // many flushes during binary updates
            writer.Config.SetRAMBufferSizeMB(2048.0 / 1024 / 1024);
            int numUpdates = AtLeast(100);

            //    System.out.println("numUpdates=" + numUpdates);
            for (int i = 0; i < numUpdates; i++)
            {
                int  field      = random.Next(numBinaryFields);
                Term updateTerm = new Term("upd", RandomInts.RandomFrom(random, updateTerms));
                long value      = random.Next();
                writer.UpdateBinaryDocValue(updateTerm, "f" + field, TestBinaryDocValuesUpdates.ToBytes(value));
                writer.UpdateNumericDocValue(updateTerm, "cf" + field, value * 2);
            }

            writer.Dispose();

            DirectoryReader reader  = DirectoryReader.Open(dir);
            BytesRef        scratch = new BytesRef();

            foreach (AtomicReaderContext context in reader.Leaves)
            {
                for (int i = 0; i < numBinaryFields; i++)
                {
                    AtomicReader     r  = context.AtomicReader;
                    BinaryDocValues  f  = r.GetBinaryDocValues("f" + i);
                    NumericDocValues cf = r.GetNumericDocValues("cf" + i);
                    for (int j = 0; j < r.MaxDoc; j++)
                    {
                        Assert.AreEqual(cf.Get(j), TestBinaryDocValuesUpdates.GetValue(f, j, scratch) * 2, "reader=" + r + ", field=f" + i + ", doc=" + j);
                    }
                }
            }
            reader.Dispose();

            dir.Dispose();
        }
        public virtual void  TestErrorAfterApplyDeletes()
        {
            MockRAMDirectory.Failure failure = new AnonymousClassFailure(this);

            // create a couple of files

            System.String[] keywords  = new System.String[] { "1", "2" };
            System.String[] unindexed = new System.String[] { "Netherlands", "Italy" };
            System.String[] unstored  = new System.String[] { "Amsterdam has lots of bridges", "Venice has lots of canals" };
            System.String[] text      = new System.String[] { "Amsterdam", "Venice" };

            for (int pass = 0; pass < 2; pass++)
            {
                bool             autoCommit = (0 == pass);
                MockRAMDirectory dir        = new MockRAMDirectory();
                IndexWriter      modifier   = new IndexWriter(dir, autoCommit, new WhitespaceAnalyzer(), true);
                modifier.SetUseCompoundFile(true);
                modifier.SetMaxBufferedDeleteTerms(2);

                dir.FailOn(failure.Reset());

                for (int i = 0; i < keywords.Length; i++)
                {
                    Document doc = new Document();
                    doc.Add(new Field("id", keywords[i], Field.Store.YES, Field.Index.NOT_ANALYZED));
                    doc.Add(new Field("country", unindexed[i], Field.Store.YES, Field.Index.NO));
                    doc.Add(new Field("contents", unstored[i], Field.Store.NO, Field.Index.ANALYZED));
                    doc.Add(new Field("city", text[i], Field.Store.YES, Field.Index.ANALYZED));
                    modifier.AddDocument(doc);
                }
                // flush (and commit if ac)

                modifier.Optimize();
                modifier.Commit();

                // one of the two files hits

                Term term     = new Term("city", "Amsterdam");
                int  hitCount = GetHitCount(dir, term);
                Assert.AreEqual(1, hitCount);

                // open the writer again (closed above)

                // delete the doc
                // max buf del terms is two, so this is buffered

                modifier.DeleteDocuments(term);

                // add a doc (needed for the !ac case; see below)
                // doc remains buffered

                Document doc2 = new Document();
                modifier.AddDocument(doc2);

                // commit the changes, the buffered deletes, and the new doc

                // The failure object will fail on the first write after the del
                // file gets created when processing the buffered delete

                // in the ac case, this will be when writing the new segments
                // files so we really don't need the new doc, but it's harmless

                // in the !ac case, a new segments file won't be created but in
                // this case, creation of the cfs file happens next so we need
                // the doc (to test that it's okay that we don't lose deletes if
                // failing while creating the cfs file)

                bool failed = false;
                try
                {
                    modifier.Commit();
                }
                catch (System.IO.IOException ioe)
                {
                    failed = true;
                }

                Assert.IsTrue(failed);

                // The commit above failed, so we need to retry it (which will
                // succeed, because the failure is a one-shot)

                modifier.Commit();

                hitCount = GetHitCount(dir, term);

                // Make sure the delete was successfully flushed:
                Assert.AreEqual(0, hitCount);

                modifier.Close();
                dir.Close();
            }
        }
Beispiel #60
0
            public override void Run()
            {
                DirectoryReader currentReader = null;
                Random          random        = LuceneTestCase.Random;

                try
                {
                    Document doc = new Document();
                    doc.Add(new TextField("id", "1", Field.Store.NO));
                    Writer.AddDocument(doc);
                    Holder.Reader = currentReader = Writer.GetReader(true);
                    Term term = new Term("id");
                    for (int i = 0; i < NumOps && !Holder.Stop; i++)
                    {
                        float nextOp = (float)random.NextDouble();
                        if (nextOp < 0.3)
                        {
                            term.Set("id", new BytesRef("1"));
                            Writer.UpdateDocument(term, doc);
                        }
                        else if (nextOp < 0.5)
                        {
                            Writer.AddDocument(doc);
                        }
                        else
                        {
                            term.Set("id", new BytesRef("1"));
                            Writer.DeleteDocuments(term);
                        }
                        if (Holder.Reader != currentReader)
                        {
                            Holder.Reader = currentReader;
                            if (Countdown)
                            {
                                Countdown = false;
                                Latch.Signal();
                            }
                        }
                        if (random.NextBoolean())
                        {
                            Writer.Commit();
                            DirectoryReader newReader = DirectoryReader.OpenIfChanged(currentReader);
                            if (newReader != null)
                            {
                                currentReader.DecRef();
                                currentReader = newReader;
                            }
                            if (currentReader.NumDocs == 0)
                            {
                                Writer.AddDocument(doc);
                            }
                        }
                    }
                }
                catch (Exception e)
                {
                    Failed = e;
                }
                finally
                {
                    Holder.Reader = null;
                    if (Countdown)
                    {
                        Latch.Signal();
                    }
                    if (currentReader != null)
                    {
                        try
                        {
                            currentReader.DecRef();
                        }
#pragma warning disable 168
                        catch (IOException e)
#pragma warning restore 168
                        {
                        }
                    }
                }
                if (VERBOSE)
                {
                    Console.WriteLine("writer stopped - forced by reader: " + Holder.Stop);
                }
            }