public virtual void TestDeletePartiallyWrittenFilesIfAbort() { Directory dir = NewDirectory(); IndexWriterConfig iwConf = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())); iwConf.SetMaxBufferedDocs(RandomInts.NextIntBetween(Random(), 2, 30)); iwConf.SetCodec(CompressingCodec.RandomInstance(Random())); // disable CFS because this test checks file names iwConf.SetMergePolicy(NewLogMergePolicy(false)); iwConf.SetUseCompoundFile(false); RandomIndexWriter iw = new RandomIndexWriter(Random(), dir, iwConf); Document validDoc = new Document(); validDoc.Add(new IntField("id", 0, Field.Store.YES)); iw.AddDocument(validDoc); iw.Commit(); // make sure that #writeField will fail to trigger an abort Document invalidDoc = new Document(); FieldType fieldType = new FieldType(); fieldType.Stored = true; invalidDoc.Add(new FieldAnonymousInnerClassHelper(this, fieldType)); Assert.Throws <ArgumentException>(() => { try { iw.AddDocument(invalidDoc); iw.Commit(); } finally { int counter = 0; foreach (string fileName in dir.ListAll()) { if (fileName.EndsWith(".fdt") || fileName.EndsWith(".fdx")) { counter++; } } // Only one .fdt and one .fdx files must have been found Assert.AreEqual(2, counter); iw.Dispose(); dir.Dispose(); } }); }
/// <summary> /// LUCENENET specific /// Non-static because NewIndexWriterConfig is now non-static /// </summary> protected internal void CreateIndex(bool doMultiSegment) { if (VERBOSE) { Console.WriteLine("TEST: setUp"); } // prepare a small index with just a few documents. dir = NewDirectory(); anlzr = new MockAnalyzer(Random); IndexWriterConfig iwc = NewIndexWriterConfig(TEST_VERSION_CURRENT, anlzr).SetMergePolicy(NewLogMergePolicy()); if (doMultiSegment) { iwc.SetMaxBufferedDocs(TestUtil.NextInt32(Random, 2, 7)); } RandomIndexWriter iw = new RandomIndexWriter(Random, dir, iwc); // add docs not exactly in natural ID order, to verify we do check the order of docs by scores int remaining = N_DOCS; bool[] done = new bool[N_DOCS]; int i = 0; while (remaining > 0) { if (done[i]) { throw new Exception("to set this test correctly N_DOCS=" + N_DOCS + " must be primary and greater than 2!"); } AddDoc(iw, i); done[i] = true; i = (i + 4) % N_DOCS; remaining--; } if (!doMultiSegment) { if (VERBOSE) { Console.WriteLine("TEST: setUp full merge"); } iw.ForceMerge(1); } iw.Dispose(); if (VERBOSE) { Console.WriteLine("TEST: setUp done close"); } }
/** Creates an index for sorting. */ public void CreateIndex(Directory dir, int numDocs, Random random) { IList <int> ids = new List <int>(); for (int i = 0; i < numDocs; i++) { ids.Add(i * 10); } // shuffle them for indexing // LUCENENET NOTE: Using LINQ, so we need to reassign the variable with the result ids = CollectionsHelper.Shuffle(ids); if (VERBOSE) { Console.WriteLine("Shuffled IDs for indexing: " + Arrays.ToString(ids.ToArray())); } PositionsTokenStream positions = new PositionsTokenStream(); IndexWriterConfig conf = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)); conf.SetMaxBufferedDocs(4); // create some segments conf.SetSimilarity(new NormsSimilarity(conf.Similarity)); // for testing norms field using (RandomIndexWriter writer = new RandomIndexWriter(random, dir, conf)) { writer.RandomForceMerge = (false); foreach (int id in ids) { writer.AddDocument(Doc(id, positions)); } // delete some documents writer.Commit(); foreach (int id in ids) { if (random.NextDouble() < 0.2) { if (VERBOSE) { Console.WriteLine("delete doc_id " + id); } writer.DeleteDocuments(new Term(ID_FIELD, id.ToString())); } } } }
public void ApplyToWriterConfig(IndexWriterConfig config) { try { // possibly take in a MergePolicy or configure it elsewhere var mergePolicy = new LogByteSizeMergePolicy(); if (MergeFactor != null) { mergePolicy.MergeFactor = (int)MergeFactor; } if (MaxMergeDocs != null) { mergePolicy.MaxMergeDocs = (int)MaxMergeDocs; } config.MergePolicy = mergePolicy; if (MaxBufferedDocs != null) { config.SetMaxBufferedDocs((int)MaxBufferedDocs); } if (RamBufferSizeMb != null) { config.SetRAMBufferSizeMB((int)RamBufferSizeMb); } if (TermIndexInterval != null) { config.SetTermIndexInterval((int)TermIndexInterval); } } catch (ArgumentOutOfRangeException) { // TODO: Log it } }
public virtual void TestNrt() { Store.Directory dir = NewDirectory(); Store.Directory taxoDir = NewDirectory(); IndexWriterConfig iwc = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())); // Don't allow tiny maxBufferedDocs; it can make this // test too slow: iwc.SetMaxBufferedDocs(Math.Max(500, iwc.MaxBufferedDocs)); // MockRandom/AlcololicMergePolicy are too slow: TieredMergePolicy tmp = new TieredMergePolicy(); tmp.FloorSegmentMB = .001; iwc.SetMergePolicy(tmp); IndexWriter w = new IndexWriter(dir, iwc); var tw = new DirectoryTaxonomyWriter(taxoDir); FacetsConfig config = new FacetsConfig(); config.SetMultiValued("field", true); AtomicBoolean stop = new AtomicBoolean(); // How many unique facets to index before stopping: int ordLimit = TEST_NIGHTLY ? 100000 : 6000; var indexer = new IndexerThread(w, config, tw, null, ordLimit, stop); var mgr = new SearcherTaxonomyManager(w, true, null, tw); var reopener = new ThreadAnonymousInnerClassHelper(this, stop, mgr); reopener.Name = "reopener"; reopener.Start(); indexer.Name = "indexer"; indexer.Start(); try { while (!stop.Get()) { SearcherAndTaxonomy pair = mgr.Acquire(); try { //System.out.println("search maxOrd=" + pair.taxonomyReader.getSize()); FacetsCollector sfc = new FacetsCollector(); pair.searcher.Search(new MatchAllDocsQuery(), sfc); Facets facets = GetTaxonomyFacetCounts(pair.taxonomyReader, config, sfc); FacetResult result = facets.GetTopChildren(10, "field"); if (pair.searcher.IndexReader.NumDocs > 0) { //System.out.println(pair.taxonomyReader.getSize()); Assert.True(result.ChildCount > 0); Assert.True(result.LabelValues.Length > 0); } //if (VERBOSE) { //System.out.println("TEST: facets=" + FacetTestUtils.toString(results.get(0))); //} } finally { mgr.Release(pair); } } } finally { indexer.Join(); reopener.Join(); } if (VERBOSE) { Console.WriteLine("TEST: now stop"); } IOUtils.Close(mgr, tw, w, taxoDir, dir); }
public virtual void TestChangeCodecAndMerge() { Directory dir = NewDirectory(); if (VERBOSE) { Console.WriteLine("TEST: make new index"); } IndexWriterConfig iwconf = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random)).SetOpenMode(OpenMode.CREATE).SetCodec(new MockCodec()); iwconf.SetMaxBufferedDocs(IndexWriterConfig.DISABLE_AUTO_FLUSH); // ((LogMergePolicy)iwconf.getMergePolicy()).setMergeFactor(10); IndexWriter writer = NewWriter(dir, iwconf); AddDocs(writer, 10); writer.Commit(); AssertQuery(new Term("content", "aaa"), dir, 10); if (VERBOSE) { Console.WriteLine("TEST: addDocs3"); } AddDocs3(writer, 10); writer.Commit(); writer.Dispose(); AssertQuery(new Term("content", "ccc"), dir, 10); AssertQuery(new Term("content", "aaa"), dir, 10); Codec codec = iwconf.Codec; iwconf = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random)) .SetOpenMode(OpenMode.APPEND).SetCodec(codec); // ((LogMergePolicy)iwconf.getMergePolicy()).setNoCFSRatio(0.0); // ((LogMergePolicy)iwconf.getMergePolicy()).setMergeFactor(10); iwconf.SetMaxBufferedDocs(IndexWriterConfig.DISABLE_AUTO_FLUSH); iwconf.SetCodec(new MockCodec2()); // uses standard for field content writer = NewWriter(dir, iwconf); // swap in new codec for currently written segments if (VERBOSE) { Console.WriteLine("TEST: add docs w/ Standard codec for content field"); } AddDocs2(writer, 10); writer.Commit(); codec = iwconf.Codec; Assert.AreEqual(30, writer.MaxDoc); AssertQuery(new Term("content", "bbb"), dir, 10); AssertQuery(new Term("content", "ccc"), dir, 10); //// AssertQuery(new Term("content", "aaa"), dir, 10); if (VERBOSE) { Console.WriteLine("TEST: add more docs w/ new codec"); } AddDocs2(writer, 10); writer.Commit(); AssertQuery(new Term("content", "ccc"), dir, 10); AssertQuery(new Term("content", "bbb"), dir, 20); AssertQuery(new Term("content", "aaa"), dir, 10); Assert.AreEqual(40, writer.MaxDoc); if (VERBOSE) { Console.WriteLine("TEST: now optimize"); } writer.ForceMerge(1); Assert.AreEqual(40, writer.MaxDoc); writer.Dispose(); AssertQuery(new Term("content", "ccc"), dir, 10); AssertQuery(new Term("content", "bbb"), dir, 20); AssertQuery(new Term("content", "aaa"), dir, 10); dir.Dispose(); }