public virtual void TestFloatNorms() { Directory dir = NewDirectory(); MockAnalyzer analyzer = new MockAnalyzer(Random()); analyzer.MaxTokenLength = TestUtil.NextInt(Random(), 1, IndexWriter.MAX_TERM_LENGTH); IndexWriterConfig config = NewIndexWriterConfig(TEST_VERSION_CURRENT, analyzer); Similarity provider = new MySimProvider(this); config.SetSimilarity(provider); RandomIndexWriter writer = new RandomIndexWriter(Random(), dir, config); LineFileDocs docs = new LineFileDocs(Random()); int num = AtLeast(100); for (int i = 0; i < num; i++) { Document doc = docs.NextDoc(); float nextFloat = (float)Random().NextDouble(); // Cast to a double to get more precision output to the string. Field f = new TextField(FloatTestField, "" + (double)nextFloat, Field.Store.YES); f.Boost = nextFloat; doc.Add(f); writer.AddDocument(doc); doc.RemoveField(FloatTestField); if (Rarely()) { writer.Commit(); } } writer.Commit(); writer.Dispose(); AtomicReader open = SlowCompositeReaderWrapper.Wrap(DirectoryReader.Open(dir)); NumericDocValues norms = open.GetNormValues(FloatTestField); Assert.IsNotNull(norms); for (int i = 0; i < open.MaxDoc; i++) { Document document = open.Document(i); float expected = Convert.ToSingle(document.Get(FloatTestField)); Assert.AreEqual(expected, Number.IntBitsToFloat((int)norms.Get(i)), 0.0f); } open.Dispose(); dir.Dispose(); docs.Dispose(); }
public override void Run() { // TODO: would be better if this were cross thread, so that we make sure one thread deleting anothers added docs works: IList <string> toDeleteIDs = new List <string>(); IList <SubDocs> toDeleteSubDocs = new List <SubDocs>(); while (DateTime.UtcNow < StopTime && !OuterInstance.Failed.Get()) { try { // Occasional longish pause if running // nightly if (LuceneTestCase.TEST_NIGHTLY && Random().Next(6) == 3) { if (VERBOSE) { Console.WriteLine(Thread.CurrentThread.Name + ": now long sleep"); } Thread.Sleep(TestUtil.NextInt(Random(), 50, 500)); } // Rate limit ingest rate: if (Random().Next(7) == 5) { Thread.Sleep(TestUtil.NextInt(Random(), 1, 10)); if (VERBOSE) { Console.WriteLine(Thread.CurrentThread.Name + ": done sleep"); } } Document doc = Docs.NextDoc(); if (doc == null) { break; } // Maybe add randomly named field string addedField; if (Random().NextBoolean()) { addedField = "extra" + Random().Next(40); doc.Add(NewTextField(addedField, "a random field", Field.Store.YES)); } else { addedField = null; } if (Random().NextBoolean()) { if (Random().NextBoolean()) { // Add/update doc block: string packID; SubDocs delSubDocs; if (toDeleteSubDocs.Count > 0 && Random().NextBoolean()) { delSubDocs = toDeleteSubDocs[Random().Next(toDeleteSubDocs.Count)]; Debug.Assert(!delSubDocs.Deleted); toDeleteSubDocs.Remove(delSubDocs); // Update doc block, replacing prior packID packID = delSubDocs.PackID; } else { delSubDocs = null; // Add doc block, using new packID packID = OuterInstance.PackCount.IncrementAndGet() + ""; } Field packIDField = NewStringField("packID", packID, Field.Store.YES); IList <string> docIDs = new List <string>(); SubDocs subDocs = new SubDocs(packID, docIDs); IList <Document> docsList = new List <Document>(); AllSubDocs.Add(subDocs); doc.Add(packIDField); docsList.Add(TestUtil.CloneDocument(doc)); docIDs.Add(doc.Get("docid")); int maxDocCount = TestUtil.NextInt(Random(), 1, 10); while (docsList.Count < maxDocCount) { doc = Docs.NextDoc(); if (doc == null) { break; } docsList.Add(TestUtil.CloneDocument(doc)); docIDs.Add(doc.Get("docid")); } OuterInstance.AddCount.AddAndGet(docsList.Count); Term packIDTerm = new Term("packID", packID); if (delSubDocs != null) { delSubDocs.Deleted = true; DelIDs.AddAll(delSubDocs.SubIDs); OuterInstance.DelCount.AddAndGet(delSubDocs.SubIDs.Count); if (VERBOSE) { Console.WriteLine(Thread.CurrentThread.Name + ": update pack packID=" + delSubDocs.PackID + " count=" + docsList.Count + " docs=" + docIDs); } OuterInstance.UpdateDocuments(packIDTerm, docsList); } else { if (VERBOSE) { Console.WriteLine(Thread.CurrentThread.Name + ": add pack packID=" + packID + " count=" + docsList.Count + " docs=" + docIDs); } OuterInstance.AddDocuments(packIDTerm, docsList); } doc.RemoveField("packID"); if (Random().Next(5) == 2) { if (VERBOSE) { Console.WriteLine(Thread.CurrentThread.Name + ": buffer del id:" + packID); } toDeleteSubDocs.Add(subDocs); } } else { // Add single doc string docid = doc.Get("docid"); if (VERBOSE) { Console.WriteLine(Thread.CurrentThread.Name + ": add doc docid:" + docid); } OuterInstance.AddDocument(new Term("docid", docid), doc); OuterInstance.AddCount.IncrementAndGet(); if (Random().Next(5) == 3) { if (VERBOSE) { Console.WriteLine(Thread.CurrentThread.Name + ": buffer del id:" + doc.Get("docid")); } toDeleteIDs.Add(docid); } } } else { // Update single doc, but we never re-use // and ID so the delete will never // actually happen: if (VERBOSE) { Console.WriteLine(Thread.CurrentThread.Name + ": update doc id:" + doc.Get("docid")); } string docid = doc.Get("docid"); OuterInstance.UpdateDocument(new Term("docid", docid), doc); OuterInstance.AddCount.IncrementAndGet(); if (Random().Next(5) == 3) { if (VERBOSE) { Console.WriteLine(Thread.CurrentThread.Name + ": buffer del id:" + doc.Get("docid")); } toDeleteIDs.Add(docid); } } if (Random().Next(30) == 17) { if (VERBOSE) { Console.WriteLine(Thread.CurrentThread.Name + ": apply " + toDeleteIDs.Count + " deletes"); } foreach (string id in toDeleteIDs) { if (VERBOSE) { Console.WriteLine(Thread.CurrentThread.Name + ": del term=id:" + id); } OuterInstance.DeleteDocuments(new Term("docid", id)); } int count = OuterInstance.DelCount.AddAndGet(toDeleteIDs.Count); if (VERBOSE) { Console.WriteLine(Thread.CurrentThread.Name + ": tot " + count + " deletes"); } DelIDs.AddAll(toDeleteIDs); toDeleteIDs.Clear(); foreach (SubDocs subDocs in toDeleteSubDocs) { Debug.Assert(!subDocs.Deleted); DelPackIDs.Add(subDocs.PackID); OuterInstance.DeleteDocuments(new Term("packID", subDocs.PackID)); subDocs.Deleted = true; if (VERBOSE) { Console.WriteLine(Thread.CurrentThread.Name + ": del subs: " + subDocs.SubIDs + " packID=" + subDocs.PackID); } DelIDs.AddAll(subDocs.SubIDs); OuterInstance.DelCount.AddAndGet(subDocs.SubIDs.Count); } toDeleteSubDocs.Clear(); } if (addedField != null) { doc.RemoveField(addedField); } } catch (Exception t) { Console.WriteLine(Thread.CurrentThread.Name + ": hit exc"); Console.WriteLine(t.ToString()); Console.Write(t.StackTrace); OuterInstance.Failed.Set(true); throw new Exception(t.Message, t); } } if (VERBOSE) { Console.WriteLine(Thread.CurrentThread.Name + ": indexing done"); } OuterInstance.DoAfterIndexingThreadDone(); }
public virtual void Test() { Random random = new Random(Random().Next()); LineFileDocs docs = new LineFileDocs(random, DefaultCodecSupportsDocValues()); Directory d = NewDirectory(); MockAnalyzer analyzer = new MockAnalyzer(Random()); analyzer.MaxTokenLength = TestUtil.NextInt(Random(), 1, IndexWriter.MAX_TERM_LENGTH); RandomIndexWriter w = new RandomIndexWriter(Random(), d, analyzer, Similarity, TimeZone); int numDocs = AtLeast(10); for (int docCount = 0; docCount < numDocs; docCount++) { w.AddDocument(docs.NextDoc()); } IndexReader r = w.Reader; w.Dispose(); List <BytesRef> terms = new List <BytesRef>(); TermsEnum termsEnum = MultiFields.GetTerms(r, "body").GetIterator(null); BytesRef term; while ((term = termsEnum.Next()) != null) { terms.Add(BytesRef.DeepCopyOf(term)); } if (VERBOSE) { Console.WriteLine("TEST: " + terms.Count + " terms"); } int upto = -1; int iters = AtLeast(200); for (int iter = 0; iter < iters; iter++) { bool isEnd; if (upto != -1 && Random().NextBoolean()) { // next if (VERBOSE) { Console.WriteLine("TEST: iter next"); } isEnd = termsEnum.Next() == null; upto++; if (isEnd) { if (VERBOSE) { Console.WriteLine(" end"); } Assert.AreEqual(upto, terms.Count); upto = -1; } else { if (VERBOSE) { Console.WriteLine(" got term=" + termsEnum.Term.Utf8ToString() + " expected=" + terms[upto].Utf8ToString()); } Assert.IsTrue(upto < terms.Count); Assert.AreEqual(terms[upto], termsEnum.Term); } } else { BytesRef target; string exists; if (Random().NextBoolean()) { // likely fake term if (Random().NextBoolean()) { target = new BytesRef(TestUtil.RandomSimpleString(Random())); } else { target = new BytesRef(TestUtil.RandomRealisticUnicodeString(Random())); } exists = "likely not"; } else { // real term target = terms[Random().Next(terms.Count)]; exists = "yes"; } upto = terms.BinarySearch(target); if (Random().NextBoolean()) { if (VERBOSE) { Console.WriteLine("TEST: iter seekCeil target=" + target.Utf8ToString() + " exists=" + exists); } // seekCeil TermsEnum.SeekStatus status = termsEnum.SeekCeil(target); if (VERBOSE) { Console.WriteLine(" got " + status); } if (upto < 0) { upto = -(upto + 1); if (upto >= terms.Count) { Assert.AreEqual(TermsEnum.SeekStatus.END, status); upto = -1; } else { Assert.AreEqual(TermsEnum.SeekStatus.NOT_FOUND, status); Assert.AreEqual(terms[upto], termsEnum.Term); } } else { Assert.AreEqual(TermsEnum.SeekStatus.FOUND, status); Assert.AreEqual(terms[upto], termsEnum.Term); } } else { if (VERBOSE) { Console.WriteLine("TEST: iter seekExact target=" + target.Utf8ToString() + " exists=" + exists); } // seekExact bool result = termsEnum.SeekExact(target); if (VERBOSE) { Console.WriteLine(" got " + result); } if (upto < 0) { Assert.IsFalse(result); upto = -1; } else { Assert.IsTrue(result); Assert.AreEqual(target, termsEnum.Term); } } } } r.Dispose(); d.Dispose(); docs.Dispose(); }
public virtual void TestRollingUpdates_Mem() { Random random = new Random(Random().Next()); BaseDirectoryWrapper dir = NewDirectory(); LineFileDocs docs = new LineFileDocs(random, DefaultCodecSupportsDocValues()); //provider.register(new MemoryCodec()); // LUCENE TODO: uncomment this out once MemoryPostingsFormat is brought over //if ((!"Lucene3x".Equals(Codec.Default.Name)) && Random().NextBoolean()) //{ // Codec.Default = // TestUtil.AlwaysPostingsFormat(new MemoryPostingsFormat(random().nextBoolean(), random.NextFloat())); //} MockAnalyzer analyzer = new MockAnalyzer(Random()); analyzer.MaxTokenLength = TestUtil.NextInt(Random(), 1, IndexWriter.MAX_TERM_LENGTH); IndexWriter w = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, analyzer)); int SIZE = AtLeast(20); int id = 0; IndexReader r = null; IndexSearcher s = null; int numUpdates = (int)(SIZE * (2 + (TEST_NIGHTLY ? 200 * Random().NextDouble() : 5 * Random().NextDouble()))); if (VERBOSE) { Console.WriteLine("TEST: numUpdates=" + numUpdates); } int updateCount = 0; // TODO: sometimes update ids not in order... for (int docIter = 0; docIter < numUpdates; docIter++) { Documents.Document doc = docs.NextDoc(); string myID = "" + id; if (id == SIZE - 1) { id = 0; } else { id++; } if (VERBOSE) { Console.WriteLine(" docIter=" + docIter + " id=" + id); } ((Field)doc.GetField("docid")).StringValue = myID; Term idTerm = new Term("docid", myID); bool doUpdate; if (s != null && updateCount < SIZE) { TopDocs hits = s.Search(new TermQuery(idTerm), 1); Assert.AreEqual(1, hits.TotalHits); doUpdate = !w.TryDeleteDocument(r, hits.ScoreDocs[0].Doc); if (VERBOSE) { if (doUpdate) { Console.WriteLine(" tryDeleteDocument failed"); } else { Console.WriteLine(" tryDeleteDocument succeeded"); } } } else { doUpdate = true; if (VERBOSE) { Console.WriteLine(" no searcher: doUpdate=true"); } } updateCount++; if (doUpdate) { w.UpdateDocument(idTerm, doc); } else { w.AddDocument(doc); } if (docIter >= SIZE && Random().Next(50) == 17) { if (r != null) { r.Dispose(); } bool applyDeletions = Random().NextBoolean(); if (VERBOSE) { Console.WriteLine("TEST: reopen applyDeletions=" + applyDeletions); } r = w.GetReader(applyDeletions); if (applyDeletions) { s = NewSearcher(r); } else { s = null; } Assert.IsTrue(!applyDeletions || r.NumDocs == SIZE, "applyDeletions=" + applyDeletions + " r.NumDocs=" + r.NumDocs + " vs SIZE=" + SIZE); updateCount = 0; } } if (r != null) { r.Dispose(); } w.Commit(); Assert.AreEqual(SIZE, w.NumDocs()); w.Dispose(); TestIndexWriter.AssertNoUnreferencedFiles(dir, "leftover files after rolling updates"); docs.Dispose(); // LUCENE-4455: SegmentInfos infos = new SegmentInfos(); infos.Read(dir); long totalBytes = 0; foreach (SegmentCommitInfo sipc in infos.Segments) { totalBytes += sipc.SizeInBytes(); } long totalBytes2 = 0; foreach (string fileName in dir.ListAll()) { if (!fileName.StartsWith(IndexFileNames.SEGMENTS)) { totalBytes2 += dir.FileLength(fileName); } } Assert.AreEqual(totalBytes2, totalBytes); dir.Dispose(); }
/// <summary> /// populates a writer with random stuff. this must be fully reproducable with /// the seed! /// </summary> public static void CreateRandomIndex(int numdocs, RandomIndexWriter writer, Random random) { LineFileDocs lineFileDocs = new LineFileDocs(random); for (int i = 0; i < numdocs; i++) { writer.AddDocument(lineFileDocs.NextDoc()); } lineFileDocs.Dispose(); }
// Collections.synchronizedMap(new WeakHashMap<SegmentCoreReaders, bool?>()); public virtual void RunTest(string testName) { Failed.Set(false); AddCount.Set(0); DelCount.Set(0); PackCount.Set(0); DateTime t0 = DateTime.UtcNow; Random random = new Random(Random().Next()); LineFileDocs docs = new LineFileDocs(random, DefaultCodecSupportsDocValues()); DirectoryInfo tempDir = CreateTempDir(testName); Dir = GetDirectory(NewMockFSDirectory(tempDir)); // some subclasses rely on this being MDW if (Dir is BaseDirectoryWrapper) { ((BaseDirectoryWrapper)Dir).CheckIndexOnClose = false; // don't double-checkIndex, we do it ourselves. } MockAnalyzer analyzer = new MockAnalyzer(Random()); analyzer.MaxTokenLength = TestUtil.NextInt(Random(), 1, IndexWriter.MAX_TERM_LENGTH); IndexWriterConfig conf = NewIndexWriterConfig(TEST_VERSION_CURRENT, analyzer).SetInfoStream(new FailOnNonBulkMergesInfoStream()); if (LuceneTestCase.TEST_NIGHTLY) { // newIWConfig makes smallish max seg size, which // results in tons and tons of segments for this test // when run nightly: MergePolicy mp = conf.MergePolicy; if (mp is TieredMergePolicy) { ((TieredMergePolicy)mp).MaxMergedSegmentMB = 5000.0; } else if (mp is LogByteSizeMergePolicy) { ((LogByteSizeMergePolicy)mp).MaxMergeMB = 1000.0; } else if (mp is LogMergePolicy) { ((LogMergePolicy)mp).MaxMergeDocs = 100000; } } conf.SetMergedSegmentWarmer(new IndexReaderWarmerAnonymousInnerClassHelper(this)); if (VERBOSE) { conf.InfoStream = new PrintStreamInfoStreamAnonymousInnerClassHelper(this, Console.Out); } Writer = new IndexWriter(Dir, conf); TestUtil.ReduceOpenFiles(Writer); TaskScheduler es = Random().NextBoolean() ? null : TaskScheduler.Default; DoAfterWriter(es); int NUM_INDEX_THREADS = TestUtil.NextInt(Random(), 2, 4); int RUN_TIME_SEC = LuceneTestCase.TEST_NIGHTLY ? 300 : RANDOM_MULTIPLIER; ISet<string> delIDs = new ConcurrentHashSet<string>(new HashSet<string>()); ISet<string> delPackIDs = new ConcurrentHashSet<string>(new HashSet<string>()); IList<SubDocs> allSubDocs = new SynchronizedCollection<SubDocs>(); DateTime stopTime = DateTime.UtcNow.AddSeconds(RUN_TIME_SEC); ThreadClass[] indexThreads = LaunchIndexingThreads(docs, NUM_INDEX_THREADS, stopTime, delIDs, delPackIDs, allSubDocs); if (VERBOSE) { Console.WriteLine("TEST: DONE start " + NUM_INDEX_THREADS + " indexing threads [" + (DateTime.UtcNow - t0).TotalMilliseconds + " ms]"); } // Let index build up a bit Thread.Sleep(100); DoSearching(es, stopTime); if (VERBOSE) { Console.WriteLine("TEST: all searching done [" + (DateTime.UtcNow - t0).TotalMilliseconds + " ms]"); } for (int thread = 0; thread < indexThreads.Length; thread++) { indexThreads[thread].Join(); } if (VERBOSE) { Console.WriteLine("TEST: done join indexing threads [" + (DateTime.UtcNow - t0).TotalMilliseconds + " ms]; addCount=" + AddCount + " delCount=" + DelCount); } IndexSearcher s = FinalSearcher; if (VERBOSE) { Console.WriteLine("TEST: finalSearcher=" + s); } Assert.IsFalse(Failed.Get()); bool doFail = false; // Verify: make sure delIDs are in fact deleted: foreach (string id in delIDs) { TopDocs hits = s.Search(new TermQuery(new Term("docid", id)), 1); if (hits.TotalHits != 0) { Console.WriteLine("doc id=" + id + " is supposed to be deleted, but got " + hits.TotalHits + " hits; first docID=" + hits.ScoreDocs[0].Doc); doFail = true; } } // Verify: make sure delPackIDs are in fact deleted: foreach (string id in delPackIDs) { TopDocs hits = s.Search(new TermQuery(new Term("packID", id)), 1); if (hits.TotalHits != 0) { Console.WriteLine("packID=" + id + " is supposed to be deleted, but got " + hits.TotalHits + " matches"); doFail = true; } } // Verify: make sure each group of sub-docs are still in docID order: foreach (SubDocs subDocs in allSubDocs.ToList()) { TopDocs hits = s.Search(new TermQuery(new Term("packID", subDocs.PackID)), 20); if (!subDocs.Deleted) { // We sort by relevance but the scores should be identical so sort falls back to by docID: if (hits.TotalHits != subDocs.SubIDs.Count) { Console.WriteLine("packID=" + subDocs.PackID + ": expected " + subDocs.SubIDs.Count + " hits but got " + hits.TotalHits); doFail = true; } else { int lastDocID = -1; int startDocID = -1; foreach (ScoreDoc scoreDoc in hits.ScoreDocs) { int docID = scoreDoc.Doc; if (lastDocID != -1) { Assert.AreEqual(1 + lastDocID, docID); } else { startDocID = docID; } lastDocID = docID; Document doc = s.Doc(docID); Assert.AreEqual(subDocs.PackID, doc.Get("packID")); } lastDocID = startDocID - 1; foreach (string subID in subDocs.SubIDs) { hits = s.Search(new TermQuery(new Term("docid", subID)), 1); Assert.AreEqual(1, hits.TotalHits); int docID = hits.ScoreDocs[0].Doc; if (lastDocID != -1) { Assert.AreEqual(1 + lastDocID, docID); } lastDocID = docID; } } } else { // Pack was deleted -- make sure its docs are // deleted. We can't verify packID is deleted // because we can re-use packID for update: foreach (string subID in subDocs.SubIDs) { Assert.AreEqual(0, s.Search(new TermQuery(new Term("docid", subID)), 1).TotalHits); } } } // Verify: make sure all not-deleted docs are in fact // not deleted: int endID = Convert.ToInt32(docs.NextDoc().Get("docid")); docs.Dispose(); for (int id = 0; id < endID; id++) { string stringID = "" + id; if (!delIDs.Contains(stringID)) { TopDocs hits = s.Search(new TermQuery(new Term("docid", stringID)), 1); if (hits.TotalHits != 1) { Console.WriteLine("doc id=" + stringID + " is not supposed to be deleted, but got hitCount=" + hits.TotalHits + "; delIDs=" + string.Join(",", delIDs.ToArray())); doFail = true; } } } Assert.IsFalse(doFail); Assert.AreEqual(AddCount.Get() - DelCount.Get(), s.IndexReader.NumDocs, "index=" + Writer.SegString() + " addCount=" + AddCount + " delCount=" + DelCount); ReleaseSearcher(s); Writer.Commit(); Assert.AreEqual(AddCount.Get() - DelCount.Get(), Writer.NumDocs(), "index=" + Writer.SegString() + " addCount=" + AddCount + " delCount=" + DelCount); DoClose(); Writer.Dispose(false); // Cannot shutdown until after writer is closed because // writer has merged segment warmer that uses IS to run // searches, and that IS may be using this es! /*if (es != null) { es.shutdown(); es.awaitTermination(1, TimeUnit.SECONDS); }*/ TestUtil.CheckIndex(Dir); Dir.Dispose(); System.IO.Directory.Delete(tempDir.FullName, true); if (VERBOSE) { Console.WriteLine("TEST: done [" + (DateTime.UtcNow - t0).TotalMilliseconds + " ms]"); } }
private void CheckRandomData(Random random, Analyzer a, int iterations, int maxWordLength, bool useCharFilter, bool simple, bool offsetsAreCorrect, RandomIndexWriter iw) { LineFileDocs docs = new LineFileDocs(random); Document doc = null; Field field = null, currentField = null; StringReader bogus = new StringReader(""); if (iw != null) { doc = new Document(); FieldType ft = new FieldType(TextField.TYPE_NOT_STORED); if (random.NextBoolean()) { ft.StoreTermVectors = true; ft.StoreTermVectorOffsets = random.NextBoolean(); ft.StoreTermVectorPositions = random.NextBoolean(); if (ft.StoreTermVectorPositions && !OLD_FORMAT_IMPERSONATION_IS_ACTIVE) { ft.StoreTermVectorPayloads = random.NextBoolean(); } } if (random.NextBoolean()) { ft.OmitNorms = true; } string pf = TestUtil.GetPostingsFormat("dummy"); bool supportsOffsets = !DoesntSupportOffsets.Contains(pf); switch (random.Next(4)) { case 0: ft.IndexOptions = FieldInfo.IndexOptions.DOCS_ONLY; break; case 1: ft.IndexOptions = FieldInfo.IndexOptions.DOCS_AND_FREQS; break; case 2: ft.IndexOptions = FieldInfo.IndexOptions.DOCS_AND_FREQS_AND_POSITIONS; break; default: if (supportsOffsets && offsetsAreCorrect) { ft.IndexOptions = FieldInfo.IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS; } else { ft.IndexOptions = FieldInfo.IndexOptions.DOCS_AND_FREQS_AND_POSITIONS; } break; } currentField = field = new Field("dummy", bogus, ft); doc.Add(currentField); } try { for (int i = 0; i < iterations; i++) { string text; if (random.Next(10) == 7) { // real data from linedocs text = docs.NextDoc().Get("body"); if (text.Length > maxWordLength) { // Take a random slice from the text...: int startPos = random.Next(text.Length - maxWordLength); if (startPos > 0 && char.IsLowSurrogate(text[startPos])) { // Take care not to split up a surrogate pair: startPos--; Assert.True(char.IsHighSurrogate(text[startPos])); } int endPos = startPos + maxWordLength - 1; if (char.IsHighSurrogate(text[endPos])) { // Take care not to split up a surrogate pair: endPos--; } text = text.Substring(startPos, 1 + endPos - startPos); } } else { // synthetic text = TestUtil.RandomAnalysisString(random, maxWordLength, simple); } try { CheckAnalysisConsistency(random, a, useCharFilter, text, offsetsAreCorrect, currentField); if (iw != null) { if (random.Next(7) == 0) { // pile up a multivalued field var ft = (FieldType)field.FieldType; currentField = new Field("dummy", bogus, ft); doc.Add(currentField); } else { iw.AddDocument(doc); if (doc.Fields.Count > 1) { // back to 1 field currentField = field; doc.RemoveFields("dummy"); doc.Add(currentField); } } } } catch (Exception t) { // TODO: really we should pass a random seed to // checkAnalysisConsistency then print it here too: Console.Error.WriteLine("TEST FAIL: useCharFilter=" + useCharFilter + " text='" + Escape(text) + "'"); throw; } } } finally { IOUtils.CloseWhileHandlingException(docs); } }
public override void Run() { for (int iter = 0; iter < iters && !failed.Value; iter++) { //final int x = Random().nextInt(5); int x = Random.Next(3); try { switch (x) { case 0: rollbackLock.@Lock(); if (Verbose) { Console.WriteLine("\nTEST: " + Thread.CurrentThread.Name + ": now rollback"); } try { writerRef.Value.Rollback(); if (Verbose) { Console.WriteLine("TEST: " + Thread.CurrentThread.Name + ": rollback done; now open new writer"); } writerRef.Value = new IndexWriter(d, NewIndexWriterConfig( #if FEATURE_INSTANCE_TESTDATA_INITIALIZATION outerInstance, #endif TEST_VERSION_CURRENT, new MockAnalyzer(Random))); } finally { rollbackLock.Unlock(); } break; case 1: commitLock.@Lock(); if (Verbose) { Console.WriteLine("\nTEST: " + Thread.CurrentThread.Name + ": now commit"); } try { if (Random.NextBoolean()) { writerRef.Value.PrepareCommit(); } writerRef.Value.Commit(); } catch (ObjectDisposedException) { // ok } catch (NullReferenceException) { // ok } finally { commitLock.Unlock(); } break; case 2: if (Verbose) { Console.WriteLine("\nTEST: " + Thread.CurrentThread.Name + ": now add"); } try { writerRef.Value.AddDocument(docs.NextDoc()); } catch (ObjectDisposedException) { // ok } catch (NullReferenceException) { // ok } catch (InvalidOperationException) { // ok } catch (AssertionException) { // ok } break; } } catch (Exception t) { failed.Value = (true); throw new Exception(t.Message, t); } } }
public override void Run() { // TODO: would be better if this were cross thread, so that we make sure one thread deleting anothers added docs works: IList <string> toDeleteIDs = new JCG.List <string>(); IList <SubDocs> toDeleteSubDocs = new JCG.List <SubDocs>(); while (J2N.Time.NanoTime() / J2N.Time.MillisecondsPerNanosecond < stopTime && !outerInstance.m_failed) // LUCENENET: Use NanoTime() rather than CurrentTimeMilliseconds() for more accurate/reliable results { try { // Occasional longish pause if running // nightly if (LuceneTestCase.TestNightly && Random.Next(6) == 3) { if (Verbose) { Console.WriteLine(Thread.CurrentThread.Name + ": now long sleep"); } //Thread.Sleep(TestUtil.NextInt32(Random, 50, 500)); // LUCENENET specific - Reduced amount of pause to keep the total // Nightly test time under 1 hour Thread.Sleep(TestUtil.NextInt32(Random, 50, 250)); } // Rate limit ingest rate: if (Random.Next(7) == 5) { Thread.Sleep(TestUtil.NextInt32(Random, 1, 10)); if (Verbose) { Console.WriteLine(Thread.CurrentThread.Name + ": done sleep"); } } Document doc = docs.NextDoc(); if (doc is null) { break; } // Maybe add randomly named field string addedField; if (Random.NextBoolean()) { addedField = "extra" + Random.Next(40); doc.Add(NewTextField(addedField, "a random field", Field.Store.YES)); } else { addedField = null; } if (Random.NextBoolean()) { if (Random.NextBoolean()) { // Add/update doc block: string packID; SubDocs delSubDocs; if (toDeleteSubDocs.Count > 0 && Random.NextBoolean()) { delSubDocs = toDeleteSubDocs[Random.Next(toDeleteSubDocs.Count)]; if (Debugging.AssertsEnabled) { Debugging.Assert(!delSubDocs.Deleted); } toDeleteSubDocs.Remove(delSubDocs); // Update doc block, replacing prior packID packID = delSubDocs.PackID; } else { delSubDocs = null; // Add doc block, using new packID packID = outerInstance.m_packCount.GetAndIncrement().ToString(CultureInfo.InvariantCulture); } Field packIDField = NewStringField("packID", packID, Field.Store.YES); IList <string> docIDs = new JCG.List <string>(); SubDocs subDocs = new SubDocs(packID, docIDs); IList <Document> docsList = new JCG.List <Document>(); allSubDocs.Enqueue(subDocs); doc.Add(packIDField); docsList.Add(TestUtil.CloneDocument(doc)); docIDs.Add(doc.Get("docid")); int maxDocCount = TestUtil.NextInt32(Random, 1, 10); while (docsList.Count < maxDocCount) { doc = docs.NextDoc(); if (doc is null) { break; } docsList.Add(TestUtil.CloneDocument(doc)); docIDs.Add(doc.Get("docid")); } outerInstance.m_addCount.AddAndGet(docsList.Count); Term packIDTerm = new Term("packID", packID); if (delSubDocs != null) { delSubDocs.Deleted = true; delIDs.UnionWith(delSubDocs.SubIDs); outerInstance.m_delCount.AddAndGet(delSubDocs.SubIDs.Count); if (Verbose) { Console.WriteLine(Thread.CurrentThread.Name + ": update pack packID=" + delSubDocs.PackID + " count=" + docsList.Count + " docs=" + string.Format(J2N.Text.StringFormatter.InvariantCulture, "{0}", docIDs)); } outerInstance.UpdateDocuments(packIDTerm, docsList); } else { if (Verbose) { Console.WriteLine(Thread.CurrentThread.Name + ": add pack packID=" + packID + " count=" + docsList.Count + " docs=" + string.Format(J2N.Text.StringFormatter.InvariantCulture, "{0}", docIDs)); } outerInstance.AddDocuments(packIDTerm, docsList); } doc.RemoveField("packID"); if (Random.Next(5) == 2) { if (Verbose) { Console.WriteLine(Thread.CurrentThread.Name + ": buffer del id:" + packID); } toDeleteSubDocs.Add(subDocs); } } else { // Add single doc string docid = doc.Get("docid"); if (Verbose) { Console.WriteLine(Thread.CurrentThread.Name + ": add doc docid:" + docid); } outerInstance.AddDocument(new Term("docid", docid), doc); outerInstance.m_addCount.GetAndIncrement(); if (Random.Next(5) == 3) { if (Verbose) { Console.WriteLine(Thread.CurrentThread.Name + ": buffer del id:" + doc.Get("docid")); } toDeleteIDs.Add(docid); } } } else { // Update single doc, but we never re-use // and ID so the delete will never // actually happen: if (Verbose) { Console.WriteLine(Thread.CurrentThread.Name + ": update doc id:" + doc.Get("docid")); } string docid = doc.Get("docid"); outerInstance.UpdateDocument(new Term("docid", docid), doc); outerInstance.m_addCount.GetAndIncrement(); if (Random.Next(5) == 3) { if (Verbose) { Console.WriteLine(Thread.CurrentThread.Name + ": buffer del id:" + doc.Get("docid")); } toDeleteIDs.Add(docid); } } if (Random.Next(30) == 17) { if (Verbose) { Console.WriteLine(Thread.CurrentThread.Name + ": apply " + toDeleteIDs.Count + " deletes"); } foreach (string id in toDeleteIDs) { if (Verbose) { Console.WriteLine(Thread.CurrentThread.Name + ": del term=id:" + id); } outerInstance.DeleteDocuments(new Term("docid", id)); } int count = outerInstance.m_delCount.AddAndGet(toDeleteIDs.Count); if (Verbose) { Console.WriteLine(Thread.CurrentThread.Name + ": tot " + count + " deletes"); } delIDs.UnionWith(toDeleteIDs); toDeleteIDs.Clear(); foreach (SubDocs subDocs in toDeleteSubDocs) { if (Debugging.AssertsEnabled) { Debugging.Assert(!subDocs.Deleted); } delPackIDs.Add(subDocs.PackID); outerInstance.DeleteDocuments(new Term("packID", subDocs.PackID)); subDocs.Deleted = true; if (Verbose) { Console.WriteLine(Thread.CurrentThread.Name + ": del subs: " + subDocs.SubIDs + " packID=" + subDocs.PackID); } delIDs.UnionWith(subDocs.SubIDs); outerInstance.m_delCount.AddAndGet(subDocs.SubIDs.Count); } toDeleteSubDocs.Clear(); } if (addedField != null) { doc.RemoveField(addedField); } } catch (Exception t) when(t.IsThrowable()) { Console.WriteLine(Thread.CurrentThread.Name + ": hit exc"); Console.WriteLine(t.ToString()); Console.Write(t.StackTrace); outerInstance.m_failed.Value = (true); throw RuntimeException.Create(t); } } if (Verbose) { Console.WriteLine(Thread.CurrentThread.Name + ": indexing done"); } outerInstance.DoAfterIndexingThreadDone(); }
public override void Run() { try { while (!doStop) { w.UpdateDocument(new Term("docid", "" + Random.Next(numStartDocs)), docs.NextDoc()); // Force deletes to apply w.GetReader().Dispose(); } } catch (Exception t) when(t.IsThrowable()) { throw RuntimeException.Create(t); } }
public override void Run() { try { LineFileDocs docs = new LineFileDocs(Random(), DefaultCodecSupportsDocValues()); int numDocs = 0; while (DateTime.UtcNow < OuterInstance.EndTime) { int what = Random().Next(3); NodeState node = OuterInstance.Nodes[Random().Next(OuterInstance.Nodes.Length)]; if (numDocs == 0 || what == 0) { node.Writer.AddDocument(docs.NextDoc()); numDocs++; } else if (what == 1) { node.Writer.UpdateDocument(new Term("docid", "" + Random().Next(numDocs)), docs.NextDoc()); numDocs++; } else { node.Writer.DeleteDocuments(new Term("docid", "" + Random().Next(numDocs))); } // TODO: doc blocks too if (Random().Next(17) == 12) { node.Writer.Commit(); } if (Random().Next(17) == 12) { OuterInstance.Nodes[Random().Next(OuterInstance.Nodes.Length)].Reopen(); } } } catch (Exception t) { Console.WriteLine("FAILED:"); Console.Out.WriteLine(t.StackTrace); throw new Exception(t.Message, t); } }
public override void Run() { for (int iter = 0; iter < Iters && !Failed.Get(); iter++) { //final int x = Random().nextInt(5); int x = Random().Next(3); try { switch (x) { case 0: RollbackLock.@Lock(); if (VERBOSE) { Console.WriteLine("\nTEST: " + Thread.CurrentThread.Name + ": now rollback"); } try { WriterRef.Value.Rollback(); if (VERBOSE) { Console.WriteLine("TEST: " + Thread.CurrentThread.Name + ": rollback done; now open new writer"); } WriterRef.Value = new IndexWriter(d, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random()))); } finally { RollbackLock.Unlock(); } break; case 1: CommitLock.@Lock(); if (VERBOSE) { Console.WriteLine("\nTEST: " + Thread.CurrentThread.Name + ": now commit"); } try { if (Random().NextBoolean()) { WriterRef.Value.PrepareCommit(); } WriterRef.Value.Commit(); } catch (AlreadyClosedException ace) { // ok } catch (System.NullReferenceException npe) { // ok } finally { CommitLock.Unlock(); } break; case 2: if (VERBOSE) { Console.WriteLine("\nTEST: " + Thread.CurrentThread.Name + ": now add"); } try { WriterRef.Value.AddDocument(Docs.NextDoc()); } catch (AlreadyClosedException ace) { // ok } catch (System.NullReferenceException npe) { // ok } catch (InvalidOperationException ae) { // ok } break; } } catch (Exception t) { Failed.Set(true); throw new Exception(t.Message, t); } } }
public virtual void Test() { MockDirectoryWrapper dir = NewMockFSDirectory(CreateTempDir("TestIndexWriterOutOfFileDescriptors")); dir.PreventDoubleWrite = false; double rate = Random().NextDouble() * 0.01; //System.out.println("rate=" + rate); dir.RandomIOExceptionRateOnOpen = rate; int iters = AtLeast(20); LineFileDocs docs = new LineFileDocs(Random(), DefaultCodecSupportsDocValues()); IndexReader r = null; DirectoryReader r2 = null; bool any = false; MockDirectoryWrapper dirCopy = null; int lastNumDocs = 0; for (int iter = 0; iter < iters; iter++) { IndexWriter w = null; if (VERBOSE) { Console.WriteLine("TEST: iter=" + iter); } try { MockAnalyzer analyzer = new MockAnalyzer(Random()); analyzer.MaxTokenLength = TestUtil.NextInt(Random(), 1, IndexWriter.MAX_TERM_LENGTH); IndexWriterConfig iwc = NewIndexWriterConfig(TEST_VERSION_CURRENT, analyzer); if (VERBOSE) { // Do this ourselves instead of relying on LTC so // we see incrementing messageID: iwc.InfoStream = new PrintStreamInfoStream(Console.Out); } MergeScheduler ms = iwc.MergeScheduler; if (ms is ConcurrentMergeScheduler) { ((ConcurrentMergeScheduler)ms).SetSuppressExceptions(); } w = new IndexWriter(dir, iwc); if (r != null && Random().Next(5) == 3) { if (Random().NextBoolean()) { if (VERBOSE) { Console.WriteLine("TEST: addIndexes IR[]"); } w.AddIndexes(new IndexReader[] { r }); } else { if (VERBOSE) { Console.WriteLine("TEST: addIndexes Directory[]"); } w.AddIndexes(new Directory[] { dirCopy }); } } else { if (VERBOSE) { Console.WriteLine("TEST: addDocument"); } w.AddDocument(docs.NextDoc()); } dir.RandomIOExceptionRateOnOpen = 0.0; w.Dispose(); w = null; // NOTE: this is O(N^2)! Only enable for temporary debugging: //dir.setRandomIOExceptionRateOnOpen(0.0); //TestUtil.CheckIndex(dir); //dir.setRandomIOExceptionRateOnOpen(rate); // Verify numDocs only increases, to catch IndexWriter // accidentally deleting the index: dir.RandomIOExceptionRateOnOpen = 0.0; Assert.IsTrue(DirectoryReader.IndexExists(dir)); if (r2 == null) { r2 = DirectoryReader.Open(dir); } else { DirectoryReader r3 = DirectoryReader.OpenIfChanged(r2); if (r3 != null) { r2.Dispose(); r2 = r3; } } Assert.IsTrue(r2.NumDocs() >= lastNumDocs, "before=" + lastNumDocs + " after=" + r2.NumDocs()); lastNumDocs = r2.NumDocs(); //System.out.println("numDocs=" + lastNumDocs); dir.RandomIOExceptionRateOnOpen = rate; any = true; if (VERBOSE) { Console.WriteLine("TEST: iter=" + iter + ": success"); } } catch (IOException ioe) { if (VERBOSE) { Console.WriteLine("TEST: iter=" + iter + ": exception"); Console.WriteLine(ioe.ToString()); Console.Write(ioe.StackTrace); } if (w != null) { // NOTE: leave random IO exceptions enabled here, // to verify that rollback does not try to write // anything: w.Rollback(); } } if (any && r == null && Random().NextBoolean()) { // Make a copy of a non-empty index so we can use // it to addIndexes later: dir.RandomIOExceptionRateOnOpen = 0.0; r = DirectoryReader.Open(dir); dirCopy = NewMockFSDirectory(CreateTempDir("TestIndexWriterOutOfFileDescriptors.copy")); HashSet<string> files = new HashSet<string>(); foreach (string file in dir.ListAll()) { dir.Copy(dirCopy, file, file, IOContext.DEFAULT); files.Add(file); } dirCopy.Sync(files); // Have IW kiss the dir so we remove any leftover // files ... we can easily have leftover files at // the time we take a copy because we are holding // open a reader: (new IndexWriter(dirCopy, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())))).Dispose(); dirCopy.RandomIOExceptionRate = rate; dir.RandomIOExceptionRateOnOpen = rate; } } if (r2 != null) { r2.Dispose(); } if (r != null) { r.Dispose(); dirCopy.Dispose(); } dir.Dispose(); }
private static void CheckRandomData(Random random, Analyzer a, int iterations, int maxWordLength, bool useCharFilter, bool simple, bool offsetsAreCorrect, RandomIndexWriter iw) { LineFileDocs docs = new LineFileDocs(random); Document doc = null; Field field = null, currentField = null; StringReader bogus = new StringReader(""); if (iw != null) { doc = new Document(); FieldType ft = new FieldType(TextField.TYPE_NOT_STORED); if (random.NextBoolean()) { ft.StoreTermVectors = true; ft.StoreTermVectorOffsets = random.NextBoolean(); ft.StoreTermVectorPositions = random.NextBoolean(); if (ft.StoreTermVectorPositions && !OLD_FORMAT_IMPERSONATION_IS_ACTIVE) { ft.StoreTermVectorPayloads = random.NextBoolean(); } } if (random.NextBoolean()) { ft.OmitNorms = true; } string pf = TestUtil.GetPostingsFormat("dummy"); bool supportsOffsets = !DoesntSupportOffsets.Contains(pf); switch (random.Next(4)) { case 0: ft.IndexOptions = FieldInfo.IndexOptions.DOCS_ONLY; break; case 1: ft.IndexOptions = FieldInfo.IndexOptions.DOCS_AND_FREQS; break; case 2: ft.IndexOptions = FieldInfo.IndexOptions.DOCS_AND_FREQS_AND_POSITIONS; break; default: if (supportsOffsets && offsetsAreCorrect) { ft.IndexOptions = FieldInfo.IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS; } else { ft.IndexOptions = FieldInfo.IndexOptions.DOCS_AND_FREQS_AND_POSITIONS; } break; } currentField = field = new Field("dummy", bogus, ft); doc.Add(currentField); } try { for (int i = 0; i < iterations; i++) { string text; if (random.Next(10) == 7) { // real data from linedocs text = docs.NextDoc().Get("body"); if (text.Length > maxWordLength) { // Take a random slice from the text...: int startPos = random.Next(text.Length - maxWordLength); if (startPos > 0 && char.IsLowSurrogate(text[startPos])) { // Take care not to split up a surrogate pair: startPos--; Assert.True(char.IsHighSurrogate(text[startPos])); } int endPos = startPos + maxWordLength - 1; if (char.IsHighSurrogate(text[endPos])) { // Take care not to split up a surrogate pair: endPos--; } text = text.Substring(startPos, 1 + endPos - startPos); } } else { // synthetic text = TestUtil.RandomAnalysisString(random, maxWordLength, simple); } try { CheckAnalysisConsistency(random, a, useCharFilter, text, offsetsAreCorrect, currentField); if (iw != null) { if (random.Next(7) == 0) { // pile up a multivalued field var ft = (FieldType)field.FieldType(); currentField = new Field("dummy", bogus, ft); doc.Add(currentField); } else { iw.AddDocument(doc); if (doc.Fields.Count > 1) { // back to 1 field currentField = field; doc.RemoveFields("dummy"); doc.Add(currentField); } } } } catch (Exception t) { // TODO: really we should pass a random seed to // checkAnalysisConsistency then print it here too: Console.Error.WriteLine("TEST FAIL: useCharFilter=" + useCharFilter + " text='" + Escape(text) + "'"); throw; } } } finally { IOUtils.CloseWhileHandlingException(docs); } }
public override void Run() { try { while (!DoStop.Get()) { w.UpdateDocument(new Term("docid", "" + Random.Next(NumStartDocs)), Docs.NextDoc()); // Force deletes to apply w.GetReader().Dispose(); } } catch (Exception t) { throw new Exception(t.Message, t); } }
// Collections.synchronizedMap(new WeakHashMap<SegmentCoreReaders, bool?>()); public virtual void RunTest(string testName) { Failed.Set(false); AddCount.Set(0); DelCount.Set(0); PackCount.Set(0); DateTime t0 = DateTime.UtcNow; Random random = new Random(Random().Next()); LineFileDocs docs = new LineFileDocs(random, DefaultCodecSupportsDocValues()); DirectoryInfo tempDir = CreateTempDir(testName); Dir = GetDirectory(NewMockFSDirectory(tempDir)); // some subclasses rely on this being MDW if (Dir is BaseDirectoryWrapper) { ((BaseDirectoryWrapper)Dir).CheckIndexOnClose = false; // don't double-checkIndex, we do it ourselves. } MockAnalyzer analyzer = new MockAnalyzer(Random()); analyzer.MaxTokenLength = TestUtil.NextInt(Random(), 1, IndexWriter.MAX_TERM_LENGTH); IndexWriterConfig conf = NewIndexWriterConfig(TEST_VERSION_CURRENT, analyzer).SetInfoStream(new FailOnNonBulkMergesInfoStream()); if (LuceneTestCase.TEST_NIGHTLY) { // newIWConfig makes smallish max seg size, which // results in tons and tons of segments for this test // when run nightly: MergePolicy mp = conf.MergePolicy; if (mp is TieredMergePolicy) { ((TieredMergePolicy)mp).MaxMergedSegmentMB = 5000.0; } else if (mp is LogByteSizeMergePolicy) { ((LogByteSizeMergePolicy)mp).MaxMergeMB = 1000.0; } else if (mp is LogMergePolicy) { ((LogMergePolicy)mp).MaxMergeDocs = 100000; } } conf.SetMergedSegmentWarmer(new IndexReaderWarmerAnonymousInnerClassHelper(this)); if (VERBOSE) { conf.InfoStream = new PrintStreamInfoStreamAnonymousInnerClassHelper(this, Console.Out); } Writer = new IndexWriter(Dir, conf); TestUtil.ReduceOpenFiles(Writer); //TaskScheduler es = Random().NextBoolean() ? null : Executors.newCachedThreadPool(new NamedThreadFactory(testName)); TaskScheduler es = null; DoAfterWriter(es); int NUM_INDEX_THREADS = TestUtil.NextInt(Random(), 2, 4); int RUN_TIME_SEC = LuceneTestCase.TEST_NIGHTLY ? 300 : RANDOM_MULTIPLIER; ISet <string> delIDs = new ConcurrentHashSet <string>(new HashSet <string>()); ISet <string> delPackIDs = new ConcurrentHashSet <string>(new HashSet <string>()); IList <SubDocs> allSubDocs = new SynchronizedCollection <SubDocs>(); DateTime stopTime = DateTime.UtcNow.AddSeconds(RUN_TIME_SEC); ThreadClass[] indexThreads = LaunchIndexingThreads(docs, NUM_INDEX_THREADS, stopTime, delIDs, delPackIDs, allSubDocs); if (VERBOSE) { Console.WriteLine("TEST: DONE start " + NUM_INDEX_THREADS + " indexing threads [" + (DateTime.UtcNow - t0).TotalMilliseconds + " ms]"); } // Let index build up a bit Thread.Sleep(100); DoSearching(es, stopTime); if (VERBOSE) { Console.WriteLine("TEST: all searching done [" + (DateTime.UtcNow - t0).TotalMilliseconds + " ms]"); } for (int thread = 0; thread < indexThreads.Length; thread++) { indexThreads[thread].Join(); } if (VERBOSE) { Console.WriteLine("TEST: done join indexing threads [" + (DateTime.UtcNow - t0).TotalMilliseconds + " ms]; addCount=" + AddCount + " delCount=" + DelCount); } IndexSearcher s = FinalSearcher; if (VERBOSE) { Console.WriteLine("TEST: finalSearcher=" + s); } Assert.IsFalse(Failed.Get()); bool doFail = false; // Verify: make sure delIDs are in fact deleted: foreach (string id in delIDs) { TopDocs hits = s.Search(new TermQuery(new Term("docid", id)), 1); if (hits.TotalHits != 0) { Console.WriteLine("doc id=" + id + " is supposed to be deleted, but got " + hits.TotalHits + " hits; first docID=" + hits.ScoreDocs[0].Doc); doFail = true; } } // Verify: make sure delPackIDs are in fact deleted: foreach (string id in delPackIDs) { TopDocs hits = s.Search(new TermQuery(new Term("packID", id)), 1); if (hits.TotalHits != 0) { Console.WriteLine("packID=" + id + " is supposed to be deleted, but got " + hits.TotalHits + " matches"); doFail = true; } } // Verify: make sure each group of sub-docs are still in docID order: foreach (SubDocs subDocs in allSubDocs.ToList()) { TopDocs hits = s.Search(new TermQuery(new Term("packID", subDocs.PackID)), 20); if (!subDocs.Deleted) { // We sort by relevance but the scores should be identical so sort falls back to by docID: if (hits.TotalHits != subDocs.SubIDs.Count) { Console.WriteLine("packID=" + subDocs.PackID + ": expected " + subDocs.SubIDs.Count + " hits but got " + hits.TotalHits); doFail = true; } else { int lastDocID = -1; int startDocID = -1; foreach (ScoreDoc scoreDoc in hits.ScoreDocs) { int docID = scoreDoc.Doc; if (lastDocID != -1) { Assert.AreEqual(1 + lastDocID, docID); } else { startDocID = docID; } lastDocID = docID; Document doc = s.Doc(docID); Assert.AreEqual(subDocs.PackID, doc.Get("packID")); } lastDocID = startDocID - 1; foreach (string subID in subDocs.SubIDs) { hits = s.Search(new TermQuery(new Term("docid", subID)), 1); Assert.AreEqual(1, hits.TotalHits); int docID = hits.ScoreDocs[0].Doc; if (lastDocID != -1) { Assert.AreEqual(1 + lastDocID, docID); } lastDocID = docID; } } } else { // Pack was deleted -- make sure its docs are // deleted. We can't verify packID is deleted // because we can re-use packID for update: foreach (string subID in subDocs.SubIDs) { Assert.AreEqual(0, s.Search(new TermQuery(new Term("docid", subID)), 1).TotalHits); } } } // Verify: make sure all not-deleted docs are in fact // not deleted: int endID = Convert.ToInt32(docs.NextDoc().Get("docid")); docs.Dispose(); for (int id = 0; id < endID; id++) { string stringID = "" + id; if (!delIDs.Contains(stringID)) { TopDocs hits = s.Search(new TermQuery(new Term("docid", stringID)), 1); if (hits.TotalHits != 1) { Console.WriteLine("doc id=" + stringID + " is not supposed to be deleted, but got hitCount=" + hits.TotalHits + "; delIDs=" + string.Join(",", delIDs.ToArray())); doFail = true; } } } Assert.IsFalse(doFail); Assert.AreEqual(AddCount.Get() - DelCount.Get(), s.IndexReader.NumDocs, "index=" + Writer.SegString() + " addCount=" + AddCount + " delCount=" + DelCount); ReleaseSearcher(s); Writer.Commit(); Assert.AreEqual(AddCount.Get() - DelCount.Get(), Writer.NumDocs(), "index=" + Writer.SegString() + " addCount=" + AddCount + " delCount=" + DelCount); DoClose(); Writer.Dispose(false); // Cannot shutdown until after writer is closed because // writer has merged segment warmer that uses IS to run // searches, and that IS may be using this es! /*if (es != null) * { * es.shutdown(); * es.awaitTermination(1, TimeUnit.SECONDS); * }*/ TestUtil.CheckIndex(Dir); Dir.Dispose(); System.IO.Directory.Delete(tempDir.FullName, true); if (VERBOSE) { Console.WriteLine("TEST: done [" + (DateTime.UtcNow - t0).TotalMilliseconds + " ms]"); } }
public virtual void Test() { MockDirectoryWrapper dir = NewMockFSDirectory(CreateTempDir("TestIndexWriterOutOfFileDescriptors")); dir.PreventDoubleWrite = false; double rate = Random().NextDouble() * 0.01; //System.out.println("rate=" + rate); dir.RandomIOExceptionRateOnOpen = rate; int iters = AtLeast(20); LineFileDocs docs = new LineFileDocs(Random(), DefaultCodecSupportsDocValues()); IndexReader r = null; DirectoryReader r2 = null; bool any = false; MockDirectoryWrapper dirCopy = null; int lastNumDocs = 0; for (int iter = 0; iter < iters; iter++) { IndexWriter w = null; if (VERBOSE) { Console.WriteLine("TEST: iter=" + iter); } try { MockAnalyzer analyzer = new MockAnalyzer(Random()); analyzer.MaxTokenLength = TestUtil.NextInt(Random(), 1, IndexWriter.MAX_TERM_LENGTH); IndexWriterConfig iwc = NewIndexWriterConfig(TEST_VERSION_CURRENT, analyzer); if (VERBOSE) { // Do this ourselves instead of relying on LTC so // we see incrementing messageID: iwc.SetInfoStream(new TextWriterInfoStream(Console.Out)); } var ms = iwc.MergeScheduler; if (ms is IConcurrentMergeScheduler) { ((IConcurrentMergeScheduler)ms).SetSuppressExceptions(); } w = new IndexWriter(dir, iwc); if (r != null && Random().Next(5) == 3) { if (Random().NextBoolean()) { if (VERBOSE) { Console.WriteLine("TEST: addIndexes IR[]"); } w.AddIndexes(new IndexReader[] { r }); } else { if (VERBOSE) { Console.WriteLine("TEST: addIndexes Directory[]"); } w.AddIndexes(new Directory[] { dirCopy }); } } else { if (VERBOSE) { Console.WriteLine("TEST: addDocument"); } w.AddDocument(docs.NextDoc()); } dir.RandomIOExceptionRateOnOpen = 0.0; w.Dispose(); w = null; // NOTE: this is O(N^2)! Only enable for temporary debugging: //dir.setRandomIOExceptionRateOnOpen(0.0); //TestUtil.CheckIndex(dir); //dir.setRandomIOExceptionRateOnOpen(rate); // Verify numDocs only increases, to catch IndexWriter // accidentally deleting the index: dir.RandomIOExceptionRateOnOpen = 0.0; Assert.IsTrue(DirectoryReader.IndexExists(dir)); if (r2 == null) { r2 = DirectoryReader.Open(dir); } else { DirectoryReader r3 = DirectoryReader.OpenIfChanged(r2); if (r3 != null) { r2.Dispose(); r2 = r3; } } Assert.IsTrue(r2.NumDocs >= lastNumDocs, "before=" + lastNumDocs + " after=" + r2.NumDocs); lastNumDocs = r2.NumDocs; //System.out.println("numDocs=" + lastNumDocs); dir.RandomIOExceptionRateOnOpen = rate; any = true; if (VERBOSE) { Console.WriteLine("TEST: iter=" + iter + ": success"); } } catch (IOException ioe) { if (VERBOSE) { Console.WriteLine("TEST: iter=" + iter + ": exception"); Console.WriteLine(ioe.ToString()); Console.Write(ioe.StackTrace); } if (w != null) { // NOTE: leave random IO exceptions enabled here, // to verify that rollback does not try to write // anything: w.Rollback(); } } if (any && r == null && Random().NextBoolean()) { // Make a copy of a non-empty index so we can use // it to addIndexes later: dir.RandomIOExceptionRateOnOpen = 0.0; r = DirectoryReader.Open(dir); dirCopy = NewMockFSDirectory(CreateTempDir("TestIndexWriterOutOfFileDescriptors.copy")); HashSet <string> files = new HashSet <string>(); foreach (string file in dir.ListAll()) { dir.Copy(dirCopy, file, file, IOContext.DEFAULT); files.Add(file); } dirCopy.Sync(files); // Have IW kiss the dir so we remove any leftover // files ... we can easily have leftover files at // the time we take a copy because we are holding // open a reader: (new IndexWriter(dirCopy, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())))).Dispose(); dirCopy.RandomIOExceptionRate = rate; dir.RandomIOExceptionRateOnOpen = rate; } } if (r2 != null) { r2.Dispose(); } if (r != null) { r.Dispose(); dirCopy.Dispose(); } dir.Dispose(); }
public virtual void RunTest(string testName) { m_failed.Value = (false); m_addCount.Value = 0; m_delCount.Value = 0; m_packCount.Value = 0; long t0 = J2N.Time.NanoTime() / J2N.Time.MillisecondsPerNanosecond; // LUCENENET: Use NanoTime() rather than CurrentTimeMilliseconds() for more accurate/reliable results Random random = new J2N.Randomizer(Random.NextInt64()); using LineFileDocs docs = new LineFileDocs(random, DefaultCodecSupportsDocValues); DirectoryInfo tempDir = CreateTempDir(testName); m_dir = GetDirectory(NewMockFSDirectory(tempDir)); // some subclasses rely on this being MDW if (m_dir is BaseDirectoryWrapper baseDirectoryWrapper) { baseDirectoryWrapper.CheckIndexOnDispose = false; // don't double-checkIndex, we do it ourselves. } MockAnalyzer analyzer = new MockAnalyzer(LuceneTestCase.Random); analyzer.MaxTokenLength = TestUtil.NextInt32(LuceneTestCase.Random, 1, IndexWriter.MAX_TERM_LENGTH); IndexWriterConfig conf = NewIndexWriterConfig(TEST_VERSION_CURRENT, analyzer).SetInfoStream(new FailOnNonBulkMergesInfoStream()); if (LuceneTestCase.TestNightly) { // newIWConfig makes smallish max seg size, which // results in tons and tons of segments for this test // when run nightly: MergePolicy mp = conf.MergePolicy; if (mp is TieredMergePolicy tieredMergePolicy) { //tieredMergePolicy.MaxMergedSegmentMB = 5000.0; tieredMergePolicy.MaxMergedSegmentMB = 2500.0; // LUCENENET specific - reduced each number by 50% to keep testing time under 1 hour } else if (mp is LogByteSizeMergePolicy logByteSizeMergePolicy) { //logByteSizeMergePolicy.MaxMergeMB = 1000.0; logByteSizeMergePolicy.MaxMergeMB = 500.0; // LUCENENET specific - reduced each number by 50% to keep testing time under 1 hour } else if (mp is LogMergePolicy logMergePolicy) { //logMergePolicy.MaxMergeDocs = 100000; logMergePolicy.MaxMergeDocs = 50000; // LUCENENET specific - reduced each number by 50% to keep testing time under 1 hour } } conf.SetMergedSegmentWarmer(new IndexReaderWarmerAnonymousClass(this)); if (Verbose) { conf.SetInfoStream(new PrintStreamInfoStreamAnonymousClass(Console.Out)); } m_writer = new IndexWriter(m_dir, conf); TestUtil.ReduceOpenFiles(m_writer); TaskScheduler es = LuceneTestCase.Random.NextBoolean() ? null : TaskScheduler.Default; DoAfterWriter(es); int NUM_INDEX_THREADS = TestUtil.NextInt32(LuceneTestCase.Random, 2, 4); //int RUN_TIME_SEC = LuceneTestCase.TestNightly ? 300 : RandomMultiplier; // LUCENENET specific - lowered from 300 to 150 to reduce total time on Nightly // build to less than 1 hour. int RUN_TIME_SEC = LuceneTestCase.TestNightly ? 150 : RandomMultiplier; ISet <string> delIDs = new ConcurrentHashSet <string>(); ISet <string> delPackIDs = new ConcurrentHashSet <string>(); ConcurrentQueue <SubDocs> allSubDocs = new ConcurrentQueue <SubDocs>(); long stopTime = (J2N.Time.NanoTime() / J2N.Time.MillisecondsPerNanosecond) + (RUN_TIME_SEC * 1000); // LUCENENET: Use NanoTime() rather than CurrentTimeMilliseconds() for more accurate/reliable results ThreadJob[] indexThreads = LaunchIndexingThreads(docs, NUM_INDEX_THREADS, stopTime, delIDs, delPackIDs, allSubDocs); if (Verbose) { Console.WriteLine("TEST: DONE start " + NUM_INDEX_THREADS + " indexing threads [" + ((J2N.Time.NanoTime() / J2N.Time.MillisecondsPerNanosecond) - t0) + " ms]"); // LUCENENET: Use NanoTime() rather than CurrentTimeMilliseconds() for more accurate/reliable results } // Let index build up a bit Thread.Sleep(100); DoSearching(es, stopTime); if (Verbose) { Console.WriteLine("TEST: all searching done [" + ((J2N.Time.NanoTime() / J2N.Time.MillisecondsPerNanosecond) - t0) + " ms]"); // LUCENENET: Use NanoTime() rather than CurrentTimeMilliseconds() for more accurate/reliable results } for (int thread = 0; thread < indexThreads.Length; thread++) { indexThreads[thread].Join(); } if (Verbose) { Console.WriteLine("TEST: done join indexing threads [" + ((J2N.Time.NanoTime() / J2N.Time.MillisecondsPerNanosecond) - t0) + " ms]; addCount=" + m_addCount + " delCount=" + m_delCount); // LUCENENET: Use NanoTime() rather than CurrentTimeMilliseconds() for more accurate/reliable results } IndexSearcher s = GetFinalSearcher(); if (Verbose) { Console.WriteLine("TEST: finalSearcher=" + s); } assertFalse(m_failed); bool doFail = false; // Verify: make sure delIDs are in fact deleted: foreach (string id in delIDs) { TopDocs hits = s.Search(new TermQuery(new Term("docid", id)), 1); if (hits.TotalHits != 0) { Console.WriteLine("doc id=" + id + " is supposed to be deleted, but got " + hits.TotalHits + " hits; first docID=" + hits.ScoreDocs[0].Doc); doFail = true; } } // Verify: make sure delPackIDs are in fact deleted: foreach (string id in delPackIDs) { TopDocs hits = s.Search(new TermQuery(new Term("packID", id)), 1); if (hits.TotalHits != 0) { Console.WriteLine("packID=" + id + " is supposed to be deleted, but got " + hits.TotalHits + " matches"); doFail = true; } } // Verify: make sure each group of sub-docs are still in docID order: foreach (SubDocs subDocs in allSubDocs) { TopDocs hits = s.Search(new TermQuery(new Term("packID", subDocs.PackID)), 20); if (!subDocs.Deleted) { // We sort by relevance but the scores should be identical so sort falls back to by docID: if (hits.TotalHits != subDocs.SubIDs.Count) { Console.WriteLine("packID=" + subDocs.PackID + ": expected " + subDocs.SubIDs.Count + " hits but got " + hits.TotalHits); doFail = true; } else { int lastDocID = -1; int startDocID = -1; foreach (ScoreDoc scoreDoc in hits.ScoreDocs) { int docID = scoreDoc.Doc; if (lastDocID != -1) { assertEquals(1 + lastDocID, docID); } else { startDocID = docID; } lastDocID = docID; Document doc = s.Doc(docID); assertEquals(subDocs.PackID, doc.Get("packID")); } lastDocID = startDocID - 1; foreach (string subID in subDocs.SubIDs) { hits = s.Search(new TermQuery(new Term("docid", subID)), 1); assertEquals(1, hits.TotalHits); int docID = hits.ScoreDocs[0].Doc; if (lastDocID != -1) { assertEquals(1 + lastDocID, docID); } lastDocID = docID; } } } else { // Pack was deleted -- make sure its docs are // deleted. We can't verify packID is deleted // because we can re-use packID for update: foreach (string subID in subDocs.SubIDs) { assertEquals(0, s.Search(new TermQuery(new Term("docid", subID)), 1).TotalHits); } } } // Verify: make sure all not-deleted docs are in fact // not deleted: int endID = Convert.ToInt32(docs.NextDoc().Get("docid"), CultureInfo.InvariantCulture); docs.Dispose(); for (int id = 0; id < endID; id++) { string stringID = id.ToString(CultureInfo.InvariantCulture); if (!delIDs.Contains(stringID)) { TopDocs hits = s.Search(new TermQuery(new Term("docid", stringID)), 1); if (hits.TotalHits != 1) { Console.WriteLine("doc id=" + stringID + " is not supposed to be deleted, but got hitCount=" + hits.TotalHits + "; delIDs=" + Collections.ToString(delIDs)); doFail = true; } } } assertFalse(doFail); assertEquals("index=" + m_writer.SegString() + " addCount=" + m_addCount + " delCount=" + m_delCount, m_addCount - m_delCount, s.IndexReader.NumDocs); ReleaseSearcher(s); m_writer.Commit(); assertEquals("index=" + m_writer.SegString() + " addCount=" + m_addCount + " delCount=" + m_delCount, m_addCount - m_delCount, m_writer.NumDocs); DoClose(); m_writer.Dispose(false); // Cannot shutdown until after writer is closed because // writer has merged segment warmer that uses IS to run // searches, and that IS may be using this es! /*if (es != null) * { * es.shutdown(); * es.awaitTermination(1, TimeUnit.SECONDS); * }*/ TestUtil.CheckIndex(m_dir); m_dir.Dispose(); //System.IO.Directory.Delete(tempDir.FullName, true); TestUtil.Rm(tempDir); if (Verbose) { Console.WriteLine("TEST: done [" + ((J2N.Time.NanoTime() / J2N.Time.MillisecondsPerNanosecond) - t0) + " ms]"); // LUCENENET: Use NanoTime() rather than CurrentTimeMilliseconds() for more accurate/reliable results } }
public virtual void Test() { Random random = new Random(Random().Next()); LineFileDocs docs = new LineFileDocs(random, DefaultCodecSupportsDocValues()); Directory d = NewDirectory(); MockAnalyzer analyzer = new MockAnalyzer(Random()); analyzer.MaxTokenLength = TestUtil.NextInt(Random(), 1, IndexWriter.MAX_TERM_LENGTH); RandomIndexWriter w = new RandomIndexWriter(Random(), d, analyzer, Similarity, TimeZone); int numDocs = AtLeast(10); for (int docCount = 0; docCount < numDocs; docCount++) { w.AddDocument(docs.NextDoc()); } IndexReader r = w.Reader; w.Dispose(); List<BytesRef> terms = new List<BytesRef>(); TermsEnum termsEnum = MultiFields.GetTerms(r, "body").Iterator(null); BytesRef term; while ((term = termsEnum.Next()) != null) { terms.Add(BytesRef.DeepCopyOf(term)); } if (VERBOSE) { Console.WriteLine("TEST: " + terms.Count + " terms"); } int upto = -1; int iters = AtLeast(200); for (int iter = 0; iter < iters; iter++) { bool isEnd; if (upto != -1 && Random().NextBoolean()) { // next if (VERBOSE) { Console.WriteLine("TEST: iter next"); } isEnd = termsEnum.Next() == null; upto++; if (isEnd) { if (VERBOSE) { Console.WriteLine(" end"); } Assert.AreEqual(upto, terms.Count); upto = -1; } else { if (VERBOSE) { Console.WriteLine(" got term=" + termsEnum.Term().Utf8ToString() + " expected=" + terms[upto].Utf8ToString()); } Assert.IsTrue(upto < terms.Count); Assert.AreEqual(terms[upto], termsEnum.Term()); } } else { BytesRef target; string exists; if (Random().NextBoolean()) { // likely fake term if (Random().NextBoolean()) { target = new BytesRef(TestUtil.RandomSimpleString(Random())); } else { target = new BytesRef(TestUtil.RandomRealisticUnicodeString(Random())); } exists = "likely not"; } else { // real term target = terms[Random().Next(terms.Count)]; exists = "yes"; } upto = terms.BinarySearch(target); if (Random().NextBoolean()) { if (VERBOSE) { Console.WriteLine("TEST: iter seekCeil target=" + target.Utf8ToString() + " exists=" + exists); } // seekCeil TermsEnum.SeekStatus status = termsEnum.SeekCeil(target); if (VERBOSE) { Console.WriteLine(" got " + status); } if (upto < 0) { upto = -(upto + 1); if (upto >= terms.Count) { Assert.AreEqual(TermsEnum.SeekStatus.END, status); upto = -1; } else { Assert.AreEqual(TermsEnum.SeekStatus.NOT_FOUND, status); Assert.AreEqual(terms[upto], termsEnum.Term()); } } else { Assert.AreEqual(TermsEnum.SeekStatus.FOUND, status); Assert.AreEqual(terms[upto], termsEnum.Term()); } } else { if (VERBOSE) { Console.WriteLine("TEST: iter seekExact target=" + target.Utf8ToString() + " exists=" + exists); } // seekExact bool result = termsEnum.SeekExact(target); if (VERBOSE) { Console.WriteLine(" got " + result); } if (upto < 0) { Assert.IsFalse(result); upto = -1; } else { Assert.IsTrue(result); Assert.AreEqual(target, termsEnum.Term()); } } } } r.Dispose(); d.Dispose(); docs.Dispose(); }
public virtual void TestNRTAndCommit() { Directory dir = NewDirectory(); NRTCachingDirectory cachedDir = new NRTCachingDirectory(dir, 2.0, 25.0); MockAnalyzer analyzer = new MockAnalyzer(Random()); analyzer.MaxTokenLength = TestUtil.NextInt(Random(), 1, IndexWriter.MAX_TERM_LENGTH); IndexWriterConfig conf = NewIndexWriterConfig(TEST_VERSION_CURRENT, analyzer); RandomIndexWriter w = new RandomIndexWriter(Random(), cachedDir, conf); LineFileDocs docs = new LineFileDocs(Random(), DefaultCodecSupportsDocValues()); int numDocs = TestUtil.NextInt(Random(), 100, 400); if (VERBOSE) { Console.WriteLine("TEST: numDocs=" + numDocs); } IList <BytesRef> ids = new List <BytesRef>(); DirectoryReader r = null; for (int docCount = 0; docCount < numDocs; docCount++) { Document doc = docs.NextDoc(); ids.Add(new BytesRef(doc.Get("docid"))); w.AddDocument(doc); if (Random().Next(20) == 17) { if (r == null) { r = DirectoryReader.Open(w.w, false); } else { DirectoryReader r2 = DirectoryReader.OpenIfChanged(r); if (r2 != null) { r.Dispose(); r = r2; } } Assert.AreEqual(1 + docCount, r.NumDocs); IndexSearcher s = NewSearcher(r); // Just make sure search can run; we can't assert // totHits since it could be 0 TopDocs hits = s.Search(new TermQuery(new Term("body", "the")), 10); // System.out.println("tot hits " + hits.totalHits); } } if (r != null) { r.Dispose(); } // Close should force cache to clear since all files are sync'd w.Dispose(); string[] cachedFiles = cachedDir.ListCachedFiles(); foreach (string file in cachedFiles) { Console.WriteLine("FAIL: cached file " + file + " remains after sync"); } Assert.AreEqual(0, cachedFiles.Length); r = DirectoryReader.Open(dir); foreach (BytesRef id in ids) { Assert.AreEqual(1, r.DocFreq(new Term("docid", id))); } r.Dispose(); cachedDir.Dispose(); docs.Dispose(); }
public virtual void TestNRTAndCommit() { Directory dir = NewDirectory(); NRTCachingDirectory cachedDir = new NRTCachingDirectory(dir, 2.0, 25.0); MockAnalyzer analyzer = new MockAnalyzer(Random()); analyzer.MaxTokenLength = TestUtil.NextInt(Random(), 1, IndexWriter.MAX_TERM_LENGTH); IndexWriterConfig conf = NewIndexWriterConfig(TEST_VERSION_CURRENT, analyzer); RandomIndexWriter w = new RandomIndexWriter(Random(), cachedDir, conf); LineFileDocs docs = new LineFileDocs(Random(), DefaultCodecSupportsDocValues()); int numDocs = TestUtil.NextInt(Random(), 100, 400); if (VERBOSE) { Console.WriteLine("TEST: numDocs=" + numDocs); } IList<BytesRef> ids = new List<BytesRef>(); DirectoryReader r = null; for (int docCount = 0; docCount < numDocs; docCount++) { Document doc = docs.NextDoc(); ids.Add(new BytesRef(doc.Get("docid"))); w.AddDocument(doc); if (Random().Next(20) == 17) { if (r == null) { r = DirectoryReader.Open(w.w, false); } else { DirectoryReader r2 = DirectoryReader.OpenIfChanged(r); if (r2 != null) { r.Dispose(); r = r2; } } Assert.AreEqual(1 + docCount, r.NumDocs); IndexSearcher s = NewSearcher(r); // Just make sure search can run; we can't assert // totHits since it could be 0 TopDocs hits = s.Search(new TermQuery(new Term("body", "the")), 10); // System.out.println("tot hits " + hits.totalHits); } } if (r != null) { r.Dispose(); } // Close should force cache to clear since all files are sync'd w.Dispose(); string[] cachedFiles = cachedDir.ListCachedFiles(); foreach (string file in cachedFiles) { Console.WriteLine("FAIL: cached file " + file + " remains after sync"); } Assert.AreEqual(0, cachedFiles.Length); r = DirectoryReader.Open(dir); foreach (BytesRef id in ids) { Assert.AreEqual(1, r.DocFreq(new Term("docid", id))); } r.Dispose(); cachedDir.Dispose(); docs.Dispose(); }
public virtual void Test() { Directory dir = NewDirectory(); MockAnalyzer analyzer = new MockAnalyzer(Random); analyzer.MaxTokenLength = TestUtil.NextInt32(Random, 1, IndexWriter.MAX_TERM_LENGTH); RandomIndexWriter w = new RandomIndexWriter(Random, dir, analyzer); LineFileDocs docs = new LineFileDocs(Random, DefaultCodecSupportsDocValues); int charsToIndex = AtLeast(100000); int charsIndexed = 0; //System.out.println("bytesToIndex=" + charsToIndex); while (charsIndexed < charsToIndex) { Document doc = docs.NextDoc(); charsIndexed += doc.Get("body").Length; w.AddDocument(doc); //System.out.println(" bytes=" + charsIndexed + " add: " + doc); } IndexReader r = w.GetReader(); //System.out.println("numDocs=" + r.NumDocs); w.Dispose(); IndexSearcher s = NewSearcher(r); Terms terms = MultiFields.GetFields(r).GetTerms("body"); int termCount = 0; TermsEnum termsEnum = terms.GetEnumerator(); while (termsEnum.MoveNext()) { termCount++; } Assert.IsTrue(termCount > 0); // Target ~10 terms to search: double chance = 10.0 / termCount; termsEnum = terms.GetEnumerator(termsEnum); IDictionary <BytesRef, TopDocs> answers = new Dictionary <BytesRef, TopDocs>(); while (termsEnum.MoveNext()) { if (Random.NextDouble() <= chance) { BytesRef term = BytesRef.DeepCopyOf(termsEnum.Term); answers[term] = s.Search(new TermQuery(new Term("body", term)), 100); } } if (answers.Count > 0) { CountdownEvent startingGun = new CountdownEvent(1); int numThreads = TestUtil.NextInt32(Random, 2, 5); ThreadJob[] threads = new ThreadJob[numThreads]; for (int threadID = 0; threadID < numThreads; threadID++) { ThreadJob thread = new ThreadAnonymousClass(this, s, answers, startingGun); threads[threadID] = thread; thread.Start(); } startingGun.Signal(); foreach (ThreadJob thread in threads) { thread.Join(); } } r.Dispose(); dir.Dispose(); }
// TODO: create a testNormsNotPresent ourselves by adding/deleting/merging docs public virtual void BuildIndex(Directory dir) { Random random = Random(); MockAnalyzer analyzer = new MockAnalyzer(Random()); analyzer.MaxTokenLength = TestUtil.NextInt(Random(), 1, IndexWriter.MAX_TERM_LENGTH); IndexWriterConfig config = NewIndexWriterConfig(TEST_VERSION_CURRENT, analyzer); Similarity provider = new MySimProvider(this); config.SetSimilarity(provider); RandomIndexWriter writer = new RandomIndexWriter(random, dir, config); LineFileDocs docs = new LineFileDocs(random, DefaultCodecSupportsDocValues()); int num = AtLeast(100); for (int i = 0; i < num; i++) { Document doc = docs.NextDoc(); int boost = Random().Next(255); Field f = new TextField(ByteTestField, "" + boost, Field.Store.YES); f.Boost = boost; doc.Add(f); writer.AddDocument(doc); doc.RemoveField(ByteTestField); if (Rarely()) { writer.Commit(); } } writer.Commit(); writer.Dispose(); docs.Dispose(); }
/// <summary> /// populates a writer with random stuff. this must be fully reproducable with the seed! /// </summary> public static void CreateRandomIndex(int numdocs, RandomIndexWriter writer, long seed) { Random random = new Random((int)seed); // primary source for our data is from linefiledocs, its realistic. LineFileDocs lineFileDocs = new LineFileDocs(random); // LUCENENET: compile a regex so we don't have to do it in each loop (for regex.split()) Regex whiteSpace = new Regex("\\s+", RegexOptions.Compiled); // TODO: we should add other fields that use things like docs&freqs but omit positions, // because linefiledocs doesn't cover all the possibilities. for (int i = 0; i < numdocs; i++) { Document document = lineFileDocs.NextDoc(); // grab the title and add some SortedSet instances for fun string title = document.Get("titleTokenized"); string[] split = whiteSpace.Split(title); foreach (string trash in split) { document.Add(new SortedSetDocValuesField("sortedset", new BytesRef(trash))); } // add a numeric dv field sometimes document.RemoveFields("sparsenumeric"); if (random.Next(4) == 2) { document.Add(new NumericDocValuesField("sparsenumeric", random.Next())); } writer.AddDocument(document); } lineFileDocs.Dispose(); }