public MockRandomLookaheadTokenFilter(Random random, TokenStream @in) : base(@in) { this.termAtt = AddAttribute <ICharTermAttribute>(); this.seed = random.NextInt64(); this.random = new J2N.Randomizer(seed); }
private readonly Codec codec; // sugar public static IndexWriter MockIndexWriter(Directory dir, IndexWriterConfig conf, Random r) { // Randomly calls Thread.yield so we mixup thread scheduling Random random = new J2N.Randomizer(r.NextInt64()); return(MockIndexWriter(dir, conf, new TestPointAnonymousClass(random))); }
private void TestRandomWords(int maxNumWords, int numIter) { Random random = new J2N.Randomizer(Random.NextInt64()); for (int iter = 0; iter < numIter; iter++) { if (Verbose) { Console.WriteLine("\nTEST: iter " + iter); } for (int inputMode = 0; inputMode < 2; inputMode++) { int numWords = random.nextInt(maxNumWords + 1); ISet <Int32sRef> termsSet = new JCG.HashSet <Int32sRef>(); //Int32sRef[] terms = new Int32sRef[numWords]; // LUCENENET: Not used while (termsSet.size() < numWords) { string term = FSTTester <object> .GetRandomString(random); termsSet.Add(FSTTester <object> .ToInt32sRef(term, inputMode)); } DoTest(inputMode, termsSet.ToArray()); } } }
/// <summary> /// Initializes the randomized context and seed for the test fixture. /// </summary> /// <param name="fixture">The test fixture.</param> /// <param name="seedOffset">Offset that will be added to the initial seed. This should be different for SetUpFixture and TestFixture attributes /// so they have different seeds that are deterministically based on the initial seed.</param> /// <returns>The randomized context.</returns> public RandomizedContext InitializeTestFixture(Test fixture, Assembly testAssembly, int seedOffset = 0) { if (fixture is null) { throw new ArgumentNullException(nameof(fixture)); } if (!TryGetRandomSeedsFromContext(fixture, out initialSeed, out testSeed)) // NOTE: This sets the initialSeed and testSeed fields for this class. { initialSeed = new J2N.Randomizer().NextInt64(); // Seed not configured or explicitly set to "random", so auto-generate } random = new J2N.Randomizer(initialSeed + seedOffset); int goodFastHashSeed = (int)initialSeed * 31; // LUCENENET: Multiplying 31 to remove the possility of a collision with the test framework while still using a deterministic number. if (StringHelper.goodFastHashSeed != goodFastHashSeed) { StringHelper.goodFastHashSeed = goodFastHashSeed; } // Now we need to generate the first seed for our test fixture // which will be used during OneTimeSetUp and OneTimeTearDown. // Assumption: The passed in fixture doesn't have any tests added. // The tests are added in a later step to prevent differences in the // result when there are filters applied. // Generate a new long value that is the seed for this specific test. return(InitializeTestFixture(fixture, new RandomizedContext(fixture, testAssembly, initialSeed, testSeed ?? random.NextInt64()))); }
public ThreadAnonymousClass2(TestStressNRT outerInstance, string str, int ndocs, bool tombstones, AtomicInt64 operations) : base(str) { this.outerInstance = outerInstance; this.ndocs = ndocs; this.tombstones = tombstones; this.operations = operations; rand = new J2N.Randomizer(Random.NextInt64()); }
private void TestCaseIterator(int itrsWithVal, int specifiedValsOnItr, bool removeDups) { // Build a random number of lists IList <int> expected = new JCG.List <int>(); Random random = new J2N.Randomizer(Random.NextInt64()); int numLists = itrsWithVal + random.Next(1000 - itrsWithVal); IList <int>[] lists = new IList <int> [numLists]; for (int i = 0; i < numLists; i++) { lists[i] = new JCG.List <int>(); } int start = random.Next(1000000); int end = start + VALS_TO_MERGE / itrsWithVal / Math.Abs(specifiedValsOnItr); for (int i = start; i < end; i++) { int maxList = lists.Length; int maxValsOnItr = 0; int sumValsOnItr = 0; for (int itrWithVal = 0; itrWithVal < itrsWithVal; itrWithVal++) { int list = random.Next(maxList); int valsOnItr = specifiedValsOnItr < 0 ? (1 + random.Next(-specifiedValsOnItr)) : specifiedValsOnItr; maxValsOnItr = Math.Max(maxValsOnItr, valsOnItr); sumValsOnItr += valsOnItr; for (int valOnItr = 0; valOnItr < valsOnItr; valOnItr++) { lists[list].Add(i); } maxList = maxList - 1; ArrayUtil.Swap(lists, list, maxList); } int maxCount = removeDups ? maxValsOnItr : sumValsOnItr; for (int count = 0; count < maxCount; count++) { expected.Add(i); } } // Now check that they get merged cleanly IEnumerator <int>[] itrs = new IEnumerator <int> [numLists]; for (int i = 0; i < numLists; i++) { itrs[i] = lists[i].GetEnumerator(); } MergedIterator <int> mergedItr = new MergedIterator <int>(removeDups, itrs); using IEnumerator <int> expectedItr = expected.GetEnumerator(); while (expectedItr.MoveNext()) { Assert.IsTrue(mergedItr.MoveNext()); Assert.AreEqual(expectedItr.Current, mergedItr.Current); } Assert.IsFalse(mergedItr.MoveNext()); }
public virtual void Test() { Directory dir = NewDirectory(); IndexWriter w = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random)).SetMergePolicy(NewLogMergePolicy())); IList <long> numbers = new JCG.List <long>(); IList <BytesRef> binary = new JCG.List <BytesRef>(); IList <BytesRef> sorted = new JCG.List <BytesRef>(); int numDocs = AtLeast(100); for (int i = 0; i < numDocs; i++) { Document d = new Document(); long number = Random.NextInt64(); d.Add(new NumericDocValuesField("number", number)); BytesRef bytes = new BytesRef(TestUtil.RandomRealisticUnicodeString(Random)); d.Add(new BinaryDocValuesField("bytes", bytes)); binary.Add(bytes); bytes = new BytesRef(TestUtil.RandomRealisticUnicodeString(Random)); d.Add(new SortedDocValuesField("sorted", bytes)); sorted.Add(bytes); w.AddDocument(d); numbers.Add(number); } w.ForceMerge(1); IndexReader r = w.GetReader(); w.Dispose(); Assert.AreEqual(1, r.Leaves.Count); AtomicReader ar = (AtomicReader)r.Leaves[0].Reader; int numThreads = TestUtil.NextInt32(Random, 2, 5); IList <ThreadJob> threads = new JCG.List <ThreadJob>(); CountdownEvent startingGun = new CountdownEvent(1); for (int t = 0; t < numThreads; t++) { Random threadRandom = new J2N.Randomizer(Random.NextInt64()); ThreadJob thread = new ThreadAnonymousClass(this, numbers, binary, sorted, numDocs, ar, startingGun, threadRandom); thread.Start(); threads.Add(thread); } startingGun.Signal(); foreach (ThreadJob thread in threads) { thread.Join(); } r.Dispose(); dir.Dispose(); }
public virtual void Test() { BaseDirectoryWrapper dir = NewFSDirectory(CreateTempDir("test2BPagedBytes")); if (dir is MockDirectoryWrapper) { ((MockDirectoryWrapper)dir).Throttling = Throttling.NEVER; } PagedBytes pb = new PagedBytes(15); IndexOutput dataOutput = dir.CreateOutput("foo", IOContext.DEFAULT); long netBytes = 0; long seed = Random.NextInt64(); long lastFP = 0; Random r2 = new J2N.Randomizer(seed); while (netBytes < 1.1 * int.MaxValue) { int numBytes = TestUtil.NextInt32(r2, 1, 32768); byte[] bytes = new byte[numBytes]; r2.NextBytes(bytes); dataOutput.WriteBytes(bytes, bytes.Length); long fp = dataOutput.Position; // LUCENENET specific: Renamed from getFilePointer() to match FileStream if (Debugging.AssertsEnabled) { Debugging.Assert(fp == lastFP + numBytes); } lastFP = fp; netBytes += numBytes; } dataOutput.Dispose(); IndexInput input = dir.OpenInput("foo", IOContext.DEFAULT); pb.Copy(input, input.Length); input.Dispose(); PagedBytes.Reader reader = pb.Freeze(true); r2 = new J2N.Randomizer(seed); netBytes = 0; while (netBytes < 1.1 * int.MaxValue) { int numBytes = TestUtil.NextInt32(r2, 1, 32768); var bytes = new byte[numBytes]; r2.NextBytes(bytes); BytesRef expected = new BytesRef(bytes); BytesRef actual = new BytesRef(); reader.FillSlice(actual, netBytes, numBytes); Assert.AreEqual(expected, actual); netBytes += numBytes; } dir.Dispose(); }
public virtual void Test() { Directory dir = NewFSDirectory(CreateTempDir("livefieldupdates")); IndexWriterConfig iwc = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random)); IndexWriter w = new IndexWriter(dir, iwc); SearcherManager mgr = new SearcherManager(w, true, new SearcherFactoryAnonymousClass()); const int missing = -1; LiveFieldValues <IndexSearcher, int?> rt = new LiveFieldValuesAnonymousClass(mgr, missing); int numThreads = TestUtil.NextInt32(Random, 2, 5); if (Verbose) { Console.WriteLine(numThreads + " threads"); } CountdownEvent startingGun = new CountdownEvent(1); IList <ThreadJob> threads = new JCG.List <ThreadJob>(); int iters = AtLeast(1000); int idCount = TestUtil.NextInt32(Random, 100, 10000); double reopenChance = Random.NextDouble() * 0.01; double deleteChance = Random.NextDouble() * 0.25; double addChance = Random.NextDouble() * 0.5; for (int t = 0; t < numThreads; t++) { int threadID = t; Random threadRandom = new J2N.Randomizer(Random.NextInt64()); ThreadJob thread = new ThreadAnonymousClass(w, mgr, missing, rt, startingGun, iters, idCount, reopenChance, deleteChance, addChance, t, threadID, threadRandom); threads.Add(thread); thread.Start(); } startingGun.Signal(); foreach (ThreadJob thread in threads) { thread.Join(); } mgr.MaybeRefresh(); Assert.AreEqual(0, rt.Count); rt.Dispose(); mgr.Dispose(); w.Dispose(); dir.Dispose(); }
public void TestBasic() { Input[] keys = new Input[] { new Input("foo", 50), new Input("bar", 10), new Input("barbar", 12), new Input("barbara", 6) }; Random random = new J2N.Randomizer(Random.NextInt64()); WFSTCompletionLookup suggester = new WFSTCompletionLookup(); suggester.Build(new InputArrayEnumerator(keys)); // top N of 2, but only foo is available IList <Lookup.LookupResult> results = suggester.DoLookup(TestUtil.StringToCharSequence("f", random).ToString(), false, 2); assertEquals(1, results.size()); assertEquals("foo", results[0].Key.toString()); assertEquals(50, results[0].Value, 0.01F); // make sure we don't get a dup exact suggestion: results = suggester.DoLookup(TestUtil.StringToCharSequence("foo", random).ToString(), false, 2); assertEquals(1, results.size()); assertEquals("foo", results[0].Key.toString()); assertEquals(50, results[0].Value, 0.01F); // top N of 1 for 'bar': we return this even though barbar is higher results = suggester.DoLookup(TestUtil.StringToCharSequence("bar", random).ToString(), false, 1); assertEquals(1, results.size()); assertEquals("bar", results[0].Key.toString()); assertEquals(10, results[0].Value, 0.01F); // top N Of 2 for 'b' results = suggester.DoLookup(TestUtil.StringToCharSequence("b", random).ToString(), false, 2); assertEquals(2, results.size()); assertEquals("barbar", results[0].Key.toString()); assertEquals(12, results[0].Value, 0.01F); assertEquals("bar", results[1].Key.toString()); assertEquals(10, results[1].Value, 0.01F); // top N of 3 for 'ba' results = suggester.DoLookup(TestUtil.StringToCharSequence("ba", random).ToString(), false, 3); assertEquals(3, results.size()); assertEquals("barbar", results[0].Key.toString()); assertEquals(12, results[0].Value, 0.01F); assertEquals("bar", results[1].Key.toString()); assertEquals(10, results[1].Value, 0.01F); assertEquals("barbara", results[2].Key.toString()); assertEquals(6, results[2].Value, 0.01F); }
/// <summary> /// populates a writer with random stuff. this must be fully reproducable with /// the seed! /// </summary> public static void CreateRandomIndex(int numdocs, RandomIndexWriter writer, long seed) { Random random = new J2N.Randomizer(seed); // primary source for our data is from linefiledocs, its realistic. LineFileDocs lineFileDocs = new LineFileDocs(random, false); // no docvalues in 4x // TODO: we should add other fields that use things like docs&freqs but omit // positions, // because linefiledocs doesn't cover all the possibilities. for (int i = 0; i < numdocs; i++) { writer.AddDocument(lineFileDocs.NextDoc()); } lineFileDocs.Dispose(); }
public ThreadAnonymousClass(TestStressNRT outerInstance, string str, int commitPercent, int softCommitPercent, int deletePercent, int deleteByQueryPercent, int ndocs, int maxConcurrentCommits, bool tombstones, AtomicInt64 operations, FieldType storedOnlyType, AtomicInt32 numCommitting, RandomIndexWriter writer) : base(str) { this.outerInstance = outerInstance; this.commitPercent = commitPercent; this.softCommitPercent = softCommitPercent; this.deletePercent = deletePercent; this.deleteByQueryPercent = deleteByQueryPercent; this.ndocs = ndocs; this.maxConcurrentCommits = maxConcurrentCommits; this.tombstones = tombstones; this.operations = operations; this.storedOnlyType = storedOnlyType; this.numCommitting = numCommitting; this.writer = writer; rand = new J2N.Randomizer(Random.NextInt64()); }
/// <summary> /// Creates a new iterator, wrapping the specified iterator and /// returning elements in a random order. /// </summary> public UnsortedInputEnumerator(IInputEnumerator source) : base(source) { ords = new int[m_entries.Length]; Random random = new J2N.Randomizer(); for (int i = 0; i < ords.Length; i++) { ords[i] = i; } for (int i = 0; i < ords.Length; i++) { int randomPosition = random.Next(ords.Length); int temp = ords[i]; ords[i] = ords[randomPosition]; ords[randomPosition] = temp; } }
public void TestEqualsHashCode() { CommonTermsQuery query = new CommonTermsQuery(RandomOccur(Random), RandomOccur(Random), Random.NextSingle(), Random.NextBoolean()); int terms = AtLeast(2); for (int i = 0; i < terms; i++) { query.Add(new Term(TestUtil.RandomRealisticUnicodeString(Random), TestUtil.RandomRealisticUnicodeString(Random))); } QueryUtils.CheckHashEquals(query); QueryUtils.CheckUnequal(new CommonTermsQuery(RandomOccur(Random), RandomOccur(Random), Random.NextSingle(), Random.NextBoolean()), query); { long seed = Random.NextInt64(); Random r = new J2N.Randomizer(seed); CommonTermsQuery left = new CommonTermsQuery(RandomOccur(r), RandomOccur(r), r.NextSingle(), r.NextBoolean()); int leftTerms = AtLeast(r, 2); for (int i = 0; i < leftTerms; i++) { left.Add(new Term(TestUtil.RandomRealisticUnicodeString(r), TestUtil.RandomRealisticUnicodeString(r))); } left.HighFreqMinimumNumberShouldMatch = r.nextInt(4); left.LowFreqMinimumNumberShouldMatch = r.nextInt(4); r = new J2N.Randomizer(seed); CommonTermsQuery right = new CommonTermsQuery(RandomOccur(r), RandomOccur(r), r.NextSingle(), r.NextBoolean()); int rightTerms = AtLeast(r, 2); for (int i = 0; i < rightTerms; i++) { right.Add(new Term(TestUtil.RandomRealisticUnicodeString(r), TestUtil.RandomRealisticUnicodeString(r))); } right.HighFreqMinimumNumberShouldMatch = r.nextInt(4); right.LowFreqMinimumNumberShouldMatch = r.nextInt(4); QueryUtils.CheckEqual(left, right); } }
public virtual int DoTest(int iter, int ndocs, int maxTF, float percentDocs) { Directory dir = NewDirectory(); long start = J2N.Time.NanoTime() / J2N.Time.MillisecondsPerNanosecond; // LUCENENET: Use NanoTime() rather than CurrentTimeMilliseconds() for more accurate/reliable results AddDocs(LuceneTestCase.Random, dir, ndocs, "foo", "val", maxTF, percentDocs); long end = J2N.Time.NanoTime() / J2N.Time.MillisecondsPerNanosecond; // LUCENENET: Use NanoTime() rather than CurrentTimeMilliseconds() for more accurate/reliable results if (Verbose) { Console.WriteLine("milliseconds for creation of " + ndocs + " docs = " + (end - start)); } IndexReader reader = DirectoryReader.Open(dir); TermsEnum tenum = MultiFields.GetTerms(reader, "foo").GetEnumerator(); start = J2N.Time.NanoTime() / J2N.Time.MillisecondsPerNanosecond; // LUCENENET: Use NanoTime() rather than CurrentTimeMilliseconds() for more accurate/reliable results int ret = 0; DocsEnum tdocs = null; Random random = new J2N.Randomizer(Random.NextInt64()); for (int i = 0; i < iter; i++) { tenum.SeekCeil(new BytesRef("val")); tdocs = TestUtil.Docs(random, tenum, MultiFields.GetLiveDocs(reader), tdocs, DocsFlags.NONE); while (tdocs.NextDoc() != DocIdSetIterator.NO_MORE_DOCS) { ret += tdocs.DocID; } } end = J2N.Time.NanoTime() / J2N.Time.MillisecondsPerNanosecond; // LUCENENET: Use NanoTime() rather than CurrentTimeMilliseconds() for more accurate/reliable results if (Verbose) { Console.WriteLine("milliseconds for " + iter + " TermDocs iteration: " + (end - start)); } return(ret); }
private MultiPhraseQuery RandomPhraseQuery(long seed) { Random random = new J2N.Randomizer(seed); int length = TestUtil.NextInt32(random, 2, 5); MultiPhraseQuery pq = new MultiPhraseQuery(); int position = 0; for (int i = 0; i < length; i++) { int depth = TestUtil.NextInt32(random, 1, 3); Term[] terms = new Term[depth]; for (int j = 0; j < depth; j++) { terms[j] = new Term("field", "" + (char)TestUtil.NextInt32(random, 'a', 'z')); } pq.Add(terms, position); position += TestUtil.NextInt32(random, 1, 3); } return(pq); }
public virtual void Test() { IList <string> postingsList = new JCG.List <string>(); int numTerms = AtLeast(300); int maxTermsPerDoc = TestUtil.NextInt32(Random, 10, 20); bool isSimpleText = "SimpleText".Equals(TestUtil.GetPostingsFormat("field"), StringComparison.Ordinal); IndexWriterConfig iwc = NewIndexWriterConfig(Random, TEST_VERSION_CURRENT, new MockAnalyzer(Random)); if ((isSimpleText || iwc.MergePolicy is MockRandomMergePolicy) && (TestNightly || RandomMultiplier > 1)) { // Otherwise test can take way too long (> 2 hours) //numTerms /= 2; // LUCENENET specific - To keep this under the 1 hour free limit // of Azure DevOps, this was reduced from /2 to /6. numTerms /= 6; } if (Verbose) { Console.WriteLine("maxTermsPerDoc=" + maxTermsPerDoc); Console.WriteLine("numTerms=" + numTerms); } for (int i = 0; i < numTerms; i++) { string term = Convert.ToString(i, CultureInfo.InvariantCulture); for (int j = 0; j < i; j++) { postingsList.Add(term); } } postingsList.Shuffle(Random); ConcurrentQueue <string> postings = new ConcurrentQueue <string>(postingsList); Directory dir = NewFSDirectory(CreateTempDir(GetFullMethodName())); RandomIndexWriter iw = new RandomIndexWriter(Random, dir, iwc); int threadCount = TestUtil.NextInt32(Random, 1, 5); if (Verbose) { Console.WriteLine("config: " + iw.IndexWriter.Config); Console.WriteLine("threadCount=" + threadCount); } Field prototype = NewTextField("field", "", Field.Store.NO); FieldType fieldType = new FieldType(prototype.FieldType); if (Random.NextBoolean()) { fieldType.OmitNorms = true; } int options = Random.Next(3); if (options == 0) { fieldType.IndexOptions = IndexOptions.DOCS_AND_FREQS; // we dont actually need positions fieldType.StoreTermVectors = true; // but enforce term vectors when we do this so we check SOMETHING } else if (options == 1 && !DoesntSupportOffsets.Contains(TestUtil.GetPostingsFormat("field"))) { fieldType.IndexOptions = IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS; } // else just positions ThreadJob[] threads = new ThreadJob[threadCount]; CountdownEvent startingGun = new CountdownEvent(1); for (int threadID = 0; threadID < threadCount; threadID++) { Random threadRandom = new J2N.Randomizer(Random.NextInt64()); Document document = new Document(); Field field = new Field("field", "", fieldType); document.Add(field); threads[threadID] = new ThreadAnonymousClass(this, numTerms, maxTermsPerDoc, postings, iw, startingGun, threadRandom, document, field); threads[threadID].Start(); } startingGun.Signal(); foreach (ThreadJob t in threads) { t.Join(); } iw.ForceMerge(1); DirectoryReader ir = iw.GetReader(); Assert.AreEqual(1, ir.Leaves.Count); AtomicReader air = (AtomicReader)ir.Leaves[0].Reader; Terms terms = air.GetTerms("field"); // numTerms-1 because there cannot be a term 0 with 0 postings: Assert.AreEqual(numTerms - 1, terms.Count); TermsEnum termsEnum = terms.GetEnumerator(); while (termsEnum.MoveNext()) { int value = Convert.ToInt32(termsEnum.Term.Utf8ToString(), CultureInfo.InvariantCulture); Assert.AreEqual(value, termsEnum.TotalTermFreq); // don't really need to check more than this, as CheckIndex // will verify that totalTermFreq == total number of positions seen // from a docsAndPositionsEnum. } ir.Dispose(); iw.Dispose(); dir.Dispose(); }
public virtual void RunTest(string testName) { m_failed.Value = (false); m_addCount.Value = 0; m_delCount.Value = 0; m_packCount.Value = 0; long t0 = J2N.Time.NanoTime() / J2N.Time.MillisecondsPerNanosecond; // LUCENENET: Use NanoTime() rather than CurrentTimeMilliseconds() for more accurate/reliable results Random random = new J2N.Randomizer(Random.NextInt64()); using LineFileDocs docs = new LineFileDocs(random, DefaultCodecSupportsDocValues); DirectoryInfo tempDir = CreateTempDir(testName); m_dir = GetDirectory(NewMockFSDirectory(tempDir)); // some subclasses rely on this being MDW if (m_dir is BaseDirectoryWrapper baseDirectoryWrapper) { baseDirectoryWrapper.CheckIndexOnDispose = false; // don't double-checkIndex, we do it ourselves. } MockAnalyzer analyzer = new MockAnalyzer(LuceneTestCase.Random); analyzer.MaxTokenLength = TestUtil.NextInt32(LuceneTestCase.Random, 1, IndexWriter.MAX_TERM_LENGTH); IndexWriterConfig conf = NewIndexWriterConfig(TEST_VERSION_CURRENT, analyzer).SetInfoStream(new FailOnNonBulkMergesInfoStream()); if (LuceneTestCase.TestNightly) { // newIWConfig makes smallish max seg size, which // results in tons and tons of segments for this test // when run nightly: MergePolicy mp = conf.MergePolicy; if (mp is TieredMergePolicy tieredMergePolicy) { //tieredMergePolicy.MaxMergedSegmentMB = 5000.0; tieredMergePolicy.MaxMergedSegmentMB = 2500.0; // LUCENENET specific - reduced each number by 50% to keep testing time under 1 hour } else if (mp is LogByteSizeMergePolicy logByteSizeMergePolicy) { //logByteSizeMergePolicy.MaxMergeMB = 1000.0; logByteSizeMergePolicy.MaxMergeMB = 500.0; // LUCENENET specific - reduced each number by 50% to keep testing time under 1 hour } else if (mp is LogMergePolicy logMergePolicy) { //logMergePolicy.MaxMergeDocs = 100000; logMergePolicy.MaxMergeDocs = 50000; // LUCENENET specific - reduced each number by 50% to keep testing time under 1 hour } } conf.SetMergedSegmentWarmer(new IndexReaderWarmerAnonymousClass(this)); if (Verbose) { conf.SetInfoStream(new PrintStreamInfoStreamAnonymousClass(Console.Out)); } m_writer = new IndexWriter(m_dir, conf); TestUtil.ReduceOpenFiles(m_writer); TaskScheduler es = LuceneTestCase.Random.NextBoolean() ? null : TaskScheduler.Default; DoAfterWriter(es); int NUM_INDEX_THREADS = TestUtil.NextInt32(LuceneTestCase.Random, 2, 4); //int RUN_TIME_SEC = LuceneTestCase.TestNightly ? 300 : RandomMultiplier; // LUCENENET specific - lowered from 300 to 150 to reduce total time on Nightly // build to less than 1 hour. int RUN_TIME_SEC = LuceneTestCase.TestNightly ? 150 : RandomMultiplier; ISet <string> delIDs = new ConcurrentHashSet <string>(); ISet <string> delPackIDs = new ConcurrentHashSet <string>(); ConcurrentQueue <SubDocs> allSubDocs = new ConcurrentQueue <SubDocs>(); long stopTime = (J2N.Time.NanoTime() / J2N.Time.MillisecondsPerNanosecond) + (RUN_TIME_SEC * 1000); // LUCENENET: Use NanoTime() rather than CurrentTimeMilliseconds() for more accurate/reliable results ThreadJob[] indexThreads = LaunchIndexingThreads(docs, NUM_INDEX_THREADS, stopTime, delIDs, delPackIDs, allSubDocs); if (Verbose) { Console.WriteLine("TEST: DONE start " + NUM_INDEX_THREADS + " indexing threads [" + ((J2N.Time.NanoTime() / J2N.Time.MillisecondsPerNanosecond) - t0) + " ms]"); // LUCENENET: Use NanoTime() rather than CurrentTimeMilliseconds() for more accurate/reliable results } // Let index build up a bit Thread.Sleep(100); DoSearching(es, stopTime); if (Verbose) { Console.WriteLine("TEST: all searching done [" + ((J2N.Time.NanoTime() / J2N.Time.MillisecondsPerNanosecond) - t0) + " ms]"); // LUCENENET: Use NanoTime() rather than CurrentTimeMilliseconds() for more accurate/reliable results } for (int thread = 0; thread < indexThreads.Length; thread++) { indexThreads[thread].Join(); } if (Verbose) { Console.WriteLine("TEST: done join indexing threads [" + ((J2N.Time.NanoTime() / J2N.Time.MillisecondsPerNanosecond) - t0) + " ms]; addCount=" + m_addCount + " delCount=" + m_delCount); // LUCENENET: Use NanoTime() rather than CurrentTimeMilliseconds() for more accurate/reliable results } IndexSearcher s = GetFinalSearcher(); if (Verbose) { Console.WriteLine("TEST: finalSearcher=" + s); } assertFalse(m_failed); bool doFail = false; // Verify: make sure delIDs are in fact deleted: foreach (string id in delIDs) { TopDocs hits = s.Search(new TermQuery(new Term("docid", id)), 1); if (hits.TotalHits != 0) { Console.WriteLine("doc id=" + id + " is supposed to be deleted, but got " + hits.TotalHits + " hits; first docID=" + hits.ScoreDocs[0].Doc); doFail = true; } } // Verify: make sure delPackIDs are in fact deleted: foreach (string id in delPackIDs) { TopDocs hits = s.Search(new TermQuery(new Term("packID", id)), 1); if (hits.TotalHits != 0) { Console.WriteLine("packID=" + id + " is supposed to be deleted, but got " + hits.TotalHits + " matches"); doFail = true; } } // Verify: make sure each group of sub-docs are still in docID order: foreach (SubDocs subDocs in allSubDocs) { TopDocs hits = s.Search(new TermQuery(new Term("packID", subDocs.PackID)), 20); if (!subDocs.Deleted) { // We sort by relevance but the scores should be identical so sort falls back to by docID: if (hits.TotalHits != subDocs.SubIDs.Count) { Console.WriteLine("packID=" + subDocs.PackID + ": expected " + subDocs.SubIDs.Count + " hits but got " + hits.TotalHits); doFail = true; } else { int lastDocID = -1; int startDocID = -1; foreach (ScoreDoc scoreDoc in hits.ScoreDocs) { int docID = scoreDoc.Doc; if (lastDocID != -1) { assertEquals(1 + lastDocID, docID); } else { startDocID = docID; } lastDocID = docID; Document doc = s.Doc(docID); assertEquals(subDocs.PackID, doc.Get("packID")); } lastDocID = startDocID - 1; foreach (string subID in subDocs.SubIDs) { hits = s.Search(new TermQuery(new Term("docid", subID)), 1); assertEquals(1, hits.TotalHits); int docID = hits.ScoreDocs[0].Doc; if (lastDocID != -1) { assertEquals(1 + lastDocID, docID); } lastDocID = docID; } } } else { // Pack was deleted -- make sure its docs are // deleted. We can't verify packID is deleted // because we can re-use packID for update: foreach (string subID in subDocs.SubIDs) { assertEquals(0, s.Search(new TermQuery(new Term("docid", subID)), 1).TotalHits); } } } // Verify: make sure all not-deleted docs are in fact // not deleted: int endID = Convert.ToInt32(docs.NextDoc().Get("docid"), CultureInfo.InvariantCulture); docs.Dispose(); for (int id = 0; id < endID; id++) { string stringID = id.ToString(CultureInfo.InvariantCulture); if (!delIDs.Contains(stringID)) { TopDocs hits = s.Search(new TermQuery(new Term("docid", stringID)), 1); if (hits.TotalHits != 1) { Console.WriteLine("doc id=" + stringID + " is not supposed to be deleted, but got hitCount=" + hits.TotalHits + "; delIDs=" + Collections.ToString(delIDs)); doFail = true; } } } assertFalse(doFail); assertEquals("index=" + m_writer.SegString() + " addCount=" + m_addCount + " delCount=" + m_delCount, m_addCount - m_delCount, s.IndexReader.NumDocs); ReleaseSearcher(s); m_writer.Commit(); assertEquals("index=" + m_writer.SegString() + " addCount=" + m_addCount + " delCount=" + m_delCount, m_addCount - m_delCount, m_writer.NumDocs); DoClose(); m_writer.Dispose(false); // Cannot shutdown until after writer is closed because // writer has merged segment warmer that uses IS to run // searches, and that IS may be using this es! /*if (es != null) * { * es.shutdown(); * es.awaitTermination(1, TimeUnit.SECONDS); * }*/ TestUtil.CheckIndex(m_dir); m_dir.Dispose(); //System.IO.Directory.Delete(tempDir.FullName, true); TestUtil.Rm(tempDir); if (Verbose) { Console.WriteLine("TEST: done [" + ((J2N.Time.NanoTime() / J2N.Time.MillisecondsPerNanosecond) - t0) + " ms]"); // LUCENENET: Use NanoTime() rather than CurrentTimeMilliseconds() for more accurate/reliable results } }
public virtual void Test() { Random random = new J2N.Randomizer(Random.NextInt64()); LineFileDocs docs = new LineFileDocs(random, DefaultCodecSupportsDocValues); Directory d = NewDirectory(); MockAnalyzer analyzer = new MockAnalyzer(LuceneTestCase.Random); analyzer.MaxTokenLength = TestUtil.NextInt32(LuceneTestCase.Random, 1, IndexWriter.MAX_TERM_LENGTH); RandomIndexWriter w = new RandomIndexWriter(LuceneTestCase.Random, d, analyzer); int numDocs = AtLeast(10); for (int docCount = 0; docCount < numDocs; docCount++) { w.AddDocument(docs.NextDoc()); } IndexReader r = w.GetReader(); w.Dispose(); JCG.List <BytesRef> terms = new JCG.List <BytesRef>(); TermsEnum termsEnum = MultiFields.GetTerms(r, "body").GetEnumerator(); while (termsEnum.MoveNext()) { terms.Add(BytesRef.DeepCopyOf(termsEnum.Term)); } if (Verbose) { Console.WriteLine("TEST: " + terms.Count + " terms"); } int upto = -1; int iters = AtLeast(200); for (int iter = 0; iter < iters; iter++) { bool isEnd; if (upto != -1 && LuceneTestCase.Random.NextBoolean()) { // next if (Verbose) { Console.WriteLine("TEST: iter next"); } isEnd = termsEnum.MoveNext() == false; upto++; if (isEnd) { if (Verbose) { Console.WriteLine(" end"); } Assert.AreEqual(upto, terms.Count); upto = -1; } else { if (Verbose) { Console.WriteLine(" got term=" + termsEnum.Term.Utf8ToString() + " expected=" + terms[upto].Utf8ToString()); } Assert.IsTrue(upto < terms.Count); Assert.AreEqual(terms[upto], termsEnum.Term); } } else { BytesRef target; string exists; if (LuceneTestCase.Random.NextBoolean()) { // likely fake term if (LuceneTestCase.Random.NextBoolean()) { target = new BytesRef(TestUtil.RandomSimpleString(LuceneTestCase.Random)); } else { target = new BytesRef(TestUtil.RandomRealisticUnicodeString(LuceneTestCase.Random)); } exists = "likely not"; } else { // real term target = terms[LuceneTestCase.Random.Next(terms.Count)]; exists = "yes"; } upto = terms.BinarySearch(target); if (LuceneTestCase.Random.NextBoolean()) { if (Verbose) { Console.WriteLine("TEST: iter seekCeil target=" + target.Utf8ToString() + " exists=" + exists); } // seekCeil TermsEnum.SeekStatus status = termsEnum.SeekCeil(target); if (Verbose) { Console.WriteLine(" got " + status); } if (upto < 0) { upto = -(upto + 1); if (upto >= terms.Count) { Assert.AreEqual(TermsEnum.SeekStatus.END, status); upto = -1; } else { Assert.AreEqual(TermsEnum.SeekStatus.NOT_FOUND, status); Assert.AreEqual(terms[upto], termsEnum.Term); } } else { Assert.AreEqual(TermsEnum.SeekStatus.FOUND, status); Assert.AreEqual(terms[upto], termsEnum.Term); } } else { if (Verbose) { Console.WriteLine("TEST: iter seekExact target=" + target.Utf8ToString() + " exists=" + exists); } // seekExact bool result = termsEnum.SeekExact(target); if (Verbose) { Console.WriteLine(" got " + result); } if (upto < 0) { Assert.IsFalse(result); upto = -1; } else { Assert.IsTrue(result); Assert.AreEqual(target, termsEnum.Term); } } } } r.Dispose(); d.Dispose(); docs.Dispose(); }
public override FieldsConsumer FieldsConsumer(SegmentWriteState state) { int minSkipInterval; if (state.SegmentInfo.DocCount > 1000000) { // Test2BPostings can OOME otherwise: minSkipInterval = 3; } else { minSkipInterval = 2; } // we pull this before the seed intentionally: because its not consumed at runtime // (the skipInterval is written into postings header) int skipInterval = TestUtil.NextInt32(seedRandom, minSkipInterval, 10); if (LuceneTestCase.Verbose) { Console.WriteLine("MockRandomCodec: skipInterval=" + skipInterval); } long seed = seedRandom.NextInt64(); if (LuceneTestCase.Verbose) { Console.WriteLine("MockRandomCodec: writing to seg=" + state.SegmentInfo.Name + " formatID=" + state.SegmentSuffix + " seed=" + seed); } string seedFileName = IndexFileNames.SegmentFileName(state.SegmentInfo.Name, state.SegmentSuffix, SEED_EXT); IndexOutput @out = state.Directory.CreateOutput(seedFileName, state.Context); try { @out.WriteInt64(seed); } finally { @out.Dispose(); } Random random = new J2N.Randomizer(seed); random.Next(); // consume a random for buffersize PostingsWriterBase postingsWriter; if (random.nextBoolean()) { postingsWriter = new SepPostingsWriter(state, new MockInt32StreamFactory(random), skipInterval); } else { if (LuceneTestCase.Verbose) { Console.WriteLine("MockRandomCodec: writing Standard postings"); } // TODO: randomize variables like acceptibleOverHead?! postingsWriter = new Lucene41PostingsWriter(state, skipInterval); } if (random.NextBoolean()) { int totTFCutoff = TestUtil.NextInt32(random, 1, 20); if (LuceneTestCase.Verbose) { Console.WriteLine("MockRandomCodec: writing pulsing postings with totTFCutoff=" + totTFCutoff); } postingsWriter = new PulsingPostingsWriter(state, totTFCutoff, postingsWriter); } FieldsConsumer fields; int t1 = random.Next(4); if (t1 == 0) { bool success = false; try { fields = new FSTTermsWriter(state, postingsWriter); success = true; } finally { if (!success) { postingsWriter.Dispose(); } } } else if (t1 == 1) { bool success = false; try { fields = new FSTOrdTermsWriter(state, postingsWriter); success = true; } finally { if (!success) { postingsWriter.Dispose(); } } } else if (t1 == 2) { // Use BlockTree terms dict if (LuceneTestCase.Verbose) { Console.WriteLine("MockRandomCodec: writing BlockTree terms dict"); } // TODO: would be nice to allow 1 but this is very // slow to write int minTermsInBlock = TestUtil.NextInt32(random, 2, 100); int maxTermsInBlock = Math.Max(2, (minTermsInBlock - 1) * 2 + random.Next(100)); bool success = false; try { fields = new BlockTreeTermsWriter(state, postingsWriter, minTermsInBlock, maxTermsInBlock); success = true; } finally { if (!success) { postingsWriter.Dispose(); } } } else { if (LuceneTestCase.Verbose) { Console.WriteLine("MockRandomCodec: writing Block terms dict"); } bool success = false; TermsIndexWriterBase indexWriter; try { if (random.NextBoolean()) { state.TermIndexInterval = TestUtil.NextInt32(random, 1, 100); if (LuceneTestCase.Verbose) { Console.WriteLine("MockRandomCodec: fixed-gap terms index (tii=" + state.TermIndexInterval + ")"); } indexWriter = new FixedGapTermsIndexWriter(state); } else { VariableGapTermsIndexWriter.IndexTermSelector selector; int n2 = random.Next(3); if (n2 == 0) { int tii = TestUtil.NextInt32(random, 1, 100); selector = new VariableGapTermsIndexWriter.EveryNTermSelector(tii); if (LuceneTestCase.Verbose) { Console.WriteLine("MockRandomCodec: variable-gap terms index (tii=" + tii + ")"); } } else if (n2 == 1) { int docFreqThresh = TestUtil.NextInt32(random, 2, 100); int tii = TestUtil.NextInt32(random, 1, 100); selector = new VariableGapTermsIndexWriter.EveryNOrDocFreqTermSelector(docFreqThresh, tii); } else { long seed2 = random.NextInt64(); int gap = TestUtil.NextInt32(random, 2, 40); if (LuceneTestCase.Verbose) { Console.WriteLine("MockRandomCodec: random-gap terms index (max gap=" + gap + ")"); } selector = new IndexTermSelectorAnonymousClass(seed2, gap); } indexWriter = new VariableGapTermsIndexWriter(state, selector); } success = true; } finally { if (!success) { postingsWriter.Dispose(); } } success = false; try { fields = new BlockTermsWriter(indexWriter, state, postingsWriter); success = true; } finally { if (!success) { try { postingsWriter.Dispose(); } finally { indexWriter.Dispose(); } } } } return(fields); }
public virtual void TestRandomStringSort() { Random random = new J2N.Randomizer(Random.NextInt64()); int NUM_DOCS = AtLeast(100); Directory dir = NewDirectory(); RandomIndexWriter writer = new RandomIndexWriter(random, dir); bool allowDups = random.NextBoolean(); ISet <string> seen = new JCG.HashSet <string>(); int maxLength = TestUtil.NextInt32(random, 5, 100); if (Verbose) { Console.WriteLine("TEST: NUM_DOCS=" + NUM_DOCS + " maxLength=" + maxLength + " allowDups=" + allowDups); } int numDocs = 0; IList <BytesRef> docValues = new JCG.List <BytesRef>(); // TODO: deletions while (numDocs < NUM_DOCS) { Document doc = new Document(); // 10% of the time, the document is missing the value: BytesRef br; if (LuceneTestCase.Random.Next(10) != 7) { string s; if (random.NextBoolean()) { s = TestUtil.RandomSimpleString(random, maxLength); } else { s = TestUtil.RandomUnicodeString(random, maxLength); } if (!allowDups) { if (seen.Contains(s)) { continue; } seen.Add(s); } if (Verbose) { Console.WriteLine(" " + numDocs + ": s=" + s); } br = new BytesRef(s); if (DefaultCodecSupportsDocValues) { doc.Add(new SortedDocValuesField("stringdv", br)); doc.Add(new NumericDocValuesField("id", numDocs)); } else { doc.Add(NewStringField("id", Convert.ToString(numDocs), Field.Store.NO)); } doc.Add(NewStringField("string", s, Field.Store.NO)); docValues.Add(br); } else { br = null; if (Verbose) { Console.WriteLine(" " + numDocs + ": <missing>"); } docValues.Add(null); if (DefaultCodecSupportsDocValues) { doc.Add(new NumericDocValuesField("id", numDocs)); } else { doc.Add(NewStringField("id", Convert.ToString(numDocs), Field.Store.NO)); } } doc.Add(new StoredField("id", numDocs)); writer.AddDocument(doc); numDocs++; if (random.Next(40) == 17) { // force flush writer.GetReader().Dispose(); } } IndexReader r = writer.GetReader(); writer.Dispose(); if (Verbose) { Console.WriteLine(" reader=" + r); } IndexSearcher idxS = NewSearcher(r, false); int ITERS = AtLeast(100); for (int iter = 0; iter < ITERS; iter++) { bool reverse = random.NextBoolean(); TopFieldDocs hits; SortField sf; bool sortMissingLast; bool missingIsNull; if (DefaultCodecSupportsDocValues && random.NextBoolean()) { sf = new SortField("stringdv", SortFieldType.STRING, reverse); // Can only use sort missing if the DVFormat // supports docsWithField: sortMissingLast = DefaultCodecSupportsDocsWithField && Random.NextBoolean(); missingIsNull = DefaultCodecSupportsDocsWithField; } else { sf = new SortField("string", SortFieldType.STRING, reverse); sortMissingLast = Random.NextBoolean(); missingIsNull = true; } if (sortMissingLast) { sf.SetMissingValue(SortField.STRING_LAST); } Sort sort; if (random.NextBoolean()) { sort = new Sort(sf); } else { sort = new Sort(sf, SortField.FIELD_DOC); } int hitCount = TestUtil.NextInt32(random, 1, r.MaxDoc + 20); RandomFilter f = new RandomFilter(random, (float)random.NextDouble(), docValues); int queryType = random.Next(3); if (queryType == 0) { // force out of order BooleanQuery bq = new BooleanQuery(); // Add a Query with SHOULD, since bw.Scorer() returns BooleanScorer2 // which delegates to BS if there are no mandatory clauses. bq.Add(new MatchAllDocsQuery(), Occur.SHOULD); // Set minNrShouldMatch to 1 so that BQ will not optimize rewrite to return // the clause instead of BQ. bq.MinimumNumberShouldMatch = 1; hits = idxS.Search(bq, f, hitCount, sort, random.NextBoolean(), random.NextBoolean()); } else if (queryType == 1) { hits = idxS.Search(new ConstantScoreQuery(f), null, hitCount, sort, random.NextBoolean(), random.NextBoolean()); } else { hits = idxS.Search(new MatchAllDocsQuery(), f, hitCount, sort, random.NextBoolean(), random.NextBoolean()); } if (Verbose) { Console.WriteLine("\nTEST: iter=" + iter + " " + hits.TotalHits + " hits; topN=" + hitCount + "; reverse=" + reverse + "; sortMissingLast=" + sortMissingLast + " sort=" + sort); } // Compute expected results: var expected = f.matchValues.ToList(); expected.Sort(Comparer <BytesRef> .Create((a, b) => { if (a is null) { if (b is null) { return(0); } if (sortMissingLast) { return(1); } else { return(-1); } } else if (b is null) { if (sortMissingLast) { return(-1); } else { return(1); } } else { return(a.CompareTo(b)); } })); if (reverse) { expected.Reverse(); } if (Verbose) { Console.WriteLine(" expected:"); for (int idx = 0; idx < expected.Count; idx++) { BytesRef br = expected[idx]; if (br is null && missingIsNull == false) { br = new BytesRef(); } Console.WriteLine(" " + idx + ": " + (br is null ? "<missing>" : br.Utf8ToString())); if (idx == hitCount - 1) { break; } } } if (Verbose) { Console.WriteLine(" actual:"); for (int hitIDX = 0; hitIDX < hits.ScoreDocs.Length; hitIDX++) { FieldDoc fd = (FieldDoc)hits.ScoreDocs[hitIDX]; BytesRef br = (BytesRef)fd.Fields[0]; Console.WriteLine(" " + hitIDX + ": " + (br is null ? "<missing>" : br.Utf8ToString()) + " id=" + idxS.Doc(fd.Doc).Get("id")); } } for (int hitIDX = 0; hitIDX < hits.ScoreDocs.Length; hitIDX++) { FieldDoc fd = (FieldDoc)hits.ScoreDocs[hitIDX]; BytesRef br = expected[hitIDX]; if (br is null && missingIsNull == false) { br = new BytesRef(); } // Normally, the old codecs (that don't support // docsWithField via doc values) will always return // an empty BytesRef for the missing case; however, // if all docs in a given segment were missing, in // that case it will return null! So we must map // null here, too: BytesRef br2 = (BytesRef)fd.Fields[0]; if (br2 is null && missingIsNull == false) { br2 = new BytesRef(); } Assert.AreEqual(br, br2, "hit=" + hitIDX + " has wrong sort value"); } } r.Dispose(); dir.Dispose(); }
public virtual void TestRollingUpdates_Mem() { Random random = new J2N.Randomizer(Random.NextInt64()); BaseDirectoryWrapper dir = NewDirectory(); LineFileDocs docs = new LineFileDocs(random, DefaultCodecSupportsDocValues); //provider.register(new MemoryCodec()); if ((!"Lucene3x".Equals(Codec.Default.Name, StringComparison.Ordinal)) && LuceneTestCase.Random.NextBoolean()) { Codec.Default = TestUtil.AlwaysPostingsFormat(new MemoryPostingsFormat(LuceneTestCase.Random.nextBoolean(), random.NextSingle())); } MockAnalyzer analyzer = new MockAnalyzer(LuceneTestCase.Random); analyzer.MaxTokenLength = TestUtil.NextInt32(LuceneTestCase.Random, 1, IndexWriter.MAX_TERM_LENGTH); IndexWriter w = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, analyzer)); int SIZE = AtLeast(20); int id = 0; IndexReader r = null; IndexSearcher s = null; int numUpdates = (int)(SIZE * (2 + (TestNightly ? 200 * LuceneTestCase.Random.NextDouble() : 5 * LuceneTestCase.Random.NextDouble()))); if (Verbose) { Console.WriteLine("TEST: numUpdates=" + numUpdates); } int updateCount = 0; // TODO: sometimes update ids not in order... for (int docIter = 0; docIter < numUpdates; docIter++) { Documents.Document doc = docs.NextDoc(); string myID = "" + id; if (id == SIZE - 1) { id = 0; } else { id++; } if (Verbose) { Console.WriteLine(" docIter=" + docIter + " id=" + id); } ((Field)doc.GetField("docid")).SetStringValue(myID); Term idTerm = new Term("docid", myID); bool doUpdate; if (s != null && updateCount < SIZE) { TopDocs hits = s.Search(new TermQuery(idTerm), 1); Assert.AreEqual(1, hits.TotalHits); doUpdate = !w.TryDeleteDocument(r, hits.ScoreDocs[0].Doc); if (Verbose) { if (doUpdate) { Console.WriteLine(" tryDeleteDocument failed"); } else { Console.WriteLine(" tryDeleteDocument succeeded"); } } } else { doUpdate = true; if (Verbose) { Console.WriteLine(" no searcher: doUpdate=true"); } } updateCount++; if (doUpdate) { w.UpdateDocument(idTerm, doc); } else { w.AddDocument(doc); } if (docIter >= SIZE && LuceneTestCase.Random.Next(50) == 17) { if (r != null) { r.Dispose(); } bool applyDeletions = LuceneTestCase.Random.NextBoolean(); if (Verbose) { Console.WriteLine("TEST: reopen applyDeletions=" + applyDeletions); } r = w.GetReader(applyDeletions); if (applyDeletions) { s = NewSearcher(r); } else { s = null; } Assert.IsTrue(!applyDeletions || r.NumDocs == SIZE, "applyDeletions=" + applyDeletions + " r.NumDocs=" + r.NumDocs + " vs SIZE=" + SIZE); updateCount = 0; } } if (r != null) { r.Dispose(); } w.Commit(); Assert.AreEqual(SIZE, w.NumDocs); w.Dispose(); TestIndexWriter.AssertNoUnreferencedFiles(dir, "leftover files after rolling updates"); docs.Dispose(); // LUCENE-4455: SegmentInfos infos = new SegmentInfos(); infos.Read(dir); long totalBytes = 0; foreach (SegmentCommitInfo sipc in infos.Segments) { totalBytes += sipc.GetSizeInBytes(); } long totalBytes2 = 0; foreach (string fileName in dir.ListAll()) { if (!fileName.StartsWith(IndexFileNames.SEGMENTS, StringComparison.Ordinal)) { totalBytes2 += dir.FileLength(fileName); } } Assert.AreEqual(totalBytes2, totalBytes); dir.Dispose(); }
public void TestRandom() { int numWords = AtLeast(1000); IDictionary <string, long> slowCompletor = new JCG.SortedDictionary <string, long>(StringComparer.Ordinal); ISet <string> allPrefixes = new JCG.SortedSet <string>(StringComparer.Ordinal); Input[] keys = new Input[numWords]; for (int i = 0; i < numWords; i++) { String s; while (true) { // TODO: would be nice to fix this slowCompletor/comparer to // use full range, but we might lose some coverage too... s = TestUtil.RandomSimpleString(LuceneTestCase.Random); if (!slowCompletor.ContainsKey(s)) { break; } } for (int j = 1; j < s.Length; j++) { allPrefixes.add(s.Substring(0, j)); } // we can probably do Integer.MAX_VALUE here, but why worry. int weight = LuceneTestCase.Random.nextInt(1 << 24); slowCompletor.Put(s, (long)weight); keys[i] = new Input(s, weight); } WFSTCompletionLookup suggester = new WFSTCompletionLookup(false); suggester.Build(new InputArrayEnumerator(keys)); assertEquals(numWords, suggester.Count); Random random = new J2N.Randomizer(Random.NextInt64()); foreach (String prefix in allPrefixes) { int topN = TestUtil.NextInt32(random, 1, 10); IList <Lookup.LookupResult> r = suggester.DoLookup(TestUtil.StringToCharSequence(prefix, random).ToString(), false, topN); // 2. go thru whole treemap (slowCompletor) and check its actually the best suggestion JCG.List <Lookup.LookupResult> matches = new JCG.List <Lookup.LookupResult>(); // TODO: could be faster... but its slowCompletor for a reason foreach (KeyValuePair <string, long> e in slowCompletor) { if (e.Key.StartsWith(prefix, StringComparison.Ordinal)) { matches.Add(new Lookup.LookupResult(e.Key, e.Value)); } } assertTrue(matches.size() > 0); matches.Sort(new TestRandomComparer()); if (matches.size() > topN) { //matches.SubList(topN, matches.size()).clear(); matches.RemoveRange(topN, matches.size() - topN); // LUCENENET: Converted end index to length } assertEquals(matches.size(), r.size()); for (int hit = 0; hit < r.size(); hit++) { //System.out.println(" check hit " + hit); assertEquals(matches[hit].Key.toString(), r[hit].Key.toString()); assertEquals(matches[hit].Value, r[hit].Value, 0f); } } }
public override FieldsProducer FieldsProducer(SegmentReadState state) { string seedFileName = IndexFileNames.SegmentFileName(state.SegmentInfo.Name, state.SegmentSuffix, SEED_EXT); IndexInput @in = state.Directory.OpenInput(seedFileName, state.Context); long seed = @in.ReadInt64(); if (LuceneTestCase.Verbose) { Console.WriteLine("MockRandomCodec: reading from seg=" + state.SegmentInfo.Name + " formatID=" + state.SegmentSuffix + " seed=" + seed); } @in.Dispose(); Random random = new J2N.Randomizer(seed); int readBufferSize = TestUtil.NextInt32(random, 1, 4096); if (LuceneTestCase.Verbose) { Console.WriteLine("MockRandomCodec: readBufferSize=" + readBufferSize); } PostingsReaderBase postingsReader; if (random.NextBoolean()) { if (LuceneTestCase.Verbose) { Console.WriteLine("MockRandomCodec: reading Sep postings"); } postingsReader = new SepPostingsReader(state.Directory, state.FieldInfos, state.SegmentInfo, state.Context, new MockInt32StreamFactory(random), state.SegmentSuffix); } else { if (LuceneTestCase.Verbose) { Console.WriteLine("MockRandomCodec: reading Standard postings"); } postingsReader = new Lucene41PostingsReader(state.Directory, state.FieldInfos, state.SegmentInfo, state.Context, state.SegmentSuffix); } if (random.NextBoolean()) { int totTFCutoff = TestUtil.NextInt32(random, 1, 20); if (LuceneTestCase.Verbose) { Console.WriteLine("MockRandomCodec: reading pulsing postings with totTFCutoff=" + totTFCutoff); } postingsReader = new PulsingPostingsReader(state, postingsReader); } FieldsProducer fields; int t1 = random.Next(4); if (t1 == 0) { bool success = false; try { fields = new FSTTermsReader(state, postingsReader); success = true; } finally { if (!success) { postingsReader.Dispose(); } } } else if (t1 == 1) { bool success = false; try { fields = new FSTOrdTermsReader(state, postingsReader); success = true; } finally { if (!success) { postingsReader.Dispose(); } } } else if (t1 == 2) { // Use BlockTree terms dict if (LuceneTestCase.Verbose) { Console.WriteLine("MockRandomCodec: reading BlockTree terms dict"); } bool success = false; try { fields = new BlockTreeTermsReader(state.Directory, state.FieldInfos, state.SegmentInfo, postingsReader, state.Context, state.SegmentSuffix, state.TermsIndexDivisor); success = true; } finally { if (!success) { postingsReader.Dispose(); } } } else { if (LuceneTestCase.Verbose) { Console.WriteLine("MockRandomCodec: reading Block terms dict"); } TermsIndexReaderBase indexReader; bool success = false; try { bool doFixedGap = random.NextBoolean(); // randomness diverges from writer, here: if (state.TermsIndexDivisor != -1) { state.TermsIndexDivisor = TestUtil.NextInt32(random, 1, 10); } if (doFixedGap) { // if termsIndexDivisor is set to -1, we should not touch it. It means a // test explicitly instructed not to load the terms index. if (LuceneTestCase.Verbose) { Console.WriteLine("MockRandomCodec: fixed-gap terms index (divisor=" + state.TermsIndexDivisor + ")"); } indexReader = new FixedGapTermsIndexReader(state.Directory, state.FieldInfos, state.SegmentInfo.Name, state.TermsIndexDivisor, BytesRef.UTF8SortedAsUnicodeComparer, state.SegmentSuffix, state.Context); } else { int n2 = random.Next(3); if (n2 == 1) { random.Next(); } else if (n2 == 2) { random.NextInt64(); } if (LuceneTestCase.Verbose) { Console.WriteLine("MockRandomCodec: variable-gap terms index (divisor=" + state.TermsIndexDivisor + ")"); } indexReader = new VariableGapTermsIndexReader(state.Directory, state.FieldInfos, state.SegmentInfo.Name, state.TermsIndexDivisor, state.SegmentSuffix, state.Context); } success = true; } finally { if (!success) { postingsReader.Dispose(); } } success = false; try { fields = new BlockTermsReader(indexReader, state.Directory, state.FieldInfos, state.SegmentInfo, postingsReader, state.Context, state.SegmentSuffix); success = true; } finally { if (!success) { try { postingsReader.Dispose(); } finally { indexReader.Dispose(); } } } } return(fields); }
public virtual void Test() { int[] ints = new int[7]; Int32sRef input = new Int32sRef(ints, 0, ints.Length); long seed = Random.NextInt64(); Directory dir = new MMapDirectory(CreateTempDir("2BFST")); for (int doPackIter = 0; doPackIter < 2; doPackIter++) { bool doPack = doPackIter == 1; // Build FST w/ NoOutputs and stop when nodeCount > 2.2B if (!doPack) { Console.WriteLine("\nTEST: 3B nodes; doPack=false output=NO_OUTPUTS"); Outputs <object> outputs = NoOutputs.Singleton; object NO_OUTPUT = outputs.NoOutput; Builder <object> b = new Builder <object>(FST.INPUT_TYPE.BYTE1, 0, 0, true, true, int.MaxValue, outputs, null, doPack, PackedInt32s.COMPACT, true, 15); int count = 0; Random r = new J2N.Randomizer(seed); int[] ints2 = new int[200]; Int32sRef input2 = new Int32sRef(ints2, 0, ints2.Length); while (true) { //System.out.println("add: " + input + " -> " + output); for (int i = 10; i < ints2.Length; i++) { ints2[i] = r.Next(256); } b.Add(input2, NO_OUTPUT); count++; if (count % 100000 == 0) { Console.WriteLine(count + ": " + b.GetFstSizeInBytes() + " bytes; " + b.TotStateCount + " nodes"); } if (b.TotStateCount > int.MaxValue + 100L * 1024 * 1024) { break; } NextInput(r, ints2); } FST <object> fst = b.Finish(); for (int verify = 0; verify < 2; verify++) { Console.WriteLine("\nTEST: now verify [fst size=" + fst.GetSizeInBytes() + "; nodeCount=" + fst.NodeCount + "; arcCount=" + fst.ArcCount + "]"); Arrays.Fill(ints2, 0); r = new J2N.Randomizer(seed); for (int i = 0; i < count; i++) { if (i % 1000000 == 0) { Console.WriteLine(i + "...: "); } for (int j = 10; j < ints2.Length; j++) { ints2[j] = r.Next(256); } Assert.AreEqual(NO_OUTPUT, Util.Get(fst, input2)); NextInput(r, ints2); } Console.WriteLine("\nTEST: enum all input/outputs"); Int32sRefFSTEnum <object> fstEnum = new Int32sRefFSTEnum <object>(fst); Arrays.Fill(ints2, 0); r = new J2N.Randomizer(seed); int upto = 0; while (fstEnum.MoveNext()) { Int32sRefFSTEnum.InputOutput <object> pair = fstEnum.Current; for (int j = 10; j < ints2.Length; j++) { ints2[j] = r.Next(256); } Assert.AreEqual(input2, pair.Input); Assert.AreEqual(NO_OUTPUT, pair.Output); upto++; NextInput(r, ints2); } Assert.AreEqual(count, upto); if (verify == 0) { Console.WriteLine("\nTEST: save/load FST and re-verify"); IndexOutput @out = dir.CreateOutput("fst", IOContext.DEFAULT); fst.Save(@out); @out.Dispose(); IndexInput @in = dir.OpenInput("fst", IOContext.DEFAULT); fst = new FST <object>(@in, outputs); @in.Dispose(); } else { dir.DeleteFile("fst"); } } } // Build FST w/ ByteSequenceOutputs and stop when FST // size = 3GB { Console.WriteLine("\nTEST: 3 GB size; doPack=" + doPack + " outputs=bytes"); Outputs <BytesRef> outputs = ByteSequenceOutputs.Singleton; Builder <BytesRef> b = new Builder <BytesRef>(FST.INPUT_TYPE.BYTE1, 0, 0, true, true, int.MaxValue, outputs, null, doPack, PackedInt32s.COMPACT, true, 15); var outputBytes = new byte[20]; BytesRef output = new BytesRef(outputBytes); Arrays.Fill(ints, 0); int count = 0; Random r = new J2N.Randomizer(seed); while (true) { r.NextBytes(outputBytes); //System.out.println("add: " + input + " -> " + output); b.Add(input, BytesRef.DeepCopyOf(output)); count++; if (count % 1000000 == 0) { Console.WriteLine(count + "...: " + b.GetFstSizeInBytes() + " bytes"); } if (b.GetFstSizeInBytes() > LIMIT) { break; } NextInput(r, ints); } FST <BytesRef> fst = b.Finish(); for (int verify = 0; verify < 2; verify++) { Console.WriteLine("\nTEST: now verify [fst size=" + fst.GetSizeInBytes() + "; nodeCount=" + fst.NodeCount + "; arcCount=" + fst.ArcCount + "]"); r = new J2N.Randomizer(seed); Arrays.Fill(ints, 0); for (int i = 0; i < count; i++) { if (i % 1000000 == 0) { Console.WriteLine(i + "...: "); } r.NextBytes(outputBytes); Assert.AreEqual(output, Util.Get(fst, input)); NextInput(r, ints); } Console.WriteLine("\nTEST: enum all input/outputs"); Int32sRefFSTEnum <BytesRef> fstEnum = new Int32sRefFSTEnum <BytesRef>(fst); Arrays.Fill(ints, 0); r = new J2N.Randomizer(seed); int upto = 0; while (fstEnum.MoveNext()) { Int32sRefFSTEnum.InputOutput <BytesRef> pair = fstEnum.Current; Assert.AreEqual(input, pair.Input); r.NextBytes(outputBytes); Assert.AreEqual(output, pair.Output); upto++; NextInput(r, ints); } Assert.AreEqual(count, upto); if (verify == 0) { Console.WriteLine("\nTEST: save/load FST and re-verify"); IndexOutput @out = dir.CreateOutput("fst", IOContext.DEFAULT); fst.Save(@out); @out.Dispose(); IndexInput @in = dir.OpenInput("fst", IOContext.DEFAULT); fst = new FST <BytesRef>(@in, outputs); @in.Dispose(); } else { dir.DeleteFile("fst"); } } } // Build FST w/ PositiveIntOutputs and stop when FST // size = 3GB { Console.WriteLine("\nTEST: 3 GB size; doPack=" + doPack + " outputs=long"); Outputs <Int64> outputs = PositiveInt32Outputs.Singleton; Builder <Int64> b = new Builder <Int64>(FST.INPUT_TYPE.BYTE1, 0, 0, true, true, int.MaxValue, outputs, null, doPack, PackedInt32s.COMPACT, true, 15); long output = 1; Arrays.Fill(ints, 0); int count = 0; Random r = new J2N.Randomizer(seed); while (true) { //System.out.println("add: " + input + " -> " + output); b.Add(input, output); output += 1 + r.Next(10); count++; if (count % 1000000 == 0) { Console.WriteLine(count + "...: " + b.GetFstSizeInBytes() + " bytes"); } if (b.GetFstSizeInBytes() > LIMIT) { break; } NextInput(r, ints); } FST <Int64> fst = b.Finish(); for (int verify = 0; verify < 2; verify++) { Console.WriteLine("\nTEST: now verify [fst size=" + fst.GetSizeInBytes() + "; nodeCount=" + fst.NodeCount + "; arcCount=" + fst.ArcCount + "]"); Arrays.Fill(ints, 0); output = 1; r = new J2N.Randomizer(seed); for (int i = 0; i < count; i++) { if (i % 1000000 == 0) { Console.WriteLine(i + "...: "); } // forward lookup: Assert.AreEqual(output, (long)Util.Get(fst, input)); // reverse lookup: Assert.AreEqual(input, Util.GetByOutput(fst, output)); output += 1 + r.Next(10); NextInput(r, ints); } Console.WriteLine("\nTEST: enum all input/outputs"); Int32sRefFSTEnum <Int64> fstEnum = new Int32sRefFSTEnum <Int64>(fst); Arrays.Fill(ints, 0); r = new J2N.Randomizer(seed); int upto = 0; output = 1; while (fstEnum.MoveNext()) { Int32sRefFSTEnum.InputOutput <J2N.Numerics.Int64> pair = fstEnum.Current; Assert.AreEqual(input, pair.Input); Assert.AreEqual(output, pair.Output); output += 1 + r.Next(10); upto++; NextInput(r, ints); } Assert.AreEqual(count, upto); if (verify == 0) { Console.WriteLine("\nTEST: save/load FST and re-verify"); IndexOutput @out = dir.CreateOutput("fst", IOContext.DEFAULT); fst.Save(@out); @out.Dispose(); IndexInput @in = dir.OpenInput("fst", IOContext.DEFAULT); fst = new FST <Int64>(@in, outputs); @in.Dispose(); } else { dir.DeleteFile("fst"); } } } } dir.Dispose(); }