public override FieldsProducer FieldsProducer(SegmentReadState state) { string seedFileName = IndexFileNames.SegmentFileName(state.SegmentInfo.Name, state.SegmentSuffix, SEED_EXT); IndexInput @in = state.Directory.OpenInput(seedFileName, state.Context); long seed = @in.ReadInt64(); if (LuceneTestCase.Verbose) { Console.WriteLine("MockRandomCodec: reading from seg=" + state.SegmentInfo.Name + " formatID=" + state.SegmentSuffix + " seed=" + seed); } @in.Dispose(); Random random = new J2N.Randomizer(seed); int readBufferSize = TestUtil.NextInt32(random, 1, 4096); if (LuceneTestCase.Verbose) { Console.WriteLine("MockRandomCodec: readBufferSize=" + readBufferSize); } PostingsReaderBase postingsReader; if (random.NextBoolean()) { if (LuceneTestCase.Verbose) { Console.WriteLine("MockRandomCodec: reading Sep postings"); } postingsReader = new SepPostingsReader(state.Directory, state.FieldInfos, state.SegmentInfo, state.Context, new MockInt32StreamFactory(random), state.SegmentSuffix); } else { if (LuceneTestCase.Verbose) { Console.WriteLine("MockRandomCodec: reading Standard postings"); } postingsReader = new Lucene41PostingsReader(state.Directory, state.FieldInfos, state.SegmentInfo, state.Context, state.SegmentSuffix); } if (random.NextBoolean()) { int totTFCutoff = TestUtil.NextInt32(random, 1, 20); if (LuceneTestCase.Verbose) { Console.WriteLine("MockRandomCodec: reading pulsing postings with totTFCutoff=" + totTFCutoff); } postingsReader = new PulsingPostingsReader(state, postingsReader); } FieldsProducer fields; int t1 = random.Next(4); if (t1 == 0) { bool success = false; try { fields = new FSTTermsReader(state, postingsReader); success = true; } finally { if (!success) { postingsReader.Dispose(); } } } else if (t1 == 1) { bool success = false; try { fields = new FSTOrdTermsReader(state, postingsReader); success = true; } finally { if (!success) { postingsReader.Dispose(); } } } else if (t1 == 2) { // Use BlockTree terms dict if (LuceneTestCase.Verbose) { Console.WriteLine("MockRandomCodec: reading BlockTree terms dict"); } bool success = false; try { fields = new BlockTreeTermsReader(state.Directory, state.FieldInfos, state.SegmentInfo, postingsReader, state.Context, state.SegmentSuffix, state.TermsIndexDivisor); success = true; } finally { if (!success) { postingsReader.Dispose(); } } } else { if (LuceneTestCase.Verbose) { Console.WriteLine("MockRandomCodec: reading Block terms dict"); } TermsIndexReaderBase indexReader; bool success = false; try { bool doFixedGap = random.NextBoolean(); // randomness diverges from writer, here: if (state.TermsIndexDivisor != -1) { state.TermsIndexDivisor = TestUtil.NextInt32(random, 1, 10); } if (doFixedGap) { // if termsIndexDivisor is set to -1, we should not touch it. It means a // test explicitly instructed not to load the terms index. if (LuceneTestCase.Verbose) { Console.WriteLine("MockRandomCodec: fixed-gap terms index (divisor=" + state.TermsIndexDivisor + ")"); } indexReader = new FixedGapTermsIndexReader(state.Directory, state.FieldInfos, state.SegmentInfo.Name, state.TermsIndexDivisor, BytesRef.UTF8SortedAsUnicodeComparer, state.SegmentSuffix, state.Context); } else { int n2 = random.Next(3); if (n2 == 1) { random.Next(); } else if (n2 == 2) { random.NextInt64(); } if (LuceneTestCase.Verbose) { Console.WriteLine("MockRandomCodec: variable-gap terms index (divisor=" + state.TermsIndexDivisor + ")"); } indexReader = new VariableGapTermsIndexReader(state.Directory, state.FieldInfos, state.SegmentInfo.Name, state.TermsIndexDivisor, state.SegmentSuffix, state.Context); } success = true; } finally { if (!success) { postingsReader.Dispose(); } } success = false; try { fields = new BlockTermsReader(indexReader, state.Directory, state.FieldInfos, state.SegmentInfo, postingsReader, state.Context, state.SegmentSuffix); success = true; } finally { if (!success) { try { postingsReader.Dispose(); } finally { indexReader.Dispose(); } } } } return(fields); }
public virtual void TestRandomStringSort() { Random random = new J2N.Randomizer(Random.NextInt64()); int NUM_DOCS = AtLeast(100); Directory dir = NewDirectory(); RandomIndexWriter writer = new RandomIndexWriter(random, dir); bool allowDups = random.NextBoolean(); ISet <string> seen = new JCG.HashSet <string>(); int maxLength = TestUtil.NextInt32(random, 5, 100); if (Verbose) { Console.WriteLine("TEST: NUM_DOCS=" + NUM_DOCS + " maxLength=" + maxLength + " allowDups=" + allowDups); } int numDocs = 0; IList <BytesRef> docValues = new JCG.List <BytesRef>(); // TODO: deletions while (numDocs < NUM_DOCS) { Document doc = new Document(); // 10% of the time, the document is missing the value: BytesRef br; if (LuceneTestCase.Random.Next(10) != 7) { string s; if (random.NextBoolean()) { s = TestUtil.RandomSimpleString(random, maxLength); } else { s = TestUtil.RandomUnicodeString(random, maxLength); } if (!allowDups) { if (seen.Contains(s)) { continue; } seen.Add(s); } if (Verbose) { Console.WriteLine(" " + numDocs + ": s=" + s); } br = new BytesRef(s); if (DefaultCodecSupportsDocValues) { doc.Add(new SortedDocValuesField("stringdv", br)); doc.Add(new NumericDocValuesField("id", numDocs)); } else { doc.Add(NewStringField("id", Convert.ToString(numDocs), Field.Store.NO)); } doc.Add(NewStringField("string", s, Field.Store.NO)); docValues.Add(br); } else { br = null; if (Verbose) { Console.WriteLine(" " + numDocs + ": <missing>"); } docValues.Add(null); if (DefaultCodecSupportsDocValues) { doc.Add(new NumericDocValuesField("id", numDocs)); } else { doc.Add(NewStringField("id", Convert.ToString(numDocs), Field.Store.NO)); } } doc.Add(new StoredField("id", numDocs)); writer.AddDocument(doc); numDocs++; if (random.Next(40) == 17) { // force flush writer.GetReader().Dispose(); } } IndexReader r = writer.GetReader(); writer.Dispose(); if (Verbose) { Console.WriteLine(" reader=" + r); } IndexSearcher idxS = NewSearcher(r, false); int ITERS = AtLeast(100); for (int iter = 0; iter < ITERS; iter++) { bool reverse = random.NextBoolean(); TopFieldDocs hits; SortField sf; bool sortMissingLast; bool missingIsNull; if (DefaultCodecSupportsDocValues && random.NextBoolean()) { sf = new SortField("stringdv", SortFieldType.STRING, reverse); // Can only use sort missing if the DVFormat // supports docsWithField: sortMissingLast = DefaultCodecSupportsDocsWithField && Random.NextBoolean(); missingIsNull = DefaultCodecSupportsDocsWithField; } else { sf = new SortField("string", SortFieldType.STRING, reverse); sortMissingLast = Random.NextBoolean(); missingIsNull = true; } if (sortMissingLast) { sf.SetMissingValue(SortField.STRING_LAST); } Sort sort; if (random.NextBoolean()) { sort = new Sort(sf); } else { sort = new Sort(sf, SortField.FIELD_DOC); } int hitCount = TestUtil.NextInt32(random, 1, r.MaxDoc + 20); RandomFilter f = new RandomFilter(random, (float)random.NextDouble(), docValues); int queryType = random.Next(3); if (queryType == 0) { // force out of order BooleanQuery bq = new BooleanQuery(); // Add a Query with SHOULD, since bw.Scorer() returns BooleanScorer2 // which delegates to BS if there are no mandatory clauses. bq.Add(new MatchAllDocsQuery(), Occur.SHOULD); // Set minNrShouldMatch to 1 so that BQ will not optimize rewrite to return // the clause instead of BQ. bq.MinimumNumberShouldMatch = 1; hits = idxS.Search(bq, f, hitCount, sort, random.NextBoolean(), random.NextBoolean()); } else if (queryType == 1) { hits = idxS.Search(new ConstantScoreQuery(f), null, hitCount, sort, random.NextBoolean(), random.NextBoolean()); } else { hits = idxS.Search(new MatchAllDocsQuery(), f, hitCount, sort, random.NextBoolean(), random.NextBoolean()); } if (Verbose) { Console.WriteLine("\nTEST: iter=" + iter + " " + hits.TotalHits + " hits; topN=" + hitCount + "; reverse=" + reverse + "; sortMissingLast=" + sortMissingLast + " sort=" + sort); } // Compute expected results: var expected = f.matchValues.ToList(); expected.Sort(Comparer <BytesRef> .Create((a, b) => { if (a is null) { if (b is null) { return(0); } if (sortMissingLast) { return(1); } else { return(-1); } } else if (b is null) { if (sortMissingLast) { return(-1); } else { return(1); } } else { return(a.CompareTo(b)); } })); if (reverse) { expected.Reverse(); } if (Verbose) { Console.WriteLine(" expected:"); for (int idx = 0; idx < expected.Count; idx++) { BytesRef br = expected[idx]; if (br is null && missingIsNull == false) { br = new BytesRef(); } Console.WriteLine(" " + idx + ": " + (br is null ? "<missing>" : br.Utf8ToString())); if (idx == hitCount - 1) { break; } } } if (Verbose) { Console.WriteLine(" actual:"); for (int hitIDX = 0; hitIDX < hits.ScoreDocs.Length; hitIDX++) { FieldDoc fd = (FieldDoc)hits.ScoreDocs[hitIDX]; BytesRef br = (BytesRef)fd.Fields[0]; Console.WriteLine(" " + hitIDX + ": " + (br is null ? "<missing>" : br.Utf8ToString()) + " id=" + idxS.Doc(fd.Doc).Get("id")); } } for (int hitIDX = 0; hitIDX < hits.ScoreDocs.Length; hitIDX++) { FieldDoc fd = (FieldDoc)hits.ScoreDocs[hitIDX]; BytesRef br = expected[hitIDX]; if (br is null && missingIsNull == false) { br = new BytesRef(); } // Normally, the old codecs (that don't support // docsWithField via doc values) will always return // an empty BytesRef for the missing case; however, // if all docs in a given segment were missing, in // that case it will return null! So we must map // null here, too: BytesRef br2 = (BytesRef)fd.Fields[0]; if (br2 is null && missingIsNull == false) { br2 = new BytesRef(); } Assert.AreEqual(br, br2, "hit=" + hitIDX + " has wrong sort value"); } } r.Dispose(); dir.Dispose(); }
public override FieldsConsumer FieldsConsumer(SegmentWriteState state) { int minSkipInterval; if (state.SegmentInfo.DocCount > 1000000) { // Test2BPostings can OOME otherwise: minSkipInterval = 3; } else { minSkipInterval = 2; } // we pull this before the seed intentionally: because its not consumed at runtime // (the skipInterval is written into postings header) int skipInterval = TestUtil.NextInt32(seedRandom, minSkipInterval, 10); if (LuceneTestCase.Verbose) { Console.WriteLine("MockRandomCodec: skipInterval=" + skipInterval); } long seed = seedRandom.NextInt64(); if (LuceneTestCase.Verbose) { Console.WriteLine("MockRandomCodec: writing to seg=" + state.SegmentInfo.Name + " formatID=" + state.SegmentSuffix + " seed=" + seed); } string seedFileName = IndexFileNames.SegmentFileName(state.SegmentInfo.Name, state.SegmentSuffix, SEED_EXT); IndexOutput @out = state.Directory.CreateOutput(seedFileName, state.Context); try { @out.WriteInt64(seed); } finally { @out.Dispose(); } Random random = new J2N.Randomizer(seed); random.Next(); // consume a random for buffersize PostingsWriterBase postingsWriter; if (random.nextBoolean()) { postingsWriter = new SepPostingsWriter(state, new MockInt32StreamFactory(random), skipInterval); } else { if (LuceneTestCase.Verbose) { Console.WriteLine("MockRandomCodec: writing Standard postings"); } // TODO: randomize variables like acceptibleOverHead?! postingsWriter = new Lucene41PostingsWriter(state, skipInterval); } if (random.NextBoolean()) { int totTFCutoff = TestUtil.NextInt32(random, 1, 20); if (LuceneTestCase.Verbose) { Console.WriteLine("MockRandomCodec: writing pulsing postings with totTFCutoff=" + totTFCutoff); } postingsWriter = new PulsingPostingsWriter(state, totTFCutoff, postingsWriter); } FieldsConsumer fields; int t1 = random.Next(4); if (t1 == 0) { bool success = false; try { fields = new FSTTermsWriter(state, postingsWriter); success = true; } finally { if (!success) { postingsWriter.Dispose(); } } } else if (t1 == 1) { bool success = false; try { fields = new FSTOrdTermsWriter(state, postingsWriter); success = true; } finally { if (!success) { postingsWriter.Dispose(); } } } else if (t1 == 2) { // Use BlockTree terms dict if (LuceneTestCase.Verbose) { Console.WriteLine("MockRandomCodec: writing BlockTree terms dict"); } // TODO: would be nice to allow 1 but this is very // slow to write int minTermsInBlock = TestUtil.NextInt32(random, 2, 100); int maxTermsInBlock = Math.Max(2, (minTermsInBlock - 1) * 2 + random.Next(100)); bool success = false; try { fields = new BlockTreeTermsWriter(state, postingsWriter, minTermsInBlock, maxTermsInBlock); success = true; } finally { if (!success) { postingsWriter.Dispose(); } } } else { if (LuceneTestCase.Verbose) { Console.WriteLine("MockRandomCodec: writing Block terms dict"); } bool success = false; TermsIndexWriterBase indexWriter; try { if (random.NextBoolean()) { state.TermIndexInterval = TestUtil.NextInt32(random, 1, 100); if (LuceneTestCase.Verbose) { Console.WriteLine("MockRandomCodec: fixed-gap terms index (tii=" + state.TermIndexInterval + ")"); } indexWriter = new FixedGapTermsIndexWriter(state); } else { VariableGapTermsIndexWriter.IndexTermSelector selector; int n2 = random.Next(3); if (n2 == 0) { int tii = TestUtil.NextInt32(random, 1, 100); selector = new VariableGapTermsIndexWriter.EveryNTermSelector(tii); if (LuceneTestCase.Verbose) { Console.WriteLine("MockRandomCodec: variable-gap terms index (tii=" + tii + ")"); } } else if (n2 == 1) { int docFreqThresh = TestUtil.NextInt32(random, 2, 100); int tii = TestUtil.NextInt32(random, 1, 100); selector = new VariableGapTermsIndexWriter.EveryNOrDocFreqTermSelector(docFreqThresh, tii); } else { long seed2 = random.NextInt64(); int gap = TestUtil.NextInt32(random, 2, 40); if (LuceneTestCase.Verbose) { Console.WriteLine("MockRandomCodec: random-gap terms index (max gap=" + gap + ")"); } selector = new IndexTermSelectorAnonymousClass(seed2, gap); } indexWriter = new VariableGapTermsIndexWriter(state, selector); } success = true; } finally { if (!success) { postingsWriter.Dispose(); } } success = false; try { fields = new BlockTermsWriter(indexWriter, state, postingsWriter); success = true; } finally { if (!success) { try { postingsWriter.Dispose(); } finally { indexWriter.Dispose(); } } } } return(fields); }
public void TestEqualsHashCode() { CommonTermsQuery query = new CommonTermsQuery(RandomOccur(Random), RandomOccur(Random), Random.NextSingle(), Random.NextBoolean()); int terms = AtLeast(2); for (int i = 0; i < terms; i++) { query.Add(new Term(TestUtil.RandomRealisticUnicodeString(Random), TestUtil.RandomRealisticUnicodeString(Random))); } QueryUtils.CheckHashEquals(query); QueryUtils.CheckUnequal(new CommonTermsQuery(RandomOccur(Random), RandomOccur(Random), Random.NextSingle(), Random.NextBoolean()), query); { long seed = Random.NextInt64(); Random r = new J2N.Randomizer(seed); CommonTermsQuery left = new CommonTermsQuery(RandomOccur(r), RandomOccur(r), r.NextSingle(), r.NextBoolean()); int leftTerms = AtLeast(r, 2); for (int i = 0; i < leftTerms; i++) { left.Add(new Term(TestUtil.RandomRealisticUnicodeString(r), TestUtil.RandomRealisticUnicodeString(r))); } left.HighFreqMinimumNumberShouldMatch = r.nextInt(4); left.LowFreqMinimumNumberShouldMatch = r.nextInt(4); r = new J2N.Randomizer(seed); CommonTermsQuery right = new CommonTermsQuery(RandomOccur(r), RandomOccur(r), r.NextSingle(), r.NextBoolean()); int rightTerms = AtLeast(r, 2); for (int i = 0; i < rightTerms; i++) { right.Add(new Term(TestUtil.RandomRealisticUnicodeString(r), TestUtil.RandomRealisticUnicodeString(r))); } right.HighFreqMinimumNumberShouldMatch = r.nextInt(4); right.LowFreqMinimumNumberShouldMatch = r.nextInt(4); QueryUtils.CheckEqual(left, right); } }