public void TestRandomChainsWithLargeStrings() { int numIterations = AtLeast(20); Random random = Random; for (int i = 0; i < numIterations; i++) { MockRandomAnalyzer a = new MockRandomAnalyzer(random.Next()); if (Verbose) { Console.WriteLine("Creating random analyzer:" + a); } try { CheckRandomData(random, a, 50 * RandomMultiplier, 128, false, false /* We already validate our own offsets... */); } catch (Exception /*e*/) { Console.WriteLine("Exception from random analyzer: " + a); throw; // LUCENENET: CA2200: Rethrow to preserve stack details (https://docs.microsoft.com/en-us/visualstudio/code-quality/ca2200-rethrow-to-preserve-stack-details) } } }
// constructor public PerfRunData(Config config) { this.config = config; // analyzer (default is standard analyzer) analyzer = NewAnalyzerTask.CreateAnalyzer(config.Get("analyzer", typeof(Lucene.Net.Analysis.Standard.StandardAnalyzer).AssemblyQualifiedName)); // content source string sourceClass = config.Get("content.source", typeof(SingleDocSource).AssemblyQualifiedName); contentSource = (ContentSource)Activator.CreateInstance(Type.GetType(sourceClass)); //Class.forName(sourceClass).asSubclass(typeof(ContentSource)).newInstance(); contentSource.SetConfig(config); // doc maker docMaker = (DocMaker)Activator.CreateInstance(Type.GetType(config.Get("doc.maker", typeof(DocMaker).AssemblyQualifiedName))); // "org.apache.lucene.benchmark.byTask.feeds.DocMaker")).asSubclass(DocMaker.class).newInstance(); docMaker.SetConfig(config, contentSource); // facet source facetSource = (FacetSource)Activator.CreateInstance(Type.GetType(config.Get("facet.source", typeof(RandomFacetSource).AssemblyQualifiedName))); // "org.apache.lucene.benchmark.byTask.feeds.RandomFacetSource")).asSubclass(FacetSource.class).newInstance(); facetSource.SetConfig(config); // query makers readTaskQueryMaker = new Dictionary <Type, IQueryMaker>(); qmkrClass = Type.GetType(config.Get("query.maker", typeof(SimpleQueryMaker).AssemblyQualifiedName)); // index stuff Reinit(false); // statistic points points = new Points(config); if (bool.Parse(config.Get("log.queries", "false"))) { Console.WriteLine("------------> queries:"); Console.WriteLine(GetQueryMaker(new SearchTask(this)).PrintQueries()); } }
public override void Warm(AtomicReader reader) { if (Verbose) { Console.WriteLine("TEST: now warm merged reader=" + reader); } #if FEATURE_CONDITIONALWEAKTABLE_ADDORUPDATE outerInstance.warmed.AddOrUpdate(((SegmentReader)reader).core, true); #else outerInstance.warmed[((SegmentReader)reader).core] = true; #endif int maxDoc = reader.MaxDoc; IBits liveDocs = reader.LiveDocs; int sum = 0; int inc = Math.Max(1, maxDoc / 50); for (int docID = 0; docID < maxDoc; docID += inc) { if (liveDocs == null || liveDocs.Get(docID)) { Document doc = reader.Document(docID); sum += doc.Fields.Count; } } IndexSearcher searcher = #if FEATURE_INSTANCE_TESTDATA_INITIALIZATION outerInstance. #endif NewSearcher(reader); sum += searcher.Search(new TermQuery(new Term("body", "united")), 10).TotalHits; if (Verbose) { Console.WriteLine("TEST: warm visited " + sum + " fields"); } }
public override FieldsConsumer FieldsConsumer(SegmentWriteState state) { int minSkipInterval; if (state.SegmentInfo.DocCount > 1000000) { // Test2BPostings can OOME otherwise: minSkipInterval = 3; } else { minSkipInterval = 2; } // we pull this before the seed intentionally: because its not consumed at runtime // (the skipInterval is written into postings header) int skipInterval = TestUtil.NextInt32(seedRandom, minSkipInterval, 10); if (LuceneTestCase.Verbose) { Console.WriteLine("MockRandomCodec: skipInterval=" + skipInterval); } long seed = seedRandom.NextInt64(); if (LuceneTestCase.Verbose) { Console.WriteLine("MockRandomCodec: writing to seg=" + state.SegmentInfo.Name + " formatID=" + state.SegmentSuffix + " seed=" + seed); } string seedFileName = IndexFileNames.SegmentFileName(state.SegmentInfo.Name, state.SegmentSuffix, SEED_EXT); IndexOutput @out = state.Directory.CreateOutput(seedFileName, state.Context); try { @out.WriteInt64(seed); } finally { @out.Dispose(); } Random random = new J2N.Randomizer(seed); random.Next(); // consume a random for buffersize PostingsWriterBase postingsWriter; if (random.nextBoolean()) { postingsWriter = new SepPostingsWriter(state, new MockInt32StreamFactory(random), skipInterval); } else { if (LuceneTestCase.Verbose) { Console.WriteLine("MockRandomCodec: writing Standard postings"); } // TODO: randomize variables like acceptibleOverHead?! postingsWriter = new Lucene41PostingsWriter(state, skipInterval); } if (random.NextBoolean()) { int totTFCutoff = TestUtil.NextInt32(random, 1, 20); if (LuceneTestCase.Verbose) { Console.WriteLine("MockRandomCodec: writing pulsing postings with totTFCutoff=" + totTFCutoff); } postingsWriter = new PulsingPostingsWriter(state, totTFCutoff, postingsWriter); } FieldsConsumer fields; int t1 = random.Next(4); if (t1 == 0) { bool success = false; try { fields = new FSTTermsWriter(state, postingsWriter); success = true; } finally { if (!success) { postingsWriter.Dispose(); } } } else if (t1 == 1) { bool success = false; try { fields = new FSTOrdTermsWriter(state, postingsWriter); success = true; } finally { if (!success) { postingsWriter.Dispose(); } } } else if (t1 == 2) { // Use BlockTree terms dict if (LuceneTestCase.Verbose) { Console.WriteLine("MockRandomCodec: writing BlockTree terms dict"); } // TODO: would be nice to allow 1 but this is very // slow to write int minTermsInBlock = TestUtil.NextInt32(random, 2, 100); int maxTermsInBlock = Math.Max(2, (minTermsInBlock - 1) * 2 + random.Next(100)); bool success = false; try { fields = new BlockTreeTermsWriter(state, postingsWriter, minTermsInBlock, maxTermsInBlock); success = true; } finally { if (!success) { postingsWriter.Dispose(); } } } else { if (LuceneTestCase.Verbose) { Console.WriteLine("MockRandomCodec: writing Block terms dict"); } bool success = false; TermsIndexWriterBase indexWriter; try { if (random.NextBoolean()) { state.TermIndexInterval = TestUtil.NextInt32(random, 1, 100); if (LuceneTestCase.Verbose) { Console.WriteLine("MockRandomCodec: fixed-gap terms index (tii=" + state.TermIndexInterval + ")"); } indexWriter = new FixedGapTermsIndexWriter(state); } else { VariableGapTermsIndexWriter.IndexTermSelector selector; int n2 = random.Next(3); if (n2 == 0) { int tii = TestUtil.NextInt32(random, 1, 100); selector = new VariableGapTermsIndexWriter.EveryNTermSelector(tii); if (LuceneTestCase.Verbose) { Console.WriteLine("MockRandomCodec: variable-gap terms index (tii=" + tii + ")"); } } else if (n2 == 1) { int docFreqThresh = TestUtil.NextInt32(random, 2, 100); int tii = TestUtil.NextInt32(random, 1, 100); selector = new VariableGapTermsIndexWriter.EveryNOrDocFreqTermSelector(docFreqThresh, tii); } else { long seed2 = random.NextInt64(); int gap = TestUtil.NextInt32(random, 2, 40); if (LuceneTestCase.Verbose) { Console.WriteLine("MockRandomCodec: random-gap terms index (max gap=" + gap + ")"); } selector = new IndexTermSelectorAnonymousClass(seed2, gap); } indexWriter = new VariableGapTermsIndexWriter(state, selector); } success = true; } finally { if (!success) { postingsWriter.Dispose(); } } success = false; try { fields = new BlockTermsWriter(indexWriter, state, postingsWriter); success = true; } finally { if (!success) { try { postingsWriter.Dispose(); } finally { indexWriter.Dispose(); } } } } return(fields); }
public virtual void VerifyEquals(DirectoryReader r1, DirectoryReader r2, string idField) { if (Verbose) { Console.WriteLine("\nr1 docs:"); PrintDocs(r1); Console.WriteLine("\nr2 docs:"); PrintDocs(r2); } if (r1.NumDocs != r2.NumDocs) { if (Debugging.AssertsEnabled) { Debugging.Assert(false, () => "r1.NumDocs=" + r1.NumDocs + " vs r2.NumDocs=" + r2.NumDocs); } } bool hasDeletes = !(r1.MaxDoc == r2.MaxDoc && r1.NumDocs == r1.MaxDoc); int[] r2r1 = new int[r2.MaxDoc]; // r2 id to r1 id mapping // create mapping from id2 space to id2 based on idField Fields f1 = MultiFields.GetFields(r1); if (f1 == null) { // make sure r2 is empty Assert.IsNull(MultiFields.GetFields(r2)); return; } Terms terms1 = f1.GetTerms(idField); if (terms1 == null) { Assert.IsTrue(MultiFields.GetFields(r2) == null || MultiFields.GetFields(r2).GetTerms(idField) == null); return; } TermsEnum termsEnum = terms1.GetIterator(null); IBits liveDocs1 = MultiFields.GetLiveDocs(r1); IBits liveDocs2 = MultiFields.GetLiveDocs(r2); Fields fields = MultiFields.GetFields(r2); if (fields == null) { // make sure r1 is in fact empty (eg has only all // deleted docs): IBits liveDocs = MultiFields.GetLiveDocs(r1); DocsEnum docs = null; while (termsEnum.Next() != null) { docs = TestUtil.Docs(Random, termsEnum, liveDocs, docs, DocsFlags.NONE); while (docs.NextDoc() != DocIdSetIterator.NO_MORE_DOCS) { Assert.Fail("r1 is not empty but r2 is"); } } return; } Terms terms2 = fields.GetTerms(idField); TermsEnum termsEnum2 = terms2.GetIterator(null); DocsEnum termDocs1 = null; DocsEnum termDocs2 = null; while (true) { BytesRef term = termsEnum.Next(); //System.out.println("TEST: match id term=" + term); if (term == null) { break; } termDocs1 = TestUtil.Docs(Random, termsEnum, liveDocs1, termDocs1, DocsFlags.NONE); if (termsEnum2.SeekExact(term)) { termDocs2 = TestUtil.Docs(Random, termsEnum2, liveDocs2, termDocs2, DocsFlags.NONE); } else { termDocs2 = null; } if (termDocs1.NextDoc() == DocIdSetIterator.NO_MORE_DOCS) { // this doc is deleted and wasn't replaced Assert.IsTrue(termDocs2 == null || termDocs2.NextDoc() == DocIdSetIterator.NO_MORE_DOCS); continue; } int id1 = termDocs1.DocID; Assert.AreEqual(DocIdSetIterator.NO_MORE_DOCS, termDocs1.NextDoc()); Assert.IsTrue(termDocs2.NextDoc() != DocIdSetIterator.NO_MORE_DOCS); int id2 = termDocs2.DocID; Assert.AreEqual(DocIdSetIterator.NO_MORE_DOCS, termDocs2.NextDoc()); r2r1[id2] = id1; // verify stored fields are equivalent try { VerifyEquals(r1.Document(id1), r2.Document(id2)); } catch (Exception /*t*/) { Console.WriteLine("FAILED id=" + term + " id1=" + id1 + " id2=" + id2 + " term=" + term); Console.WriteLine(" d1=" + r1.Document(id1)); Console.WriteLine(" d2=" + r2.Document(id2)); throw; // LUCENENET: CA2200: Rethrow to preserve stack details (https://docs.microsoft.com/en-us/visualstudio/code-quality/ca2200-rethrow-to-preserve-stack-details) } try { // verify term vectors are equivalent VerifyEquals(r1.GetTermVectors(id1), r2.GetTermVectors(id2)); } catch (Exception /*e*/) { Console.WriteLine("FAILED id=" + term + " id1=" + id1 + " id2=" + id2); Fields tv1 = r1.GetTermVectors(id1); Console.WriteLine(" d1=" + tv1); if (tv1 != null) { DocsAndPositionsEnum dpEnum = null; DocsEnum dEnum = null; foreach (string field in tv1) { Console.WriteLine(" " + field + ":"); Terms terms3 = tv1.GetTerms(field); Assert.IsNotNull(terms3); TermsEnum termsEnum3 = terms3.GetIterator(null); BytesRef term2; while ((term2 = termsEnum3.Next()) != null) { Console.WriteLine(" " + term2.Utf8ToString() + ": freq=" + termsEnum3.TotalTermFreq); dpEnum = termsEnum3.DocsAndPositions(null, dpEnum); if (dpEnum != null) { Assert.IsTrue(dpEnum.NextDoc() != DocIdSetIterator.NO_MORE_DOCS); int freq = dpEnum.Freq; Console.WriteLine(" doc=" + dpEnum.DocID + " freq=" + freq); for (int posUpto = 0; posUpto < freq; posUpto++) { Console.WriteLine(" pos=" + dpEnum.NextPosition()); } } else { dEnum = TestUtil.Docs(Random, termsEnum3, null, dEnum, DocsFlags.FREQS); Assert.IsNotNull(dEnum); Assert.IsTrue(dEnum.NextDoc() != DocIdSetIterator.NO_MORE_DOCS); int freq = dEnum.Freq; Console.WriteLine(" doc=" + dEnum.DocID + " freq=" + freq); } } } } Fields tv2 = r2.GetTermVectors(id2); Console.WriteLine(" d2=" + tv2); if (tv2 != null) { DocsAndPositionsEnum dpEnum = null; DocsEnum dEnum = null; foreach (string field in tv2) { Console.WriteLine(" " + field + ":"); Terms terms3 = tv2.GetTerms(field); Assert.IsNotNull(terms3); TermsEnum termsEnum3 = terms3.GetIterator(null); BytesRef term2; while ((term2 = termsEnum3.Next()) != null) { Console.WriteLine(" " + term2.Utf8ToString() + ": freq=" + termsEnum3.TotalTermFreq); dpEnum = termsEnum3.DocsAndPositions(null, dpEnum); if (dpEnum != null) { Assert.IsTrue(dpEnum.NextDoc() != DocIdSetIterator.NO_MORE_DOCS); int freq = dpEnum.Freq; Console.WriteLine(" doc=" + dpEnum.DocID + " freq=" + freq); for (int posUpto = 0; posUpto < freq; posUpto++) { Console.WriteLine(" pos=" + dpEnum.NextPosition()); } } else { dEnum = TestUtil.Docs(Random, termsEnum3, null, dEnum, DocsFlags.FREQS); Assert.IsNotNull(dEnum); Assert.IsTrue(dEnum.NextDoc() != DocIdSetIterator.NO_MORE_DOCS); int freq = dEnum.Freq; Console.WriteLine(" doc=" + dEnum.DocID + " freq=" + freq); } } } } throw; // LUCENENET: CA2200: Rethrow to preserve stack details (https://docs.microsoft.com/en-us/visualstudio/code-quality/ca2200-rethrow-to-preserve-stack-details) } } //System.out.println("TEST: done match id"); // Verify postings //System.out.println("TEST: create te1"); Fields fields1 = MultiFields.GetFields(r1); IEnumerator <string> fields1Enum = fields1.GetEnumerator(); Fields fields2 = MultiFields.GetFields(r2); IEnumerator <string> fields2Enum = fields2.GetEnumerator(); string field1 = null, field2 = null; TermsEnum termsEnum1 = null; termsEnum2 = null; DocsEnum docs1 = null, docs2 = null; // pack both doc and freq into single element for easy sorting long[] info1 = new long[r1.NumDocs]; long[] info2 = new long[r2.NumDocs]; for (; ;) { BytesRef term1 = null, term2 = null; // iterate until we get some docs int len1; for (; ;) { len1 = 0; if (termsEnum1 == null) { if (!fields1Enum.MoveNext()) { break; } field1 = fields1Enum.Current; Terms terms = fields1.GetTerms(field1); if (terms == null) { continue; } termsEnum1 = terms.GetIterator(null); } term1 = termsEnum1.Next(); if (term1 == null) { // no more terms in this field termsEnum1 = null; continue; } //System.out.println("TEST: term1=" + term1); docs1 = TestUtil.Docs(Random, termsEnum1, liveDocs1, docs1, DocsFlags.FREQS); while (docs1.NextDoc() != DocIdSetIterator.NO_MORE_DOCS) { int d = docs1.DocID; int f = docs1.Freq; info1[len1] = (((long)d) << 32) | (uint)f; len1++; } if (len1 > 0) { break; } } // iterate until we get some docs int len2; for (; ;) { len2 = 0; if (termsEnum2 == null) { if (!fields2Enum.MoveNext()) { break; } field2 = fields2Enum.Current; Terms terms = fields2.GetTerms(field2); if (terms == null) { continue; } termsEnum2 = terms.GetIterator(null); } term2 = termsEnum2.Next(); if (term2 == null) { // no more terms in this field termsEnum2 = null; continue; } //System.out.println("TEST: term1=" + term1); docs2 = TestUtil.Docs(Random, termsEnum2, liveDocs2, docs2, DocsFlags.FREQS); while (docs2.NextDoc() != DocIdSetIterator.NO_MORE_DOCS) { int d = r2r1[docs2.DocID]; int f = docs2.Freq; info2[len2] = (((long)d) << 32) | (uint)f; len2++; } if (len2 > 0) { break; } } Assert.AreEqual(len1, len2); if (len1 == 0) // no more terms { break; } Assert.AreEqual(field1, field2); Assert.IsTrue(term1.BytesEquals(term2)); if (!hasDeletes) { Assert.AreEqual(termsEnum1.DocFreq, termsEnum2.DocFreq); } Assert.AreEqual(term1, term2, "len1=" + len1 + " len2=" + len2 + " deletes?=" + hasDeletes); // sort info2 to get it into ascending docid Array.Sort(info2, 0, len2); // now compare for (int i = 0; i < len1; i++) { Assert.AreEqual(info1[i], info2[i], "i=" + i + " len=" + len1 + " d1=" + ((long)((ulong)info1[i] >> 32)) + " f1=" + (info1[i] & int.MaxValue) + " d2=" + ((long)((ulong)info2[i] >> 32)) + " f2=" + (info2[i] & int.MaxValue) + " field=" + field1 + " term=" + term1.Utf8ToString()); } } }
private void DoTestSeekDoesNotExist(Random r, int numField, IList <Term> fieldTerms, Term[] fieldTermsArray, IndexReader reader) { IDictionary <string, TermsEnum> tes = new Dictionary <string, TermsEnum>(); if (Verbose) { Console.WriteLine("TEST: top random seeks"); } { int num = AtLeast(100); for (int iter = 0; iter < num; iter++) { // seek to random spot string field = ("f" + r.Next(numField)).Intern(); Term tx = new Term(field, GetRandomString(r)); int spot = Array.BinarySearch(fieldTermsArray, tx); if (spot < 0) { if (Verbose) { Console.WriteLine("TEST: non-exist seek to " + field + ":" + UnicodeUtil.ToHexString(tx.Text)); } // term does not exist: if (!tes.TryGetValue(field, out TermsEnum te)) { te = MultiFields.GetTerms(reader, field).GetEnumerator(); tes[field] = te; } if (Verbose) { Console.WriteLine(" got enum"); } spot = -spot - 1; if (spot == fieldTerms.Count || !fieldTerms[spot].Field.Equals(field, StringComparison.Ordinal)) { Assert.AreEqual(TermsEnum.SeekStatus.END, te.SeekCeil(tx.Bytes)); } else { Assert.AreEqual(TermsEnum.SeekStatus.NOT_FOUND, te.SeekCeil(tx.Bytes)); if (Verbose) { Console.WriteLine(" got term=" + UnicodeUtil.ToHexString(te.Term.Utf8ToString())); Console.WriteLine(" exp term=" + UnicodeUtil.ToHexString(fieldTerms[spot].Text)); } Assert.AreEqual(fieldTerms[spot].Bytes, te.Term); // now .next() this many times: int ct = TestUtil.NextInt32(r, 5, 100); for (int i = 0; i < ct; i++) { if (Verbose) { Console.WriteLine("TEST: now next()"); } if (1 + spot + i >= fieldTerms.Count) { break; } Term term = fieldTerms[1 + spot + i]; if (!term.Field.Equals(field, StringComparison.Ordinal)) { Assert.IsFalse(te.MoveNext()); break; } else { Assert.IsTrue(te.MoveNext()); BytesRef t = te.Term; if (Verbose) { Console.WriteLine(" got term=" + (t == null ? null : UnicodeUtil.ToHexString(t.Utf8ToString()))); Console.WriteLine(" exp=" + UnicodeUtil.ToHexString(term.Text.ToString())); } Assert.AreEqual(term.Bytes, t); } } } } } } }
// Look for seek type 3 ("pop"): if the delta from // prev -> current was replacing an S with an E, // we must now seek to beyond that E. this seek // "finishes" the dance at this character // position. private bool DoPop() { if (DEBUG_SURROGATES) { Console.WriteLine(" try pop"); } if (Debugging.AssertsEnabled) { Debugging.Assert(newSuffixStart <= prevTerm.Length); Debugging.Assert(newSuffixStart < scratchTerm.Length || newSuffixStart == 0); } if (prevTerm.Length > newSuffixStart && IsNonBMPChar(prevTerm.Bytes, newSuffixStart) && IsHighBMPChar(scratchTerm.Bytes, newSuffixStart)) { // Seek type 2 -- put 0xFF at this position: scratchTerm.Bytes[newSuffixStart] = 0xff; scratchTerm.Length = newSuffixStart + 1; if (DEBUG_SURROGATES) { Console.WriteLine(" seek to term=" + UnicodeUtil.ToHexString(scratchTerm.Utf8ToString()) + " " + scratchTerm.ToString()); } // TODO: more efficient seek? can we simply swap // the enums? outerInstance.TermsDict.SeekEnum(termEnum, new Term(fieldInfo.Name, scratchTerm), true); Term t2 = termEnum.Term(); // We could hit EOF or different field since this // was a seek "forward": if (t2 != null && t2.Field == internedFieldName) { if (DEBUG_SURROGATES) { Console.WriteLine(" got term=" + UnicodeUtil.ToHexString(t2.Text) + " " + t2.Bytes); } BytesRef b2 = t2.Bytes; if (Debugging.AssertsEnabled) { Debugging.Assert(b2.Offset == 0); } // Set newSuffixStart -- we can't use // termEnum's since the above seek may have // done no scanning (eg, term was precisely // and index term, or, was in the term seek // cache): scratchTerm.CopyBytes(b2); SetNewSuffixStart(prevTerm, scratchTerm); return(true); } else if (newSuffixStart != 0 || scratchTerm.Length != 0) { if (DEBUG_SURROGATES) { Console.WriteLine(" got term=null (or next field)"); } newSuffixStart = 0; scratchTerm.Length = 0; return(true); } } return(false); }
public virtual void TestChangeCodecAndMerge() { Directory dir = NewDirectory(); if (VERBOSE) { Console.WriteLine("TEST: make new index"); } IndexWriterConfig iwconf = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random)).SetOpenMode(OpenMode.CREATE).SetCodec(new MockCodec()); iwconf.SetMaxBufferedDocs(IndexWriterConfig.DISABLE_AUTO_FLUSH); // ((LogMergePolicy)iwconf.getMergePolicy()).setMergeFactor(10); IndexWriter writer = NewWriter(dir, iwconf); AddDocs(writer, 10); writer.Commit(); AssertQuery(new Term("content", "aaa"), dir, 10); if (VERBOSE) { Console.WriteLine("TEST: addDocs3"); } AddDocs3(writer, 10); writer.Commit(); writer.Dispose(); AssertQuery(new Term("content", "ccc"), dir, 10); AssertQuery(new Term("content", "aaa"), dir, 10); Codec codec = iwconf.Codec; iwconf = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random)) .SetOpenMode(OpenMode.APPEND).SetCodec(codec); // ((LogMergePolicy)iwconf.getMergePolicy()).setNoCFSRatio(0.0); // ((LogMergePolicy)iwconf.getMergePolicy()).setMergeFactor(10); iwconf.SetMaxBufferedDocs(IndexWriterConfig.DISABLE_AUTO_FLUSH); iwconf.SetCodec(new MockCodec2()); // uses standard for field content writer = NewWriter(dir, iwconf); // swap in new codec for currently written segments if (VERBOSE) { Console.WriteLine("TEST: add docs w/ Standard codec for content field"); } AddDocs2(writer, 10); writer.Commit(); codec = iwconf.Codec; Assert.AreEqual(30, writer.MaxDoc); AssertQuery(new Term("content", "bbb"), dir, 10); AssertQuery(new Term("content", "ccc"), dir, 10); //// AssertQuery(new Term("content", "aaa"), dir, 10); if (VERBOSE) { Console.WriteLine("TEST: add more docs w/ new codec"); } AddDocs2(writer, 10); writer.Commit(); AssertQuery(new Term("content", "ccc"), dir, 10); AssertQuery(new Term("content", "bbb"), dir, 20); AssertQuery(new Term("content", "aaa"), dir, 10); Assert.AreEqual(40, writer.MaxDoc); if (VERBOSE) { Console.WriteLine("TEST: now optimize"); } writer.ForceMerge(1); Assert.AreEqual(40, writer.MaxDoc); writer.Dispose(); AssertQuery(new Term("content", "ccc"), dir, 10); AssertQuery(new Term("content", "bbb"), dir, 20); AssertQuery(new Term("content", "aaa"), dir, 10); dir.Dispose(); }
public virtual void TestRandom() { int num = AtLeast(2); for (int iter = 0; iter < num; iter++) { if (VERBOSE) { Console.WriteLine("TEST: iter=" + iter); } Directory dir = NewDirectory(); IndexWriter w = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random)).SetMergePolicy(NoMergePolicy.COMPOUND_FILES)); // we can do this because we use NoMergePolicy (and dont merge to "nothing") w.KeepFullyDeletedSegments = true; IDictionary <BytesRef, IList <int?> > docs = new Dictionary <BytesRef, IList <int?> >(); ISet <int?> deleted = new JCG.HashSet <int?>(); IList <BytesRef> terms = new List <BytesRef>(); int numDocs = TestUtil.NextInt32(Random, 1, 100 * RANDOM_MULTIPLIER); Documents.Document doc = new Documents.Document(); Field f = NewStringField("field", "", Field.Store.NO); doc.Add(f); Field id = NewStringField("id", "", Field.Store.NO); doc.Add(id); bool onlyUniqueTerms = Random.NextBoolean(); if (VERBOSE) { Console.WriteLine("TEST: onlyUniqueTerms=" + onlyUniqueTerms + " numDocs=" + numDocs); } ISet <BytesRef> uniqueTerms = new JCG.HashSet <BytesRef>(); for (int i = 0; i < numDocs; i++) { if (!onlyUniqueTerms && Random.NextBoolean() && terms.Count > 0) { // re-use existing term BytesRef term = terms[Random.Next(terms.Count)]; docs[term].Add(i); f.SetStringValue(term.Utf8ToString()); } else { string s = TestUtil.RandomUnicodeString(Random, 10); BytesRef term = new BytesRef(s); if (!docs.TryGetValue(term, out IList <int?> docsTerm)) { docs[term] = docsTerm = new List <int?>(); } docsTerm.Add(i); terms.Add(term); uniqueTerms.Add(term); f.SetStringValue(s); } id.SetStringValue("" + i); w.AddDocument(doc); if (Random.Next(4) == 1) { w.Commit(); } if (i > 0 && Random.Next(20) == 1) { int delID = Random.Next(i); deleted.Add(delID); w.DeleteDocuments(new Term("id", "" + delID)); if (VERBOSE) { Console.WriteLine("TEST: delete " + delID); } } } if (VERBOSE) { List <BytesRef> termsList = new List <BytesRef>(uniqueTerms); #pragma warning disable 612, 618 termsList.Sort(BytesRef.UTF8SortedAsUTF16Comparer); #pragma warning restore 612, 618 Console.WriteLine("TEST: terms in UTF16 order:"); foreach (BytesRef b in termsList) { Console.WriteLine(" " + UnicodeUtil.ToHexString(b.Utf8ToString()) + " " + b); foreach (int docID in docs[b]) { if (deleted.Contains(docID)) { Console.WriteLine(" " + docID + " (deleted)"); } else { Console.WriteLine(" " + docID); } } } } IndexReader reader = w.GetReader(); w.Dispose(); if (VERBOSE) { Console.WriteLine("TEST: reader=" + reader); } IBits liveDocs = MultiFields.GetLiveDocs(reader); foreach (int delDoc in deleted) { Assert.IsFalse(liveDocs.Get(delDoc)); } for (int i = 0; i < 100; i++) { BytesRef term = terms[Random.Next(terms.Count)]; if (VERBOSE) { Console.WriteLine("TEST: seek term=" + UnicodeUtil.ToHexString(term.Utf8ToString()) + " " + term); } DocsEnum docsEnum = TestUtil.Docs(Random, reader, "field", term, liveDocs, null, DocsFlags.NONE); Assert.IsNotNull(docsEnum); foreach (int docID in docs[term]) { if (!deleted.Contains(docID)) { Assert.AreEqual(docID, docsEnum.NextDoc()); } } Assert.AreEqual(DocIdSetIterator.NO_MORE_DOCS, docsEnum.NextDoc()); } reader.Dispose(); dir.Dispose(); } }
/// <summary> /// Prints the filename and size of each file within a given compound file. /// Add the -extract flag to extract files to the current working directory. /// In order to make the extracted version of the index work, you have to copy /// the segments file from the compound index into the directory where the extracted files are stored. </summary> ///// <param name="args"> Usage: org.apache.lucene.index.IndexReader [-extract] <cfsfile> </param> public static void Main(string[] args) { string filename = null; bool extract = false; string dirImpl = null; int j = 0; while (j < args.Length) { string arg = args[j]; if ("-extract".Equals(arg, StringComparison.Ordinal)) { extract = true; } else if ("-dir-impl".Equals(arg, StringComparison.Ordinal)) { if (j == args.Length - 1) { // LUCENENET specific - our wrapper console shows the correct usage throw new ArgumentException("ERROR: missing value for --directory-type option"); //Console.WriteLine("ERROR: missing value for -dir-impl option"); //Environment.Exit(1); } j++; dirImpl = args[j]; } else if (filename == null) { filename = arg; } j++; } if (filename == null) { // LUCENENET specific - our wrapper console shows the correct usage throw new ArgumentException("ERROR: CFS-FILE is required"); //Console.WriteLine("Usage: org.apache.lucene.index.CompoundFileExtractor [-extract] [-dir-impl X] <cfsfile>"); //return; } Store.Directory dir = null; CompoundFileDirectory cfr = null; IOContext context = IOContext.READ; try { FileInfo file = new FileInfo(filename); string dirname = file.DirectoryName; filename = file.Name; if (dirImpl == null) { dir = FSDirectory.Open(new DirectoryInfo(dirname)); } else { dir = CommandLineUtil.NewFSDirectory(dirImpl, new DirectoryInfo(dirname)); } cfr = new CompoundFileDirectory(dir, filename, IOContext.DEFAULT, false); string[] files = cfr.ListAll(); ArrayUtil.TimSort(files); // sort the array of filename so that the output is more readable for (int i = 0; i < files.Length; ++i) { long len = cfr.FileLength(files[i]); if (extract) { Console.WriteLine("extract " + files[i] + " with " + len + " bytes to local directory..."); using (IndexInput ii = cfr.OpenInput(files[i], context)) { using (FileStream f = new FileStream(files[i], FileMode.Open, FileAccess.ReadWrite)) { // read and write with a small buffer, which is more effective than reading byte by byte byte[] buffer = new byte[1024]; int chunk = buffer.Length; while (len > 0) { int bufLen = (int)Math.Min(chunk, len); ii.ReadBytes(buffer, 0, bufLen); f.Write(buffer, 0, bufLen); len -= bufLen; } } } } else { Console.WriteLine(files[i] + ": " + len + " bytes"); } } } catch (IOException ioe) { Console.WriteLine(ioe.ToString()); //Console.Write(ioe.StackTrace); } finally { try { if (dir != null) { dir.Dispose(); } if (cfr != null) { cfr.Dispose(); } } catch (IOException ioe) { Console.WriteLine(ioe.ToString()); //Console.Write(ioe.StackTrace); } } }
public override void Run() { Document doc = new Document(); FieldType customType = new FieldType(TextField.TYPE_STORED); customType.StoreTermVectors = true; customType.StoreTermVectorPositions = true; customType.StoreTermVectorOffsets = true; doc.Add(newField("field", "aaa bbb ccc ddd eee fff ggg hhh iii jjj", customType)); doc.Add(new NumericDocValuesField("dv", 5)); int idUpto = 0; int fullCount = 0; // LUCENENET: Use NanoTime() rather than CurrentTimeMilliseconds() for more accurate/reliable results long stopTime = (J2N.Time.NanoTime() / J2N.Time.MillisecondsPerNanosecond) + timeToRunInMilliseconds; // LUCENENET specific: added the ability to change how much time to alot do { try { writer.UpdateDocument(new Term("id", "" + (idUpto++)), doc); addCount++; } catch (Exception ioe) when(ioe.IsIOException()) { if (Verbose) { Console.WriteLine("TEST: expected exc:"); Console.WriteLine(ioe.StackTrace); } //System.out.println(Thread.currentThread().getName() + ": hit exc"); //ioConsole.WriteLine(e.StackTrace); if (ioe.Message.StartsWith("fake disk full at", StringComparison.Ordinal) || ioe.Message.Equals("now failing on purpose", StringComparison.Ordinal)) { diskFull = true; try { Thread.Sleep(1); } catch (Exception ie) when(ie.IsInterruptedException()) { throw new Util.ThreadInterruptedException(ie); } if (fullCount++ >= 5) { break; } } else { if (noErrors) { Console.WriteLine(Thread.CurrentThread.Name + ": ERROR: unexpected IOException:"); Console.WriteLine(ioe.StackTrace); error = ioe; } break; } } catch (Exception t) when(t.IsThrowable()) { //Console.WriteLine(t.StackTrace); if (noErrors) { Console.WriteLine(Thread.CurrentThread.Name + ": ERROR: unexpected Throwable:"); Console.WriteLine(t.StackTrace); error = t; } break; } } while (J2N.Time.NanoTime() / J2N.Time.MillisecondsPerNanosecond < stopTime); // LUCENENET: Use NanoTime() rather than CurrentTimeMilliseconds() for more accurate/reliable results }
public override void Run() { for (int iter = 0; iter < iters && !failed.Value; iter++) { //final int x = Random().nextInt(5); int x = Random.Next(3); try { switch (x) { case 0: rollbackLock.@Lock(); if (Verbose) { Console.WriteLine("\nTEST: " + Thread.CurrentThread.Name + ": now rollback"); } try { writerRef.Value.Rollback(); if (Verbose) { Console.WriteLine("TEST: " + Thread.CurrentThread.Name + ": rollback done; now open new writer"); } writerRef.Value = new IndexWriter(d, NewIndexWriterConfig( #if FEATURE_INSTANCE_TESTDATA_INITIALIZATION outerInstance, #endif TEST_VERSION_CURRENT, new MockAnalyzer(Random))); } finally { rollbackLock.Unlock(); } break; case 1: commitLock.@Lock(); if (Verbose) { Console.WriteLine("\nTEST: " + Thread.CurrentThread.Name + ": now commit"); } try { if (Random.NextBoolean()) { writerRef.Value.PrepareCommit(); } writerRef.Value.Commit(); } catch (Exception ace) when(ace.IsAlreadyClosedException()) { // ok } catch (NullReferenceException) // LUCENENET TODO: - NullReferenceException must be allowed to propagate so we can defensively avoid it in .NET { // ok } finally { commitLock.Unlock(); } break; case 2: if (Verbose) { Console.WriteLine("\nTEST: " + Thread.CurrentThread.Name + ": now add"); } try { writerRef.Value.AddDocument(docs.NextDoc()); } catch (Exception ace) when(ace.IsAlreadyClosedException()) { // ok } catch (NullReferenceException) // LUCENENET TODO: - NullReferenceException must be allowed to propagate so we can defensively avoid it in .NET { // ok } catch (Exception ae) when(ae.IsAssertionError()) { // ok } break; } } catch (Exception t) when(t.IsThrowable()) { failed.Value = (true); throw RuntimeException.Create(t); } } }
// Runs test, with multiple threads, using the specific // failure to trigger an IOException public virtual void TestMultipleThreadsFailure(Failure failure) { int NUM_THREADS = 3; for (int iter = 0; iter < 2; iter++) { if (Verbose) { Console.WriteLine("TEST: iter=" + iter); } MockDirectoryWrapper dir = NewMockDirectory(); IndexWriter writer = new IndexWriter( dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random)) .SetMaxBufferedDocs(2) .SetMergeScheduler(new ConcurrentMergeScheduler()) .SetMergePolicy(NewLogMergePolicy(4))); ((IConcurrentMergeScheduler)writer.Config.MergeScheduler).SetSuppressExceptions(); IndexerThread[] threads = new IndexerThread[NUM_THREADS]; for (int i = 0; i < NUM_THREADS; i++) { threads[i] = new IndexerThread(writer, true, NewField); } for (int i = 0; i < NUM_THREADS; i++) { threads[i].Start(); } Thread.Sleep(10); dir.FailOn(failure); failure.SetDoFail(); for (int i = 0; i < NUM_THREADS; i++) { threads[i].Join(); Assert.IsTrue(threads[i].error == null, "hit unexpected Throwable"); } bool success = false; try { writer.Dispose(false); success = true; } catch (Exception ioe) when(ioe.IsIOException()) { failure.ClearDoFail(); writer.Dispose(false); } if (Verbose) { Console.WriteLine("TEST: success=" + success); } if (success) { IndexReader reader = DirectoryReader.Open(dir); IBits delDocs = MultiFields.GetLiveDocs(reader); for (int j = 0; j < reader.MaxDoc; j++) { if (delDocs == null || !delDocs.Get(j)) { reader.Document(j); reader.GetTermVectors(j); } } reader.Dispose(); } dir.Dispose(); } }
public virtual void TestCloseWithThreads() { int NUM_THREADS = 3; int numIterations = TestNightly ? 7 : 3; for (int iter = 0; iter < numIterations; iter++) { if (Verbose) { Console.WriteLine("\nTEST: iter=" + iter); } Directory dir = NewDirectory(); IndexWriter writer = new IndexWriter( dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random)) .SetMaxBufferedDocs(10) .SetMergeScheduler(new ConcurrentMergeScheduler()) .SetMergePolicy(NewLogMergePolicy(4))); ((IConcurrentMergeScheduler)writer.Config.MergeScheduler).SetSuppressExceptions(); IndexerThread[] threads = new IndexerThread[NUM_THREADS]; for (int i = 0; i < NUM_THREADS; i++) { threads[i] = new IndexerThread(writer, false, NewField) // LUCENENET NOTE - ConcurrentMergeScheduler // used to take too long for this test to index a single document // so, increased the time from 200 to 300 ms. // But it has now been restored to 200 ms like Lucene. { timeToRunInMilliseconds = 200 }; } for (int i = 0; i < NUM_THREADS; i++) { threads[i].Start(); } bool done = false; while (!done) { Thread.Sleep(100); for (int i = 0; i < NUM_THREADS; i++) // only stop when at least one thread has added a doc { if (threads[i].addCount > 0) { done = true; break; } else if (!threads[i].IsAlive) { Assert.Fail("thread failed before indexing a single document"); } } } if (Verbose) { Console.WriteLine("\nTEST: now close"); } writer.Dispose(false); // Make sure threads that are adding docs are not hung: for (int i = 0; i < NUM_THREADS; i++) { // Without fix for LUCENE-1130: one of the // threads will hang threads[i].Join(); if (threads[i].IsAlive) { Assert.Fail("thread seems to be hung"); } } // Quick test to make sure index is not corrupt: IndexReader reader = DirectoryReader.Open(dir); DocsEnum tdocs = TestUtil.Docs(Random, reader, "field", new BytesRef("aaa"), MultiFields.GetLiveDocs(reader), null, 0); int count = 0; while (tdocs.NextDoc() != DocIdSetIterator.NO_MORE_DOCS) { count++; } Assert.IsTrue(count > 0); reader.Dispose(); dir.Dispose(); } }
[AwaitsFix(BugUrl = "https://github.com/apache/lucenenet/issues/269")] // LUCENENET TODO: this test fails on x86 on .NET Framework in Release mode only #endif public virtual void TestBS2DisjunctionNextVsAdvance() { Directory d = NewDirectory(); RandomIndexWriter w = new RandomIndexWriter( #if FEATURE_INSTANCE_TESTDATA_INITIALIZATION this, #endif Random, d); int numDocs = AtLeast(300); for (int docUpto = 0; docUpto < numDocs; docUpto++) { string contents = "a"; if (Random.Next(20) <= 16) { contents += " b"; } if (Random.Next(20) <= 8) { contents += " c"; } if (Random.Next(20) <= 4) { contents += " d"; } if (Random.Next(20) <= 2) { contents += " e"; } if (Random.Next(20) <= 1) { contents += " f"; } Document doc = new Document(); doc.Add(new TextField("field", contents, Field.Store.NO)); w.AddDocument(doc); } w.ForceMerge(1); IndexReader r = w.GetReader(); IndexSearcher s = NewSearcher(r); w.Dispose(); for (int iter = 0; iter < 10 * RandomMultiplier; iter++) { if (Verbose) { Console.WriteLine("iter=" + iter); } IList <string> terms = new List <string> { "a", "b", "c", "d", "e", "f" }; int numTerms = TestUtil.NextInt32(Random, 1, terms.Count); while (terms.Count > numTerms) { terms.RemoveAt(Random.Next(terms.Count)); } if (Verbose) { Console.WriteLine(" terms=" + terms); } BooleanQuery q = new BooleanQuery(); foreach (string term in terms) { q.Add(new BooleanClause(new TermQuery(new Term("field", term)), Occur.SHOULD)); } Weight weight = s.CreateNormalizedWeight(q); Scorer scorer = weight.GetScorer(s.m_leafContexts[0], null); // First pass: just use .NextDoc() to gather all hits IList <ScoreDoc> hits = new List <ScoreDoc>(); while (scorer.NextDoc() != DocIdSetIterator.NO_MORE_DOCS) { hits.Add(new ScoreDoc(scorer.DocID, scorer.GetScore())); } if (Verbose) { Console.WriteLine(" " + hits.Count + " hits"); } // Now, randomly next/advance through the list and // verify exact match: for (int iter2 = 0; iter2 < 10; iter2++) { weight = s.CreateNormalizedWeight(q); scorer = weight.GetScorer(s.m_leafContexts[0], null); if (Verbose) { Console.WriteLine(" iter2=" + iter2); } int upto = -1; while (upto < hits.Count) { int nextUpto; int nextDoc; int left = hits.Count - upto; if (left == 1 || Random.nextBoolean()) { // next nextUpto = 1 + upto; nextDoc = scorer.NextDoc(); } else { // advance int inc = TestUtil.NextInt32(Random, 1, left - 1); nextUpto = inc + upto; nextDoc = scorer.Advance(hits[nextUpto].Doc); } if (nextUpto == hits.Count) { Assert.AreEqual(DocIdSetIterator.NO_MORE_DOCS, nextDoc); } else { ScoreDoc hit = hits[nextUpto]; Assert.AreEqual(hit.Doc, nextDoc); // Test for precise float equality: Assert.IsTrue(hit.Score == scorer.GetScore(), "doc " + hit.Doc + " has wrong score: expected=" + hit.Score + " actual=" + scorer.GetScore()); } upto = nextUpto; } } } r.Dispose(); d.Dispose(); }
private void ExecuteRandomJoin(bool multipleValuesPerDocument, int maxIndexIter, int maxSearchIter, int numberOfDocumentsToIndex) { for (int indexIter = 1; indexIter <= maxIndexIter; indexIter++) { if (Verbose) { Console.WriteLine("indexIter=" + indexIter); } Directory dir = NewDirectory(); RandomIndexWriter w = new RandomIndexWriter(Random, dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random, MockTokenizer.KEYWORD, false)) .SetMergePolicy(NewLogMergePolicy())); bool scoreDocsInOrder = TestJoinUtil.Random.NextBoolean(); IndexIterationContext context = CreateContext(numberOfDocumentsToIndex, w, multipleValuesPerDocument, scoreDocsInOrder); IndexReader topLevelReader = w.GetReader(); w.Dispose(); for (int searchIter = 1; searchIter <= maxSearchIter; searchIter++) { if (Verbose) { Console.WriteLine("searchIter=" + searchIter); } IndexSearcher indexSearcher = NewSearcher(topLevelReader); int r = Random.Next(context.RandomUniqueValues.Length); bool from = context.RandomFrom[r]; string randomValue = context.RandomUniqueValues[r]; FixedBitSet expectedResult = CreateExpectedResult(randomValue, from, indexSearcher.IndexReader, context); Query actualQuery = new TermQuery(new Term("value", randomValue)); if (Verbose) { Console.WriteLine("actualQuery=" + actualQuery); } var scoreModeLength = Enum.GetNames(typeof(ScoreMode)).Length; ScoreMode scoreMode = (ScoreMode)Random.Next(scoreModeLength); if (Verbose) { Console.WriteLine("scoreMode=" + scoreMode); } Query joinQuery; if (from) { joinQuery = JoinUtil.CreateJoinQuery("from", multipleValuesPerDocument, "to", actualQuery, indexSearcher, scoreMode); } else { joinQuery = JoinUtil.CreateJoinQuery("to", multipleValuesPerDocument, "from", actualQuery, indexSearcher, scoreMode); } if (Verbose) { Console.WriteLine("joinQuery=" + joinQuery); } // Need to know all documents that have matches. TopDocs doesn't give me that and then I'd be also testing TopDocsCollector... FixedBitSet actualResult = new FixedBitSet(indexSearcher.IndexReader.MaxDoc); TopScoreDocCollector topScoreDocCollector = TopScoreDocCollector.Create(10, false); indexSearcher.Search(joinQuery, new CollectorAnonymousClass2(scoreDocsInOrder, actualResult, topScoreDocCollector)); // Asserting bit set... if (Verbose) { Console.WriteLine("expected cardinality:" + expectedResult.Cardinality); DocIdSetIterator iterator = expectedResult.GetIterator(); for (int doc = iterator.NextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = iterator.NextDoc()) { Console.WriteLine(string.Format("Expected doc[{0}] with id value {1}", doc, indexSearcher.Doc(doc).Get("id"))); } Console.WriteLine("actual cardinality:" + actualResult.Cardinality); iterator = actualResult.GetIterator(); for (int doc = iterator.NextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = iterator.NextDoc()) { Console.WriteLine(string.Format("Actual doc[{0}] with id value {1}", doc, indexSearcher.Doc(doc).Get("id"))); } } assertEquals(expectedResult, actualResult); // Asserting TopDocs... TopDocs expectedTopDocs = CreateExpectedTopDocs(randomValue, from, scoreMode, context); TopDocs actualTopDocs = topScoreDocCollector.GetTopDocs(); assertEquals(expectedTopDocs.TotalHits, actualTopDocs.TotalHits); assertEquals(expectedTopDocs.ScoreDocs.Length, actualTopDocs.ScoreDocs.Length); if (scoreMode == ScoreMode.None) { continue; } assertEquals(expectedTopDocs.MaxScore, actualTopDocs.MaxScore, 0.0f); for (int i = 0; i < expectedTopDocs.ScoreDocs.Length; i++) { if (Verbose) { Console.WriteLine(string.Format(CultureInfo.InvariantCulture, "Expected doc: {0} | Actual doc: {1}\n", expectedTopDocs.ScoreDocs[i].Doc, actualTopDocs.ScoreDocs[i].Doc)); Console.WriteLine(string.Format(CultureInfo.InvariantCulture, "Expected score: {0} | Actual score: {1}\n", expectedTopDocs.ScoreDocs[i].Score, actualTopDocs.ScoreDocs[i].Score)); } assertEquals(expectedTopDocs.ScoreDocs[i].Doc, actualTopDocs.ScoreDocs[i].Doc); assertEquals(expectedTopDocs.ScoreDocs[i].Score, actualTopDocs.ScoreDocs[i].Score, 0.0f); Explanation explanation = indexSearcher.Explain(joinQuery, expectedTopDocs.ScoreDocs[i].Doc); assertEquals(expectedTopDocs.ScoreDocs[i].Score, explanation.Value, 0.0f); } } topLevelReader.Dispose(); dir.Dispose(); } }
private IndexIterationContext CreateContext(int nDocs, RandomIndexWriter fromWriter, RandomIndexWriter toWriter, bool multipleValuesPerDocument, bool scoreDocsInOrder) { IndexIterationContext context = new IndexIterationContext(); int numRandomValues = nDocs / 2; context.RandomUniqueValues = new string[numRandomValues]; ISet <string> trackSet = new JCG.HashSet <string>(); context.RandomFrom = new bool[numRandomValues]; for (int i = 0; i < numRandomValues; i++) { string uniqueRandomValue; do { uniqueRandomValue = TestUtil.RandomRealisticUnicodeString(Random); // uniqueRandomValue = TestUtil.randomSimpleString(random); } while ("".Equals(uniqueRandomValue, StringComparison.Ordinal) || trackSet.Contains(uniqueRandomValue)); // Generate unique values and empty strings aren't allowed. trackSet.Add(uniqueRandomValue); context.RandomFrom[i] = Random.NextBoolean(); context.RandomUniqueValues[i] = uniqueRandomValue; } RandomDoc[] docs = new RandomDoc[nDocs]; for (int i = 0; i < nDocs; i++) { string id = Convert.ToString(i, CultureInfo.InvariantCulture); int randomI = Random.Next(context.RandomUniqueValues.Length); string value = context.RandomUniqueValues[randomI]; Document document = new Document(); document.Add(NewTextField(Random, "id", id, Field.Store.NO)); document.Add(NewTextField(Random, "value", value, Field.Store.NO)); bool from = context.RandomFrom[randomI]; int numberOfLinkValues = multipleValuesPerDocument ? 2 + Random.Next(10) : 1; docs[i] = new RandomDoc(id, numberOfLinkValues, value, from); for (int j = 0; j < numberOfLinkValues; j++) { string linkValue = context.RandomUniqueValues[Random.Next(context.RandomUniqueValues.Length)]; docs[i].linkValues.Add(linkValue); if (from) { if (!context.FromDocuments.TryGetValue(linkValue, out IList <RandomDoc> fromDocs)) { context.FromDocuments[linkValue] = fromDocs = new JCG.List <RandomDoc>(); } if (!context.RandomValueFromDocs.TryGetValue(value, out IList <RandomDoc> randomValueFromDocs)) { context.RandomValueFromDocs[value] = randomValueFromDocs = new JCG.List <RandomDoc>(); } fromDocs.Add(docs[i]); randomValueFromDocs.Add(docs[i]); document.Add(NewTextField(Random, "from", linkValue, Field.Store.NO)); } else { if (!context.ToDocuments.TryGetValue(linkValue, out IList <RandomDoc> toDocuments)) { context.ToDocuments[linkValue] = toDocuments = new JCG.List <RandomDoc>(); } if (!context.RandomValueToDocs.TryGetValue(value, out IList <RandomDoc> randomValueToDocs)) { context.RandomValueToDocs[value] = randomValueToDocs = new JCG.List <RandomDoc>(); } toDocuments.Add(docs[i]); randomValueToDocs.Add(docs[i]); document.Add(NewTextField(Random, "to", linkValue, Field.Store.NO)); } } RandomIndexWriter w; if (from) { w = fromWriter; } else { w = toWriter; } w.AddDocument(document); if (Random.Next(10) == 4) { w.Commit(); } if (Verbose) { Console.WriteLine("Added document[" + docs[i].id + "]: " + document); } } // Pre-compute all possible hits for all unique random values. On top of this also compute all possible score for // any ScoreMode. IndexSearcher fromSearcher = NewSearcher(fromWriter.GetReader()); IndexSearcher toSearcher = NewSearcher(toWriter.GetReader()); for (int i = 0; i < context.RandomUniqueValues.Length; i++) { string uniqueRandomValue = context.RandomUniqueValues[i]; string fromField; string toField; IDictionary <string, IDictionary <int, JoinScore> > queryVals; if (context.RandomFrom[i]) { fromField = "from"; toField = "to"; queryVals = context.FromHitsToJoinScore; } else { fromField = "to"; toField = "from"; queryVals = context.ToHitsToJoinScore; } IDictionary <BytesRef, JoinScore> joinValueToJoinScores = new Dictionary <BytesRef, JoinScore>(); if (multipleValuesPerDocument) { fromSearcher.Search(new TermQuery(new Term("value", uniqueRandomValue)), new CollectorAnonymousClass3(fromField, joinValueToJoinScores)); } else { fromSearcher.Search(new TermQuery(new Term("value", uniqueRandomValue)), new CollectorAnonymousClass4(fromField, joinValueToJoinScores)); } IDictionary <int, JoinScore> docToJoinScore = new Dictionary <int, JoinScore>(); if (multipleValuesPerDocument) { if (scoreDocsInOrder) { AtomicReader slowCompositeReader = SlowCompositeReaderWrapper.Wrap(toSearcher.IndexReader); Terms terms = slowCompositeReader.GetTerms(toField); if (terms != null) { DocsEnum docsEnum = null; TermsEnum termsEnum = null; JCG.SortedSet <BytesRef> joinValues = new JCG.SortedSet <BytesRef>(BytesRef.UTF8SortedAsUnicodeComparer); joinValues.UnionWith(joinValueToJoinScores.Keys); foreach (BytesRef joinValue in joinValues) { termsEnum = terms.GetEnumerator(termsEnum); if (termsEnum.SeekExact(joinValue)) { docsEnum = termsEnum.Docs(slowCompositeReader.LiveDocs, docsEnum, DocsFlags.NONE); JoinScore joinScore = joinValueToJoinScores[joinValue]; for (int doc = docsEnum.NextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = docsEnum.NextDoc()) { // First encountered join value determines the score. // Something to keep in mind for many-to-many relations. if (!docToJoinScore.ContainsKey(doc)) { docToJoinScore[doc] = joinScore; } } } } } } else { toSearcher.Search(new MatchAllDocsQuery(), new CollectorAnonymousClass5(toField, joinValueToJoinScores, docToJoinScore)); } } else { toSearcher.Search(new MatchAllDocsQuery(), new CollectorAnonymousClass6(toField, joinValueToJoinScores, docToJoinScore)); } queryVals[uniqueRandomValue] = docToJoinScore; } fromSearcher.IndexReader.Dispose(); toSearcher.IndexReader.Dispose(); return(context); }
public override CollectionStatistics CollectionStatistics(string field) { // TODO: we could compute this on init and cache, // since we are re-inited whenever any nodes have a // new reader long docCount = 0; long sumTotalTermFreq = 0; long sumDocFreq = 0; long maxDoc = 0; for (int nodeID = 0; nodeID < nodeVersions.Length; nodeID++) { FieldAndShardVersion key = new FieldAndShardVersion(nodeID, nodeVersions[nodeID], field); CollectionStatistics nodeStats; if (nodeID == MyNodeID) { nodeStats = base.CollectionStatistics(field); } else { outerInstance.collectionStatsCache.TryGetValue(key, out nodeStats); } if (nodeStats is null) { Console.WriteLine("coll stats myNodeID=" + MyNodeID + ": " + Collections.ToString(outerInstance.collectionStatsCache.Keys)); } // Collection stats are pre-shared on reopen, so, // we better not have a cache miss: if (Debugging.AssertsEnabled) { Debugging.Assert(nodeStats != null, "myNodeID={0} nodeID={1} version={2} field={3}", MyNodeID, nodeID, nodeVersions[nodeID], field); } long nodeDocCount = nodeStats.DocCount; if (docCount >= 0 && nodeDocCount >= 0) { docCount += nodeDocCount; } else { docCount = -1; } long nodeSumTotalTermFreq = nodeStats.SumTotalTermFreq; if (sumTotalTermFreq >= 0 && nodeSumTotalTermFreq >= 0) { sumTotalTermFreq += nodeSumTotalTermFreq; } else { sumTotalTermFreq = -1; } long nodeSumDocFreq = nodeStats.SumDocFreq; if (sumDocFreq >= 0 && nodeSumDocFreq >= 0) { sumDocFreq += nodeSumDocFreq; } else { sumDocFreq = -1; } if (Debugging.AssertsEnabled) { Debugging.Assert(nodeStats.MaxDoc >= 0); } maxDoc += nodeStats.MaxDoc; } return(new CollectionStatistics(field, maxDoc, docCount, sumTotalTermFreq, sumDocFreq)); }
public virtual void Test2BTerms_Mem() { if ("Lucene3x".Equals(Codec.Default.Name, StringComparison.Ordinal)) { throw RuntimeException.Create("this test cannot run with PreFlex codec"); } Console.WriteLine("Starting Test2B"); long TERM_COUNT = ((long)int.MaxValue) + 100000000; int TERMS_PER_DOC = TestUtil.NextInt32(Random, 100000, 1000000); IList <BytesRef> savedTerms = null; BaseDirectoryWrapper dir = NewFSDirectory(CreateTempDir("2BTerms")); //MockDirectoryWrapper dir = NewFSDirectory(new File("/p/lucene/indices/2bindex")); if (dir is MockDirectoryWrapper) { ((MockDirectoryWrapper)dir).Throttling = Throttling.NEVER; } dir.CheckIndexOnDispose = false; // don't double-checkindex if (true) { IndexWriter w = new IndexWriter(dir, new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random)) .SetMaxBufferedDocs(IndexWriterConfig.DISABLE_AUTO_FLUSH) .SetRAMBufferSizeMB(256.0) .SetMergeScheduler(new ConcurrentMergeScheduler()) .SetMergePolicy(NewLogMergePolicy(false, 10)) .SetOpenMode(OpenMode.CREATE)); MergePolicy mp = w.Config.MergePolicy; if (mp is LogByteSizeMergePolicy) { // 1 petabyte: ((LogByteSizeMergePolicy)mp).MaxMergeMB = 1024 * 1024 * 1024; } Documents.Document doc = new Documents.Document(); MyTokenStream ts = new MyTokenStream(Random, TERMS_PER_DOC); FieldType customType = new FieldType(TextField.TYPE_NOT_STORED); customType.IndexOptions = IndexOptions.DOCS_ONLY; customType.OmitNorms = true; Field field = new Field("field", ts, customType); doc.Add(field); //w.setInfoStream(System.out); int numDocs = (int)(TERM_COUNT / TERMS_PER_DOC); Console.WriteLine("TERMS_PER_DOC=" + TERMS_PER_DOC); Console.WriteLine("numDocs=" + numDocs); for (int i = 0; i < numDocs; i++) { long t0 = J2N.Time.NanoTime() / J2N.Time.MillisecondsPerNanosecond; // LUCENENET: Use NanoTime() rather than CurrentTimeMilliseconds() for more accurate/reliable results w.AddDocument(doc); Console.WriteLine(i + " of " + numDocs + " " + ((J2N.Time.NanoTime() / J2N.Time.MillisecondsPerNanosecond) - t0) + " msec"); // LUCENENET: Use NanoTime() rather than CurrentTimeMilliseconds() for more accurate/reliable results } savedTerms = ts.savedTerms; Console.WriteLine("TEST: full merge"); w.ForceMerge(1); Console.WriteLine("TEST: close writer"); w.Dispose(); } Console.WriteLine("TEST: open reader"); IndexReader r = DirectoryReader.Open(dir); if (savedTerms == null) { savedTerms = FindTerms(r); } int numSavedTerms = savedTerms.Count; IList <BytesRef> bigOrdTerms = new JCG.List <BytesRef>(savedTerms.GetView(numSavedTerms - 10, 10)); // LUCENENET: Converted end index to length Console.WriteLine("TEST: test big ord terms..."); TestSavedTerms(r, bigOrdTerms); Console.WriteLine("TEST: test all saved terms..."); TestSavedTerms(r, savedTerms); r.Dispose(); Console.WriteLine("TEST: now CheckIndex..."); CheckIndex.Status status = TestUtil.CheckIndex(dir); long tc = status.SegmentInfos[0].TermIndexStatus.TermCount; Assert.IsTrue(tc > int.MaxValue, "count " + tc + " is not > " + int.MaxValue); dir.Dispose(); Console.WriteLine("TEST: done!"); }
public override void Run() { // TODO: would be better if this were cross thread, so that we make sure one thread deleting anothers added docs works: IList <string> toDeleteIDs = new List <string>(); IList <SubDocs> toDeleteSubDocs = new List <SubDocs>(); while (Environment.TickCount < stopTime && !outerInstance.m_failed) { try { // Occasional longish pause if running // nightly if (LuceneTestCase.TestNightly && Random.Next(6) == 3) { if (Verbose) { Console.WriteLine(Thread.CurrentThread.Name + ": now long sleep"); } //Thread.Sleep(TestUtil.NextInt32(Random, 50, 500)); // LUCENENET specific - Reduced amount of pause to keep the total // Nightly test time under 1 hour Thread.Sleep(TestUtil.NextInt32(Random, 50, 250)); } // Rate limit ingest rate: if (Random.Next(7) == 5) { Thread.Sleep(TestUtil.NextInt32(Random, 1, 10)); if (Verbose) { Console.WriteLine(Thread.CurrentThread.Name + ": done sleep"); } } Document doc = docs.NextDoc(); if (doc == null) { break; } // Maybe add randomly named field string addedField; if (Random.NextBoolean()) { addedField = "extra" + Random.Next(40); doc.Add(NewTextField(addedField, "a random field", Field.Store.YES)); } else { addedField = null; } if (Random.NextBoolean()) { if (Random.NextBoolean()) { // Add/update doc block: string packID; SubDocs delSubDocs; if (toDeleteSubDocs.Count > 0 && Random.NextBoolean()) { delSubDocs = toDeleteSubDocs[Random.Next(toDeleteSubDocs.Count)]; if (Debugging.AssertsEnabled) { Debugging.Assert(!delSubDocs.Deleted); } toDeleteSubDocs.Remove(delSubDocs); // Update doc block, replacing prior packID packID = delSubDocs.PackID; } else { delSubDocs = null; // Add doc block, using new packID packID = outerInstance.m_packCount.GetAndIncrement().ToString(CultureInfo.InvariantCulture); } Field packIDField = NewStringField("packID", packID, Field.Store.YES); IList <string> docIDs = new List <string>(); SubDocs subDocs = new SubDocs(packID, docIDs); IList <Document> docsList = new List <Document>(); allSubDocs.Enqueue(subDocs); doc.Add(packIDField); docsList.Add(TestUtil.CloneDocument(doc)); docIDs.Add(doc.Get("docid")); int maxDocCount = TestUtil.NextInt32(Random, 1, 10); while (docsList.Count < maxDocCount) { doc = docs.NextDoc(); if (doc == null) { break; } docsList.Add(TestUtil.CloneDocument(doc)); docIDs.Add(doc.Get("docid")); } outerInstance.m_addCount.AddAndGet(docsList.Count); Term packIDTerm = new Term("packID", packID); if (delSubDocs != null) { delSubDocs.Deleted = true; delIDs.UnionWith(delSubDocs.SubIDs); outerInstance.m_delCount.AddAndGet(delSubDocs.SubIDs.Count); if (Verbose) { Console.WriteLine(Thread.CurrentThread.Name + ": update pack packID=" + delSubDocs.PackID + " count=" + docsList.Count + " docs=" + string.Format(J2N.Text.StringFormatter.InvariantCulture, "{0}", docIDs)); } outerInstance.UpdateDocuments(packIDTerm, docsList); } else { if (Verbose) { Console.WriteLine(Thread.CurrentThread.Name + ": add pack packID=" + packID + " count=" + docsList.Count + " docs=" + string.Format(J2N.Text.StringFormatter.InvariantCulture, "{0}", docIDs)); } outerInstance.AddDocuments(packIDTerm, docsList); } doc.RemoveField("packID"); if (Random.Next(5) == 2) { if (Verbose) { Console.WriteLine(Thread.CurrentThread.Name + ": buffer del id:" + packID); } toDeleteSubDocs.Add(subDocs); } } else { // Add single doc string docid = doc.Get("docid"); if (Verbose) { Console.WriteLine(Thread.CurrentThread.Name + ": add doc docid:" + docid); } outerInstance.AddDocument(new Term("docid", docid), doc); outerInstance.m_addCount.GetAndIncrement(); if (Random.Next(5) == 3) { if (Verbose) { Console.WriteLine(Thread.CurrentThread.Name + ": buffer del id:" + doc.Get("docid")); } toDeleteIDs.Add(docid); } } } else { // Update single doc, but we never re-use // and ID so the delete will never // actually happen: if (Verbose) { Console.WriteLine(Thread.CurrentThread.Name + ": update doc id:" + doc.Get("docid")); } string docid = doc.Get("docid"); outerInstance.UpdateDocument(new Term("docid", docid), doc); outerInstance.m_addCount.GetAndIncrement(); if (Random.Next(5) == 3) { if (Verbose) { Console.WriteLine(Thread.CurrentThread.Name + ": buffer del id:" + doc.Get("docid")); } toDeleteIDs.Add(docid); } } if (Random.Next(30) == 17) { if (Verbose) { Console.WriteLine(Thread.CurrentThread.Name + ": apply " + toDeleteIDs.Count + " deletes"); } foreach (string id in toDeleteIDs) { if (Verbose) { Console.WriteLine(Thread.CurrentThread.Name + ": del term=id:" + id); } outerInstance.DeleteDocuments(new Term("docid", id)); } int count = outerInstance.m_delCount.AddAndGet(toDeleteIDs.Count); if (Verbose) { Console.WriteLine(Thread.CurrentThread.Name + ": tot " + count + " deletes"); } delIDs.UnionWith(toDeleteIDs); toDeleteIDs.Clear(); foreach (SubDocs subDocs in toDeleteSubDocs) { if (Debugging.AssertsEnabled) { Debugging.Assert(!subDocs.Deleted); } delPackIDs.Add(subDocs.PackID); outerInstance.DeleteDocuments(new Term("packID", subDocs.PackID)); subDocs.Deleted = true; if (Verbose) { Console.WriteLine(Thread.CurrentThread.Name + ": del subs: " + subDocs.SubIDs + " packID=" + subDocs.PackID); } delIDs.UnionWith(subDocs.SubIDs); outerInstance.m_delCount.AddAndGet(subDocs.SubIDs.Count); } toDeleteSubDocs.Clear(); } if (addedField != null) { doc.RemoveField(addedField); } } catch (Exception t) { Console.WriteLine(Thread.CurrentThread.Name + ": hit exc"); Console.WriteLine(t.ToString()); Console.Write(t.StackTrace); outerInstance.m_failed.Value = (true); throw new Exception(t.ToString(), t); } } if (Verbose) { Console.WriteLine(Thread.CurrentThread.Name + ": indexing done"); } outerInstance.DoAfterIndexingThreadDone(); }
// Swap in S, in place of E: private bool SeekToNonBMP(SegmentTermEnum te, BytesRef term, int pos) { int savLength = term.Length; if (Debugging.AssertsEnabled) { Debugging.Assert(term.Offset == 0); } // The 3 bytes starting at downTo make up 1 // unicode character: if (Debugging.AssertsEnabled) { Debugging.Assert(IsHighBMPChar(term.Bytes, pos)); } // NOTE: we cannot make this assert, because // AutomatonQuery legitimately sends us malformed UTF8 // (eg the UTF8 bytes with just 0xee) // assert term.length >= pos + 3: "term.length=" + term.length + " pos+3=" + (pos+3) + " byte=" + Integer.toHexString(term.bytes[pos]) + " term=" + term.toString(); // Save the bytes && length, since we need to // restore this if seek "back" finds no matching // terms if (term.Bytes.Length < 4 + pos) { term.Grow(4 + pos); } scratch[0] = (sbyte)term.Bytes[pos]; scratch[1] = (sbyte)term.Bytes[pos + 1]; scratch[2] = (sbyte)term.Bytes[pos + 2]; term.Bytes[pos] = 0xf0; term.Bytes[pos + 1] = 0x90; term.Bytes[pos + 2] = 0x80; term.Bytes[pos + 3] = 0x80; term.Length = 4 + pos; if (DEBUG_SURROGATES) { Console.WriteLine(" try seek term=" + UnicodeUtil.ToHexString(term.Utf8ToString())); } // Seek "back": outerInstance.TermsDict.SeekEnum(te, new Term(fieldInfo.Name, term), true); // Test if the term we seek'd to in fact found a // surrogate pair at the same position as the E: Term t2 = te.Term(); // Cannot be null (or move to next field) because at // "worst" it'd seek to the same term we are on now, // unless we are being called from seek if (t2 is null || t2.Field != internedFieldName) { return(false); } if (DEBUG_SURROGATES) { Console.WriteLine(" got term=" + UnicodeUtil.ToHexString(t2.Text)); } // Now test if prefix is identical and we found // a non-BMP char at the same position: BytesRef b2 = t2.Bytes; if (Debugging.AssertsEnabled) { Debugging.Assert(b2.Offset == 0); } bool matches; if (b2.Length >= term.Length && IsNonBMPChar(b2.Bytes, pos)) { matches = true; for (int i = 0; i < pos; i++) { if (term.Bytes[i] != b2.Bytes[i]) { matches = false; break; } } } else { matches = false; } // Restore term: term.Length = savLength; term.Bytes[pos] = (byte)scratch[0]; term.Bytes[pos + 1] = (byte)scratch[1]; term.Bytes[pos + 2] = (byte)scratch[2]; return(matches); }
public override void Run() { if (Verbose) { Console.WriteLine(Thread.CurrentThread.Name + ": launch search thread"); } while (Environment.TickCount < stopTimeMS) { try { IndexSearcher s = outerInstance.GetCurrentSearcher(); try { // Verify 1) IW is correctly setting // diagnostics, and 2) segment warming for // merged segments is actually happening: foreach (AtomicReaderContext sub in s.IndexReader.Leaves) { SegmentReader segReader = (SegmentReader)sub.Reader; IDictionary <string, string> diagnostics = segReader.SegmentInfo.Info.Diagnostics; assertNotNull(diagnostics); string source; diagnostics.TryGetValue("source", out source); assertNotNull(source); if (source.Equals("merge", StringComparison.Ordinal)) { assertTrue("sub reader " + sub + " wasn't warmed: warmed=" + outerInstance.warmed + " diagnostics=" + diagnostics + " si=" + segReader.SegmentInfo, // LUCENENET: ConditionalWeakTable doesn't have ContainsKey, so we normalize to TryGetValue !outerInstance.m_assertMergedSegmentsWarmed || outerInstance.warmed.TryGetValue(segReader.core, out BooleanRef _)); } } if (s.IndexReader.NumDocs > 0) { outerInstance.SmokeTestSearcher(s); Fields fields = MultiFields.GetFields(s.IndexReader); if (fields == null) { continue; } Terms terms = fields.GetTerms("body"); if (terms == null) { continue; } TermsEnum termsEnum = terms.GetEnumerator(); int seenTermCount = 0; int shift; int trigger; if (totTermCount < 30) { shift = 0; trigger = 1; } else { trigger = totTermCount / 30; shift = Random.Next(trigger); } while (Environment.TickCount < stopTimeMS) { if (!termsEnum.MoveNext()) { totTermCount.Value = seenTermCount; break; } seenTermCount++; // search 30 terms if ((seenTermCount + shift) % trigger == 0) { //if (VERBOSE) { //System.out.println(Thread.currentThread().getName() + " now search body:" + term.Utf8ToString()); //} totHits.AddAndGet(outerInstance.RunQuery(s, new TermQuery(new Term("body", termsEnum.Term)))); } } //if (VERBOSE) { //System.out.println(Thread.currentThread().getName() + ": search done"); //} } } finally { outerInstance.ReleaseSearcher(s); } } catch (Exception t) { Console.WriteLine(Thread.CurrentThread.Name + ": hit exc"); outerInstance.m_failed.Value = (true); Console.WriteLine(t.ToString()); throw new Exception(t.ToString(), t); } } }
// randomly seeks to term that we know exists, then next's // from there private void DoTestSeekExists(Random r, IList <Term> fieldTerms, IndexReader reader) { IDictionary <string, TermsEnum> tes = new Dictionary <string, TermsEnum>(); // Test random seek to existing term, then enum: if (Verbose) { Console.WriteLine("\nTEST: top now seek"); } int num = AtLeast(100); for (int iter = 0; iter < num; iter++) { // pick random field+term int spot = r.Next(fieldTerms.Count); Term term = fieldTerms[spot]; string field = term.Field; if (Verbose) { Console.WriteLine("TEST: exist seek field=" + field + " term=" + UnicodeUtil.ToHexString(term.Text)); } // seek to it if (!tes.TryGetValue(field, out TermsEnum te)) { te = MultiFields.GetTerms(reader, field).GetEnumerator(); tes[field] = te; } if (Verbose) { Console.WriteLine(" done get enum"); } // seek should find the term Assert.AreEqual(TermsEnum.SeekStatus.FOUND, te.SeekCeil(term.Bytes)); // now .next() this many times: int ct = TestUtil.NextInt32(r, 5, 100); for (int i = 0; i < ct; i++) { if (Verbose) { Console.WriteLine("TEST: now next()"); } if (1 + spot + i >= fieldTerms.Count) { break; } term = fieldTerms[1 + spot + i]; if (!term.Field.Equals(field, StringComparison.Ordinal)) { Assert.IsFalse(te.MoveNext()); break; } else { Assert.IsTrue(te.MoveNext()); BytesRef t = te.Term; if (Verbose) { Console.WriteLine(" got term=" + (t == null ? null : UnicodeUtil.ToHexString(t.Utf8ToString()))); Console.WriteLine(" exp=" + UnicodeUtil.ToHexString(term.Text.ToString())); } Assert.AreEqual(term.Bytes, t); } } } }
public virtual void TestFlushExceptions() { MockDirectoryWrapper directory = NewMockDirectory(); FailOnlyOnFlush failure = new FailOnlyOnFlush(this); directory.FailOn(failure); IndexWriter writer = new IndexWriter(directory, (IndexWriterConfig)NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random)).SetMaxBufferedDocs(2)); Document doc = new Document(); Field idField = NewStringField("id", "", Field.Store.YES); doc.Add(idField); int extraCount = 0; for (int i = 0; i < 10; i++) { if (Verbose) { Console.WriteLine("TEST: iter=" + i); } for (int j = 0; j < 20; j++) { idField.SetStringValue(Convert.ToString(i * 20 + j)); writer.AddDocument(doc); } // must cycle here because sometimes the merge flushes // the doc we just added and so there's nothing to // flush, and we don't hit the exception while (true) { writer.AddDocument(doc); failure.SetDoFail(); try { writer.Flush(true, true); if (failure.hitExc) { Assert.Fail("failed to hit IOException"); } extraCount++; } catch (IOException ioe) { if (Verbose) { Console.WriteLine(ioe.StackTrace); } failure.ClearDoFail(); break; } } Assert.AreEqual(20 * (i + 1) + extraCount, writer.NumDocs); } writer.Dispose(); IndexReader reader = DirectoryReader.Open(directory); Assert.AreEqual(200 + extraCount, reader.NumDocs); reader.Dispose(); directory.Dispose(); }
public virtual void TestSurrogatesOrder() { Directory dir = NewDirectory(); var config = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random)); config.Codec = new PreFlexRWCodec(); RandomIndexWriter w = new RandomIndexWriter(Random, dir, config); int numField = TestUtil.NextInt32(Random, 2, 5); int uniqueTermCount = 0; int tc = 0; var fieldTerms = new JCG.List <Term>(); for (int f = 0; f < numField; f++) { string field = "f" + f; int numTerms = AtLeast(200); ISet <string> uniqueTerms = new JCG.HashSet <string>(); for (int i = 0; i < numTerms; i++) { string term = GetRandomString(Random) + "_ " + (tc++); uniqueTerms.Add(term); fieldTerms.Add(new Term(field, term)); Documents.Document doc = new Documents.Document(); doc.Add(NewStringField(field, term, Field.Store.NO)); w.AddDocument(doc); } uniqueTermCount += uniqueTerms.Count; } IndexReader reader = w.GetReader(); if (Verbose) { fieldTerms.Sort(termAsUTF16Comparer); Console.WriteLine("\nTEST: UTF16 order"); foreach (Term t in fieldTerms) { Console.WriteLine(" " + ToHexString(t)); } } // sorts in code point order: fieldTerms.Sort(); if (Verbose) { Console.WriteLine("\nTEST: codepoint order"); foreach (Term t in fieldTerms) { Console.WriteLine(" " + ToHexString(t)); } } Term[] fieldTermsArray = fieldTerms.ToArray(); //SegmentInfo si = makePreFlexSegment(r, "_0", dir, fieldInfos, codec, fieldTerms); //FieldsProducer fields = codec.fieldsProducer(new SegmentReadState(dir, si, fieldInfos, 1024, 1)); //Assert.IsNotNull(fields); DoTestStraightEnum(fieldTerms, reader, uniqueTermCount); DoTestSeekExists(Random, fieldTerms, reader); DoTestSeekDoesNotExist(Random, numField, fieldTerms, fieldTermsArray, reader); reader.Dispose(); w.Dispose(); dir.Dispose(); }
public virtual void TestRandom() { int alphabetSize = TestUtil.NextInt32(Random, 2, 7); int docLen = AtLeast(3000); //final int docLen = 50; string document = GetRandomString('a', alphabetSize, docLen); if (Verbose) { Console.WriteLine("TEST: doc=" + document); } int numSyn = AtLeast(5); //final int numSyn = 2; IDictionary <string, OneSyn> synMap = new Dictionary <string, OneSyn>(); IList <OneSyn> syns = new JCG.List <OneSyn>(); bool dedup = Random.nextBoolean(); if (Verbose) { Console.WriteLine(" dedup=" + dedup); } b = new SynonymMap.Builder(dedup); for (int synIDX = 0; synIDX < numSyn; synIDX++) { string synIn = GetRandomString('a', alphabetSize, TestUtil.NextInt32(Random, 1, 5)).Trim(); if (!synMap.TryGetValue(synIn, out OneSyn s) || s is null) { s = new OneSyn(); s.@in = synIn; syns.Add(s); s.@out = new JCG.List <string>(); synMap[synIn] = s; s.keepOrig = Random.nextBoolean(); } string synOut = GetRandomString('0', 10, TestUtil.NextInt32(Random, 1, 5)).Trim(); [email protected](synOut); Add(synIn, synOut, s.keepOrig); if (Verbose) { Console.WriteLine(" syns[" + synIDX + "] = " + s.@in + " -> " + s.@out + " keepOrig=" + s.keepOrig); } } tokensIn = new MockTokenizer(new StringReader("a"), MockTokenizer.WHITESPACE, true); tokensIn.Reset(); assertTrue(tokensIn.IncrementToken()); assertFalse(tokensIn.IncrementToken()); tokensIn.End(); tokensIn.Dispose(); tokensOut = new SynonymFilter(tokensIn, b.Build(), true); termAtt = tokensOut.AddAttribute <ICharTermAttribute>(); posIncrAtt = tokensOut.AddAttribute <IPositionIncrementAttribute>(); posLenAtt = tokensOut.AddAttribute <IPositionLengthAttribute>(); offsetAtt = tokensOut.AddAttribute <IOffsetAttribute>(); if (dedup) { PruneDups(syns); } string expected = SlowSynMatcher(document, syns, 5); if (Verbose) { Console.WriteLine("TEST: expected=" + expected); } Verify(document, expected); }
public virtual void IndexDoc() { Document d = new Document(); FieldType customType1 = new FieldType(TextField.TYPE_STORED); customType1.IsTokenized = false; customType1.OmitNorms = true; List <Field> fields = new List <Field>(); string idString = IdString; Field idField = NewField("id", idString, customType1); fields.Add(idField); int nFields = NextInt(maxFields); for (int i = 0; i < nFields; i++) { FieldType customType = new FieldType(); switch (NextInt(4)) { case 0: break; case 1: customType.StoreTermVectors = true; break; case 2: customType.StoreTermVectors = true; customType.StoreTermVectorPositions = true; break; case 3: customType.StoreTermVectors = true; customType.StoreTermVectorOffsets = true; break; } switch (NextInt(4)) { case 0: customType.IsStored = true; customType.OmitNorms = true; customType.IsIndexed = true; fields.Add(NewField("f" + NextInt(100), GetString(1), customType)); break; case 1: customType.IsIndexed = true; customType.IsTokenized = true; fields.Add(NewField("f" + NextInt(100), GetString(0), customType)); break; case 2: customType.IsStored = true; customType.StoreTermVectors = false; customType.StoreTermVectorOffsets = false; customType.StoreTermVectorPositions = false; fields.Add(NewField("f" + NextInt(100), GetString(0), customType)); break; case 3: customType.IsStored = true; customType.IsIndexed = true; customType.IsTokenized = true; fields.Add(NewField("f" + NextInt(100), GetString(bigFieldSize), customType)); break; } } if (sameFieldOrder) { fields.Sort(fieldNameComparer); } else { // random placement of id field also fields.Swap(NextInt(fields.Count), 0); } for (int i = 0; i < fields.Count; i++) { d.Add(fields[i]); } if (Verbose) { Console.WriteLine(Thread.CurrentThread.Name + ": indexing id:" + idString); } w.UpdateDocument(new Term("id", idString), d); //System.out.println(Thread.currentThread().getName() + ": indexing "+d); docs[idString] = d; }
// For the output string: separate positions with a space, // and separate multiple tokens at each position with a // /. If a token should have end offset != the input // token's end offset then add :X to it: // TODO: we should probably refactor this guy to use/take analyzer, // the tests are a little messy private void Verify(string input, string output) { if (Verbose) { Console.WriteLine("TEST: verify input=" + input + " expectedOutput=" + output); } tokensIn.SetReader(new StringReader(input)); tokensOut.Reset(); string[] expected = output.Split(' ').TrimEnd(); int expectedUpto = 0; while (tokensOut.IncrementToken()) { if (Verbose) { Console.WriteLine(" incr token=" + termAtt.ToString() + " posIncr=" + posIncrAtt.PositionIncrement + " startOff=" + offsetAtt.StartOffset + " endOff=" + offsetAtt.EndOffset); } assertTrue(expectedUpto < expected.Length); int startOffset = offsetAtt.StartOffset; int endOffset = offsetAtt.EndOffset; string[] expectedAtPos = expected[expectedUpto++].Split('/').TrimEnd(); for (int atPos = 0; atPos < expectedAtPos.Length; atPos++) { if (atPos > 0) { assertTrue(tokensOut.IncrementToken()); if (Verbose) { Console.WriteLine(" incr token=" + termAtt.ToString() + " posIncr=" + posIncrAtt.PositionIncrement + " startOff=" + offsetAtt.StartOffset + " endOff=" + offsetAtt.EndOffset); } } int colonIndex = expectedAtPos[atPos].IndexOf(':'); int underbarIndex = expectedAtPos[atPos].IndexOf('_'); string expectedToken; int expectedEndOffset; int expectedPosLen; if (colonIndex != -1) { expectedToken = expectedAtPos[atPos].Substring(0, colonIndex - 0); if (underbarIndex != -1) { expectedEndOffset = int.Parse(expectedAtPos[atPos].Substring(1 + colonIndex, underbarIndex - (1 + colonIndex)), CultureInfo.InvariantCulture); expectedPosLen = int.Parse(expectedAtPos[atPos].Substring(1 + underbarIndex), CultureInfo.InvariantCulture); } else { expectedEndOffset = int.Parse(expectedAtPos[atPos].Substring(1 + colonIndex), CultureInfo.InvariantCulture); expectedPosLen = 1; } } else { expectedToken = expectedAtPos[atPos]; expectedEndOffset = endOffset; expectedPosLen = 1; } assertEquals(expectedToken, termAtt.ToString()); assertEquals(atPos == 0 ? 1 : 0, posIncrAtt.PositionIncrement); // start/end offset of all tokens at same pos should // be the same: assertEquals(startOffset, offsetAtt.StartOffset); assertEquals(expectedEndOffset, offsetAtt.EndOffset); assertEquals(expectedPosLen, posLenAtt.PositionLength); } } tokensOut.End(); tokensOut.Dispose(); if (Verbose) { Console.WriteLine(" incr: END"); } assertEquals(expectedUpto, expected.Length); }
public override FieldsProducer FieldsProducer(SegmentReadState state) { string seedFileName = IndexFileNames.SegmentFileName(state.SegmentInfo.Name, state.SegmentSuffix, SEED_EXT); IndexInput @in = state.Directory.OpenInput(seedFileName, state.Context); long seed = @in.ReadInt64(); if (LuceneTestCase.Verbose) { Console.WriteLine("MockRandomCodec: reading from seg=" + state.SegmentInfo.Name + " formatID=" + state.SegmentSuffix + " seed=" + seed); } @in.Dispose(); Random random = new J2N.Randomizer(seed); int readBufferSize = TestUtil.NextInt32(random, 1, 4096); if (LuceneTestCase.Verbose) { Console.WriteLine("MockRandomCodec: readBufferSize=" + readBufferSize); } PostingsReaderBase postingsReader; if (random.NextBoolean()) { if (LuceneTestCase.Verbose) { Console.WriteLine("MockRandomCodec: reading Sep postings"); } postingsReader = new SepPostingsReader(state.Directory, state.FieldInfos, state.SegmentInfo, state.Context, new MockInt32StreamFactory(random), state.SegmentSuffix); } else { if (LuceneTestCase.Verbose) { Console.WriteLine("MockRandomCodec: reading Standard postings"); } postingsReader = new Lucene41PostingsReader(state.Directory, state.FieldInfos, state.SegmentInfo, state.Context, state.SegmentSuffix); } if (random.NextBoolean()) { int totTFCutoff = TestUtil.NextInt32(random, 1, 20); if (LuceneTestCase.Verbose) { Console.WriteLine("MockRandomCodec: reading pulsing postings with totTFCutoff=" + totTFCutoff); } postingsReader = new PulsingPostingsReader(state, postingsReader); } FieldsProducer fields; int t1 = random.Next(4); if (t1 == 0) { bool success = false; try { fields = new FSTTermsReader(state, postingsReader); success = true; } finally { if (!success) { postingsReader.Dispose(); } } } else if (t1 == 1) { bool success = false; try { fields = new FSTOrdTermsReader(state, postingsReader); success = true; } finally { if (!success) { postingsReader.Dispose(); } } } else if (t1 == 2) { // Use BlockTree terms dict if (LuceneTestCase.Verbose) { Console.WriteLine("MockRandomCodec: reading BlockTree terms dict"); } bool success = false; try { fields = new BlockTreeTermsReader(state.Directory, state.FieldInfos, state.SegmentInfo, postingsReader, state.Context, state.SegmentSuffix, state.TermsIndexDivisor); success = true; } finally { if (!success) { postingsReader.Dispose(); } } } else { if (LuceneTestCase.Verbose) { Console.WriteLine("MockRandomCodec: reading Block terms dict"); } TermsIndexReaderBase indexReader; bool success = false; try { bool doFixedGap = random.NextBoolean(); // randomness diverges from writer, here: if (state.TermsIndexDivisor != -1) { state.TermsIndexDivisor = TestUtil.NextInt32(random, 1, 10); } if (doFixedGap) { // if termsIndexDivisor is set to -1, we should not touch it. It means a // test explicitly instructed not to load the terms index. if (LuceneTestCase.Verbose) { Console.WriteLine("MockRandomCodec: fixed-gap terms index (divisor=" + state.TermsIndexDivisor + ")"); } indexReader = new FixedGapTermsIndexReader(state.Directory, state.FieldInfos, state.SegmentInfo.Name, state.TermsIndexDivisor, BytesRef.UTF8SortedAsUnicodeComparer, state.SegmentSuffix, state.Context); } else { int n2 = random.Next(3); if (n2 == 1) { random.Next(); } else if (n2 == 2) { random.NextInt64(); } if (LuceneTestCase.Verbose) { Console.WriteLine("MockRandomCodec: variable-gap terms index (divisor=" + state.TermsIndexDivisor + ")"); } indexReader = new VariableGapTermsIndexReader(state.Directory, state.FieldInfos, state.SegmentInfo.Name, state.TermsIndexDivisor, state.SegmentSuffix, state.Context); } success = true; } finally { if (!success) { postingsReader.Dispose(); } } success = false; try { fields = new BlockTermsReader(indexReader, state.Directory, state.FieldInfos, state.SegmentInfo, postingsReader, state.Context, state.SegmentSuffix); success = true; } finally { if (!success) { try { postingsReader.Dispose(); } finally { indexReader.Dispose(); } } } } return(fields); }
/* * public void testWikipedia() { * final FileInputStream fis = new FileInputStream("/q/lucene/jawiki-20120220-pages-articles.xml"); * final Reader r = new BufferedReader(new InputStreamReader(fis, StandardCharsets.UTF_8)); * * final long startTimeNS = System.nanoTime(); * boolean done = false; * long compoundCount = 0; * long nonCompoundCount = 0; * long netOffset = 0; * while (!done) { * final TokenStream ts = analyzer.tokenStream("ignored", r); * ts.reset(); * final PositionIncrementAttribute posIncAtt = ts.addAttribute(PositionIncrementAttribute.class); * final OffsetAttribute offsetAtt = ts.addAttribute(OffsetAttribute.class); * int count = 0; * while (true) { * if (!ts.incrementToken()) { * done = true; * break; * } * count++; * if (posIncAtt.getPositionIncrement() == 0) { * compoundCount++; * } else { * nonCompoundCount++; * if (nonCompoundCount % 1000000 == 0) { * System.out.println(String.format("%.2f msec [pos=%d, %d, %d]", * (System.nanoTime()-startTimeNS)/1000000.0, * netOffset + offsetAtt.startOffset(), * nonCompoundCount, * compoundCount)); * } * } * if (count == 100000000) { * System.out.println(" again..."); * break; * } * } * ts.end(); * netOffset += offsetAtt.endOffset(); * } * System.out.println("compoundCount=" + compoundCount + " nonCompoundCount=" + nonCompoundCount); * r.close(); * } */ private void doTestBocchan(int numIterations) { TextReader reader = new StreamReader( this.GetType().getResourceAsStream("bocchan.utf-8"), Encoding.UTF8); String line = reader.ReadLine(); reader.Dispose(); if (Verbose) { Console.WriteLine("Test for Bocchan without pre-splitting sentences"); } /* * if (numIterations > 1) { * // warmup * for (int i = 0; i < numIterations; i++) { * final TokenStream ts = analyzer.tokenStream("ignored", line); * ts.reset(); * while(ts.incrementToken()); * } * } */ long totalStart = Environment.TickCount; for (int i = 0; i < numIterations; i++) { TokenStream ts = analyzer.GetTokenStream("ignored", line); try { ts.Reset(); while (ts.IncrementToken()) { ; } ts.End(); } finally { IOUtils.DisposeWhileHandlingException(ts); } } String[] sentences = Regex.Split(line, "、|。").TrimEnd(); if (Verbose) { Console.WriteLine("Total time : " + (Environment.TickCount - totalStart)); Console.WriteLine("Test for Bocchan with pre-splitting sentences (" + sentences.Length + " sentences)"); } totalStart = Environment.TickCount; for (int i = 0; i < numIterations; i++) { foreach (String sentence in sentences) { TokenStream ts = analyzer.GetTokenStream("ignored", sentence); try { ts.Reset(); while (ts.IncrementToken()) { ; } ts.End(); } finally { IOUtils.DisposeWhileHandlingException(ts); } } } if (Verbose) { Console.WriteLine("Total time : " + (Environment.TickCount - totalStart)); } }