public override void SetUp() { base.SetUp(); Directory = NewDirectory(); RandomIndexWriter writer = new RandomIndexWriter(Random(), Directory, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).SetMergePolicy(NewLogMergePolicy())); for (int i = 0; i < DocFields.Length; i++) { Document doc = new Document(); doc.Add(NewTextField(FIELD, DocFields[i], Field.Store.NO)); writer.AddDocument(doc); } Reader = writer.Reader; writer.Dispose(); Searcher = NewSearcher(Reader); }
public override void SetUp() { base.SetUp(); Directory = NewDirectory(); RandomIndexWriter writer = new RandomIndexWriter(Random(), Directory, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).SetMergePolicy(NewLogMergePolicy()).SetSimilarity(new DefaultSimilarity())); for (int i = 0; i < Values.Length; i++) { Document doc = new Document(); doc.Add(NewTextField(FIELD, Values[i], Field.Store.YES)); writer.AddDocument(doc); } IndexReader = SlowCompositeReaderWrapper.Wrap(writer.Reader); writer.Dispose(); IndexSearcher = NewSearcher(IndexReader); IndexSearcher.Similarity = new DefaultSimilarity(); }
public virtual void Test() { Random random = new Random(Random.Next()); LineFileDocs docs = new LineFileDocs(random, DefaultCodecSupportsDocValues); Directory d = NewDirectory(); MockAnalyzer analyzer = new MockAnalyzer(LuceneTestCase.Random); analyzer.MaxTokenLength = TestUtil.NextInt32(LuceneTestCase.Random, 1, IndexWriter.MAX_TERM_LENGTH); RandomIndexWriter w = new RandomIndexWriter( #if FEATURE_INSTANCE_TESTDATA_INITIALIZATION this, #endif LuceneTestCase.Random, d, analyzer); int numDocs = AtLeast(10); for (int docCount = 0; docCount < numDocs; docCount++) { w.AddDocument(docs.NextDoc()); } IndexReader r = w.GetReader(); w.Dispose(); List <BytesRef> terms = new List <BytesRef>(); TermsEnum termsEnum = MultiFields.GetTerms(r, "body").GetIterator(null); BytesRef term; while ((term = termsEnum.Next()) != null) { terms.Add(BytesRef.DeepCopyOf(term)); } if (Verbose) { Console.WriteLine("TEST: " + terms.Count + " terms"); } int upto = -1; int iters = AtLeast(200); for (int iter = 0; iter < iters; iter++) { bool isEnd; if (upto != -1 && LuceneTestCase.Random.NextBoolean()) { // next if (Verbose) { Console.WriteLine("TEST: iter next"); } isEnd = termsEnum.Next() == null; upto++; if (isEnd) { if (Verbose) { Console.WriteLine(" end"); } Assert.AreEqual(upto, terms.Count); upto = -1; } else { if (Verbose) { Console.WriteLine(" got term=" + termsEnum.Term.Utf8ToString() + " expected=" + terms[upto].Utf8ToString()); } Assert.IsTrue(upto < terms.Count); Assert.AreEqual(terms[upto], termsEnum.Term); } } else { BytesRef target; string exists; if (LuceneTestCase.Random.NextBoolean()) { // likely fake term if (LuceneTestCase.Random.NextBoolean()) { target = new BytesRef(TestUtil.RandomSimpleString(LuceneTestCase.Random)); } else { target = new BytesRef(TestUtil.RandomRealisticUnicodeString(LuceneTestCase.Random)); } exists = "likely not"; } else { // real term target = terms[LuceneTestCase.Random.Next(terms.Count)]; exists = "yes"; } upto = terms.BinarySearch(target); if (LuceneTestCase.Random.NextBoolean()) { if (Verbose) { Console.WriteLine("TEST: iter seekCeil target=" + target.Utf8ToString() + " exists=" + exists); } // seekCeil TermsEnum.SeekStatus status = termsEnum.SeekCeil(target); if (Verbose) { Console.WriteLine(" got " + status); } if (upto < 0) { upto = -(upto + 1); if (upto >= terms.Count) { Assert.AreEqual(TermsEnum.SeekStatus.END, status); upto = -1; } else { Assert.AreEqual(TermsEnum.SeekStatus.NOT_FOUND, status); Assert.AreEqual(terms[upto], termsEnum.Term); } } else { Assert.AreEqual(TermsEnum.SeekStatus.FOUND, status); Assert.AreEqual(terms[upto], termsEnum.Term); } } else { if (Verbose) { Console.WriteLine("TEST: iter seekExact target=" + target.Utf8ToString() + " exists=" + exists); } // seekExact bool result = termsEnum.SeekExact(target); if (Verbose) { Console.WriteLine(" got " + result); } if (upto < 0) { Assert.IsFalse(result); upto = -1; } else { Assert.IsTrue(result); Assert.AreEqual(target, termsEnum.Term); } } } } r.Dispose(); d.Dispose(); docs.Dispose(); }
// [Test] // LUCENENET NOTE: For now, we are overriding this test in every subclass to pull it into the right context for the subclass public virtual void TestNumericField() { Directory dir = NewDirectory(); var w = new RandomIndexWriter(Random(), dir, ClassEnvRule.similarity, ClassEnvRule.timeZone); var numDocs = AtLeast(500); var answers = new object[numDocs]; NumericType[] typeAnswers = new NumericType[numDocs]; for (int id = 0; id < numDocs; id++) { Document doc = new Document(); Field nf; Field sf; object answer; NumericType typeAnswer; if (Random().NextBoolean()) { // float/double if (Random().NextBoolean()) { float f = Random().NextFloat(); answer = Convert.ToSingle(f, CultureInfo.InvariantCulture); nf = new SingleField("nf", f, Field.Store.NO); sf = new StoredField("nf", f); typeAnswer = NumericType.SINGLE; } else { double d = Random().NextDouble(); answer = Convert.ToDouble(d, CultureInfo.InvariantCulture); nf = new DoubleField("nf", d, Field.Store.NO); sf = new StoredField("nf", d); typeAnswer = NumericType.DOUBLE; } } else { // int/long if (Random().NextBoolean()) { int i = Random().Next(); answer = Convert.ToInt32(i, CultureInfo.InvariantCulture); nf = new Int32Field("nf", i, Field.Store.NO); sf = new StoredField("nf", i); typeAnswer = NumericType.INT32; } else { long l = Random().NextLong(); answer = Convert.ToInt64(l, CultureInfo.InvariantCulture); nf = new Int64Field("nf", l, Field.Store.NO); sf = new StoredField("nf", l); typeAnswer = NumericType.INT64; } } doc.Add(nf); doc.Add(sf); answers[id] = answer; typeAnswers[id] = typeAnswer; FieldType ft = new FieldType(Int32Field.TYPE_STORED); ft.NumericPrecisionStep = int.MaxValue; doc.Add(new Int32Field("id", id, ft)); w.AddDocument(doc); } DirectoryReader r = w.Reader; w.Dispose(); Assert.AreEqual(numDocs, r.NumDocs); foreach (AtomicReaderContext ctx in r.Leaves) { AtomicReader sub = (AtomicReader)ctx.Reader; FieldCache.Int32s ids = FieldCache.DEFAULT.GetInt32s(sub, "id", false); for (int docID = 0; docID < sub.NumDocs; docID++) { Document doc = sub.Document(docID); Field f = doc.GetField <Field>("nf"); Assert.IsTrue(f is StoredField, "got f=" + f); #pragma warning disable 612, 618 Assert.AreEqual(answers[ids.Get(docID)], f.GetNumericValue()); #pragma warning restore 612, 618 } } r.Dispose(); dir.Dispose(); }
// [Test] // LUCENENET NOTE: For now, we are overriding this test in every subclass to pull it into the right context for the subclass public virtual void TestRandomStoredFields() { Directory dir = NewDirectory(); Random rand = Random(); RandomIndexWriter w = new RandomIndexWriter(rand, dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).SetMaxBufferedDocs(TestUtil.NextInt(rand, 5, 20))); //w.w.setNoCFSRatio(0.0); int docCount = AtLeast(200); int fieldCount = TestUtil.NextInt(rand, 1, 5); IList <int?> fieldIDs = new List <int?>(); FieldType customType = new FieldType(TextField.TYPE_STORED); customType.IsTokenized = false; Field idField = NewField("id", "", customType); for (int i = 0; i < fieldCount; i++) { fieldIDs.Add(i); } IDictionary <string, Document> docs = new Dictionary <string, Document>(); if (VERBOSE) { Console.WriteLine("TEST: build index docCount=" + docCount); } FieldType customType2 = new FieldType(); customType2.IsStored = true; for (int i = 0; i < docCount; i++) { Document doc = new Document(); doc.Add(idField); string id = "" + i; idField.SetStringValue(id); docs[id] = doc; if (VERBOSE) { Console.WriteLine("TEST: add doc id=" + id); } foreach (int field in fieldIDs) { string s; if (rand.Next(4) != 3) { s = TestUtil.RandomUnicodeString(rand, 1000); doc.Add(NewField("f" + field, s, customType2)); } else { s = null; } } w.AddDocument(doc); if (rand.Next(50) == 17) { // mixup binding of field name -> Number every so often Collections.Shuffle(fieldIDs); } if (rand.Next(5) == 3 && i > 0) { string delID = "" + rand.Next(i); if (VERBOSE) { Console.WriteLine("TEST: delete doc id=" + delID); } w.DeleteDocuments(new Term("id", delID)); docs.Remove(delID); } } if (VERBOSE) { Console.WriteLine("TEST: " + docs.Count + " docs in index; now load fields"); } if (docs.Count > 0) { string[] idsList = docs.Keys.ToArray(/*new string[docs.Count]*/); for (int x = 0; x < 2; x++) { IndexReader r = w.Reader; IndexSearcher s = NewSearcher(r); if (VERBOSE) { Console.WriteLine("TEST: cycle x=" + x + " r=" + r); } int num = AtLeast(1000); for (int iter = 0; iter < num; iter++) { string testID = idsList[rand.Next(idsList.Length)]; if (VERBOSE) { Console.WriteLine("TEST: test id=" + testID); } TopDocs hits = s.Search(new TermQuery(new Term("id", testID)), 1); Assert.AreEqual(1, hits.TotalHits); Document doc = r.Document(hits.ScoreDocs[0].Doc); Document docExp = docs[testID]; for (int i = 0; i < fieldCount; i++) { assertEquals("doc " + testID + ", field f" + fieldCount + " is wrong", docExp.Get("f" + i), doc.Get("f" + i)); } } r.Dispose(); w.ForceMerge(1); } } w.Dispose(); dir.Dispose(); }
public virtual void TestEnforceDeletions() { Directory dir = NewDirectory(); RandomIndexWriter writer = new RandomIndexWriter(Random(), dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).SetMergeScheduler(new SerialMergeScheduler()).SetMergePolicy(NewLogMergePolicy(10))); // asserts below requires no unexpected merges: // NOTE: cannot use writer.getReader because RIW (on // flipping a coin) may give us a newly opened reader, // but we use .reopen on this reader below and expect to // (must) get an NRT reader: DirectoryReader reader = DirectoryReader.Open(writer.w, true); // same reason we don't wrap? IndexSearcher searcher = NewSearcher(reader, false); // add a doc, refresh the reader, and check that it's there Document doc = new Document(); doc.Add(NewStringField("id", "1", Field.Store.YES)); writer.AddDocument(doc); reader = RefreshReader(reader); searcher = NewSearcher(reader, false); TopDocs docs = searcher.Search(new MatchAllDocsQuery(), 1); Assert.AreEqual(1, docs.TotalHits, "Should find a hit..."); Filter startFilter = new QueryWrapperFilter(new TermQuery(new Term("id", "1"))); CachingWrapperFilter filter = new CachingWrapperFilter(startFilter); docs = searcher.Search(new MatchAllDocsQuery(), filter, 1); Assert.IsTrue(filter.SizeInBytes() > 0); Assert.AreEqual(1, docs.TotalHits, "[query + filter] Should find a hit..."); Query constantScore = new ConstantScoreQuery(filter); docs = searcher.Search(constantScore, 1); Assert.AreEqual(1, docs.TotalHits, "[just filter] Should find a hit..."); // make sure we get a cache hit when we reopen reader // that had no change to deletions // fake delete (deletes nothing): writer.DeleteDocuments(new Term("foo", "bar")); IndexReader oldReader = reader; reader = RefreshReader(reader); Assert.IsTrue(reader == oldReader); int missCount = filter.MissCount; docs = searcher.Search(constantScore, 1); Assert.AreEqual(1, docs.TotalHits, "[just filter] Should find a hit..."); // cache hit: Assert.AreEqual(missCount, filter.MissCount); // now delete the doc, refresh the reader, and see that it's not there writer.DeleteDocuments(new Term("id", "1")); // NOTE: important to hold ref here so GC doesn't clear // the cache entry! Else the assert below may sometimes // fail: oldReader = reader; reader = RefreshReader(reader); searcher = NewSearcher(reader, false); missCount = filter.MissCount; docs = searcher.Search(new MatchAllDocsQuery(), filter, 1); Assert.AreEqual(0, docs.TotalHits, "[query + filter] Should *not* find a hit..."); // cache hit Assert.AreEqual(missCount, filter.MissCount); docs = searcher.Search(constantScore, 1); Assert.AreEqual(0, docs.TotalHits, "[just filter] Should *not* find a hit..."); // apply deletes dynamically: filter = new CachingWrapperFilter(startFilter); writer.AddDocument(doc); reader = RefreshReader(reader); searcher = NewSearcher(reader, false); docs = searcher.Search(new MatchAllDocsQuery(), filter, 1); Assert.AreEqual(1, docs.TotalHits, "[query + filter] Should find a hit..."); missCount = filter.MissCount; Assert.IsTrue(missCount > 0); constantScore = new ConstantScoreQuery(filter); docs = searcher.Search(constantScore, 1); Assert.AreEqual(1, docs.TotalHits, "[just filter] Should find a hit..."); Assert.AreEqual(missCount, filter.MissCount); writer.AddDocument(doc); // NOTE: important to hold ref here so GC doesn't clear // the cache entry! Else the assert below may sometimes // fail: oldReader = reader; reader = RefreshReader(reader); searcher = NewSearcher(reader, false); docs = searcher.Search(new MatchAllDocsQuery(), filter, 1); Assert.AreEqual(2, docs.TotalHits, "[query + filter] Should find 2 hits..."); Assert.IsTrue(filter.MissCount > missCount); missCount = filter.MissCount; constantScore = new ConstantScoreQuery(filter); docs = searcher.Search(constantScore, 1); Assert.AreEqual(2, docs.TotalHits, "[just filter] Should find a hit..."); Assert.AreEqual(missCount, filter.MissCount); // now delete the doc, refresh the reader, and see that it's not there writer.DeleteDocuments(new Term("id", "1")); reader = RefreshReader(reader); searcher = NewSearcher(reader, false); docs = searcher.Search(new MatchAllDocsQuery(), filter, 1); Assert.AreEqual(0, docs.TotalHits, "[query + filter] Should *not* find a hit..."); // CWF reused the same entry (it dynamically applied the deletes): Assert.AreEqual(missCount, filter.MissCount); docs = searcher.Search(constantScore, 1); Assert.AreEqual(0, docs.TotalHits, "[just filter] Should *not* find a hit..."); // CWF reused the same entry (it dynamically applied the deletes): Assert.AreEqual(missCount, filter.MissCount); // NOTE: silliness to make sure JRE does not eliminate // our holding onto oldReader to prevent // CachingWrapperFilter's WeakHashMap from dropping the // entry: Assert.IsTrue(oldReader != null); reader.Dispose(); writer.Dispose(); dir.Dispose(); }
public virtual void TestSortedSetWithDups() { AssumeTrue("codec does not support SORTED_SET", DefaultCodecSupportsSortedSet); Directory dir = NewDirectory(); IndexWriterConfig iwc = NewIndexWriterConfig(Random, TEST_VERSION_CURRENT, null); iwc.SetMergePolicy(NewLogMergePolicy()); RandomIndexWriter iw = new RandomIndexWriter(Random, dir, iwc); int numDocs = AtLeast(500); for (int i = 0; i < numDocs; i++) { Document doc = new Document(); int numValues = Random.Next(5); for (int j = 0; j < numValues; j++) { doc.Add(new SortedSetDocValuesField("bytes", new BytesRef(TestUtil.RandomSimpleString(Random, 2)))); } iw.AddDocument(doc); if (Random.Next(17) == 0) { iw.Commit(); } } DirectoryReader ir = iw.GetReader(); iw.ForceMerge(1); DirectoryReader ir2 = iw.GetReader(); AtomicReader merged = GetOnlySegmentReader(ir2); iw.Dispose(); SortedSetDocValues multi = MultiDocValues.GetSortedSetValues(ir, "bytes"); SortedSetDocValues single = merged.GetSortedSetDocValues("bytes"); if (multi == null) { Assert.IsNull(single); } else { Assert.AreEqual(single.ValueCount, multi.ValueCount); BytesRef actual = new BytesRef(); BytesRef expected = new BytesRef(); // check values for (long i = 0; i < single.ValueCount; i++) { single.LookupOrd(i, expected); multi.LookupOrd(i, actual); Assert.AreEqual(expected, actual); } // check ord list for (int i = 0; i < numDocs; i++) { single.SetDocument(i); List <long?> expectedList = new List <long?>(); long ord; while ((ord = single.NextOrd()) != SortedSetDocValues.NO_MORE_ORDS) { expectedList.Add(ord); } multi.SetDocument(i); int upto = 0; while ((ord = multi.NextOrd()) != SortedSetDocValues.NO_MORE_ORDS) { Assert.AreEqual((long)expectedList[upto], ord); upto++; } Assert.AreEqual(expectedList.Count, upto); } } ir.Dispose(); ir2.Dispose(); dir.Dispose(); }
public virtual void Test2() { Random random = Random; int NUM_DOCS = AtLeast(100); Directory dir = NewDirectory(); RandomIndexWriter writer = new RandomIndexWriter( #if FEATURE_INSTANCE_TESTDATA_INITIALIZATION this, #endif random, dir); bool allowDups = random.NextBoolean(); ISet <string> seen = new JCG.HashSet <string>(); if (VERBOSE) { Console.WriteLine("TEST: NUM_DOCS=" + NUM_DOCS + " allowDups=" + allowDups); } int numDocs = 0; IList <BytesRef> docValues = new List <BytesRef>(); // TODO: deletions while (numDocs < NUM_DOCS) { string s; if (random.NextBoolean()) { s = TestUtil.RandomSimpleString(random); } else { s = TestUtil.RandomUnicodeString(random); } BytesRef br = new BytesRef(s); if (!allowDups) { if (seen.Contains(s)) { continue; } seen.Add(s); } if (VERBOSE) { Console.WriteLine(" " + numDocs + ": s=" + s); } Document doc = new Document(); doc.Add(new SortedDocValuesField("stringdv", br)); doc.Add(new NumericDocValuesField("id", numDocs)); docValues.Add(br); writer.AddDocument(doc); numDocs++; if (random.Next(40) == 17) { // force flush writer.GetReader().Dispose(); } } writer.ForceMerge(1); DirectoryReader r = writer.GetReader(); writer.Dispose(); AtomicReader sr = GetOnlySegmentReader(r); long END_TIME = Environment.TickCount + (TEST_NIGHTLY ? 30 : 1); int NUM_THREADS = TestUtil.NextInt32(LuceneTestCase.Random, 1, 10); ThreadJob[] threads = new ThreadJob[NUM_THREADS]; for (int thread = 0; thread < NUM_THREADS; thread++) { threads[thread] = new ThreadAnonymousInnerClassHelper2(random, docValues, sr, END_TIME); threads[thread].Start(); } foreach (ThreadJob thread in threads) { thread.Join(); } r.Dispose(); dir.Dispose(); }
public static void BeforeClass() { Dir = NewDirectory(); RandomIndexWriter iw = new RandomIndexWriter(Random(), Dir); int numDocs = AtLeast(300); for (int i = 0; i < numDocs; i++) { Document doc = new Document(); AddSome(doc, AlwaysTerms); if (Random().Next(100) < 90) { AddSome(doc, CommonTerms); } if (Random().Next(100) < 50) { AddSome(doc, MediumTerms); } if (Random().Next(100) < 10) { AddSome(doc, RareTerms); } iw.AddDocument(doc); } iw.ForceMerge(1); iw.Dispose(); r = DirectoryReader.Open(Dir); atomicReader = GetOnlySegmentReader(r); Searcher = new IndexSearcher(atomicReader); Searcher.Similarity = new DefaultSimilarityAnonymousInnerClassHelper(); }
public override void TearDown() { Writer.Dispose(); Directory.Dispose(); base.TearDown(); }
public override void SetUp() { base.SetUp(); Index = NewDirectory(); RandomIndexWriter writer = new RandomIndexWriter(Random(), Index, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).SetSimilarity(Sim).SetMergePolicy(NewLogMergePolicy())); // hed is the most important field, dek is secondary // d1 is an "ok" match for: albino elephant { Document d1 = new Document(); d1.Add(NewField("id", "d1", NonAnalyzedType)); // Field.Keyword("id", // "d1")); d1.Add(NewTextField("hed", "elephant", Field.Store.YES)); // Field.Text("hed", "elephant")); d1.Add(NewTextField("dek", "elephant", Field.Store.YES)); // Field.Text("dek", "elephant")); writer.AddDocument(d1); } // d2 is a "good" match for: albino elephant { Document d2 = new Document(); d2.Add(NewField("id", "d2", NonAnalyzedType)); // Field.Keyword("id", // "d2")); d2.Add(NewTextField("hed", "elephant", Field.Store.YES)); // Field.Text("hed", "elephant")); d2.Add(NewTextField("dek", "albino", Field.Store.YES)); // Field.Text("dek", // "albino")); d2.Add(NewTextField("dek", "elephant", Field.Store.YES)); // Field.Text("dek", "elephant")); writer.AddDocument(d2); } // d3 is a "better" match for: albino elephant { Document d3 = new Document(); d3.Add(NewField("id", "d3", NonAnalyzedType)); // Field.Keyword("id", // "d3")); d3.Add(NewTextField("hed", "albino", Field.Store.YES)); // Field.Text("hed", // "albino")); d3.Add(NewTextField("hed", "elephant", Field.Store.YES)); // Field.Text("hed", "elephant")); writer.AddDocument(d3); } // d4 is the "best" match for: albino elephant { Document d4 = new Document(); d4.Add(NewField("id", "d4", NonAnalyzedType)); // Field.Keyword("id", // "d4")); d4.Add(NewTextField("hed", "albino", Field.Store.YES)); // Field.Text("hed", // "albino")); d4.Add(NewField("hed", "elephant", NonAnalyzedType)); // Field.Text("hed", "elephant")); d4.Add(NewTextField("dek", "albino", Field.Store.YES)); // Field.Text("dek", // "albino")); writer.AddDocument(d4); } r = SlowCompositeReaderWrapper.Wrap(writer.Reader); writer.Dispose(); s = NewSearcher(r); s.Similarity = Sim; }
public virtual void TestCrazySpans() { // The problem: "normal" lucene queries create scorers, returning null if terms dont exist // this means they never score a term that does not exist. // however with spans, there is only one scorer for the whole hierarchy: // inner queries are not real queries, their boosts are ignored, etc. Directory dir = NewDirectory(); RandomIndexWriter iw = new RandomIndexWriter(Random(), dir); Document doc = new Document(); FieldType ft = new FieldType(TextField.TYPE_NOT_STORED); doc.Add(NewField("foo", "bar", ft)); iw.AddDocument(doc); IndexReader ir = iw.Reader; iw.Dispose(); IndexSearcher @is = NewSearcher(ir); foreach (Similarity sim in Sims) { @is.Similarity = sim; SpanTermQuery s1 = new SpanTermQuery(new Term("foo", "bar")); SpanTermQuery s2 = new SpanTermQuery(new Term("foo", "baz")); Query query = new SpanOrQuery(s1, s2); TopDocs td = @is.Search(query, 10); Assert.AreEqual(1, td.TotalHits); float score = td.ScoreDocs[0].Score; Assert.IsTrue(score >= 0.0f); Assert.IsFalse(float.IsInfinity(score), "inf score for " + sim); } ir.Dispose(); dir.Dispose(); }
public void TestNumericField() { Directory dir = NewDirectory(); var w = new RandomIndexWriter(Random(), dir); var numDocs = AtLeast(500); var answers = new object[numDocs]; FieldType.NumericType[] typeAnswers = new FieldType.NumericType[numDocs]; for (int id = 0; id < numDocs; id++) { Document doc = new Document(); Field nf; Field sf; object answer; FieldType.NumericType typeAnswer; if (Random().NextBoolean()) { // float/double if (Random().NextBoolean()) { float f = Random().NextFloat(); answer = Convert.ToSingle(f); nf = new FloatField("nf", f, Field.Store.NO); sf = new StoredField("nf", f); typeAnswer = FieldType.NumericType.FLOAT; } else { double d = Random().NextDouble(); answer = Convert.ToDouble(d); nf = new DoubleField("nf", d, Field.Store.NO); sf = new StoredField("nf", d); typeAnswer = FieldType.NumericType.DOUBLE; } } else { // int/long if (Random().NextBoolean()) { int i = Random().Next(); answer = Convert.ToInt32(i); nf = new IntField("nf", i, Field.Store.NO); sf = new StoredField("nf", i); typeAnswer = FieldType.NumericType.INT; } else { long l = Random().NextLong(); answer = Convert.ToInt64(l); nf = new LongField("nf", l, Field.Store.NO); sf = new StoredField("nf", l); typeAnswer = FieldType.NumericType.LONG; } } doc.Add(nf); doc.Add(sf); answers[id] = answer; typeAnswers[id] = typeAnswer; FieldType ft = new FieldType(IntField.TYPE_STORED); ft.NumericPrecisionStep = int.MaxValue; doc.Add(new IntField("id", id, ft)); w.AddDocument(doc); } DirectoryReader r = w.Reader; w.Dispose(); Assert.AreEqual(numDocs, r.NumDocs); foreach (AtomicReaderContext ctx in r.Leaves) { AtomicReader sub = (AtomicReader)ctx.Reader; FieldCache.Ints ids = FieldCache.DEFAULT.GetInts(sub, "id", false); for (int docID = 0; docID < sub.NumDocs; docID++) { Document doc = sub.Document(docID); Field f = (Field)doc.GetField("nf"); Assert.IsTrue(f is StoredField, "got f=" + f); Assert.AreEqual(answers[ids.Get(docID)], f.NumericValue); } } r.Dispose(); dir.Dispose(); }
public virtual void TestOmitTFAndNorms() { Directory dir = NewDirectory(); RandomIndexWriter iw = new RandomIndexWriter(Random(), dir); Document doc = new Document(); FieldType ft = new FieldType(TextField.TYPE_NOT_STORED); ft.IndexOptions = FieldInfo.IndexOptions.DOCS_ONLY; ft.OmitNorms = true; ft.Freeze(); Field f = NewField("foo", "bar", ft); doc.Add(f); iw.AddDocument(doc); IndexReader ir = iw.Reader; iw.Dispose(); IndexSearcher @is = NewSearcher(ir); foreach (Similarity sim in Sims) { @is.Similarity = sim; BooleanQuery query = new BooleanQuery(true); query.Add(new TermQuery(new Term("foo", "bar")), BooleanClause.Occur.SHOULD); Assert.AreEqual(1, @is.Search(query, 10).TotalHits); } ir.Dispose(); dir.Dispose(); }
public virtual void TestEmptyTerm() { Directory dir = NewDirectory(); RandomIndexWriter iw = new RandomIndexWriter(Random(), dir); Document doc = new Document(); doc.Add(NewTextField("foo", "bar", Field.Store.NO)); iw.AddDocument(doc); IndexReader ir = iw.Reader; iw.Dispose(); IndexSearcher @is = NewSearcher(ir); foreach (Similarity sim in Sims) { @is.Similarity = sim; BooleanQuery query = new BooleanQuery(true); query.Add(new TermQuery(new Term("foo", "bar")), BooleanClause.Occur.SHOULD); query.Add(new TermQuery(new Term("foo", "baz")), BooleanClause.Occur.SHOULD); Assert.AreEqual(1, @is.Search(query, 10).TotalHits); } ir.Dispose(); dir.Dispose(); }
public virtual void TestEmptyIndex() { Directory dir = NewDirectory(); RandomIndexWriter iw = new RandomIndexWriter(Random(), dir); IndexReader ir = iw.Reader; iw.Dispose(); IndexSearcher @is = NewSearcher(ir); foreach (Similarity sim in Sims) { @is.Similarity = sim; Assert.AreEqual(0, @is.Search(new TermQuery(new Term("foo", "bar")), 10).TotalHits); } ir.Dispose(); dir.Dispose(); }
public virtual void TestIndexing() { DirectoryInfo tmpDir = CreateTempDir("TestNeverDelete"); BaseDirectoryWrapper d = NewFSDirectory(tmpDir); // We want to "see" files removed if Lucene removed // them. this is still worth running on Windows since // some files the IR opens and closes. if (d is MockDirectoryWrapper) { ((MockDirectoryWrapper)d).NoDeleteOpenFile = false; } RandomIndexWriter w = new RandomIndexWriter(Random(), d, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).SetIndexDeletionPolicy(NoDeletionPolicy.INSTANCE)); w.w.Config.SetMaxBufferedDocs(TestUtil.NextInt(Random(), 5, 30)); w.Commit(); ThreadClass[] indexThreads = new ThreadClass[Random().Next(4)]; long stopTime = Environment.TickCount + AtLeast(1000); for (int x = 0; x < indexThreads.Length; x++) { indexThreads[x] = new ThreadAnonymousInnerClassHelper(w, stopTime, NewStringField, NewTextField); indexThreads[x].Name = "Thread " + x; indexThreads[x].Start(); } HashSet <string> allFiles = new HashSet <string>(); DirectoryReader r = DirectoryReader.Open(d); while (Environment.TickCount < stopTime) { IndexCommit ic = r.IndexCommit; if (VERBOSE) { Console.WriteLine("TEST: check files: " + ic.FileNames); } allFiles.AddAll(ic.FileNames); // Make sure no old files were removed foreach (string fileName in allFiles) { Assert.IsTrue(SlowFileExists(d, fileName), "file " + fileName + " does not exist"); } DirectoryReader r2 = DirectoryReader.OpenIfChanged(r); if (r2 != null) { r.Dispose(); r = r2; } Thread.Sleep(1); } r.Dispose(); foreach (ThreadClass t in indexThreads) { t.Join(); } w.Dispose(); d.Dispose(); System.IO.Directory.Delete(tmpDir.FullName, true); }
public virtual void TestIndexing() { DirectoryInfo tmpDir = CreateTempDir("TestNeverDelete"); BaseDirectoryWrapper d = NewFSDirectory(tmpDir); // We want to "see" files removed if Lucene removed // them. this is still worth running on Windows since // some files the IR opens and closes. if (d is MockDirectoryWrapper) { ((MockDirectoryWrapper)d).NoDeleteOpenFile = false; } RandomIndexWriter w = new RandomIndexWriter(Random, d, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random)).SetIndexDeletionPolicy(NoDeletionPolicy.INSTANCE)); w.IndexWriter.Config.SetMaxBufferedDocs(TestUtil.NextInt32(Random, 5, 30)); w.Commit(); ThreadJob[] indexThreads = new ThreadJob[Random.Next(4)]; long stopTime = (J2N.Time.NanoTime() / J2N.Time.MillisecondsPerNanosecond) + AtLeast(1000); // LUCENENET: Use NanoTime() rather than CurrentTimeMilliseconds() for more accurate/reliable results for (int x = 0; x < indexThreads.Length; x++) { indexThreads[x] = new ThreadAnonymousClass(w, stopTime, NewStringField, NewTextField); indexThreads[x].Name = "Thread " + x; indexThreads[x].Start(); } ISet <string> allFiles = new JCG.HashSet <string>(); DirectoryReader r = DirectoryReader.Open(d); while (J2N.Time.NanoTime() / J2N.Time.MillisecondsPerNanosecond < stopTime) // LUCENENET: Use NanoTime() rather than CurrentTimeMilliseconds() for more accurate/reliable results { IndexCommit ic = r.IndexCommit; if (Verbose) { Console.WriteLine("TEST: check files: " + ic.FileNames); } allFiles.UnionWith(ic.FileNames); // Make sure no old files were removed foreach (string fileName in allFiles) { Assert.IsTrue(SlowFileExists(d, fileName), "file " + fileName + " does not exist"); } DirectoryReader r2 = DirectoryReader.OpenIfChanged(r); if (r2 != null) { r.Dispose(); r = r2; } Thread.Sleep(1); } r.Dispose(); foreach (ThreadJob t in indexThreads) { t.Join(); } w.Dispose(); d.Dispose(); System.IO.Directory.Delete(tmpDir.FullName, true); }
public virtual void TestRandomWithPrefix() { Directory dir = NewDirectory(); HashSet <string> prefixes = new HashSet <string>(); int numPrefix = TestUtil.NextInt(Random(), 2, 7); if (VERBOSE) { Console.WriteLine("TEST: use " + numPrefix + " prefixes"); } while (prefixes.Count < numPrefix) { prefixes.Add(TestUtil.RandomRealisticUnicodeString(Random())); //prefixes.Add(TestUtil.RandomSimpleString(random)); } string[] prefixesArray = prefixes.ToArray(/*new string[prefixes.Count]*/); int NUM_TERMS = AtLeast(20); HashSet <BytesRef> terms = new HashSet <BytesRef>(); while (terms.Count < NUM_TERMS) { string s = prefixesArray[Random().Next(prefixesArray.Length)] + TestUtil.RandomRealisticUnicodeString(Random()); //final String s = prefixesArray[random.nextInt(prefixesArray.Length)] + TestUtil.RandomSimpleString(random); if (s.Length > 0) { terms.Add(new BytesRef(s)); } } BytesRef[] termsArray = terms.ToArray(); Array.Sort(termsArray); int NUM_DOCS = AtLeast(100); IndexWriterConfig conf = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())); // Sometimes swap in codec that impls ord(): if (Random().Next(10) == 7) { Codec codec = TestUtil.AlwaysPostingsFormat(PostingsFormat.ForName("Lucene41WithOrds")); conf.SetCodec(codec); } RandomIndexWriter w = new RandomIndexWriter(Random(), dir, conf); int[][] idToOrds = new int[NUM_DOCS][]; HashSet <int?> ordsForDocSet = new HashSet <int?>(); for (int id = 0; id < NUM_DOCS; id++) { Document doc = new Document(); doc.Add(new Int32Field("id", id, Field.Store.NO)); int termCount = TestUtil.NextInt(Random(), 0, 20 * RANDOM_MULTIPLIER); while (ordsForDocSet.Count < termCount) { ordsForDocSet.Add(Random().Next(termsArray.Length)); } int[] ordsForDoc = new int[termCount]; int upto = 0; if (VERBOSE) { Console.WriteLine("TEST: doc id=" + id); } foreach (int ord in ordsForDocSet) { ordsForDoc[upto++] = ord; Field field = NewStringField("field", termsArray[ord].Utf8ToString(), Field.Store.NO); if (VERBOSE) { Console.WriteLine(" f=" + termsArray[ord].Utf8ToString()); } doc.Add(field); } ordsForDocSet.Clear(); Array.Sort(ordsForDoc); idToOrds[id] = ordsForDoc; w.AddDocument(doc); } DirectoryReader r = w.Reader; w.Dispose(); if (VERBOSE) { Console.WriteLine("TEST: reader=" + r); } AtomicReader slowR = SlowCompositeReaderWrapper.Wrap(r); foreach (string prefix in prefixesArray) { BytesRef prefixRef = prefix == null ? null : new BytesRef(prefix); int[][] idToOrdsPrefix = new int[NUM_DOCS][]; for (int id = 0; id < NUM_DOCS; id++) { int[] docOrds = idToOrds[id]; IList <int?> newOrds = new List <int?>(); foreach (int ord in idToOrds[id]) { if (StringHelper.StartsWith(termsArray[ord], prefixRef)) { newOrds.Add(ord); } } int[] newOrdsArray = new int[newOrds.Count]; int upto = 0; foreach (int ord in newOrds) { newOrdsArray[upto++] = ord; } idToOrdsPrefix[id] = newOrdsArray; } foreach (AtomicReaderContext ctx in r.Leaves) { if (VERBOSE) { Console.WriteLine("\nTEST: sub=" + ctx.Reader); } Verify((AtomicReader)ctx.Reader, idToOrdsPrefix, termsArray, prefixRef); } // Also test top-level reader: its enum does not support // ord, so this forces the OrdWrapper to run: if (VERBOSE) { Console.WriteLine("TEST: top reader"); } Verify(slowR, idToOrdsPrefix, termsArray, prefixRef); } FieldCache.DEFAULT.PurgeByCacheKey(slowR.CoreCacheKey); r.Dispose(); dir.Dispose(); }
public virtual void TestIllegalIndexableField() { Directory dir = NewDirectory(); RandomIndexWriter w = new RandomIndexWriter( #if FEATURE_INSTANCE_TESTDATA_INITIALIZATION this, #endif Random, dir); FieldType ft = new FieldType(TextField.TYPE_NOT_STORED); ft.StoreTermVectors = true; ft.StoreTermVectorPayloads = true; Document doc = new Document(); doc.Add(new Field("field", "value", ft)); try { w.AddDocument(doc); Assert.Fail("did not hit exception"); } catch (ArgumentException iae) { // Expected Assert.AreEqual("cannot index term vector payloads without term vector positions (field=\"field\")", iae.Message); } ft = new FieldType(TextField.TYPE_NOT_STORED); ft.StoreTermVectors = false; ft.StoreTermVectorOffsets = true; doc = new Document(); doc.Add(new Field("field", "value", ft)); try { w.AddDocument(doc); Assert.Fail("did not hit exception"); } catch (ArgumentException iae) { // Expected Assert.AreEqual("cannot index term vector offsets when term vectors are not indexed (field=\"field\")", iae.Message); } ft = new FieldType(TextField.TYPE_NOT_STORED); ft.StoreTermVectors = false; ft.StoreTermVectorPositions = true; doc = new Document(); doc.Add(new Field("field", "value", ft)); try { w.AddDocument(doc); Assert.Fail("did not hit exception"); } catch (ArgumentException iae) { // Expected Assert.AreEqual("cannot index term vector positions when term vectors are not indexed (field=\"field\")", iae.Message); } ft = new FieldType(TextField.TYPE_NOT_STORED); ft.StoreTermVectors = false; ft.StoreTermVectorPayloads = true; doc = new Document(); doc.Add(new Field("field", "value", ft)); try { w.AddDocument(doc); Assert.Fail("did not hit exception"); } catch (ArgumentException iae) { // Expected Assert.AreEqual("cannot index term vector payloads when term vectors are not indexed (field=\"field\")", iae.Message); } w.Dispose(); dir.Dispose(); }
public static void BeforeClass() { NoDocs = AtLeast(4096); Distance = (1L << 60) / NoDocs; Directory = NewDirectory(); RandomIndexWriter writer = new RandomIndexWriter(Random(), Directory, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).SetMaxBufferedDocs(TestUtil.NextInt(Random(), 100, 1000)).SetMergePolicy(NewLogMergePolicy())); FieldType storedLong = new FieldType(LongField.TYPE_NOT_STORED); storedLong.Stored = true; storedLong.Freeze(); FieldType storedLong8 = new FieldType(storedLong); storedLong8.NumericPrecisionStep = 8; FieldType storedLong4 = new FieldType(storedLong); storedLong4.NumericPrecisionStep = 4; FieldType storedLong6 = new FieldType(storedLong); storedLong6.NumericPrecisionStep = 6; FieldType storedLong2 = new FieldType(storedLong); storedLong2.NumericPrecisionStep = 2; FieldType storedLongNone = new FieldType(storedLong); storedLongNone.NumericPrecisionStep = int.MaxValue; FieldType unstoredLong = LongField.TYPE_NOT_STORED; FieldType unstoredLong8 = new FieldType(unstoredLong); unstoredLong8.NumericPrecisionStep = 8; FieldType unstoredLong6 = new FieldType(unstoredLong); unstoredLong6.NumericPrecisionStep = 6; FieldType unstoredLong4 = new FieldType(unstoredLong); unstoredLong4.NumericPrecisionStep = 4; FieldType unstoredLong2 = new FieldType(unstoredLong); unstoredLong2.NumericPrecisionStep = 2; LongField field8 = new LongField("field8", 0L, storedLong8), field6 = new LongField("field6", 0L, storedLong6), field4 = new LongField("field4", 0L, storedLong4), field2 = new LongField("field2", 0L, storedLong2), fieldNoTrie = new LongField("field" + int.MaxValue, 0L, storedLongNone), ascfield8 = new LongField("ascfield8", 0L, unstoredLong8), ascfield6 = new LongField("ascfield6", 0L, unstoredLong6), ascfield4 = new LongField("ascfield4", 0L, unstoredLong4), ascfield2 = new LongField("ascfield2", 0L, unstoredLong2); Document doc = new Document(); // add fields, that have a distance to test general functionality doc.Add(field8); doc.Add(field6); doc.Add(field4); doc.Add(field2); doc.Add(fieldNoTrie); // add ascending fields with a distance of 1, beginning at -noDocs/2 to test the correct splitting of range and inclusive/exclusive doc.Add(ascfield8); doc.Add(ascfield6); doc.Add(ascfield4); doc.Add(ascfield2); // Add a series of noDocs docs with increasing long values, by updating the fields for (int l = 0; l < NoDocs; l++) { long val = Distance * l + StartOffset; field8.LongValue = val; field6.LongValue = val; field4.LongValue = val; field2.LongValue = val; fieldNoTrie.LongValue = val; val = l - (NoDocs / 2); ascfield8.LongValue = val; ascfield6.LongValue = val; ascfield4.LongValue = val; ascfield2.LongValue = val; writer.AddDocument(doc); } Reader = writer.Reader; Searcher = NewSearcher(Reader); writer.Dispose(); }
public virtual void DoTestLongPostingsNoPositions(FieldInfo.IndexOptions options) { // Don't use TestUtil.getTempDir so that we own the // randomness (ie same seed will point to same dir): Directory dir = NewFSDirectory(CreateTempDir("longpostings" + "." + Random().NextLong())); int NUM_DOCS = AtLeast(2000); if (VERBOSE) { Console.WriteLine("TEST: NUM_DOCS=" + NUM_DOCS); } string s1 = GetRandomTerm(null); string s2 = GetRandomTerm(s1); if (VERBOSE) { Console.WriteLine("\nTEST: s1=" + s1 + " s2=" + s2); /* * for(int idx=0;idx<s1.Length();idx++) { * System.out.println(" s1 ch=0x" + Integer.toHexString(s1.charAt(idx))); * } * for(int idx=0;idx<s2.Length();idx++) { * System.out.println(" s2 ch=0x" + Integer.toHexString(s2.charAt(idx))); * } */ } FixedBitSet isS1 = new FixedBitSet(NUM_DOCS); for (int idx = 0; idx < NUM_DOCS; idx++) { if (Random().NextBoolean()) { isS1.Set(idx); } } IndexReader r; if (true) { IndexWriterConfig iwc = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).SetOpenMode(IndexWriterConfig.OpenMode_e.CREATE).SetMergePolicy(NewLogMergePolicy()); iwc.SetRAMBufferSizeMB(16.0 + 16.0 * Random().NextDouble()); iwc.SetMaxBufferedDocs(-1); RandomIndexWriter riw = new RandomIndexWriter(Random(), dir, iwc); FieldType ft = new FieldType(TextField.TYPE_NOT_STORED); ft.IndexOptions = options; for (int idx = 0; idx < NUM_DOCS; idx++) { Document doc = new Document(); string s = isS1.Get(idx) ? s1 : s2; Field f = NewField("field", s, ft); int count = TestUtil.NextInt(Random(), 1, 4); for (int ct = 0; ct < count; ct++) { doc.Add(f); } riw.AddDocument(doc); } r = riw.Reader; riw.Dispose(); } else { r = DirectoryReader.Open(dir); } /* * if (VERBOSE) { * System.out.println("TEST: terms"); * TermEnum termEnum = r.Terms(); * while(termEnum.Next()) { * System.out.println(" term=" + termEnum.Term() + " len=" + termEnum.Term().Text().Length()); * Assert.IsTrue(termEnum.DocFreq() > 0); * System.out.println(" s1?=" + (termEnum.Term().Text().equals(s1)) + " s1len=" + s1.Length()); * System.out.println(" s2?=" + (termEnum.Term().Text().equals(s2)) + " s2len=" + s2.Length()); * final String s = termEnum.Term().Text(); * for(int idx=0;idx<s.Length();idx++) { * System.out.println(" ch=0x" + Integer.toHexString(s.charAt(idx))); * } * } * } */ Assert.AreEqual(NUM_DOCS, r.NumDocs); Assert.IsTrue(r.DocFreq(new Term("field", s1)) > 0); Assert.IsTrue(r.DocFreq(new Term("field", s2)) > 0); int num = AtLeast(1000); for (int iter = 0; iter < num; iter++) { string term; bool doS1; if (Random().NextBoolean()) { term = s1; doS1 = true; } else { term = s2; doS1 = false; } if (VERBOSE) { Console.WriteLine("\nTEST: iter=" + iter + " doS1=" + doS1 + " term=" + term); } DocsEnum docs; DocsEnum postings; if (options == FieldInfo.IndexOptions.DOCS_ONLY) { docs = TestUtil.Docs(Random(), r, "field", new BytesRef(term), null, null, DocsEnum.FLAG_NONE); postings = null; } else { docs = postings = TestUtil.Docs(Random(), r, "field", new BytesRef(term), null, null, DocsEnum.FLAG_FREQS); Debug.Assert(postings != null); } Debug.Assert(docs != null); int docID = -1; while (docID < DocIdSetIterator.NO_MORE_DOCS) { int what = Random().Next(3); if (what == 0) { if (VERBOSE) { Console.WriteLine("TEST: docID=" + docID + "; do next()"); } // nextDoc int expected = docID + 1; while (true) { if (expected == NUM_DOCS) { expected = int.MaxValue; break; } else if (isS1.Get(expected) == doS1) { break; } else { expected++; } } docID = docs.NextDoc(); if (VERBOSE) { Console.WriteLine(" got docID=" + docID); } Assert.AreEqual(expected, docID); if (docID == DocIdSetIterator.NO_MORE_DOCS) { break; } if (Random().Next(6) == 3 && postings != null) { int freq = postings.Freq(); Assert.IsTrue(freq >= 1 && freq <= 4); } } else { // advance int targetDocID; if (docID == -1) { targetDocID = Random().Next(NUM_DOCS + 1); } else { targetDocID = docID + TestUtil.NextInt(Random(), 1, NUM_DOCS - docID); } if (VERBOSE) { Console.WriteLine("TEST: docID=" + docID + "; do advance(" + targetDocID + ")"); } int expected = targetDocID; while (true) { if (expected == NUM_DOCS) { expected = int.MaxValue; break; } else if (isS1.Get(expected) == doS1) { break; } else { expected++; } } docID = docs.Advance(targetDocID); if (VERBOSE) { Console.WriteLine(" got docID=" + docID); } Assert.AreEqual(expected, docID); if (docID == DocIdSetIterator.NO_MORE_DOCS) { break; } if (Random().Next(6) == 3 && postings != null) { int freq = postings.Freq(); Assert.IsTrue(freq >= 1 && freq <= 4, "got invalid freq=" + freq); } } } } r.Dispose(); dir.Dispose(); }
public virtual void TestNullDocIdSetIterator() { Directory dir = NewDirectory(); RandomIndexWriter writer = new RandomIndexWriter(Random(), dir); writer.Dispose(); IndexReader reader = SlowCompositeReaderWrapper.Wrap(DirectoryReader.Open(dir)); AtomicReaderContext context = (AtomicReaderContext)reader.Context; Filter filter = new FilterAnonymousInnerClassHelper2(this, context); CachingWrapperFilter cacher = new CachingWrapperFilter(filter); // the caching filter should return the empty set constant Assert.IsNull(cacher.GetDocIdSet(context, (context.AtomicReader).LiveDocs)); reader.Dispose(); dir.Dispose(); }
public virtual void Test2() { Random random = Random; int NUM_DOCS = AtLeast(100); Directory dir = NewDirectory(); RandomIndexWriter writer = new RandomIndexWriter(random, dir); bool allowDups = random.NextBoolean(); ISet <string> seen = new JCG.HashSet <string>(); if (Verbose) { Console.WriteLine("TEST: NUM_DOCS=" + NUM_DOCS + " allowDups=" + allowDups); } int numDocs = 0; IList <BytesRef> docValues = new JCG.List <BytesRef>(); // TODO: deletions while (numDocs < NUM_DOCS) { string s; if (random.NextBoolean()) { s = TestUtil.RandomSimpleString(random); } else { s = TestUtil.RandomUnicodeString(random); } BytesRef br = new BytesRef(s); if (!allowDups) { if (seen.Contains(s)) { continue; } seen.Add(s); } if (Verbose) { Console.WriteLine(" " + numDocs + ": s=" + s); } Document doc = new Document(); doc.Add(new SortedDocValuesField("stringdv", br)); doc.Add(new NumericDocValuesField("id", numDocs)); docValues.Add(br); writer.AddDocument(doc); numDocs++; if (random.Next(40) == 17) { // force flush writer.GetReader().Dispose(); } } writer.ForceMerge(1); DirectoryReader r = writer.GetReader(); writer.Dispose(); AtomicReader sr = GetOnlySegmentReader(r); long END_TIME = (J2N.Time.NanoTime() / J2N.Time.MillisecondsPerNanosecond) + (TestNightly ? 30 : 1); // LUCENENET: Use NanoTime() rather than CurrentTimeMilliseconds() for more accurate/reliable results int NUM_THREADS = TestUtil.NextInt32(LuceneTestCase.Random, 1, 10); ThreadJob[] threads = new ThreadJob[NUM_THREADS]; for (int thread = 0; thread < NUM_THREADS; thread++) { threads[thread] = new ThreadAnonymousClass2(random, docValues, sr, END_TIME); threads[thread].Start(); } foreach (ThreadJob thread in threads) { thread.Join(); } r.Dispose(); dir.Dispose(); }
public virtual void Test() { IList <string> postingsList = new List <string>(); int numTerms = AtLeast(300); int maxTermsPerDoc = TestUtil.NextInt(Random(), 10, 20); bool isSimpleText = "SimpleText".Equals(TestUtil.GetPostingsFormat("field")); IndexWriterConfig iwc = NewIndexWriterConfig(Random(), TEST_VERSION_CURRENT, new MockAnalyzer(Random())); if ((isSimpleText || iwc.MergePolicy is MockRandomMergePolicy) && (TEST_NIGHTLY || RANDOM_MULTIPLIER > 1)) { // Otherwise test can take way too long (> 2 hours) numTerms /= 2; } if (VERBOSE) { Console.WriteLine("maxTermsPerDoc=" + maxTermsPerDoc); Console.WriteLine("numTerms=" + numTerms); } for (int i = 0; i < numTerms; i++) { string term = Convert.ToString(i); for (int j = 0; j < i; j++) { postingsList.Add(term); } } Collections.Shuffle(postingsList); ConcurrentQueue <string> postings = new ConcurrentQueue <string>(postingsList); Directory dir = NewFSDirectory(CreateTempDir(GetFullMethodName())); RandomIndexWriter iw = new RandomIndexWriter(Random(), dir, iwc); int threadCount = TestUtil.NextInt(Random(), 1, 5); if (VERBOSE) { Console.WriteLine("config: " + iw.w.Config); Console.WriteLine("threadCount=" + threadCount); } Field prototype = NewTextField("field", "", Field.Store.NO); FieldType fieldType = new FieldType(prototype.FieldType); if (Random().NextBoolean()) { fieldType.OmitNorms = true; } int options = Random().Next(3); if (options == 0) { fieldType.IndexOptions = IndexOptions.DOCS_AND_FREQS; // we dont actually need positions fieldType.StoreTermVectors = true; // but enforce term vectors when we do this so we check SOMETHING } else if (options == 1 && !DoesntSupportOffsets.Contains(TestUtil.GetPostingsFormat("field"))) { fieldType.IndexOptions = IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS; } // else just positions ThreadClass[] threads = new ThreadClass[threadCount]; CountdownEvent startingGun = new CountdownEvent(1); for (int threadID = 0; threadID < threadCount; threadID++) { Random threadRandom = new Random(Random().Next()); Document document = new Document(); Field field = new Field("field", "", fieldType); document.Add(field); threads[threadID] = new ThreadAnonymousInnerClassHelper(this, numTerms, maxTermsPerDoc, postings, iw, startingGun, threadRandom, document, field); threads[threadID].Start(); } startingGun.Signal(); foreach (ThreadClass t in threads) { t.Join(); } iw.ForceMerge(1); DirectoryReader ir = iw.Reader; Assert.AreEqual(1, ir.Leaves.Count); AtomicReader air = (AtomicReader)ir.Leaves[0].Reader; Terms terms = air.GetTerms("field"); // numTerms-1 because there cannot be a term 0 with 0 postings: Assert.AreEqual(numTerms - 1, terms.Count); TermsEnum termsEnum = terms.GetIterator(null); BytesRef termBR; while ((termBR = termsEnum.Next()) != null) { int value = Convert.ToInt32(termBR.Utf8ToString()); Assert.AreEqual(value, termsEnum.TotalTermFreq); // don't really need to check more than this, as CheckIndex // will verify that totalTermFreq == total number of positions seen // from a docsAndPositionsEnum. } ir.Dispose(); iw.Dispose(); dir.Dispose(); }
public virtual void DoTestNumbers(bool withPayloads) { Directory dir = NewDirectory(); Analyzer analyzer = withPayloads ? (Analyzer) new MockPayloadAnalyzer() : new MockAnalyzer(Random()); Iwc = NewIndexWriterConfig(TEST_VERSION_CURRENT, analyzer); Iwc.SetMergePolicy(NewLogMergePolicy()); // will rely on docids a bit for skipping RandomIndexWriter w = new RandomIndexWriter(Random(), dir, Iwc); FieldType ft = new FieldType(TextField.TYPE_STORED); ft.IndexOptions = IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS; if (Random().NextBoolean()) { ft.StoreTermVectors = true; ft.StoreTermVectorOffsets = Random().NextBoolean(); ft.StoreTermVectorPositions = Random().NextBoolean(); } int numDocs = AtLeast(500); for (int i = 0; i < numDocs; i++) { Document doc = new Document(); doc.Add(new Field("numbers", English.IntToEnglish(i), ft)); doc.Add(new Field("oddeven", (i % 2) == 0 ? "even" : "odd", ft)); doc.Add(new StringField("id", "" + i, Field.Store.NO)); w.AddDocument(doc); } IndexReader reader = w.Reader; w.Dispose(); string[] terms = new string[] { "one", "two", "three", "four", "five", "six", "seven", "eight", "nine", "ten", "hundred" }; foreach (string term in terms) { DocsAndPositionsEnum dp = MultiFields.GetTermPositionsEnum(reader, null, "numbers", new BytesRef(term)); int doc; while ((doc = dp.NextDoc()) != DocIdSetIterator.NO_MORE_DOCS) { string storedNumbers = reader.Document(doc).Get("numbers"); int freq = dp.Freq; for (int i = 0; i < freq; i++) { dp.NextPosition(); int start = dp.StartOffset; Debug.Assert(start >= 0); int end = dp.EndOffset; Debug.Assert(end >= 0 && end >= start); // check that the offsets correspond to the term in the src text Assert.IsTrue(storedNumbers.Substring(start, end - start).Equals(term, StringComparison.Ordinal)); if (withPayloads) { // check that we have a payload and it starts with "pos" Assert.IsNotNull(dp.GetPayload()); BytesRef payload = dp.GetPayload(); Assert.IsTrue(payload.Utf8ToString().StartsWith("pos:", StringComparison.Ordinal)); } // note: withPayloads=false doesnt necessarily mean we dont have them from MockAnalyzer! } } } // check we can skip correctly int numSkippingTests = AtLeast(50); for (int j = 0; j < numSkippingTests; j++) { int num = TestUtil.NextInt(Random(), 100, Math.Min(numDocs - 1, 999)); DocsAndPositionsEnum dp = MultiFields.GetTermPositionsEnum(reader, null, "numbers", new BytesRef("hundred")); int doc = dp.Advance(num); Assert.AreEqual(num, doc); int freq = dp.Freq; for (int i = 0; i < freq; i++) { string storedNumbers = reader.Document(doc).Get("numbers"); dp.NextPosition(); int start = dp.StartOffset; Debug.Assert(start >= 0); int end = dp.EndOffset; Debug.Assert(end >= 0 && end >= start); // check that the offsets correspond to the term in the src text Assert.IsTrue(storedNumbers.Substring(start, end - start).Equals("hundred", StringComparison.Ordinal)); if (withPayloads) { // check that we have a payload and it starts with "pos" Assert.IsNotNull(dp.GetPayload()); BytesRef payload = dp.GetPayload(); Assert.IsTrue(payload.Utf8ToString().StartsWith("pos:", StringComparison.Ordinal)); } // note: withPayloads=false doesnt necessarily mean we dont have them from MockAnalyzer! } } // check that other fields (without offsets) work correctly for (int i = 0; i < numDocs; i++) { DocsEnum dp = MultiFields.GetTermDocsEnum(reader, null, "id", new BytesRef("" + i), 0); Assert.AreEqual(i, dp.NextDoc()); Assert.AreEqual(DocIdSetIterator.NO_MORE_DOCS, dp.NextDoc()); } reader.Dispose(); dir.Dispose(); }
public virtual void TestDocsWithField() { AssumeTrue("codec does not support docsWithField", DefaultCodecSupportsDocsWithField); Directory dir = NewDirectory(); IndexWriterConfig iwc = NewIndexWriterConfig(Random, TEST_VERSION_CURRENT, null); iwc.SetMergePolicy(NewLogMergePolicy()); RandomIndexWriter iw = new RandomIndexWriter(Random, dir, iwc); int numDocs = AtLeast(500); for (int i = 0; i < numDocs; i++) { Document doc = new Document(); if (Random.Next(4) >= 0) { doc.Add(new NumericDocValuesField("numbers", Random.NextInt64())); } doc.Add(new NumericDocValuesField("numbersAlways", Random.NextInt64())); iw.AddDocument(doc); if (Random.Next(17) == 0) { iw.Commit(); } } DirectoryReader ir = iw.GetReader(); iw.ForceMerge(1); DirectoryReader ir2 = iw.GetReader(); AtomicReader merged = GetOnlySegmentReader(ir2); iw.Dispose(); IBits multi = MultiDocValues.GetDocsWithField(ir, "numbers"); IBits single = merged.GetDocsWithField("numbers"); if (multi == null) { Assert.IsNull(single); } else { Assert.AreEqual(single.Length, multi.Length); for (int i = 0; i < numDocs; i++) { Assert.AreEqual(single.Get(i), multi.Get(i)); } } multi = MultiDocValues.GetDocsWithField(ir, "numbersAlways"); single = merged.GetDocsWithField("numbersAlways"); Assert.AreEqual(single.Length, multi.Length); for (int i = 0; i < numDocs; i++) { Assert.AreEqual(single.Get(i), multi.Get(i)); } ir.Dispose(); ir2.Dispose(); dir.Dispose(); }
public virtual void TestRandom() { // token -> docID -> tokens IDictionary <string, IDictionary <int?, IList <Token> > > actualTokens = new Dictionary <string, IDictionary <int?, IList <Token> > >(); Directory dir = NewDirectory(); RandomIndexWriter w = new RandomIndexWriter(Random(), dir, Iwc); int numDocs = AtLeast(20); //final int numDocs = AtLeast(5); FieldType ft = new FieldType(TextField.TYPE_NOT_STORED); // TODO: randomize what IndexOptions we use; also test // changing this up in one IW buffered segment...: ft.IndexOptions = IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS; if (Random().NextBoolean()) { ft.StoreTermVectors = true; ft.StoreTermVectorOffsets = Random().NextBoolean(); ft.StoreTermVectorPositions = Random().NextBoolean(); } for (int docCount = 0; docCount < numDocs; docCount++) { Document doc = new Document(); doc.Add(new Int32Field("id", docCount, Field.Store.NO)); IList <Token> tokens = new List <Token>(); int numTokens = AtLeast(100); //final int numTokens = AtLeast(20); int pos = -1; int offset = 0; //System.out.println("doc id=" + docCount); for (int tokenCount = 0; tokenCount < numTokens; tokenCount++) { string text; if (Random().NextBoolean()) { text = "a"; } else if (Random().NextBoolean()) { text = "b"; } else if (Random().NextBoolean()) { text = "c"; } else { text = "d"; } int posIncr = Random().NextBoolean() ? 1 : Random().Next(5); if (tokenCount == 0 && posIncr == 0) { posIncr = 1; } int offIncr = Random().NextBoolean() ? 0 : Random().Next(5); int tokenOffset = Random().Next(5); Token token = MakeToken(text, posIncr, offset + offIncr, offset + offIncr + tokenOffset); if (!actualTokens.ContainsKey(text)) { actualTokens[text] = new Dictionary <int?, IList <Token> >(); } IDictionary <int?, IList <Token> > postingsByDoc = actualTokens[text]; if (!postingsByDoc.ContainsKey(docCount)) { postingsByDoc[docCount] = new List <Token>(); } postingsByDoc[docCount].Add(token); tokens.Add(token); pos += posIncr; // stuff abs position into type: token.Type = "" + pos; offset += offIncr + tokenOffset; //System.out.println(" " + token + " posIncr=" + token.getPositionIncrement() + " pos=" + pos + " off=" + token.StartOffset + "/" + token.EndOffset + " (freq=" + postingsByDoc.Get(docCount).Size() + ")"); } doc.Add(new Field("content", new CannedTokenStream(tokens.ToArray()), ft)); w.AddDocument(doc); } DirectoryReader r = w.Reader; w.Dispose(); string[] terms = new string[] { "a", "b", "c", "d" }; foreach (AtomicReaderContext ctx in r.Leaves) { // TODO: improve this AtomicReader sub = (AtomicReader)ctx.Reader; //System.out.println("\nsub=" + sub); TermsEnum termsEnum = sub.Fields.GetTerms("content").GetIterator(null); DocsEnum docs = null; DocsAndPositionsEnum docsAndPositions = null; DocsAndPositionsEnum docsAndPositionsAndOffsets = null; FieldCache.Int32s docIDToID = FieldCache.DEFAULT.GetInt32s(sub, "id", false); foreach (string term in terms) { //System.out.println(" term=" + term); if (termsEnum.SeekExact(new BytesRef(term))) { docs = termsEnum.Docs(null, docs); Assert.IsNotNull(docs); int doc; //System.out.println(" doc/freq"); while ((doc = docs.NextDoc()) != DocIdSetIterator.NO_MORE_DOCS) { IList <Token> expected = actualTokens[term][docIDToID.Get(doc)]; //System.out.println(" doc=" + docIDToID.Get(doc) + " docID=" + doc + " " + expected.Size() + " freq"); Assert.IsNotNull(expected); Assert.AreEqual(expected.Count, docs.Freq); } // explicitly exclude offsets here docsAndPositions = termsEnum.DocsAndPositions(null, docsAndPositions, DocsAndPositionsFlags.PAYLOADS); Assert.IsNotNull(docsAndPositions); //System.out.println(" doc/freq/pos"); while ((doc = docsAndPositions.NextDoc()) != DocIdSetIterator.NO_MORE_DOCS) { IList <Token> expected = actualTokens[term][docIDToID.Get(doc)]; //System.out.println(" doc=" + docIDToID.Get(doc) + " " + expected.Size() + " freq"); Assert.IsNotNull(expected); Assert.AreEqual(expected.Count, docsAndPositions.Freq); foreach (Token token in expected) { int pos = Convert.ToInt32(token.Type); //System.out.println(" pos=" + pos); Assert.AreEqual(pos, docsAndPositions.NextPosition()); } } docsAndPositionsAndOffsets = termsEnum.DocsAndPositions(null, docsAndPositions); Assert.IsNotNull(docsAndPositionsAndOffsets); //System.out.println(" doc/freq/pos/offs"); while ((doc = docsAndPositionsAndOffsets.NextDoc()) != DocIdSetIterator.NO_MORE_DOCS) { IList <Token> expected = actualTokens[term][docIDToID.Get(doc)]; //System.out.println(" doc=" + docIDToID.Get(doc) + " " + expected.Size() + " freq"); Assert.IsNotNull(expected); Assert.AreEqual(expected.Count, docsAndPositionsAndOffsets.Freq); foreach (Token token in expected) { int pos = Convert.ToInt32(token.Type); //System.out.println(" pos=" + pos); Assert.AreEqual(pos, docsAndPositionsAndOffsets.NextPosition()); Assert.AreEqual(token.StartOffset, docsAndPositionsAndOffsets.StartOffset); Assert.AreEqual(token.EndOffset, docsAndPositionsAndOffsets.EndOffset); } } } } // TODO: test advance: } r.Dispose(); dir.Dispose(); }
// [Test, LongRunningTest, Timeout(int.MaxValue)] // LUCENENET NOTE: For now, we are overriding this test in every subclass to pull it into the right context for the subclass public virtual void TestBigDocuments() { // "big" as "much bigger than the chunk size" // for this test we force a FS dir // we can't just use newFSDirectory, because this test doesn't really index anything. // so if we get NRTCachingDir+SimpleText, we make massive stored fields and OOM (LUCENE-4484) Directory dir = new MockDirectoryWrapper(Random(), new MMapDirectory(CreateTempDir("testBigDocuments"))); IndexWriterConfig iwConf = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())); iwConf.SetMaxBufferedDocs(RandomInts.NextIntBetween(Random(), 2, 30)); RandomIndexWriter iw = new RandomIndexWriter(Random(), dir, iwConf); if (dir is MockDirectoryWrapper) { ((MockDirectoryWrapper)dir).Throttling = MockDirectoryWrapper.Throttling_e.NEVER; } Document emptyDoc = new Document(); // emptyDoc Document bigDoc1 = new Document(); // lot of small fields Document bigDoc2 = new Document(); // 1 very big field Field idField = new StringField("id", "", Field.Store.NO); emptyDoc.Add(idField); bigDoc1.Add(idField); bigDoc2.Add(idField); FieldType onlyStored = new FieldType(StringField.TYPE_STORED); onlyStored.IsIndexed = false; Field smallField = new Field("fld", RandomByteArray(Random().Next(10), 256), onlyStored); int numFields = RandomInts.NextIntBetween(Random(), 500000, 1000000); for (int i = 0; i < numFields; ++i) { bigDoc1.Add(smallField); } Field bigField = new Field("fld", RandomByteArray(RandomInts.NextIntBetween(Random(), 1000000, 5000000), 2), onlyStored); bigDoc2.Add(bigField); int numDocs = AtLeast(5); Document[] docs = new Document[numDocs]; for (int i = 0; i < numDocs; ++i) { docs[i] = RandomInts.RandomFrom(Random(), Arrays.AsList(emptyDoc, bigDoc1, bigDoc2)); } for (int i = 0; i < numDocs; ++i) { idField.SetStringValue("" + i); iw.AddDocument(docs[i]); if (Random().Next(numDocs) == 0) { iw.Commit(); } } iw.Commit(); iw.ForceMerge(1); // look at what happens when big docs are merged DirectoryReader rd = DirectoryReader.Open(dir); IndexSearcher searcher = new IndexSearcher(rd); for (int i = 0; i < numDocs; ++i) { Query query = new TermQuery(new Term("id", "" + i)); TopDocs topDocs = searcher.Search(query, 1); Assert.AreEqual(1, topDocs.TotalHits, "" + i); Document doc = rd.Document(topDocs.ScoreDocs[0].Doc); Assert.IsNotNull(doc); IIndexableField[] fieldValues = doc.GetFields("fld"); Assert.AreEqual(docs[i].GetFields("fld").Length, fieldValues.Length); if (fieldValues.Length > 0) { Assert.AreEqual(docs[i].GetFields("fld")[0].GetBinaryValue(), fieldValues[0].GetBinaryValue()); } } rd.Dispose(); iw.Dispose(); dir.Dispose(); }
public virtual void Test() { int NUM_DOCS = AtLeast(1000); Directory dir = NewDirectory(); RandomIndexWriter w = null; int docsLeftInthisSegment = 0; int docUpto = 0; while (docUpto < NUM_DOCS) { if (Verbose) { Console.WriteLine("TEST: " + docUpto + " of " + NUM_DOCS); } if (docsLeftInthisSegment == 0) { IndexWriterConfig iwc = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random)); if (Random.NextBoolean()) { // Make sure we aggressively mix in SimpleText // since it has different impls for all codec // formats... iwc.SetCodec(Codec.ForName("Lucene46")); } if (w != null) { w.Dispose(); } w = new RandomIndexWriter(Random, dir, iwc); docsLeftInthisSegment = TestUtil.NextInt32(Random, 10, 100); } Document doc = new Document(); doc.Add(NewStringField("id", Convert.ToString(docUpto), Field.Store.YES)); w.AddDocument(doc); docUpto++; docsLeftInthisSegment--; } if (Verbose) { Console.WriteLine("\nTEST: now delete..."); } // Random delete half the docs: ISet <int?> deleted = new JCG.HashSet <int?>(); while (deleted.Count < NUM_DOCS / 2) { int?toDelete = Random.Next(NUM_DOCS); if (!deleted.Contains(toDelete)) { deleted.Add(toDelete); w.DeleteDocuments(new Term("id", Convert.ToString(toDelete))); if (Random.Next(17) == 6) { IndexReader r = w.GetReader(); Assert.AreEqual(NUM_DOCS - deleted.Count, r.NumDocs); r.Dispose(); } } } w.Dispose(); dir.Dispose(); }
public virtual void TestIntersectRandom() { Directory dir = NewDirectory(); RandomIndexWriter w = new RandomIndexWriter( #if FEATURE_INSTANCE_TESTDATA_INITIALIZATION this, #endif Random, dir); int numTerms = AtLeast(300); //final int numTerms = 50; ISet <string> terms = new JCG.HashSet <string>(); ICollection <string> pendingTerms = new List <string>(); IDictionary <BytesRef, int?> termToID = new Dictionary <BytesRef, int?>(); int id = 0; while (terms.Count != numTerms) { string s = RandomString; if (!terms.Contains(s)) { terms.Add(s); pendingTerms.Add(s); if (Random.Next(20) == 7) { AddDoc(w, pendingTerms, termToID, id++); } } } AddDoc(w, pendingTerms, termToID, id++); BytesRef[] termsArray = new BytesRef[terms.Count]; ISet <BytesRef> termsSet = new JCG.HashSet <BytesRef>(); { int upto = 0; foreach (string s in terms) { BytesRef b = new BytesRef(s); termsArray[upto++] = b; termsSet.Add(b); } Array.Sort(termsArray); } if (Verbose) { Console.WriteLine("\nTEST: indexed terms (unicode order):"); foreach (BytesRef t in termsArray) { Console.WriteLine(" " + t.Utf8ToString() + " -> id:" + termToID[t]); } } IndexReader r = w.GetReader(); w.Dispose(); // NOTE: intentional insanity!! FieldCache.Int32s docIDToID = FieldCache.DEFAULT.GetInt32s(SlowCompositeReaderWrapper.Wrap(r), "id", false); for (int iter = 0; iter < 10 * RandomMultiplier; iter++) { // TODO: can we also test infinite As here...? // From the random terms, pick some ratio and compile an // automaton: ISet <string> acceptTerms = new JCG.HashSet <string>(); JCG.SortedSet <BytesRef> sortedAcceptTerms = new JCG.SortedSet <BytesRef>(); double keepPct = Random.NextDouble(); Automaton a; if (iter == 0) { if (Verbose) { Console.WriteLine("\nTEST: empty automaton"); } a = BasicAutomata.MakeEmpty(); } else { if (Verbose) { Console.WriteLine("\nTEST: keepPct=" + keepPct); } foreach (string s in terms) { string s2; if (Random.NextDouble() <= keepPct) { s2 = s; } else { s2 = RandomString; } acceptTerms.Add(s2); sortedAcceptTerms.Add(new BytesRef(s2)); } a = BasicAutomata.MakeStringUnion(sortedAcceptTerms); } if (Random.NextBoolean()) { if (Verbose) { Console.WriteLine("TEST: reduce the automaton"); } a.Reduce(); } CompiledAutomaton c = new CompiledAutomaton(a, true, false); BytesRef[] acceptTermsArray = new BytesRef[acceptTerms.Count]; ISet <BytesRef> acceptTermsSet = new JCG.HashSet <BytesRef>(); int upto = 0; foreach (string s in acceptTerms) { BytesRef b = new BytesRef(s); acceptTermsArray[upto++] = b; acceptTermsSet.Add(b); Assert.IsTrue(Accepts(c, b)); } Array.Sort(acceptTermsArray); if (Verbose) { Console.WriteLine("\nTEST: accept terms (unicode order):"); foreach (BytesRef t in acceptTermsArray) { Console.WriteLine(" " + t.Utf8ToString() + (termsSet.Contains(t) ? " (exists)" : "")); } Console.WriteLine(a.ToDot()); } for (int iter2 = 0; iter2 < 100; iter2++) { BytesRef startTerm = acceptTermsArray.Length == 0 || Random.NextBoolean() ? null : acceptTermsArray[Random.Next(acceptTermsArray.Length)]; if (Verbose) { Console.WriteLine("\nTEST: iter2=" + iter2 + " startTerm=" + (startTerm == null ? "<null>" : startTerm.Utf8ToString())); if (startTerm != null) { int state = c.RunAutomaton.InitialState; for (int idx = 0; idx < startTerm.Length; idx++) { int label = startTerm.Bytes[startTerm.Offset + idx] & 0xff; Console.WriteLine(" state=" + state + " label=" + label); state = c.RunAutomaton.Step(state, label); Assert.IsTrue(state != -1); } Console.WriteLine(" state=" + state); } } TermsEnum te = MultiFields.GetTerms(r, "f").Intersect(c, startTerm); int loc; if (startTerm == null) { loc = 0; } else { loc = Array.BinarySearch(termsArray, BytesRef.DeepCopyOf(startTerm)); if (loc < 0) { loc = -(loc + 1); } else { // startTerm exists in index loc++; } } while (loc < termsArray.Length && !acceptTermsSet.Contains(termsArray[loc])) { loc++; } DocsEnum docsEnum = null; while (loc < termsArray.Length) { BytesRef expected = termsArray[loc]; BytesRef actual = te.Next(); if (Verbose) { Console.WriteLine("TEST: next() expected=" + expected.Utf8ToString() + " actual=" + (actual == null ? "null" : actual.Utf8ToString())); } Assert.AreEqual(expected, actual); Assert.AreEqual(1, te.DocFreq); docsEnum = TestUtil.Docs(Random, te, null, docsEnum, DocsFlags.NONE); int docID = docsEnum.NextDoc(); Assert.IsTrue(docID != DocIdSetIterator.NO_MORE_DOCS); Assert.AreEqual(docIDToID.Get(docID), (int)termToID[expected]); do { loc++; } while (loc < termsArray.Length && !acceptTermsSet.Contains(termsArray[loc])); } Assert.IsNull(te.Next()); } } r.Dispose(); dir.Dispose(); }
public virtual void TestStressAdvance_Mem() { for (int iter = 0; iter < 3; iter++) { if (Verbose) { Console.WriteLine("\nTEST: iter=" + iter); } Directory dir = NewDirectory(); RandomIndexWriter w = new RandomIndexWriter( #if FEATURE_INSTANCE_TESTDATA_INITIALIZATION this, #endif Random, dir); ISet <int> aDocs = new JCG.HashSet <int>(); Documents.Document doc = new Documents.Document(); Field f = NewStringField("field", "", Field.Store.NO); doc.Add(f); Field idField = NewStringField("id", "", Field.Store.YES); doc.Add(idField); int num = AtLeast(4097); if (Verbose) { Console.WriteLine("\nTEST: numDocs=" + num); } for (int id = 0; id < num; id++) { if (Random.Next(4) == 3) { f.SetStringValue("a"); aDocs.Add(id); } else { f.SetStringValue("b"); } idField.SetStringValue("" + id); w.AddDocument(doc); if (Verbose) { Console.WriteLine("\nTEST: doc upto " + id); } } w.ForceMerge(1); IList <int> aDocIDs = new List <int>(); IList <int> bDocIDs = new List <int>(); DirectoryReader r = w.GetReader(); int[] idToDocID = new int[r.MaxDoc]; for (int docID = 0; docID < idToDocID.Length; docID++) { int id = Convert.ToInt32(r.Document(docID).Get("id")); if (aDocs.Contains(id)) { aDocIDs.Add(docID); } else { bDocIDs.Add(docID); } } TermsEnum te = GetOnlySegmentReader(r).Fields.GetTerms("field").GetIterator(null); DocsEnum de = null; for (int iter2 = 0; iter2 < 10; iter2++) { if (Verbose) { Console.WriteLine("\nTEST: iter=" + iter + " iter2=" + iter2); } Assert.AreEqual(TermsEnum.SeekStatus.FOUND, te.SeekCeil(new BytesRef("a"))); de = TestUtil.Docs(Random, te, null, de, DocsFlags.NONE); TestOne(de, aDocIDs); Assert.AreEqual(TermsEnum.SeekStatus.FOUND, te.SeekCeil(new BytesRef("b"))); de = TestUtil.Docs(Random, te, null, de, DocsFlags.NONE); TestOne(de, bDocIDs); } w.Dispose(); r.Dispose(); dir.Dispose(); } }
public virtual void TestIntersectStartTerm() { Directory dir = NewDirectory(); IndexWriterConfig iwc = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random)); iwc.SetMergePolicy(new LogDocMergePolicy()); RandomIndexWriter w = new RandomIndexWriter(Random, dir, iwc); Document doc = new Document(); doc.Add(NewStringField("field", "abc", Field.Store.NO)); w.AddDocument(doc); doc = new Document(); doc.Add(NewStringField("field", "abd", Field.Store.NO)); w.AddDocument(doc); doc = new Document(); doc.Add(NewStringField("field", "acd", Field.Store.NO)); w.AddDocument(doc); doc = new Document(); doc.Add(NewStringField("field", "bcd", Field.Store.NO)); w.AddDocument(doc); w.ForceMerge(1); DirectoryReader r = w.GetReader(); w.Dispose(); AtomicReader sub = GetOnlySegmentReader(r); Terms terms = sub.Fields.GetTerms("field"); Automaton automaton = (new RegExp(".*d", RegExpSyntax.NONE)).ToAutomaton(); CompiledAutomaton ca = new CompiledAutomaton(automaton, false, false); TermsEnum te; // should seek to startTerm te = terms.Intersect(ca, new BytesRef("aad")); Assert.AreEqual("abd", te.Next().Utf8ToString()); Assert.AreEqual(1, te.Docs(null, null, DocsFlags.NONE).NextDoc()); Assert.AreEqual("acd", te.Next().Utf8ToString()); Assert.AreEqual(2, te.Docs(null, null, DocsFlags.NONE).NextDoc()); Assert.AreEqual("bcd", te.Next().Utf8ToString()); Assert.AreEqual(3, te.Docs(null, null, DocsFlags.NONE).NextDoc()); Assert.IsNull(te.Next()); // should fail to find ceil label on second arc, rewind te = terms.Intersect(ca, new BytesRef("add")); Assert.AreEqual("bcd", te.Next().Utf8ToString()); Assert.AreEqual(3, te.Docs(null, null, DocsFlags.NONE).NextDoc()); Assert.IsNull(te.Next()); // should reach end te = terms.Intersect(ca, new BytesRef("bcd")); Assert.IsNull(te.Next()); te = terms.Intersect(ca, new BytesRef("ddd")); Assert.IsNull(te.Next()); r.Dispose(); dir.Dispose(); }
public virtual void TestRandom() { Directory dir = NewDirectory(); int NUM_TERMS = AtLeast(20); ISet <BytesRef> terms = new JCG.HashSet <BytesRef>(); while (terms.Count < NUM_TERMS) { string s = TestUtil.RandomRealisticUnicodeString(Random); //final String s = TestUtil.RandomSimpleString(random); if (s.Length > 0) { terms.Add(new BytesRef(s)); } } BytesRef[] termsArray = terms.ToArray(/*new BytesRef[terms.Count]*/); Array.Sort(termsArray); int NUM_DOCS = AtLeast(100); IndexWriterConfig conf = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random)); // Sometimes swap in codec that impls ord(): if (Random.Next(10) == 7) { // Make sure terms index has ords: Codec codec = TestUtil.AlwaysPostingsFormat(PostingsFormat.ForName("Lucene41WithOrds")); conf.SetCodec(codec); } RandomIndexWriter w = new RandomIndexWriter(Random, dir, conf); int[][] idToOrds = new int[NUM_DOCS][]; ISet <int?> ordsForDocSet = new JCG.HashSet <int?>(); for (int id = 0; id < NUM_DOCS; id++) { Document doc = new Document(); doc.Add(new Int32Field("id", id, Field.Store.NO)); int termCount = TestUtil.NextInt32(Random, 0, 20 * RandomMultiplier); while (ordsForDocSet.Count < termCount) { ordsForDocSet.Add(Random.Next(termsArray.Length)); } int[] ordsForDoc = new int[termCount]; int upto = 0; if (Verbose) { Console.WriteLine("TEST: doc id=" + id); } foreach (int ord in ordsForDocSet) { ordsForDoc[upto++] = ord; Field field = NewStringField("field", termsArray[ord].Utf8ToString(), Field.Store.NO); if (Verbose) { Console.WriteLine(" f=" + termsArray[ord].Utf8ToString()); } doc.Add(field); } ordsForDocSet.Clear(); Array.Sort(ordsForDoc); idToOrds[id] = ordsForDoc; w.AddDocument(doc); } DirectoryReader r = w.GetReader(); w.Dispose(); if (Verbose) { Console.WriteLine("TEST: reader=" + r); } foreach (AtomicReaderContext ctx in r.Leaves) { if (Verbose) { Console.WriteLine("\nTEST: sub=" + ctx.Reader); } Verify((AtomicReader)ctx.Reader, idToOrds, termsArray, null); } // Also test top-level reader: its enum does not support // ord, so this forces the OrdWrapper to run: if (Verbose) { Console.WriteLine("TEST: top reader"); } AtomicReader slowR = SlowCompositeReaderWrapper.Wrap(r); Verify(slowR, idToOrds, termsArray, null); FieldCache.DEFAULT.PurgeByCacheKey(slowR.CoreCacheKey); r.Dispose(); dir.Dispose(); }
public virtual void Test() { IList <string> postingsList = new List <string>(); int numTerms = AtLeast(300); int maxTermsPerDoc = TestUtil.NextInt32(Random, 10, 20); bool isSimpleText = "SimpleText".Equals(TestUtil.GetPostingsFormat("field"), StringComparison.Ordinal); IndexWriterConfig iwc = NewIndexWriterConfig(Random, TEST_VERSION_CURRENT, new MockAnalyzer(Random)); if ((isSimpleText || iwc.MergePolicy is MockRandomMergePolicy) && (TestNightly || RandomMultiplier > 1)) { // Otherwise test can take way too long (> 2 hours) //numTerms /= 2; // LUCENENET specific - To keep this under the 1 hour free limit // of Azure DevOps, this was reduced from /2 to /6. numTerms /= 6; } if (Verbose) { Console.WriteLine("maxTermsPerDoc=" + maxTermsPerDoc); Console.WriteLine("numTerms=" + numTerms); } for (int i = 0; i < numTerms; i++) { string term = Convert.ToString(i, CultureInfo.InvariantCulture); for (int j = 0; j < i; j++) { postingsList.Add(term); } } postingsList.Shuffle(Random); ConcurrentQueue <string> postings = new ConcurrentQueue <string>(postingsList); Directory dir = NewFSDirectory(CreateTempDir(GetFullMethodName())); RandomIndexWriter iw = new RandomIndexWriter(Random, dir, iwc); int threadCount = TestUtil.NextInt32(Random, 1, 5); if (Verbose) { Console.WriteLine("config: " + iw.IndexWriter.Config); Console.WriteLine("threadCount=" + threadCount); } Field prototype = NewTextField("field", "", Field.Store.NO); FieldType fieldType = new FieldType(prototype.FieldType); if (Random.NextBoolean()) { fieldType.OmitNorms = true; } int options = Random.Next(3); if (options == 0) { fieldType.IndexOptions = IndexOptions.DOCS_AND_FREQS; // we dont actually need positions fieldType.StoreTermVectors = true; // but enforce term vectors when we do this so we check SOMETHING } else if (options == 1 && !DoesntSupportOffsets.Contains(TestUtil.GetPostingsFormat("field"))) { fieldType.IndexOptions = IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS; } // else just positions ThreadJob[] threads = new ThreadJob[threadCount]; CountdownEvent startingGun = new CountdownEvent(1); for (int threadID = 0; threadID < threadCount; threadID++) { Random threadRandom = new Random(Random.Next()); Document document = new Document(); Field field = new Field("field", "", fieldType); document.Add(field); threads[threadID] = new ThreadAnonymousClass(this, numTerms, maxTermsPerDoc, postings, iw, startingGun, threadRandom, document, field); threads[threadID].Start(); } startingGun.Signal(); foreach (ThreadJob t in threads) { t.Join(); } iw.ForceMerge(1); DirectoryReader ir = iw.GetReader(); Assert.AreEqual(1, ir.Leaves.Count); AtomicReader air = (AtomicReader)ir.Leaves[0].Reader; Terms terms = air.GetTerms("field"); // numTerms-1 because there cannot be a term 0 with 0 postings: Assert.AreEqual(numTerms - 1, terms.Count); TermsEnum termsEnum = terms.GetEnumerator(); while (termsEnum.MoveNext()) { int value = Convert.ToInt32(termsEnum.Term.Utf8ToString(), CultureInfo.InvariantCulture); Assert.AreEqual(value, termsEnum.TotalTermFreq); // don't really need to check more than this, as CheckIndex // will verify that totalTermFreq == total number of positions seen // from a docsAndPositionsEnum. } ir.Dispose(); iw.Dispose(); dir.Dispose(); }
public virtual void Test() { IList <string> postingsList = new List <string>(); int numTerms = AtLeast(300); int maxTermsPerDoc = TestUtil.NextInt(Random(), 10, 20); bool isSimpleText = "SimpleText".Equals(TestUtil.GetPostingsFormat("field")); IndexWriterConfig iwc = NewIndexWriterConfig(Random(), TEST_VERSION_CURRENT, new MockAnalyzer(Random())); if ((isSimpleText || iwc.MergePolicy is MockRandomMergePolicy) && (TEST_NIGHTLY || RANDOM_MULTIPLIER > 1)) { // Otherwise test can take way too long (> 2 hours) numTerms /= 2; } if (VERBOSE) { Console.WriteLine("maxTermsPerDoc=" + maxTermsPerDoc); Console.WriteLine("numTerms=" + numTerms); } for (int i = 0; i < numTerms; i++) { string term = Convert.ToString(i); for (int j = 0; j < i; j++) { postingsList.Add(term); } } postingsList = CollectionsHelper.Shuffle(postingsList); ConcurrentQueue <string> postings = new ConcurrentQueue <string>(postingsList); Directory dir = NewFSDirectory(CreateTempDir("bagofpostings")); RandomIndexWriter iw = new RandomIndexWriter(Random(), dir, iwc); int threadCount = TestUtil.NextInt(Random(), 1, 5); if (VERBOSE) { Console.WriteLine("config: " + iw.w.Config); Console.WriteLine("threadCount=" + threadCount); } ThreadClass[] threads = new ThreadClass[threadCount]; CountDownLatch startingGun = new CountDownLatch(1); for (int threadID = 0; threadID < threadCount; threadID++) { threads[threadID] = new ThreadAnonymousInnerClassHelper(this, maxTermsPerDoc, postings, iw, startingGun); threads[threadID].Start(); } startingGun.countDown(); foreach (ThreadClass t in threads) { t.Join(); } iw.ForceMerge(1); DirectoryReader ir = iw.Reader; Assert.AreEqual(1, ir.Leaves.Count); AtomicReader air = (AtomicReader)ir.Leaves[0].Reader; Terms terms = air.Terms("field"); // numTerms-1 because there cannot be a term 0 with 0 postings: Assert.AreEqual(numTerms - 1, air.Fields.UniqueTermCount); if (iwc.Codec is Lucene3xCodec == false) { Assert.AreEqual(numTerms - 1, terms.Size()); } TermsEnum termsEnum = terms.Iterator(null); BytesRef term_; while ((term_ = termsEnum.Next()) != null) { int value = Convert.ToInt32(term_.Utf8ToString()); Assert.AreEqual(value, termsEnum.DocFreq()); // don't really need to check more than this, as CheckIndex // will verify that docFreq == actual number of documents seen // from a docsAndPositionsEnum. } ir.Dispose(); iw.Dispose(); dir.Dispose(); }
public virtual void TestSortedTermsEnum() { Directory directory = NewDirectory(); Analyzer analyzer = new MockAnalyzer(Random()); IndexWriterConfig iwconfig = NewIndexWriterConfig(TEST_VERSION_CURRENT, analyzer); iwconfig.SetMergePolicy(NewLogMergePolicy()); RandomIndexWriter iwriter = new RandomIndexWriter(Random(), directory, iwconfig); Document doc = new Document(); doc.Add(new StringField("field", "hello", Field.Store.NO)); iwriter.AddDocument(doc); doc = new Document(); doc.Add(new StringField("field", "world", Field.Store.NO)); iwriter.AddDocument(doc); doc = new Document(); doc.Add(new StringField("field", "beer", Field.Store.NO)); iwriter.AddDocument(doc); iwriter.ForceMerge(1); DirectoryReader ireader = iwriter.Reader; iwriter.Dispose(); AtomicReader ar = GetOnlySegmentReader(ireader); SortedSetDocValues dv = FieldCache.DEFAULT.GetDocTermOrds(ar, "field"); Assert.AreEqual(3, dv.ValueCount); TermsEnum termsEnum = dv.GetTermsEnum(); // next() Assert.AreEqual("beer", termsEnum.Next().Utf8ToString()); Assert.AreEqual(0, termsEnum.Ord); Assert.AreEqual("hello", termsEnum.Next().Utf8ToString()); Assert.AreEqual(1, termsEnum.Ord); Assert.AreEqual("world", termsEnum.Next().Utf8ToString()); Assert.AreEqual(2, termsEnum.Ord); // seekCeil() Assert.AreEqual(SeekStatus.NOT_FOUND, termsEnum.SeekCeil(new BytesRef("ha!"))); Assert.AreEqual("hello", termsEnum.Term.Utf8ToString()); Assert.AreEqual(1, termsEnum.Ord); Assert.AreEqual(SeekStatus.FOUND, termsEnum.SeekCeil(new BytesRef("beer"))); Assert.AreEqual("beer", termsEnum.Term.Utf8ToString()); Assert.AreEqual(0, termsEnum.Ord); Assert.AreEqual(SeekStatus.END, termsEnum.SeekCeil(new BytesRef("zzz"))); // seekExact() Assert.IsTrue(termsEnum.SeekExact(new BytesRef("beer"))); Assert.AreEqual("beer", termsEnum.Term.Utf8ToString()); Assert.AreEqual(0, termsEnum.Ord); Assert.IsTrue(termsEnum.SeekExact(new BytesRef("hello"))); Assert.AreEqual("hello", termsEnum.Term.Utf8ToString()); Assert.AreEqual(1, termsEnum.Ord); Assert.IsTrue(termsEnum.SeekExact(new BytesRef("world"))); Assert.AreEqual("world", termsEnum.Term.Utf8ToString()); Assert.AreEqual(2, termsEnum.Ord); Assert.IsFalse(termsEnum.SeekExact(new BytesRef("bogus"))); // seek(ord) termsEnum.SeekExact(0); Assert.AreEqual("beer", termsEnum.Term.Utf8ToString()); Assert.AreEqual(0, termsEnum.Ord); termsEnum.SeekExact(1); Assert.AreEqual("hello", termsEnum.Term.Utf8ToString()); Assert.AreEqual(1, termsEnum.Ord); termsEnum.SeekExact(2); Assert.AreEqual("world", termsEnum.Term.Utf8ToString()); Assert.AreEqual(2, termsEnum.Ord); ireader.Dispose(); directory.Dispose(); }
public virtual void TestArbitraryFields() { Directory dir = NewDirectory(); RandomIndexWriter w = new RandomIndexWriter(Random(), dir, Similarity, TimeZone); int NUM_DOCS = AtLeast(27); if (VERBOSE) { Console.WriteLine("TEST: " + NUM_DOCS + " docs"); } int[] fieldsPerDoc = new int[NUM_DOCS]; int baseCount = 0; for (int docCount = 0; docCount < NUM_DOCS; docCount++) { int fieldCount = TestUtil.NextInt(Random(), 1, 17); fieldsPerDoc[docCount] = fieldCount - 1; int finalDocCount = docCount; if (VERBOSE) { Console.WriteLine("TEST: " + fieldCount + " fields in doc " + docCount); } int finalBaseCount = baseCount; baseCount += fieldCount - 1; w.AddDocument(new IterableAnonymousInnerClassHelper(this, fieldCount, finalDocCount, finalBaseCount)); } IndexReader r = w.Reader; w.Dispose(); IndexSearcher s = NewSearcher(r); int counter = 0; for (int id = 0; id < NUM_DOCS; id++) { if (VERBOSE) { Console.WriteLine("TEST: verify doc id=" + id + " (" + fieldsPerDoc[id] + " fields) counter=" + counter); } TopDocs hits = s.Search(new TermQuery(new Term("id", "" + id)), 1); Assert.AreEqual(1, hits.TotalHits); int docID = hits.ScoreDocs[0].Doc; Document doc = s.Doc(docID); int endCounter = counter + fieldsPerDoc[id]; while (counter < endCounter) { string name = "f" + counter; int fieldID = counter % 10; bool stored = (counter & 1) == 0 || fieldID == 3; bool binary = fieldID == 3; bool indexed = fieldID != 3; string stringValue; if (fieldID != 3 && fieldID != 9) { stringValue = "text " + counter; } else { stringValue = null; } // stored: if (stored) { IIndexableField f = doc.GetField(name); Assert.IsNotNull(f, "doc " + id + " doesn't have field f" + counter); if (binary) { Assert.IsNotNull(f, "doc " + id + " doesn't have field f" + counter); BytesRef b = f.GetBinaryValue(); Assert.IsNotNull(b); Assert.AreEqual(10, b.Length); for (int idx = 0; idx < 10; idx++) { Assert.AreEqual((byte)(idx + counter), b.Bytes[b.Offset + idx]); } } else { Debug.Assert(stringValue != null); Assert.AreEqual(stringValue, f.GetStringValue()); } } if (indexed) { bool tv = counter % 2 == 1 && fieldID != 9; if (tv) { Terms tfv = r.GetTermVectors(docID).GetTerms(name); Assert.IsNotNull(tfv); TermsEnum termsEnum = tfv.GetIterator(null); Assert.AreEqual(new BytesRef("" + counter), termsEnum.Next()); Assert.AreEqual(1, termsEnum.TotalTermFreq); DocsAndPositionsEnum dpEnum = termsEnum.DocsAndPositions(null, null); Assert.IsTrue(dpEnum.NextDoc() != DocIdSetIterator.NO_MORE_DOCS); Assert.AreEqual(1, dpEnum.Freq); Assert.AreEqual(1, dpEnum.NextPosition()); Assert.AreEqual(new BytesRef("text"), termsEnum.Next()); Assert.AreEqual(1, termsEnum.TotalTermFreq); dpEnum = termsEnum.DocsAndPositions(null, dpEnum); Assert.IsTrue(dpEnum.NextDoc() != DocIdSetIterator.NO_MORE_DOCS); Assert.AreEqual(1, dpEnum.Freq); Assert.AreEqual(0, dpEnum.NextPosition()); Assert.IsNull(termsEnum.Next()); // TODO: offsets } else { Fields vectors = r.GetTermVectors(docID); Assert.IsTrue(vectors == null || vectors.GetTerms(name) == null); } BooleanQuery bq = new BooleanQuery(); bq.Add(new TermQuery(new Term("id", "" + id)), Occur.MUST); bq.Add(new TermQuery(new Term(name, "text")), Occur.MUST); TopDocs hits2 = s.Search(bq, 1); Assert.AreEqual(1, hits2.TotalHits); Assert.AreEqual(docID, hits2.ScoreDocs[0].Doc); bq = new BooleanQuery(); bq.Add(new TermQuery(new Term("id", "" + id)), Occur.MUST); bq.Add(new TermQuery(new Term(name, "" + counter)), Occur.MUST); TopDocs hits3 = s.Search(bq, 1); Assert.AreEqual(1, hits3.TotalHits); Assert.AreEqual(docID, hits3.ScoreDocs[0].Doc); } counter++; } } r.Dispose(); dir.Dispose(); }
public virtual void TestSimple() { Directory dir = NewDirectory(); RandomIndexWriter iw = new RandomIndexWriter(Random(), dir); Document doc = new Document(); Field field = NewTextField("foo", "", Field.Store.NO); doc.Add(field); Field dvField = new FloatDocValuesField("foo_boost", 0.0F); doc.Add(dvField); Field field2 = NewTextField("bar", "", Field.Store.NO); doc.Add(field2); field.StringValue = "quick brown fox"; field2.StringValue = "quick brown fox"; dvField.FloatValue = 2f; // boost x2 iw.AddDocument(doc); field.StringValue = "jumps over lazy brown dog"; field2.StringValue = "jumps over lazy brown dog"; dvField.FloatValue = 4f; // boost x4 iw.AddDocument(doc); IndexReader ir = iw.Reader; iw.Dispose(); // no boosting IndexSearcher searcher1 = NewSearcher(ir, false); Similarity @base = searcher1.Similarity; // boosting IndexSearcher searcher2 = NewSearcher(ir, false); searcher2.Similarity = new PerFieldSimilarityWrapperAnonymousInnerClassHelper(this, field, @base); // in this case, we searched on field "foo". first document should have 2x the score. TermQuery tq = new TermQuery(new Term("foo", "quick")); QueryUtils.Check(Random(), tq, searcher1); QueryUtils.Check(Random(), tq, searcher2); TopDocs noboost = searcher1.Search(tq, 10); TopDocs boost = searcher2.Search(tq, 10); Assert.AreEqual(1, noboost.TotalHits); Assert.AreEqual(1, boost.TotalHits); //System.out.println(searcher2.Explain(tq, boost.ScoreDocs[0].Doc)); Assert.AreEqual(boost.ScoreDocs[0].Score, noboost.ScoreDocs[0].Score * 2f, SCORE_EPSILON); // this query matches only the second document, which should have 4x the score. tq = new TermQuery(new Term("foo", "jumps")); QueryUtils.Check(Random(), tq, searcher1); QueryUtils.Check(Random(), tq, searcher2); noboost = searcher1.Search(tq, 10); boost = searcher2.Search(tq, 10); Assert.AreEqual(1, noboost.TotalHits); Assert.AreEqual(1, boost.TotalHits); Assert.AreEqual(boost.ScoreDocs[0].Score, noboost.ScoreDocs[0].Score * 4f, SCORE_EPSILON); // search on on field bar just for kicks, nothing should happen, since we setup // our sim provider to only use foo_boost for field foo. tq = new TermQuery(new Term("bar", "quick")); QueryUtils.Check(Random(), tq, searcher1); QueryUtils.Check(Random(), tq, searcher2); noboost = searcher1.Search(tq, 10); boost = searcher2.Search(tq, 10); Assert.AreEqual(1, noboost.TotalHits); Assert.AreEqual(1, boost.TotalHits); Assert.AreEqual(boost.ScoreDocs[0].Score, noboost.ScoreDocs[0].Score, SCORE_EPSILON); ir.Dispose(); dir.Dispose(); }
public virtual void TestChangeGaps() { // LUCENE-5324: check that it is possible to change the wrapper's gaps int positionGap = Random().Next(1000); int offsetGap = Random().Next(1000); Analyzer @delegate = new MockAnalyzer(Random()); Analyzer a = new AnalyzerWrapperAnonymousInnerClassHelper2(this, @delegate.Strategy, positionGap, offsetGap, @delegate); RandomIndexWriter writer = new RandomIndexWriter(Random(), NewDirectory()); Document doc = new Document(); FieldType ft = new FieldType(); ft.Indexed = true; ft.IndexOptions = FieldInfo.IndexOptions.DOCS_ONLY; ft.Tokenized = true; ft.StoreTermVectors = true; ft.StoreTermVectorPositions = true; ft.StoreTermVectorOffsets = true; doc.Add(new Field("f", "a", ft)); doc.Add(new Field("f", "a", ft)); writer.AddDocument(doc, a); AtomicReader reader = GetOnlySegmentReader(writer.Reader); Fields fields = reader.GetTermVectors(0); Terms terms = fields.Terms("f"); TermsEnum te = terms.Iterator(null); Assert.AreEqual(new BytesRef("a"), te.Next()); DocsAndPositionsEnum dpe = te.DocsAndPositions(null, null); Assert.AreEqual(0, dpe.NextDoc()); Assert.AreEqual(2, dpe.Freq()); Assert.AreEqual(0, dpe.NextPosition()); Assert.AreEqual(0, dpe.StartOffset()); int endOffset = dpe.EndOffset(); Assert.AreEqual(1 + positionGap, dpe.NextPosition()); Assert.AreEqual(1 + endOffset + offsetGap, dpe.EndOffset()); Assert.AreEqual(null, te.Next()); reader.Dispose(); writer.Dispose(); writer.w.Directory.Dispose(); }
public virtual void TestInfiniteValues() { Directory dir = NewDirectory(); RandomIndexWriter writer = new RandomIndexWriter(Random(), dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random()))); Document doc = new Document(); doc.Add(new DoubleField("double", double.NegativeInfinity, Field.Store.NO)); doc.Add(new LongField("long", long.MinValue, Field.Store.NO)); writer.AddDocument(doc); doc = new Document(); doc.Add(new DoubleField("double", double.PositiveInfinity, Field.Store.NO)); doc.Add(new LongField("long", long.MaxValue, Field.Store.NO)); writer.AddDocument(doc); doc = new Document(); doc.Add(new DoubleField("double", 0.0, Field.Store.NO)); doc.Add(new LongField("long", 0L, Field.Store.NO)); writer.AddDocument(doc); foreach (double d in TestNumericUtils.DOUBLE_NANs) { doc = new Document(); doc.Add(new DoubleField("double", d, Field.Store.NO)); writer.AddDocument(doc); } writer.Dispose(); IndexReader r = DirectoryReader.Open(dir); IndexSearcher s = NewSearcher(r); Query q = NumericRangeQuery.NewLongRange("long", null, null, true, true); TopDocs topDocs = s.Search(q, 10); Assert.AreEqual(3, topDocs.ScoreDocs.Length, "Score doc count"); q = NumericRangeQuery.NewLongRange("long", null, null, false, false); topDocs = s.Search(q, 10); Assert.AreEqual(3, topDocs.ScoreDocs.Length, "Score doc count"); q = NumericRangeQuery.NewLongRange("long", long.MinValue, long.MaxValue, true, true); topDocs = s.Search(q, 10); Assert.AreEqual(3, topDocs.ScoreDocs.Length, "Score doc count"); q = NumericRangeQuery.NewLongRange("long", long.MinValue, long.MaxValue, false, false); topDocs = s.Search(q, 10); Assert.AreEqual(1, topDocs.ScoreDocs.Length, "Score doc count"); q = NumericRangeQuery.NewDoubleRange("double", null, null, true, true); topDocs = s.Search(q, 10); Assert.AreEqual(3, topDocs.ScoreDocs.Length, "Score doc count"); q = NumericRangeQuery.NewDoubleRange("double", null, null, false, false); topDocs = s.Search(q, 10); Assert.AreEqual(3, topDocs.ScoreDocs.Length, "Score doc count"); q = NumericRangeQuery.NewDoubleRange("double", double.NegativeInfinity, double.PositiveInfinity, true, true); topDocs = s.Search(q, 10); Assert.AreEqual(3, topDocs.ScoreDocs.Length, "Score doc count"); q = NumericRangeQuery.NewDoubleRange("double", double.NegativeInfinity, double.PositiveInfinity, false, false); topDocs = s.Search(q, 10); Assert.AreEqual(1, topDocs.ScoreDocs.Length, "Score doc count"); q = NumericRangeQuery.NewDoubleRange("double", double.NaN, double.NaN, true, true); topDocs = s.Search(q, 10); Assert.AreEqual(TestNumericUtils.DOUBLE_NANs.Length, topDocs.ScoreDocs.Length, "Score doc count"); r.Dispose(); dir.Dispose(); }
public virtual void TestPostings() { Directory dir = NewFSDirectory(CreateTempDir("postings")); IndexWriterConfig iwc = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())); iwc.SetCodec(Codec.ForName("Lucene40")); RandomIndexWriter iw = new RandomIndexWriter(Random(), dir, iwc); Document doc = new Document(); // id field FieldType idType = new FieldType(StringField.TYPE_NOT_STORED); idType.StoreTermVectors = true; Field idField = new Field("id", "", idType); doc.Add(idField); // title field: short text field FieldType titleType = new FieldType(TextField.TYPE_NOT_STORED); titleType.StoreTermVectors = true; titleType.StoreTermVectorPositions = true; titleType.StoreTermVectorOffsets = true; titleType.IndexOptions = IndexOptions(); Field titleField = new Field("title", "", titleType); doc.Add(titleField); // body field: long text field FieldType bodyType = new FieldType(TextField.TYPE_NOT_STORED); bodyType.StoreTermVectors = true; bodyType.StoreTermVectorPositions = true; bodyType.StoreTermVectorOffsets = true; bodyType.IndexOptions = IndexOptions(); Field bodyField = new Field("body", "", bodyType); doc.Add(bodyField); int numDocs = AtLeast(1000); for (int i = 0; i < numDocs; i++) { idField.StringValue = Convert.ToString(i); titleField.StringValue = FieldValue(1); bodyField.StringValue = FieldValue(3); iw.AddDocument(doc); if (Random().Next(20) == 0) { iw.DeleteDocuments(new Term("id", Convert.ToString(i))); } } if (Random().NextBoolean()) { // delete 1-100% of docs iw.DeleteDocuments(new Term("title", Terms[Random().Next(Terms.Length)])); } iw.Dispose(); dir.Dispose(); // checkindex }
public virtual void Test() { IList <string> postingsList = new List <string>(); int numTerms = AtLeast(300); int maxTermsPerDoc = TestUtil.NextInt32(Random, 10, 20); bool isSimpleText = "SimpleText".Equals(TestUtil.GetPostingsFormat("field"), StringComparison.Ordinal); IndexWriterConfig iwc = NewIndexWriterConfig(Random, TEST_VERSION_CURRENT, new MockAnalyzer(Random)); if ((isSimpleText || iwc.MergePolicy is MockRandomMergePolicy) && (TestNightly || RandomMultiplier > 1)) { // Otherwise test can take way too long (> 2 hours) numTerms /= 2; } if (Verbose) { Console.WriteLine("maxTermsPerDoc=" + maxTermsPerDoc); Console.WriteLine("numTerms=" + numTerms); } for (int i = 0; i < numTerms; i++) { string term = Convert.ToString(i, CultureInfo.InvariantCulture); for (int j = 0; j < i; j++) { postingsList.Add(term); } } postingsList.Shuffle(Random); ConcurrentQueue <string> postings = new ConcurrentQueue <string>(postingsList); Directory dir = NewFSDirectory(CreateTempDir("bagofpostings")); RandomIndexWriter iw = new RandomIndexWriter(Random, dir, iwc); int threadCount = TestUtil.NextInt32(Random, 1, 5); if (Verbose) { Console.WriteLine("config: " + iw.IndexWriter.Config); Console.WriteLine("threadCount=" + threadCount); } ThreadJob[] threads = new ThreadJob[threadCount]; CountdownEvent startingGun = new CountdownEvent(1); for (int threadID = 0; threadID < threadCount; threadID++) { threads[threadID] = new ThreadAnonymousInnerClassHelper(this, maxTermsPerDoc, postings, iw, startingGun); threads[threadID].Start(); } startingGun.Signal(); foreach (ThreadJob t in threads) { t.Join(); } iw.ForceMerge(1); DirectoryReader ir = iw.GetReader(); Assert.AreEqual(1, ir.Leaves.Count); AtomicReader air = (AtomicReader)ir.Leaves[0].Reader; Terms terms = air.GetTerms("field"); // numTerms-1 because there cannot be a term 0 with 0 postings: #pragma warning disable 612, 618 Assert.AreEqual(numTerms - 1, air.Fields.UniqueTermCount); if (iwc.Codec is Lucene3xCodec == false) #pragma warning restore 612, 618 { Assert.AreEqual(numTerms - 1, terms.Count); } TermsEnum termsEnum = terms.GetEnumerator(); while (termsEnum.MoveNext()) { int value = Convert.ToInt32(termsEnum.Term.Utf8ToString(), CultureInfo.InvariantCulture); Assert.AreEqual(value, termsEnum.DocFreq); // don't really need to check more than this, as CheckIndex // will verify that docFreq == actual number of documents seen // from a docsAndPositionsEnum. } ir.Dispose(); iw.Dispose(); dir.Dispose(); }
public virtual void TestIsCacheAble() { Directory dir = NewDirectory(); RandomIndexWriter writer = new RandomIndexWriter(Random(), dir); writer.AddDocument(new Document()); writer.Dispose(); IndexReader reader = SlowCompositeReaderWrapper.Wrap(DirectoryReader.Open(dir)); // not cacheable: AssertDocIdSetCacheable(reader, new QueryWrapperFilter(new TermQuery(new Term("test", "value"))), false); // returns default empty docidset, always cacheable: AssertDocIdSetCacheable(reader, NumericRangeFilter.NewIntRange("test", Convert.ToInt32(10000), Convert.ToInt32(-10000), true, true), true); // is cacheable: AssertDocIdSetCacheable(reader, FieldCacheRangeFilter.NewIntRange("test", Convert.ToInt32(10), Convert.ToInt32(20), true, true), true); // a fixedbitset filter is always cacheable AssertDocIdSetCacheable(reader, new FilterAnonymousInnerClassHelper3(this), true); reader.Dispose(); dir.Dispose(); }
public virtual void Test() { Directory dir = NewDirectory(); Analyzer analyzer = new AnalyzerAnonymousInnerClassHelper(this, Analyzer.PER_FIELD_REUSE_STRATEGY); IndexWriterConfig iwc = NewIndexWriterConfig(TEST_VERSION_CURRENT, analyzer); iwc.SetCodec(TestUtil.AlwaysPostingsFormat(new Lucene41PostingsFormat())); // TODO we could actually add more fields implemented with different PFs // or, just put this test into the usual rotation? RandomIndexWriter iw = new RandomIndexWriter(Random(), dir, (IndexWriterConfig)iwc.Clone()); Document doc = new Document(); FieldType docsOnlyType = new FieldType(TextField.TYPE_NOT_STORED); // turn this on for a cross-check docsOnlyType.StoreTermVectors = true; docsOnlyType.IndexOptions = FieldInfo.IndexOptions.DOCS_ONLY; FieldType docsAndFreqsType = new FieldType(TextField.TYPE_NOT_STORED); // turn this on for a cross-check docsAndFreqsType.StoreTermVectors = true; docsAndFreqsType.IndexOptions = FieldInfo.IndexOptions.DOCS_AND_FREQS; FieldType positionsType = new FieldType(TextField.TYPE_NOT_STORED); // turn these on for a cross-check positionsType.StoreTermVectors = true; positionsType.StoreTermVectorPositions = true; positionsType.StoreTermVectorOffsets = true; positionsType.StoreTermVectorPayloads = true; FieldType offsetsType = new FieldType(positionsType); offsetsType.IndexOptions = FieldInfo.IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS; Field field1 = new Field("field1docs", "", docsOnlyType); Field field2 = new Field("field2freqs", "", docsAndFreqsType); Field field3 = new Field("field3positions", "", positionsType); Field field4 = new Field("field4offsets", "", offsetsType); Field field5 = new Field("field5payloadsFixed", "", positionsType); Field field6 = new Field("field6payloadsVariable", "", positionsType); Field field7 = new Field("field7payloadsFixedOffsets", "", offsetsType); Field field8 = new Field("field8payloadsVariableOffsets", "", offsetsType); doc.Add(field1); doc.Add(field2); doc.Add(field3); doc.Add(field4); doc.Add(field5); doc.Add(field6); doc.Add(field7); doc.Add(field8); for (int i = 0; i < MAXDOC; i++) { string stringValue = Convert.ToString(i) + " verycommon " + English.IntToEnglish(i).Replace('-', ' ') + " " + TestUtil.RandomSimpleString(Random()); field1.StringValue = stringValue; field2.StringValue = stringValue; field3.StringValue = stringValue; field4.StringValue = stringValue; field5.StringValue = stringValue; field6.StringValue = stringValue; field7.StringValue = stringValue; field8.StringValue = stringValue; iw.AddDocument(doc); } iw.Dispose(); Verify(dir); TestUtil.CheckIndex(dir); // for some extra coverage, checkIndex before we forceMerge iwc.SetOpenMode(OpenMode_e.APPEND); IndexWriter iw2 = new IndexWriter(dir, (IndexWriterConfig)iwc.Clone()); iw2.ForceMerge(1); iw2.Dispose(); Verify(dir); dir.Dispose(); }
public virtual void TestTermUTF16SortOrder() { Random rnd = Random(); Directory dir = NewDirectory(); RandomIndexWriter writer = new RandomIndexWriter(rnd, dir); Document d = new Document(); // Single segment Field f = NewStringField("f", "", Field.Store.NO); d.Add(f); char[] chars = new char[2]; HashSet <string> allTerms = new HashSet <string>(); int num = AtLeast(200); for (int i = 0; i < num; i++) { string s; if (rnd.NextBoolean()) { // Single char if (rnd.NextBoolean()) { // Above surrogates chars[0] = (char)GetInt(rnd, 1 + UnicodeUtil.UNI_SUR_LOW_END, 0xffff); } else { // Below surrogates chars[0] = (char)GetInt(rnd, 0, UnicodeUtil.UNI_SUR_HIGH_START - 1); } s = new string(chars, 0, 1); } else { // Surrogate pair chars[0] = (char)GetInt(rnd, UnicodeUtil.UNI_SUR_HIGH_START, UnicodeUtil.UNI_SUR_HIGH_END); Assert.IsTrue(((int)chars[0]) >= UnicodeUtil.UNI_SUR_HIGH_START && ((int)chars[0]) <= UnicodeUtil.UNI_SUR_HIGH_END); chars[1] = (char)GetInt(rnd, UnicodeUtil.UNI_SUR_LOW_START, UnicodeUtil.UNI_SUR_LOW_END); s = new string(chars, 0, 2); } allTerms.Add(s); f.StringValue = s; writer.AddDocument(d); if ((1 + i) % 42 == 0) { writer.Commit(); } } IndexReader r = writer.Reader; // Test each sub-segment foreach (AtomicReaderContext ctx in r.Leaves) { CheckTermsOrder(ctx.Reader, allTerms, false); } CheckTermsOrder(r, allTerms, true); // Test multi segment r.Dispose(); writer.ForceMerge(1); // Test single segment r = writer.Reader; CheckTermsOrder(r, allTerms, true); r.Dispose(); writer.Dispose(); dir.Dispose(); }
public virtual void Test() { // update variables int commitPercent = Random.Next(20); int softCommitPercent = Random.Next(100); // what percent of the commits are soft int deletePercent = Random.Next(50); int deleteByQueryPercent = Random.Next(25); int ndocs = AtLeast(50); int nWriteThreads = TestUtil.NextInt32(Random, 1, TestNightly ? 10 : 5); int maxConcurrentCommits = TestUtil.NextInt32(Random, 1, TestNightly ? 10 : 5); // number of committers at a time... needed if we want to avoid commit errors due to exceeding the max bool tombstones = Random.NextBoolean(); // query variables AtomicInt64 operations = new AtomicInt64(AtLeast(10000)); // number of query operations to perform in total int nReadThreads = TestUtil.NextInt32(Random, 1, TestNightly ? 10 : 5); InitModel(ndocs); FieldType storedOnlyType = new FieldType(); storedOnlyType.IsStored = true; if (Verbose) { Console.WriteLine("\n"); Console.WriteLine("TEST: commitPercent=" + commitPercent); Console.WriteLine("TEST: softCommitPercent=" + softCommitPercent); Console.WriteLine("TEST: deletePercent=" + deletePercent); Console.WriteLine("TEST: deleteByQueryPercent=" + deleteByQueryPercent); Console.WriteLine("TEST: ndocs=" + ndocs); Console.WriteLine("TEST: nWriteThreads=" + nWriteThreads); Console.WriteLine("TEST: nReadThreads=" + nReadThreads); Console.WriteLine("TEST: maxConcurrentCommits=" + maxConcurrentCommits); Console.WriteLine("TEST: tombstones=" + tombstones); Console.WriteLine("TEST: operations=" + operations); Console.WriteLine("\n"); } AtomicInt32 numCommitting = new AtomicInt32(); IList <ThreadJob> threads = new List <ThreadJob>(); Directory dir = NewDirectory(); RandomIndexWriter writer = new RandomIndexWriter(Random, dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random))); writer.DoRandomForceMergeAssert = false; writer.Commit(); reader = DirectoryReader.Open(dir); for (int i = 0; i < nWriteThreads; i++) { ThreadJob thread = new ThreadAnonymousClass(this, "WRITER" + i, commitPercent, softCommitPercent, deletePercent, deleteByQueryPercent, ndocs, maxConcurrentCommits, tombstones, operations, storedOnlyType, numCommitting, writer); threads.Add(thread); } for (int i = 0; i < nReadThreads; i++) { ThreadJob thread = new ThreadAnonymousClass2(this, "READER" + i, ndocs, tombstones, operations); threads.Add(thread); } foreach (ThreadJob thread in threads) { thread.Start(); } foreach (ThreadJob thread in threads) { thread.Join(); } writer.Dispose(); if (Verbose) { Console.WriteLine("TEST: close reader=" + reader); } reader.Dispose(); dir.Dispose(); }
public virtual void TestCachingWorks() { Directory dir = NewDirectory(); RandomIndexWriter writer = new RandomIndexWriter(Random(), dir); writer.Dispose(); IndexReader reader = SlowCompositeReaderWrapper.Wrap(DirectoryReader.Open(dir)); AtomicReaderContext context = (AtomicReaderContext)reader.Context; MockFilter filter = new MockFilter(); CachingWrapperFilter cacher = new CachingWrapperFilter(filter); // first time, nested filter is called DocIdSet strongRef = cacher.GetDocIdSet(context, (context.AtomicReader).LiveDocs); Assert.IsTrue(filter.WasCalled(), "first time"); // make sure no exception if cache is holding the wrong docIdSet cacher.GetDocIdSet(context, (context.AtomicReader).LiveDocs); // second time, nested filter should not be called filter.Clear(); cacher.GetDocIdSet(context, (context.AtomicReader).LiveDocs); Assert.IsFalse(filter.WasCalled(), "second time"); reader.Dispose(); dir.Dispose(); }