public override void SetUp() { base.SetUp(); dir = NewDirectory(); IndexWriterConfig iwConfig = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())); iwConfig.SetMergePolicy(NewLogMergePolicy()); RandomIndexWriter iw = new RandomIndexWriter(Random(), dir, iwConfig); Document document = new Document(); Field idField = new StringField("id", "", Field.Store.NO); document.Add(idField); iw.AddDocument(document); ir = iw.Reader; @is = NewSearcher(ir); iw.Dispose(); }
public override void SetUp() { base.SetUp(); dir = NewDirectory(); IndexWriterConfig iwConfig = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())); iwConfig.SetMergePolicy(NewLogMergePolicy()); RandomIndexWriter iw = new RandomIndexWriter(Random(), dir, iwConfig); Document document = new Document(); Field idField = new StringField("id", "", Field.Store.NO); document.Add(idField); Field byteField = new StringField("byte", "", Field.Store.NO); document.Add(byteField); Field doubleField = new StringField("double", "", Field.Store.NO); document.Add(doubleField); Field floatField = new StringField("float", "", Field.Store.NO); document.Add(floatField); Field intField = new StringField("int", "", Field.Store.NO); document.Add(intField); Field longField = new StringField("long", "", Field.Store.NO); document.Add(longField); Field shortField = new StringField("short", "", Field.Store.NO); document.Add(shortField); Field stringField = new StringField("string", "", Field.Store.NO); document.Add(stringField); Field textField = new TextField("text", "", Field.Store.NO); document.Add(textField); foreach (string[] doc in documents) { idField.StringValue = doc[0]; byteField.StringValue = doc[1]; doubleField.StringValue = doc[2]; floatField.StringValue = doc[3]; intField.StringValue = doc[4]; longField.StringValue = doc[5]; shortField.StringValue = doc[6]; stringField.StringValue = doc[7]; textField.StringValue = doc[8]; iw.AddDocument(document); } reader = iw.Reader; searcher = NewSearcher(reader); iw.Dispose(); }
public void BeforeClass() { Random random = Random(); Directory = NewDirectory(); Stopword = "" + RandomChar(); CharacterRunAutomaton stopset = new CharacterRunAutomaton(BasicAutomata.MakeString(Stopword)); Analyzer = new MockAnalyzer(random, MockTokenizer.WHITESPACE, false, stopset); RandomIndexWriter iw = new RandomIndexWriter(random, Directory, Analyzer, ClassEnvRule.Similarity, ClassEnvRule.TimeZone); Document doc = new Document(); Field id = new StringField("id", "", Field.Store.NO); Field field = new TextField("field", "", Field.Store.NO); doc.Add(id); doc.Add(field); // index some docs int numDocs = AtLeast(1000); for (int i = 0; i < numDocs; i++) { id.StringValue = Convert.ToString(i); field.StringValue = RandomFieldContents(); iw.AddDocument(doc); } // delete some docs int numDeletes = numDocs / 20; for (int i = 0; i < numDeletes; i++) { Term toDelete = new Term("id", Convert.ToString(random.Next(numDocs))); if (random.NextBoolean()) { iw.DeleteDocuments(toDelete); } else { iw.DeleteDocuments(new TermQuery(toDelete)); } } Reader = iw.Reader; S1 = NewSearcher(Reader); S2 = NewSearcher(Reader); iw.Dispose(); }
public override void SetUp() { base.SetUp(); Dir = NewDirectory(); Iw = new RandomIndexWriter(Random(), Dir); Document doc = new Document(); Field idField = new StringField("id", "", Field.Store.NO); doc.Add(idField); // add 500 docs with id 0..499 for (int i = 0; i < 500; i++) { idField.StringValue = Convert.ToString(i); Iw.AddDocument(doc); } // delete 20 of them for (int i = 0; i < 20; i++) { Iw.DeleteDocuments(new Term("id", Convert.ToString(Random().Next(Iw.MaxDoc())))); } Ir = Iw.Reader; @is = NewSearcher(Ir); }
public void TestBigDocuments() { // "big" as "much bigger than the chunk size" // for this test we force a FS dir // we can't just use newFSDirectory, because this test doesn't really index anything. // so if we get NRTCachingDir+SimpleText, we make massive stored fields and OOM (LUCENE-4484) Directory dir = new MockDirectoryWrapper(Random(), new MMapDirectory(CreateTempDir("testBigDocuments"))); IndexWriterConfig iwConf = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())); iwConf.SetMaxBufferedDocs(RandomInts.NextIntBetween(Random(), 2, 30)); RandomIndexWriter iw = new RandomIndexWriter(Random(), dir, iwConf); if (dir is MockDirectoryWrapper) { ((MockDirectoryWrapper)dir).Throttling = MockDirectoryWrapper.Throttling_e.NEVER; } Document emptyDoc = new Document(); // emptyDoc Document bigDoc1 = new Document(); // lot of small fields Document bigDoc2 = new Document(); // 1 very big field Field idField = new StringField("id", "", Field.Store.NO); emptyDoc.Add(idField); bigDoc1.Add(idField); bigDoc2.Add(idField); FieldType onlyStored = new FieldType(StringField.TYPE_STORED); onlyStored.Indexed = false; Field smallField = new Field("fld", RandomByteArray(Random().Next(10), 256), onlyStored); int numFields = RandomInts.NextIntBetween(Random(), 500000, 1000000); for (int i = 0; i < numFields; ++i) { bigDoc1.Add(smallField); } Field bigField = new Field("fld", RandomByteArray(RandomInts.NextIntBetween(Random(), 1000000, 5000000), 2), onlyStored); bigDoc2.Add(bigField); int numDocs = AtLeast(5); Document[] docs = new Document[numDocs]; for (int i = 0; i < numDocs; ++i) { docs[i] = RandomInts.RandomFrom(Random(), Arrays.AsList(emptyDoc, bigDoc1, bigDoc2)); } for (int i = 0; i < numDocs; ++i) { idField.StringValue = "" + i; iw.AddDocument(docs[i]); if (Random().Next(numDocs) == 0) { iw.Commit(); } } iw.Commit(); iw.ForceMerge(1); // look at what happens when big docs are merged DirectoryReader rd = DirectoryReader.Open(dir); IndexSearcher searcher = new IndexSearcher(rd); for (int i = 0; i < numDocs; ++i) { Query query = new TermQuery(new Term("id", "" + i)); TopDocs topDocs = searcher.Search(query, 1); Assert.AreEqual(1, topDocs.TotalHits, "" + i); Document doc = rd.Document(topDocs.ScoreDocs[0].Doc); Assert.IsNotNull(doc); IndexableField[] fieldValues = doc.GetFields("fld"); Assert.AreEqual(docs[i].GetFields("fld").Length, fieldValues.Length); if (fieldValues.Length > 0) { Assert.AreEqual(docs[i].GetFields("fld")[0].BinaryValue(), fieldValues[0].BinaryValue()); } } rd.Dispose(); iw.Dispose(); dir.Dispose(); }
public void TestConcurrentReads() { Directory dir = NewDirectory(); IndexWriterConfig iwConf = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())); iwConf.SetMaxBufferedDocs(RandomInts.NextIntBetween(Random(), 2, 30)); RandomIndexWriter iw = new RandomIndexWriter(Random(), dir, iwConf); // make sure the readers are properly cloned Document doc = new Document(); Field field = new StringField("fld", "", Field.Store.YES); doc.Add(field); int numDocs = AtLeast(1000); for (int i = 0; i < numDocs; ++i) { field.StringValue = "" + i; iw.AddDocument(doc); } iw.Commit(); DirectoryReader rd = DirectoryReader.Open(dir); IndexSearcher searcher = new IndexSearcher(rd); int concurrentReads = AtLeast(5); int readsPerThread = AtLeast(50); IList<ThreadClass> readThreads = new List<ThreadClass>(); AtomicReference<Exception> ex = new AtomicReference<Exception>(); for (int i = 0; i < concurrentReads; ++i) { readThreads.Add(new ThreadAnonymousInnerClassHelper(numDocs, rd, searcher, readsPerThread, ex, i)); } foreach (ThreadClass thread in readThreads) { thread.Start(); } foreach (ThreadClass thread in readThreads) { thread.Join(); } rd.Dispose(); if (ex.Value != null) { throw ex.Value; } iw.Dispose(); dir.Dispose(); }
public void TestSearchAfterWhenSortingByFunctionValues() { Directory dir = NewDirectory(); IndexWriterConfig iwc = NewIndexWriterConfig(TEST_VERSION_CURRENT, null); iwc.SetMergePolicy(NewLogMergePolicy()); // depends on docid order RandomIndexWriter writer = new RandomIndexWriter(Random(), dir, iwc); Document doc = new Document(); Field field = new StringField("value", "", Field.Store.YES); doc.Add(field); // Save docs unsorted (decreasing value n, n-1, ...) const int NUM_VALS = 5; for (int val = NUM_VALS; val > 0; val--) { field.StringValue = Convert.ToString(val); writer.AddDocument(doc); } // Open index IndexReader reader = writer.Reader; writer.Dispose(); IndexSearcher searcher = NewSearcher(reader); // Get ValueSource from FieldCache IntFieldSource src = new IntFieldSource("value"); // ...and make it a sort criterion SortField sf = src.GetSortField(false).Rewrite(searcher); Sort orderBy = new Sort(sf); // Get hits sorted by our FunctionValues (ascending values) Query q = new MatchAllDocsQuery(); TopDocs hits = searcher.Search(q, reader.MaxDoc, orderBy); assertEquals(NUM_VALS, hits.ScoreDocs.Length); // Verify that sorting works in general int i = 0; foreach (ScoreDoc hit in hits.ScoreDocs) { int valueFromDoc = Convert.ToInt32(reader.Document(hit.Doc).Get("value")); assertEquals(++i, valueFromDoc); } // Now get hits after hit #2 using IS.searchAfter() int afterIdx = 1; FieldDoc afterHit = (FieldDoc)hits.ScoreDocs[afterIdx]; hits = searcher.SearchAfter(afterHit, q, reader.MaxDoc, orderBy); // Expected # of hits: NUM_VALS - 2 assertEquals(NUM_VALS - (afterIdx + 1), hits.ScoreDocs.Length); // Verify that hits are actually "after" int afterValue = (int)((double?)afterHit.Fields[0]); foreach (ScoreDoc hit in hits.ScoreDocs) { int val = Convert.ToInt32(reader.Document(hit.Doc).Get("value")); assertTrue(afterValue <= val); assertFalse(hit.Doc == afterHit.Doc); } reader.Dispose(); dir.Dispose(); }
public virtual void TestFieldSetValue() { Field field = new StringField("id", "id1", Field.Store.YES); Documents.Document doc = new Documents.Document(); doc.Add(field); doc.Add(new StringField("keyword", "test", Field.Store.YES)); Directory dir = NewDirectory(); RandomIndexWriter writer = new RandomIndexWriter(Random(), dir, Similarity, TimeZone); writer.AddDocument(doc); field.StringValue = "id2"; writer.AddDocument(doc); field.StringValue = "id3"; writer.AddDocument(doc); IndexReader reader = writer.Reader; IndexSearcher searcher = NewSearcher(reader); Query query = new TermQuery(new Term("keyword", "test")); // ensure that queries return expected results without DateFilter first ScoreDoc[] hits = searcher.Search(query, null, 1000).ScoreDocs; Assert.AreEqual(3, hits.Length); int result = 0; for (int i = 0; i < 3; i++) { Documents.Document doc2 = searcher.Doc(hits[i].Doc); Field f = (Field)doc2.GetField("id"); if (f.StringValue.Equals("id1")) { result |= 1; } else if (f.StringValue.Equals("id2")) { result |= 2; } else if (f.StringValue.Equals("id3")) { result |= 4; } else { Assert.Fail("unexpected id field"); } } writer.Dispose(); reader.Dispose(); dir.Dispose(); Assert.AreEqual(7, result, "did not see all IDs"); }
private static Document CreateDocument(string text, int ng1, int ng2) { var doc = new Document(); // the word field is never queried on... its indexed so it can be quickly // checked for rebuild (and stored for retrieval). Doesn't need norms or TF/pos Field f = new StringField(F_WORD, text, Field.Store.YES); doc.Add(f); // orig term AddGram(text, doc, ng1, ng2); return doc; }
public void Test() { RandomIndexWriter writer; DirectoryReader indexReader; int numParents = AtLeast(200); IndexWriterConfig cfg = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())); cfg.SetMergePolicy(NewLogMergePolicy()); using (writer = new RandomIndexWriter(Random(), NewDirectory(), cfg)) { Document parentDoc = new Document(); NumericDocValuesField parentVal = new NumericDocValuesField("parent_val", 0L); parentDoc.Add(parentVal); StringField parent = new StringField("parent", "true", Field.Store.YES); parentDoc.Add(parent); for (int i = 0; i < numParents; ++i) { List<Document> documents = new List<Document>(); int numChildren = Random().nextInt(10); for (int j = 0; j < numChildren; ++j) { Document childDoc = new Document(); childDoc.Add(new NumericDocValuesField("child_val", Random().nextInt(5))); documents.Add(childDoc); } parentVal.LongValue = (Random().nextInt(50)); documents.Add(parentDoc); writer.AddDocuments(documents); } writer.ForceMerge(1); indexReader = writer.Reader; } AtomicReader reader = GetOnlySegmentReader(indexReader); Filter parentsFilter = new FixedBitSetCachingWrapperFilter(new QueryWrapperFilter(new TermQuery(new Term("parent", "true")))); FixedBitSet parentBits = (FixedBitSet)parentsFilter.GetDocIdSet(reader.AtomicContext, null); NumericDocValues parentValues = reader.GetNumericDocValues("parent_val"); NumericDocValues childValues = reader.GetNumericDocValues("child_val"); Sort parentSort = new Sort(new SortField("parent_val", SortField.Type_e.LONG)); Sort childSort = new Sort(new SortField("child_val", SortField.Type_e.LONG)); Sort sort = new Sort(new SortField("custom", new BlockJoinComparatorSource(parentsFilter, parentSort, childSort))); Sorter sorter = new Sorter(sort); Sorter.DocMap docMap = sorter.Sort(reader); assertEquals(reader.MaxDoc, docMap.Count); int[] children = new int[1]; int numChildren2 = 0; int previousParent = -1; for (int i = 0; i < docMap.Count; ++i) { int oldID = docMap.NewToOld(i); if (parentBits.Get(oldID)) { // check that we have the right children for (int j = 0; j < numChildren2; ++j) { assertEquals(oldID, parentBits.NextSetBit(children[j])); } // check that children are sorted for (int j = 1; j < numChildren2; ++j) { int doc1 = children[j - 1]; int doc2 = children[j]; if (childValues.Get(doc1) == childValues.Get(doc2)) { assertTrue(doc1 < doc2); // sort is stable } else { assertTrue(childValues.Get(doc1) < childValues.Get(doc2)); } } // check that parents are sorted if (previousParent != -1) { if (parentValues.Get(previousParent) == parentValues.Get(oldID)) { assertTrue(previousParent < oldID); } else { assertTrue(parentValues.Get(previousParent) < parentValues.Get(oldID)); } } // reset previousParent = oldID; numChildren2 = 0; } else { children = ArrayUtil.Grow(children, numChildren2 + 1); children[numChildren2++] = oldID; } } indexReader.Dispose(); writer.w.Directory.Dispose(); }
private void DoTestSortedSetVsStoredFields(int minLength, int maxLength, int maxValuesPerDoc) { Directory dir = NewDirectory(); IndexWriterConfig conf = new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())); RandomIndexWriter writer = new RandomIndexWriter(Random(), dir, conf); // index some docs int numDocs = AtLeast(300); for (int i = 0; i < numDocs; i++) { Document doc = new Document(); Field idField = new StringField("id", Convert.ToString(i), Field.Store.NO); doc.Add(idField); int length; if (minLength == maxLength) { length = minLength; // fixed length } else { length = TestUtil.NextInt(Random(), minLength, maxLength); } int numValues = TestUtil.NextInt(Random(), 0, maxValuesPerDoc); // create a random set of strings SortedSet<string> values = new SortedSet<string>(); for (int v = 0; v < numValues; v++) { values.Add(TestUtil.RandomSimpleString(Random(), length)); } // add ordered to the stored field foreach (string v in values) { doc.Add(new StoredField("stored", v)); } // add in any order to the dv field IList<string> unordered = new List<string>(values); unordered = CollectionsHelper.Shuffle(unordered); foreach (string v in unordered) { doc.Add(new SortedSetDocValuesField("dv", new BytesRef(v))); } writer.AddDocument(doc); if (Random().Next(31) == 0) { writer.Commit(); } } // delete some docs int numDeletions = Random().Next(numDocs / 10); for (int i = 0; i < numDeletions; i++) { int id = Random().Next(numDocs); writer.DeleteDocuments(new Term("id", Convert.ToString(id))); } writer.Dispose(); // compare DirectoryReader ir = DirectoryReader.Open(dir); foreach (AtomicReaderContext context in ir.Leaves) { AtomicReader r = context.AtomicReader; SortedSetDocValues docValues = r.GetSortedSetDocValues("dv"); BytesRef scratch = new BytesRef(); for (int i = 0; i < r.MaxDoc; i++) { string[] stringValues = r.Document(i).GetValues("stored"); if (docValues != null) { docValues.Document = i; } for (int j = 0; j < stringValues.Length; j++) { Debug.Assert(docValues != null); long ord = docValues.NextOrd(); Debug.Assert(ord != SortedSetDocValues.NO_MORE_ORDS); docValues.LookupOrd(ord, scratch); Assert.AreEqual(stringValues[j], scratch.Utf8ToString()); } Debug.Assert(docValues == null || docValues.NextOrd() == SortedSetDocValues.NO_MORE_ORDS); } } ir.Dispose(); dir.Dispose(); }
private void DoTestSortedVsFieldCache(int minLength, int maxLength) { Directory dir = NewDirectory(); IndexWriterConfig conf = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())); RandomIndexWriter writer = new RandomIndexWriter(Random(), dir, conf); Document doc = new Document(); Field idField = new StringField("id", "", Field.Store.NO); Field indexedField = new StringField("indexed", "", Field.Store.NO); Field dvField = new SortedDocValuesField("dv", new BytesRef()); doc.Add(idField); doc.Add(indexedField); doc.Add(dvField); // index some docs int numDocs = AtLeast(300); for (int i = 0; i < numDocs; i++) { idField.StringValue = Convert.ToString(i); int length; if (minLength == maxLength) { length = minLength; // fixed length } else { length = TestUtil.NextInt(Random(), minLength, maxLength); } string value = TestUtil.RandomSimpleString(Random(), length); indexedField.StringValue = value; dvField.BytesValue = new BytesRef(value); writer.AddDocument(doc); if (Random().Next(31) == 0) { writer.Commit(); } } // delete some docs int numDeletions = Random().Next(numDocs / 10); for (int i = 0; i < numDeletions; i++) { int id = Random().Next(numDocs); writer.DeleteDocuments(new Term("id", Convert.ToString(id))); } writer.Dispose(); // compare DirectoryReader ir = DirectoryReader.Open(dir); foreach (AtomicReaderContext context in ir.Leaves) { AtomicReader r = context.AtomicReader; SortedDocValues expected = FieldCache.DEFAULT.GetTermsIndex(r, "indexed"); SortedDocValues actual = r.GetSortedDocValues("dv"); AssertEquals(r.MaxDoc, expected, actual); } ir.Dispose(); dir.Dispose(); }
private void DoTestSortedVsStoredFields(int minLength, int maxLength) { Directory dir = NewDirectory(); IndexWriterConfig conf = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())); RandomIndexWriter writer = new RandomIndexWriter(Random(), dir, conf); Document doc = new Document(); Field idField = new StringField("id", "", Field.Store.NO); Field storedField = new StoredField("stored", new byte[0]); Field dvField = new SortedDocValuesField("dv", new BytesRef()); doc.Add(idField); doc.Add(storedField); doc.Add(dvField); // index some docs int numDocs = AtLeast(300); for (int i = 0; i < numDocs; i++) { idField.StringValue = Convert.ToString(i); int length; if (minLength == maxLength) { length = minLength; // fixed length } else { length = TestUtil.NextInt(Random(), minLength, maxLength); } var buffer = new byte[length]; Random().NextBytes(buffer); storedField.BytesValue = new BytesRef(buffer); dvField.BytesValue = new BytesRef(buffer); writer.AddDocument(doc); if (Random().Next(31) == 0) { writer.Commit(); } } // delete some docs int numDeletions = Random().Next(numDocs / 10); for (int i = 0; i < numDeletions; i++) { int id = Random().Next(numDocs); writer.DeleteDocuments(new Term("id", Convert.ToString(id))); } writer.Dispose(); // compare DirectoryReader ir = DirectoryReader.Open(dir); foreach (AtomicReaderContext context in ir.Leaves) { AtomicReader r = context.AtomicReader; BinaryDocValues docValues = r.GetSortedDocValues("dv"); for (int i = 0; i < r.MaxDoc; i++) { BytesRef binaryValue = r.Document(i).GetBinaryValue("stored"); BytesRef scratch = new BytesRef(); docValues.Get(i, scratch); Assert.AreEqual(binaryValue, scratch); } } ir.Dispose(); dir.Dispose(); }
private void DoTestMissingVsFieldCache(LongProducer longs) { AssumeTrue("Codec does not support GetDocsWithField", DefaultCodecSupportsDocsWithField()); Directory dir = NewDirectory(); IndexWriterConfig conf = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())); RandomIndexWriter writer = new RandomIndexWriter(Random(), dir, conf); Field idField = new StringField("id", "", Field.Store.NO); Field indexedField = NewStringField("indexed", "", Field.Store.NO); Field dvField = new NumericDocValuesField("dv", 0); // index some docs int numDocs = AtLeast(300); // numDocs should be always > 256 so that in case of a codec that optimizes // for numbers of values <= 256, all storage layouts are tested Debug.Assert(numDocs > 256); for (int i = 0; i < numDocs; i++) { idField.StringValue = Convert.ToString(i); long value = longs.Next(); indexedField.StringValue = Convert.ToString(value); dvField.LongValue = value; Document doc = new Document(); doc.Add(idField); // 1/4 of the time we neglect to add the fields if (Random().Next(4) > 0) { doc.Add(indexedField); doc.Add(dvField); } writer.AddDocument(doc); if (Random().Next(31) == 0) { writer.Commit(); } } // delete some docs int numDeletions = Random().Next(numDocs / 10); for (int i = 0; i < numDeletions; i++) { int id = Random().Next(numDocs); writer.DeleteDocuments(new Term("id", Convert.ToString(id))); } // merge some segments and ensure that at least one of them has more than // 256 values writer.ForceMerge(numDocs / 256); writer.Dispose(); // compare DirectoryReader ir = DirectoryReader.Open(dir); foreach (var context in ir.Leaves) { AtomicReader r = context.AtomicReader; Bits expected = FieldCache.DEFAULT.GetDocsWithField(r, "indexed"); Bits actual = FieldCache.DEFAULT.GetDocsWithField(r, "dv"); AssertEquals(expected, actual); } ir.Dispose(); dir.Dispose(); }
private void DoTestNumericsVsStoredFields(LongProducer longs) { Directory dir = NewDirectory(); IndexWriterConfig conf = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())); RandomIndexWriter writer = new RandomIndexWriter(Random(), dir, conf); Document doc = new Document(); Field idField = new StringField("id", "", Field.Store.NO); Field storedField = NewStringField("stored", "", Field.Store.YES); Field dvField = new NumericDocValuesField("dv", 0); doc.Add(idField); doc.Add(storedField); doc.Add(dvField); // index some docs int numDocs = AtLeast(300); // numDocs should be always > 256 so that in case of a codec that optimizes // for numbers of values <= 256, all storage layouts are tested Debug.Assert(numDocs > 256); for (int i = 0; i < numDocs; i++) { idField.StringValue = Convert.ToString(i); long value = longs.Next(); storedField.StringValue = Convert.ToString(value); dvField.LongValue = value; writer.AddDocument(doc); if (Random().Next(31) == 0) { writer.Commit(); } } // delete some docs int numDeletions = Random().Next(numDocs / 10); for (int i = 0; i < numDeletions; i++) { int id = Random().Next(numDocs); writer.DeleteDocuments(new Term("id", Convert.ToString(id))); } // merge some segments and ensure that at least one of them has more than // 256 values writer.ForceMerge(numDocs / 256); writer.Dispose(); // compare DirectoryReader ir = DirectoryReader.Open(dir); foreach (AtomicReaderContext context in ir.Leaves) { AtomicReader r = context.AtomicReader; NumericDocValues docValues = r.GetNumericDocValues("dv"); for (int i = 0; i < r.MaxDoc; i++) { long storedValue = Convert.ToInt64(r.Document(i).Get("stored")); Assert.AreEqual(storedValue, docValues.Get(i)); } } ir.Dispose(); dir.Dispose(); }
public void TestThreads() { Directory dir = NewDirectory(); IndexWriterConfig conf = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())); RandomIndexWriter writer = new RandomIndexWriter(Random(), dir, conf); Document doc = new Document(); Field idField = new StringField("id", "", Field.Store.NO); Field storedBinField = new StoredField("storedBin", new byte[0]); Field dvBinField = new BinaryDocValuesField("dvBin", new BytesRef()); Field dvSortedField = new SortedDocValuesField("dvSorted", new BytesRef()); Field storedNumericField = new StoredField("storedNum", ""); Field dvNumericField = new NumericDocValuesField("dvNum", 0); doc.Add(idField); doc.Add(storedBinField); doc.Add(dvBinField); doc.Add(dvSortedField); doc.Add(storedNumericField); doc.Add(dvNumericField); // index some docs int numDocs = AtLeast(300); for (int i = 0; i < numDocs; i++) { idField.StringValue = Convert.ToString(i); int length = TestUtil.NextInt(Random(), 0, 8); var buffer = new byte[length]; Random().NextBytes(buffer); storedBinField.BytesValue = new BytesRef(buffer); dvBinField.BytesValue = new BytesRef(buffer); dvSortedField.BytesValue = new BytesRef(buffer); long numericValue = Random().NextLong(); storedNumericField.StringValue = Convert.ToString(numericValue); dvNumericField.LongValue = numericValue; writer.AddDocument(doc); if (Random().Next(31) == 0) { writer.Commit(); } } // delete some docs int numDeletions = Random().Next(numDocs / 10); for (int i = 0; i < numDeletions; i++) { int id = Random().Next(numDocs); writer.DeleteDocuments(new Term("id", Convert.ToString(id))); } writer.Dispose(); // compare DirectoryReader ir = DirectoryReader.Open(dir); int numThreads = TestUtil.NextInt(Random(), 2, 7); ThreadClass[] threads = new ThreadClass[numThreads]; CountdownEvent startingGun = new CountdownEvent(1); for (int i = 0; i < threads.Length; i++) { threads[i] = new ThreadAnonymousInnerClassHelper(this, ir, startingGun); threads[i].Start(); } startingGun.Signal(); foreach (ThreadClass t in threads) { t.Join(); } ir.Dispose(); dir.Dispose(); }
private void DoTestSortedSetVsUninvertedField(int minLength, int maxLength) { Directory dir = NewDirectory(); IndexWriterConfig conf = new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())); RandomIndexWriter writer = new RandomIndexWriter(Random(), dir, conf); // index some docs int numDocs = AtLeast(300); for (int i = 0; i < numDocs; i++) { Document doc = new Document(); Field idField = new StringField("id", Convert.ToString(i), Field.Store.NO); doc.Add(idField); int length; if (minLength == maxLength) { length = minLength; // fixed length } else { length = TestUtil.NextInt(Random(), minLength, maxLength); } int numValues = Random().Next(17); // create a random list of strings IList<string> values = new List<string>(); for (int v = 0; v < numValues; v++) { values.Add(TestUtil.RandomSimpleString(Random(), length)); } // add in any order to the indexed field IList<string> unordered = new List<string>(values); unordered = CollectionsHelper.Shuffle(unordered); foreach (string v in unordered) { doc.Add(NewStringField("indexed", v, Field.Store.NO)); } // add in any order to the dv field IList<string> unordered2 = new List<string>(values); unordered2 = CollectionsHelper.Shuffle(unordered2); foreach (string v in unordered2) { doc.Add(new SortedSetDocValuesField("dv", new BytesRef(v))); } writer.AddDocument(doc); if (Random().Next(31) == 0) { writer.Commit(); } } // delete some docs int numDeletions = Random().Next(numDocs / 10); for (int i = 0; i < numDeletions; i++) { int id = Random().Next(numDocs); writer.DeleteDocuments(new Term("id", Convert.ToString(id))); } // compare per-segment DirectoryReader ir = writer.Reader; foreach (AtomicReaderContext context in ir.Leaves) { AtomicReader r = context.AtomicReader; SortedSetDocValues expected = FieldCache.DEFAULT.GetDocTermOrds(r, "indexed"); SortedSetDocValues actual = r.GetSortedSetDocValues("dv"); AssertEquals(r.MaxDoc, expected, actual); } ir.Dispose(); writer.ForceMerge(1); // now compare again after the merge ir = writer.Reader; AtomicReader ar = GetOnlySegmentReader(ir); SortedSetDocValues expected_ = FieldCache.DEFAULT.GetDocTermOrds(ar, "indexed"); SortedSetDocValues actual_ = ar.GetSortedSetDocValues("dv"); AssertEquals(ir.MaxDoc, expected_, actual_); ir.Dispose(); writer.Dispose(); dir.Dispose(); }
/// <summary> /// Tests dv against stored fields with threads (all types + missing) /// </summary> // [Test] // LUCENENET NOTE: For now, we are overriding this test in every subclass to pull it into the right context for the subclass public virtual void TestThreads2() { AssumeTrue("Codec does not support GetDocsWithField", DefaultCodecSupportsDocsWithField()); AssumeTrue("Codec does not support SORTED_SET", DefaultCodecSupportsSortedSet()); Directory dir = NewDirectory(); IndexWriterConfig conf = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())); RandomIndexWriter writer = new RandomIndexWriter(Random(), dir, conf); Field idField = new StringField("id", "", Field.Store.NO); Field storedBinField = new StoredField("storedBin", new byte[0]); Field dvBinField = new BinaryDocValuesField("dvBin", new BytesRef()); Field dvSortedField = new SortedDocValuesField("dvSorted", new BytesRef()); Field storedNumericField = new StoredField("storedNum", ""); Field dvNumericField = new NumericDocValuesField("dvNum", 0); // index some docs int numDocs = AtLeast(300); for (int i = 0; i < numDocs; i++) { idField.StringValue = Convert.ToString(i); int length = TestUtil.NextInt(Random(), 0, 8); var buffer = new byte[length]; Random().NextBytes(buffer); storedBinField.BytesValue = new BytesRef(buffer); dvBinField.BytesValue = new BytesRef(buffer); dvSortedField.BytesValue = new BytesRef(buffer); long numericValue = Random().NextLong(); storedNumericField.StringValue = Convert.ToString(numericValue); dvNumericField.LongValue = numericValue; Document doc = new Document(); doc.Add(idField); if (Random().Next(4) > 0) { doc.Add(storedBinField); doc.Add(dvBinField); doc.Add(dvSortedField); } if (Random().Next(4) > 0) { doc.Add(storedNumericField); doc.Add(dvNumericField); } int numSortedSetFields = Random().Next(3); SortedSet<string> values = new SortedSet<string>(); for (int j = 0; j < numSortedSetFields; j++) { values.Add(TestUtil.RandomSimpleString(Random())); } foreach (string v in values) { doc.Add(new SortedSetDocValuesField("dvSortedSet", new BytesRef(v))); doc.Add(new StoredField("storedSortedSet", v)); } writer.AddDocument(doc); if (Random().Next(31) == 0) { writer.Commit(); } } // delete some docs int numDeletions = Random().Next(numDocs / 10); for (int i = 0; i < numDeletions; i++) { int id = Random().Next(numDocs); writer.DeleteDocuments(new Term("id", Convert.ToString(id))); } writer.Dispose(); // compare DirectoryReader ir = DirectoryReader.Open(dir); int numThreads = TestUtil.NextInt(Random(), 2, 7); ThreadClass[] threads = new ThreadClass[numThreads]; CountdownEvent startingGun = new CountdownEvent(1); for (int i = 0; i < threads.Length; i++) { threads[i] = new ThreadAnonymousInnerClassHelper2(this, ir, startingGun); threads[i].Start(); } startingGun.Signal(); foreach (ThreadClass t in threads) { t.Join(); } ir.Dispose(); dir.Dispose(); }
public Field[] CreateIndexableFields(Rectangle bbox) { var fields = new Field[5]; fields[0] = DoubleField(field_minX, bbox.GetMinX()); fields[1] = DoubleField(field_maxX, bbox.GetMaxX()); fields[2] = DoubleField(field_minY, bbox.GetMinY()); fields[3] = DoubleField(field_maxY, bbox.GetMaxY()); fields[4] = new StringField(field_xdl, bbox.GetCrossesDateLine() ? "T" : "F", Field.Store.NO); return fields; }
private static void AddGram(string text, Document doc, int ng1, int ng2) { int len = text.Length; for (int ng = ng1; ng <= ng2; ng++) { string key = "gram" + ng; string end = null; for (int i = 0; i < len - ng + 1; i++) { string gram = text.Substring(i, ng); FieldType ft = new FieldType(StringField.TYPE_NOT_STORED); ft.IndexOptions = FieldInfo.IndexOptions.DOCS_AND_FREQS; Field ngramField = new Field(key, gram, ft); // spellchecker does not use positional queries, but we want freqs // for scoring these multivalued n-gram fields. doc.Add(ngramField); if (i == 0) { // only one term possible in the startXXField, TF/pos and norms aren't needed. Field startField = new StringField("start" + ng, gram, Field.Store.NO); doc.Add(startField); } end = gram; } if (end != null) // may not be present if len==ng1 { // only one term possible in the endXXField, TF/pos and norms aren't needed. Field endField = new StringField("end" + ng, end, Field.Store.NO); doc.Add(endField); } } }