private void DoTestHits(ScoreDoc[] hits, int expectedCount, IndexReader reader) { int hitCount = hits.Length; Assert.AreEqual(expectedCount, hitCount, "wrong number of hits"); for (int i = 0; i < hitCount; i++) { reader.Document(hits[i].Doc); reader.GetTermVectors(hits[i].Doc); } }
public virtual void TestEndOffsetPositionWithCachingTokenFilter() { Directory dir = NewDirectory(); Analyzer analyzer = new MockAnalyzer(Random); IndexWriter w = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, analyzer)); Document doc = new Document(); Exception priorException = null; // LUCENENET: No need to cast to IOExcpetion TokenStream stream = analyzer.GetTokenStream("field", new StringReader("abcd ")); try { stream.Reset(); // TODO: weird to reset before wrapping with CachingTokenFilter... correct? TokenStream cachedStream = new CachingTokenFilter(stream); FieldType customType = new FieldType(TextField.TYPE_NOT_STORED); customType.StoreTermVectors = true; customType.StoreTermVectorPositions = true; customType.StoreTermVectorOffsets = true; Field f = new Field("field", cachedStream, customType); doc.Add(f); doc.Add(f); w.AddDocument(doc); } catch (Exception e) when(e.IsIOException()) { priorException = e; } finally { IOUtils.DisposeWhileHandlingException(priorException, stream); } w.Dispose(); IndexReader r = DirectoryReader.Open(dir); TermsEnum termsEnum = r.GetTermVectors(0).GetTerms("field").GetEnumerator(); Assert.IsTrue(termsEnum.MoveNext()); DocsAndPositionsEnum dpEnum = termsEnum.DocsAndPositions(null, null); Assert.AreEqual(2, termsEnum.TotalTermFreq); Assert.IsTrue(dpEnum.NextDoc() != DocIdSetIterator.NO_MORE_DOCS); dpEnum.NextPosition(); Assert.AreEqual(0, dpEnum.StartOffset); Assert.AreEqual(4, dpEnum.EndOffset); dpEnum.NextPosition(); Assert.AreEqual(8, dpEnum.StartOffset); Assert.AreEqual(12, dpEnum.EndOffset); Assert.AreEqual(DocIdSetIterator.NO_MORE_DOCS, dpEnum.NextDoc()); r.Dispose(); dir.Dispose(); }
public virtual void TestTermVectorCorruption() { // LUCENENET specific - log the current locking strategy used and HResult values // for assistance troubleshooting problems on Linux/macOS LogNativeFSFactoryDebugInfo(); Directory dir = NewDirectory(); for (int iter = 0; iter < 2; iter++) { IndexWriter writer = new IndexWriter(dir, ((IndexWriterConfig)NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random)).SetMaxBufferedDocs(2).SetRAMBufferSizeMB(IndexWriterConfig.DISABLE_AUTO_FLUSH)).SetMergeScheduler(new SerialMergeScheduler()).SetMergePolicy(new LogDocMergePolicy())); Document document = new Document(); FieldType customType = new FieldType(); customType.IsStored = true; Field storedField = NewField("stored", "stored", customType); document.Add(storedField); writer.AddDocument(document); writer.AddDocument(document); document = new Document(); document.Add(storedField); FieldType customType2 = new FieldType(StringField.TYPE_NOT_STORED); customType2.StoreTermVectors = true; customType2.StoreTermVectorPositions = true; customType2.StoreTermVectorOffsets = true; Field termVectorField = NewField("termVector", "termVector", customType2); document.Add(termVectorField); writer.AddDocument(document); writer.ForceMerge(1); writer.Dispose(); IndexReader reader = DirectoryReader.Open(dir); for (int i = 0; i < reader.NumDocs; i++) { reader.Document(i); reader.GetTermVectors(i); } reader.Dispose(); writer = new IndexWriter(dir, ((IndexWriterConfig)NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random)).SetMaxBufferedDocs(2).SetRAMBufferSizeMB(IndexWriterConfig.DISABLE_AUTO_FLUSH)).SetMergeScheduler(new SerialMergeScheduler()).SetMergePolicy(new LogDocMergePolicy())); Directory[] indexDirs = new Directory[] { new MockDirectoryWrapper(Random, new RAMDirectory(dir, NewIOContext(Random))) }; writer.AddIndexes(indexDirs); writer.ForceMerge(1); writer.Dispose(); } dir.Dispose(); }
public virtual void TestTermVectorCorruption2() { Directory dir = NewDirectory(); for (int iter = 0; iter < 2; iter++) { IndexWriter writer = new IndexWriter(dir, ((IndexWriterConfig)NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random)).SetMaxBufferedDocs(2).SetRAMBufferSizeMB(IndexWriterConfig.DISABLE_AUTO_FLUSH)).SetMergeScheduler(new SerialMergeScheduler()).SetMergePolicy(new LogDocMergePolicy())); Document document = new Document(); FieldType customType = new FieldType(); customType.IsStored = true; Field storedField = NewField("stored", "stored", customType); document.Add(storedField); writer.AddDocument(document); writer.AddDocument(document); document = new Document(); document.Add(storedField); FieldType customType2 = new FieldType(StringField.TYPE_NOT_STORED); customType2.StoreTermVectors = true; customType2.StoreTermVectorPositions = true; customType2.StoreTermVectorOffsets = true; Field termVectorField = NewField("termVector", "termVector", customType2); document.Add(termVectorField); writer.AddDocument(document); writer.ForceMerge(1); writer.Dispose(); IndexReader reader = DirectoryReader.Open(dir); Assert.IsNull(reader.GetTermVectors(0)); Assert.IsNull(reader.GetTermVectors(1)); Assert.IsNotNull(reader.GetTermVectors(2)); reader.Dispose(); } dir.Dispose(); }
public virtual void TestLotsOfFields() { RandomDocumentFactory docFactory = new RandomDocumentFactory(this, 5000, 10); foreach (Options options in ValidOptions()) { using Directory dir = NewDirectory(); using RandomIndexWriter writer = new RandomIndexWriter(Random, dir); RandomDocument doc = docFactory.NewDocument(AtLeast(100), 5, options); writer.AddDocument(doc.ToDocument()); using IndexReader reader = writer.GetReader(); AssertEquals(doc, reader.GetTermVectors(0)); } }
public virtual void TestMerge() { RandomDocumentFactory docFactory = new RandomDocumentFactory(this, 5, 20); int numDocs = AtLeast(100); int numDeletes = Random.Next(numDocs); HashSet <int?> deletes = new HashSet <int?>(); while (deletes.Count < numDeletes) { deletes.Add(Random.Next(numDocs)); } foreach (Options options in ValidOptions()) { RandomDocument[] docs = new RandomDocument[numDocs]; for (int i = 0; i < numDocs; ++i) { docs[i] = docFactory.NewDocument(TestUtil.NextInt32(Random, 1, 3), AtLeast(10), options); } using (Directory dir = NewDirectory()) using (RandomIndexWriter writer = new RandomIndexWriter(Random, dir, ClassEnvRule.similarity, ClassEnvRule.timeZone)) { for (int i = 0; i < numDocs; ++i) { writer.AddDocument(AddId(docs[i].ToDocument(), "" + i)); if (Rarely()) { writer.Commit(); } } foreach (int delete in deletes) { writer.DeleteDocuments(new Term("id", "" + delete)); } // merge with deletes writer.ForceMerge(1); using (IndexReader reader = writer.GetReader()) { for (int i = 0; i < numDocs; ++i) { if (!deletes.Contains(i)) { int docID = DocID(reader, "" + i); AssertEquals(docs[i], reader.GetTermVectors(docID)); } } } // reader.Dispose(); }// writer.Dispose();, dir.Dispose(); } }
public virtual void TestMixedTermVectorSettingsSameField() { Document doc = new Document(); // f1 first without tv then with tv doc.Add(NewStringField("f1", "v1", Field.Store.YES)); FieldType customType2 = new FieldType(StringField.TYPE_STORED); customType2.StoreTermVectors = true; customType2.StoreTermVectorOffsets = true; customType2.StoreTermVectorPositions = true; doc.Add(NewField("f1", "v2", customType2)); // f2 first with tv then without tv doc.Add(NewField("f2", "v1", customType2)); doc.Add(NewStringField("f2", "v2", Field.Store.YES)); IndexWriter writer = new IndexWriter(Dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random()))); writer.AddDocument(doc); writer.Dispose(); TestUtil.CheckIndex(Dir); IndexReader reader = DirectoryReader.Open(Dir); // f1 Terms tfv1 = reader.GetTermVectors(0).GetTerms("f1"); Assert.IsNotNull(tfv1); Assert.AreEqual(2, tfv1.Count, "the 'with_tv' setting should rule!"); // f2 Terms tfv2 = reader.GetTermVectors(0).GetTerms("f2"); Assert.IsNotNull(tfv2); Assert.AreEqual(2, tfv2.Count, "the 'with_tv' setting should rule!"); reader.Dispose(); }
public override void Run() { try { for (int i = 0; i < AtLeast(100); ++i) { int idx = Random().Next(NumDocs); int docID = OuterInstance.DocID(Reader, "" + idx); OuterInstance.AssertEquals(Docs[idx], Reader.GetTermVectors(docID)); } } catch (Exception t) { this.ARException.Value = t; } }
public override void Run() { try { for (int i = 0; i < AtLeast(100); ++i) { int idx = Random.Next(numDocs); int docID = outerInstance.DocID(reader, "" + idx); outerInstance.AssertEquals(docs[idx], reader.GetTermVectors(docID)); } } catch (Exception t) when(t.IsThrowable()) { this.exception.Value = t; } }
public virtual void TestLotsOfFields() { RandomDocumentFactory docFactory = new RandomDocumentFactory(this, 500, 10); foreach (Options options in ValidOptions()) { Directory dir = NewDirectory(); RandomIndexWriter writer = new RandomIndexWriter(Random(), dir, ClassEnvRule.Similarity, ClassEnvRule.TimeZone); RandomDocument doc = docFactory.NewDocument(AtLeast(100), 5, options); writer.AddDocument(doc.ToDocument()); IndexReader reader = writer.Reader; AssertEquals(doc, reader.GetTermVectors(0)); reader.Dispose(); writer.Dispose(); dir.Dispose(); } }
// run random tests from different threads to make sure the per-thread clones // don't share mutable data public virtual void TestClone() { RandomDocumentFactory docFactory = new RandomDocumentFactory(this, 5, 20); int numDocs = AtLeast(100); foreach (Options options in ValidOptions()) { RandomDocument[] docs = new RandomDocument[numDocs]; for (int i = 0; i < numDocs; ++i) { docs[i] = docFactory.NewDocument(TestUtil.NextInt32(Random, 1, 3), AtLeast(10), options); } AtomicObject <Exception> exception = new AtomicObject <Exception>(); using (Directory dir = NewDirectory()) using (RandomIndexWriter writer = new RandomIndexWriter(Random, dir, ClassEnvRule.similarity, ClassEnvRule.timeZone)) { for (int i = 0; i < numDocs; ++i) { writer.AddDocument(AddId(docs[i].ToDocument(), "" + i)); } using (IndexReader reader = writer.GetReader()) { for (int i = 0; i < numDocs; ++i) { int docID = DocID(reader, "" + i); AssertEquals(docs[i], reader.GetTermVectors(docID)); } ThreadClass[] threads = new ThreadClass[2]; for (int i = 0; i < threads.Length; ++i) { threads[i] = new ThreadAnonymousInnerClassHelper(this, numDocs, docs, reader, exception, i); } foreach (ThreadClass thread in threads) { thread.Start(); } foreach (ThreadClass thread in threads) { thread.Join(); } } // reader.Dispose(); }// writer.Dispose();, dir.Dispose(); Assert.IsNull(exception.Value, "One thread threw an exception"); } }
public virtual void TestEndOffsetPositionStandard() { Directory dir = NewDirectory(); IndexWriter w = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random))); Document doc = new Document(); FieldType customType = new FieldType(TextField.TYPE_NOT_STORED); customType.StoreTermVectors = true; customType.StoreTermVectorPositions = true; customType.StoreTermVectorOffsets = true; Field f = NewField("field", "abcd the ", customType); Field f2 = NewField("field", "crunch man", customType); doc.Add(f); doc.Add(f2); w.AddDocument(doc); w.Dispose(); IndexReader r = DirectoryReader.Open(dir); TermsEnum termsEnum = r.GetTermVectors(0).GetTerms("field").GetEnumerator(); Assert.IsTrue(termsEnum.MoveNext()); DocsAndPositionsEnum dpEnum = termsEnum.DocsAndPositions(null, null); Assert.IsTrue(dpEnum.NextDoc() != DocIdSetIterator.NO_MORE_DOCS); dpEnum.NextPosition(); Assert.AreEqual(0, dpEnum.StartOffset); Assert.AreEqual(4, dpEnum.EndOffset); Assert.IsTrue(termsEnum.MoveNext()); dpEnum = termsEnum.DocsAndPositions(null, dpEnum); Assert.IsTrue(dpEnum.NextDoc() != DocIdSetIterator.NO_MORE_DOCS); dpEnum.NextPosition(); Assert.AreEqual(11, dpEnum.StartOffset); Assert.AreEqual(17, dpEnum.EndOffset); Assert.IsTrue(termsEnum.MoveNext()); dpEnum = termsEnum.DocsAndPositions(null, dpEnum); Assert.IsTrue(dpEnum.NextDoc() != DocIdSetIterator.NO_MORE_DOCS); dpEnum.NextPosition(); Assert.AreEqual(18, dpEnum.StartOffset); Assert.AreEqual(21, dpEnum.EndOffset); r.Dispose(); dir.Dispose(); }
public virtual void TestHighFreqs() { RandomDocumentFactory docFactory = new RandomDocumentFactory(this, 3, 5); foreach (Options options in ValidOptions()) { if (options == Options.NONE) { continue; } using Directory dir = NewDirectory(); using RandomIndexWriter writer = new RandomIndexWriter(Random, dir); RandomDocument doc = docFactory.NewDocument(TestUtil.NextInt32(Random, 1, 2), AtLeast(20000), options); writer.AddDocument(doc.ToDocument()); using IndexReader reader = writer.GetReader(); AssertEquals(doc, reader.GetTermVectors(0)); } }
public virtual void TestHighFreqs() { RandomDocumentFactory docFactory = new RandomDocumentFactory(this, 3, 5); foreach (Options options in ValidOptions()) { if (options == Options.NONE) { continue; } using (Directory dir = NewDirectory()) using (RandomIndexWriter writer = new RandomIndexWriter(Random(), dir, ClassEnvRule.Similarity, ClassEnvRule.TimeZone)) { RandomDocument doc = docFactory.NewDocument(TestUtil.NextInt(Random(), 1, 2), AtLeast(20000), options); writer.AddDocument(doc.ToDocument()); using (IndexReader reader = writer.Reader) AssertEquals(doc, reader.GetTermVectors(0)); } } }
public virtual void TestEndOffsetPositionStopFilter() { Directory dir = NewDirectory(); IndexWriter w = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random(), MockTokenizer.SIMPLE, true, MockTokenFilter.ENGLISH_STOPSET))); Document doc = new Document(); FieldType customType = new FieldType(TextField.TYPE_NOT_STORED); customType.StoreTermVectors = true; customType.StoreTermVectorPositions = true; customType.StoreTermVectorOffsets = true; Field f = NewField("field", "abcd the", customType); doc.Add(f); doc.Add(f); w.AddDocument(doc); w.Dispose(); IndexReader r = DirectoryReader.Open(dir); TermsEnum termsEnum = r.GetTermVectors(0).Terms("field").Iterator(null); Assert.IsNotNull(termsEnum.Next()); DocsAndPositionsEnum dpEnum = termsEnum.DocsAndPositions(null, null); Assert.AreEqual(2, termsEnum.TotalTermFreq()); Assert.IsTrue(dpEnum.NextDoc() != DocIdSetIterator.NO_MORE_DOCS); dpEnum.NextPosition(); Assert.AreEqual(0, dpEnum.StartOffset()); Assert.AreEqual(4, dpEnum.EndOffset()); dpEnum.NextPosition(); Assert.AreEqual(9, dpEnum.StartOffset()); Assert.AreEqual(13, dpEnum.EndOffset()); Assert.AreEqual(DocIdSetIterator.NO_MORE_DOCS, dpEnum.NextDoc()); r.Dispose(); dir.Dispose(); }
public virtual void TestRandom() { RandomDocumentFactory docFactory = new RandomDocumentFactory(this, 5, 20); int numDocs = AtLeast(100); RandomDocument[] docs = new RandomDocument[numDocs]; for (int i = 0; i < numDocs; ++i) { docs[i] = docFactory.NewDocument(TestUtil.NextInt32(Random, 1, 3), TestUtil.NextInt32(Random, 10, 50), RandomOptions()); } using Directory dir = NewDirectory(); using RandomIndexWriter writer = new RandomIndexWriter(Random, dir); for (int i = 0; i < numDocs; ++i) { writer.AddDocument(AddId(docs[i].ToDocument(), "" + i)); } using IndexReader reader = writer.GetReader(); for (int i = 0; i < numDocs; ++i) { int docID = DocID(reader, "" + i); AssertEquals(docs[i], reader.GetTermVectors(docID)); } }
public virtual void SearchIndex(Directory dir, string oldName) { //QueryParser parser = new QueryParser("contents", new MockAnalyzer(random)); //Query query = parser.parse("handle:1"); IndexReader reader = DirectoryReader.Open(dir); IndexSearcher searcher = NewSearcher(reader); TestUtil.CheckIndex(dir); // true if this is a 4.0+ index bool is40Index = MultiFields.GetMergedFieldInfos(reader).FieldInfo("content5") != null; // true if this is a 4.2+ index bool is42Index = MultiFields.GetMergedFieldInfos(reader).FieldInfo("dvSortedSet") != null; Debug.Assert(is40Index); // NOTE: currently we can only do this on trunk! Bits liveDocs = MultiFields.GetLiveDocs(reader); for (int i = 0; i < 35; i++) { if (liveDocs.Get(i)) { Document d = reader.Document(i); IList <IndexableField> fields = d.Fields; bool isProxDoc = d.GetField("content3") == null; if (isProxDoc) { int numFields = is40Index ? 7 : 5; Assert.AreEqual(numFields, fields.Count); IndexableField f = d.GetField("id"); Assert.AreEqual("" + i, f.StringValue); f = d.GetField("utf8"); Assert.AreEqual("Lu\uD834\uDD1Ece\uD834\uDD60ne \u0000 \u2620 ab\ud917\udc17cd", f.StringValue); f = d.GetField("autf8"); Assert.AreEqual("Lu\uD834\uDD1Ece\uD834\uDD60ne \u0000 \u2620 ab\ud917\udc17cd", f.StringValue); f = d.GetField("content2"); Assert.AreEqual("here is more content with aaa aaa aaa", f.StringValue); f = d.GetField("fie\u2C77ld"); Assert.AreEqual("field with non-ascii name", f.StringValue); } Fields tfvFields = reader.GetTermVectors(i); Assert.IsNotNull(tfvFields, "i=" + i); Terms tfv = tfvFields.Terms("utf8"); Assert.IsNotNull(tfv, "docID=" + i + " index=" + oldName); } else { // Only ID 7 is deleted Assert.AreEqual(7, i); } } if (is40Index) { // check docvalues fields NumericDocValues dvByte = MultiDocValues.GetNumericValues(reader, "dvByte"); BinaryDocValues dvBytesDerefFixed = MultiDocValues.GetBinaryValues(reader, "dvBytesDerefFixed"); BinaryDocValues dvBytesDerefVar = MultiDocValues.GetBinaryValues(reader, "dvBytesDerefVar"); SortedDocValues dvBytesSortedFixed = MultiDocValues.GetSortedValues(reader, "dvBytesSortedFixed"); SortedDocValues dvBytesSortedVar = MultiDocValues.GetSortedValues(reader, "dvBytesSortedVar"); BinaryDocValues dvBytesStraightFixed = MultiDocValues.GetBinaryValues(reader, "dvBytesStraightFixed"); BinaryDocValues dvBytesStraightVar = MultiDocValues.GetBinaryValues(reader, "dvBytesStraightVar"); NumericDocValues dvDouble = MultiDocValues.GetNumericValues(reader, "dvDouble"); NumericDocValues dvFloat = MultiDocValues.GetNumericValues(reader, "dvFloat"); NumericDocValues dvInt = MultiDocValues.GetNumericValues(reader, "dvInt"); NumericDocValues dvLong = MultiDocValues.GetNumericValues(reader, "dvLong"); NumericDocValues dvPacked = MultiDocValues.GetNumericValues(reader, "dvPacked"); NumericDocValues dvShort = MultiDocValues.GetNumericValues(reader, "dvShort"); SortedSetDocValues dvSortedSet = null; if (is42Index) { dvSortedSet = MultiDocValues.GetSortedSetValues(reader, "dvSortedSet"); } for (int i = 0; i < 35; i++) { int id = Convert.ToInt32(reader.Document(i).Get("id")); Assert.AreEqual(id, dvByte.Get(i)); sbyte[] bytes = new sbyte[] { (sbyte)((int)((uint)id >> 24)), (sbyte)((int)((uint)id >> 16)), (sbyte)((int)((uint)id >> 8)), (sbyte)id }; BytesRef expectedRef = new BytesRef((byte[])(Array)bytes); BytesRef scratch = new BytesRef(); dvBytesDerefFixed.Get(i, scratch); Assert.AreEqual(expectedRef, scratch); dvBytesDerefVar.Get(i, scratch); Assert.AreEqual(expectedRef, scratch); dvBytesSortedFixed.Get(i, scratch); Assert.AreEqual(expectedRef, scratch); dvBytesSortedVar.Get(i, scratch); Assert.AreEqual(expectedRef, scratch); dvBytesStraightFixed.Get(i, scratch); Assert.AreEqual(expectedRef, scratch); dvBytesStraightVar.Get(i, scratch); Assert.AreEqual(expectedRef, scratch); Assert.AreEqual((double)id, BitConverter.Int64BitsToDouble(dvDouble.Get(i)), 0D); Assert.AreEqual((float)id, Number.IntBitsToFloat((int)dvFloat.Get(i)), 0F); Assert.AreEqual(id, dvInt.Get(i)); Assert.AreEqual(id, dvLong.Get(i)); Assert.AreEqual(id, dvPacked.Get(i)); Assert.AreEqual(id, dvShort.Get(i)); if (is42Index) { dvSortedSet.Document = i; long ord = dvSortedSet.NextOrd(); Assert.AreEqual(SortedSetDocValues.NO_MORE_ORDS, dvSortedSet.NextOrd()); dvSortedSet.LookupOrd(ord, scratch); Assert.AreEqual(expectedRef, scratch); } } } ScoreDoc[] hits = searcher.Search(new TermQuery(new Term("content", "aaa")), null, 1000).ScoreDocs; // First document should be #0 Document doc = searcher.IndexReader.Document(hits[0].Doc); assertEquals("didn't get the right document first", "0", doc.Get("id")); DoTestHits(hits, 34, searcher.IndexReader); if (is40Index) { hits = searcher.Search(new TermQuery(new Term("content5", "aaa")), null, 1000).ScoreDocs; DoTestHits(hits, 34, searcher.IndexReader); hits = searcher.Search(new TermQuery(new Term("content6", "aaa")), null, 1000).ScoreDocs; DoTestHits(hits, 34, searcher.IndexReader); } hits = searcher.Search(new TermQuery(new Term("utf8", "\u0000")), null, 1000).ScoreDocs; Assert.AreEqual(34, hits.Length); hits = searcher.Search(new TermQuery(new Term("utf8", "lu\uD834\uDD1Ece\uD834\uDD60ne")), null, 1000).ScoreDocs; Assert.AreEqual(34, hits.Length); hits = searcher.Search(new TermQuery(new Term("utf8", "ab\ud917\udc17cd")), null, 1000).ScoreDocs; Assert.AreEqual(34, hits.Length); reader.Dispose(); }
// Runs test, with multiple threads, using the specific // failure to trigger an IOException public virtual void TestMultipleThreadsFailure(Func <IConcurrentMergeScheduler> newScheduler, Failure failure) { int NUM_THREADS = 3; for (int iter = 0; iter < 2; iter++) { if (VERBOSE) { Console.WriteLine("TEST: iter=" + iter); } MockDirectoryWrapper dir = NewMockDirectory(); var config = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random)) .SetMaxBufferedDocs(2) .SetMergeScheduler(newScheduler()) .SetMergePolicy(NewLogMergePolicy(4)); IndexWriter writer = new IndexWriter(dir, config); var scheduler = config.mergeScheduler as IConcurrentMergeScheduler; if (scheduler != null) { scheduler.SetSuppressExceptions(); } IndexerThread[] threads = new IndexerThread[NUM_THREADS]; for (int i = 0; i < NUM_THREADS; i++) { threads[i] = new IndexerThread(writer, true, NewField); } for (int i = 0; i < NUM_THREADS; i++) { threads[i].Start(); } Thread.Sleep(10); dir.FailOn(failure); failure.SetDoFail(); for (int i = 0; i < NUM_THREADS; i++) { threads[i].Join(); Assert.IsTrue(threads[i].Error == null, "hit unexpected Throwable"); } bool success = false; try { writer.Dispose(false); success = true; } catch (IOException) { failure.ClearDoFail(); writer.Dispose(false); } if (VERBOSE) { Console.WriteLine("TEST: success=" + success); } if (success) { IndexReader reader = DirectoryReader.Open(dir); IBits delDocs = MultiFields.GetLiveDocs(reader); for (int j = 0; j < reader.MaxDoc; j++) { if (delDocs == null || !delDocs.Get(j)) { reader.Document(j); reader.GetTermVectors(j); } } reader.Dispose(); } dir.Dispose(); } }
/// <summary> /// checks that term vectors across all fields are equivalent /// </summary> public void AssertTermVectorsEquals(string info, IndexReader leftReader, IndexReader rightReader) { Debug.Assert(leftReader.MaxDoc == rightReader.MaxDoc); for (int i = 0; i < leftReader.MaxDoc; i++) { Fields leftFields = leftReader.GetTermVectors(i); Fields rightFields = rightReader.GetTermVectors(i); AssertFieldsEquals(info, leftReader, leftFields, rightFields, Rarely()); } }
public virtual void TestArbitraryFields() { Directory dir = NewDirectory(); RandomIndexWriter w = new RandomIndexWriter( #if FEATURE_INSTANCE_TESTDATA_INITIALIZATION this, #endif Random, dir); int NUM_DOCS = AtLeast(27); if (Verbose) { Console.WriteLine("TEST: " + NUM_DOCS + " docs"); } int[] fieldsPerDoc = new int[NUM_DOCS]; int baseCount = 0; for (int docCount = 0; docCount < NUM_DOCS; docCount++) { int fieldCount = TestUtil.NextInt32(Random, 1, 17); fieldsPerDoc[docCount] = fieldCount - 1; int finalDocCount = docCount; if (Verbose) { Console.WriteLine("TEST: " + fieldCount + " fields in doc " + docCount); } int finalBaseCount = baseCount; baseCount += fieldCount - 1; w.AddDocument(new IterableAnonymousInnerClassHelper(this, fieldCount, finalDocCount, finalBaseCount)); } IndexReader r = w.GetReader(); w.Dispose(); IndexSearcher s = NewSearcher(r); int counter = 0; for (int id = 0; id < NUM_DOCS; id++) { if (Verbose) { Console.WriteLine("TEST: verify doc id=" + id + " (" + fieldsPerDoc[id] + " fields) counter=" + counter); } TopDocs hits = s.Search(new TermQuery(new Term("id", "" + id)), 1); Assert.AreEqual(1, hits.TotalHits); int docID = hits.ScoreDocs[0].Doc; Document doc = s.Doc(docID); int endCounter = counter + fieldsPerDoc[id]; while (counter < endCounter) { string name = "f" + counter; int fieldID = counter % 10; bool stored = (counter & 1) == 0 || fieldID == 3; bool binary = fieldID == 3; bool indexed = fieldID != 3; string stringValue; if (fieldID != 3 && fieldID != 9) { stringValue = "text " + counter; } else { stringValue = null; } // stored: if (stored) { IIndexableField f = doc.GetField(name); Assert.IsNotNull(f, "doc " + id + " doesn't have field f" + counter); if (binary) { Assert.IsNotNull(f, "doc " + id + " doesn't have field f" + counter); BytesRef b = f.GetBinaryValue(); Assert.IsNotNull(b); Assert.AreEqual(10, b.Length); for (int idx = 0; idx < 10; idx++) { Assert.AreEqual((byte)(idx + counter), b.Bytes[b.Offset + idx]); } } else { Debug.Assert(stringValue != null); Assert.AreEqual(stringValue, f.GetStringValue()); } } if (indexed) { bool tv = counter % 2 == 1 && fieldID != 9; if (tv) { Terms tfv = r.GetTermVectors(docID).GetTerms(name); Assert.IsNotNull(tfv); TermsEnum termsEnum = tfv.GetIterator(null); Assert.AreEqual(new BytesRef("" + counter), termsEnum.Next()); Assert.AreEqual(1, termsEnum.TotalTermFreq); DocsAndPositionsEnum dpEnum = termsEnum.DocsAndPositions(null, null); Assert.IsTrue(dpEnum.NextDoc() != DocIdSetIterator.NO_MORE_DOCS); Assert.AreEqual(1, dpEnum.Freq); Assert.AreEqual(1, dpEnum.NextPosition()); Assert.AreEqual(new BytesRef("text"), termsEnum.Next()); Assert.AreEqual(1, termsEnum.TotalTermFreq); dpEnum = termsEnum.DocsAndPositions(null, dpEnum); Assert.IsTrue(dpEnum.NextDoc() != DocIdSetIterator.NO_MORE_DOCS); Assert.AreEqual(1, dpEnum.Freq); Assert.AreEqual(0, dpEnum.NextPosition()); Assert.IsNull(termsEnum.Next()); // TODO: offsets } else { Fields vectors = r.GetTermVectors(docID); Assert.IsTrue(vectors == null || vectors.GetTerms(name) == null); } BooleanQuery bq = new BooleanQuery(); bq.Add(new TermQuery(new Term("id", "" + id)), Occur.MUST); bq.Add(new TermQuery(new Term(name, "text")), Occur.MUST); TopDocs hits2 = s.Search(bq, 1); Assert.AreEqual(1, hits2.TotalHits); Assert.AreEqual(docID, hits2.ScoreDocs[0].Doc); bq = new BooleanQuery(); bq.Add(new TermQuery(new Term("id", "" + id)), Occur.MUST); bq.Add(new TermQuery(new Term(name, "" + counter)), Occur.MUST); TopDocs hits3 = s.Search(bq, 1); Assert.AreEqual(1, hits3.TotalHits); Assert.AreEqual(docID, hits3.ScoreDocs[0].Doc); } counter++; } } r.Dispose(); dir.Dispose(); }
public virtual void TestDoubleOffsetCounting() { Directory dir = NewDirectory(); IndexWriter w = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random))); Document doc = new Document(); FieldType customType = new FieldType(StringField.TYPE_NOT_STORED); customType.StoreTermVectors = true; customType.StoreTermVectorPositions = true; customType.StoreTermVectorOffsets = true; Field f = NewField("field", "abcd", customType); doc.Add(f); doc.Add(f); Field f2 = NewField("field", "", customType); doc.Add(f2); doc.Add(f); w.AddDocument(doc); w.Dispose(); IndexReader r = DirectoryReader.Open(dir); Terms vector = r.GetTermVectors(0).GetTerms("field"); Assert.IsNotNull(vector); TermsEnum termsEnum = vector.GetEnumerator(); Assert.IsTrue(termsEnum.MoveNext()); Assert.AreEqual("", termsEnum.Term.Utf8ToString()); // Token "" occurred once Assert.AreEqual(1, termsEnum.TotalTermFreq); DocsAndPositionsEnum dpEnum = termsEnum.DocsAndPositions(null, null); Assert.IsTrue(dpEnum.NextDoc() != DocIdSetIterator.NO_MORE_DOCS); dpEnum.NextPosition(); Assert.AreEqual(8, dpEnum.StartOffset); Assert.AreEqual(8, dpEnum.EndOffset); Assert.AreEqual(DocIdSetIterator.NO_MORE_DOCS, dpEnum.NextDoc()); // Token "abcd" occurred three times Assert.IsTrue(termsEnum.MoveNext()); Assert.AreEqual(new BytesRef("abcd"), termsEnum.Term); dpEnum = termsEnum.DocsAndPositions(null, dpEnum); Assert.AreEqual(3, termsEnum.TotalTermFreq); Assert.IsTrue(dpEnum.NextDoc() != DocIdSetIterator.NO_MORE_DOCS); dpEnum.NextPosition(); Assert.AreEqual(0, dpEnum.StartOffset); Assert.AreEqual(4, dpEnum.EndOffset); dpEnum.NextPosition(); Assert.AreEqual(4, dpEnum.StartOffset); Assert.AreEqual(8, dpEnum.EndOffset); dpEnum.NextPosition(); Assert.AreEqual(8, dpEnum.StartOffset); Assert.AreEqual(12, dpEnum.EndOffset); Assert.AreEqual(DocIdSetIterator.NO_MORE_DOCS, dpEnum.NextDoc()); Assert.IsFalse(termsEnum.MoveNext()); r.Dispose(); dir.Dispose(); }