public virtual void TestNoOrds() { Directory dir = NewDirectory(); RandomIndexWriter iw = new RandomIndexWriter(Random, dir); Document doc = new Document(); FieldType ft = new FieldType(TextField.TYPE_NOT_STORED); ft.StoreTermVectors = true; doc.Add(new Field("foo", "this is a test", ft)); iw.AddDocument(doc); AtomicReader ir = GetOnlySegmentReader(iw.GetReader()); Terms terms = ir.GetTermVector(0, "foo"); Assert.IsNotNull(terms); TermsEnum termsEnum = terms.GetEnumerator(); Assert.AreEqual(TermsEnum.SeekStatus.FOUND, termsEnum.SeekCeil(new BytesRef("this"))); try { var _ = termsEnum.Ord; Assert.Fail(); } catch (Exception expected) when (expected.IsUnsupportedOperationException()) { // expected exception } try { termsEnum.SeekExact(0); Assert.Fail(); } catch (Exception expected) when (expected.IsUnsupportedOperationException()) { // expected exception } ir.Dispose(); iw.Dispose(); dir.Dispose(); }
/// <summary> /// alternate scorer skipTo(),skipTo(),next(),next(),skipTo(),skipTo(), etc /// and ensure a hitcollector receives same docs and scores /// </summary> public static void CheckSkipTo(Query q, IndexSearcher s) { //System.out.println("Checking "+q); IList<AtomicReaderContext> readerContextArray = s.TopReaderContext.Leaves; if (s.CreateNormalizedWeight(q).ScoresDocsOutOfOrder()) // in this case order of skipTo() might differ from that of next(). { return; } const int skip_op = 0; const int next_op = 1; int[][] orders = new int[][] { new int[] { next_op }, new int[] { skip_op }, new int[] { skip_op, next_op }, new int[] { next_op, skip_op }, new int[] { skip_op, skip_op, next_op, next_op }, new int[] { next_op, next_op, skip_op, skip_op }, new int[] { skip_op, skip_op, skip_op, next_op, next_op } }; for (int k = 0; k < orders.Length; k++) { int[] order = orders[k]; // System.out.print("Order:");for (int i = 0; i < order.Length; i++) // System.out.print(order[i]==skip_op ? " skip()":" next()"); // System.out.println(); int[] opidx = new int[] { 0 }; int[] lastDoc = new int[] { -1 }; // FUTURE: ensure scorer.Doc()==-1 const float maxDiff = 1e-5f; AtomicReader[] lastReader = new AtomicReader[] { null }; s.Search(q, new CollectorAnonymousInnerClassHelper(q, s, readerContextArray, skip_op, order, opidx, lastDoc, maxDiff, lastReader)); if (lastReader[0] != null) { // confirm that skipping beyond the last doc, on the // previous reader, hits NO_MORE_DOCS AtomicReader previousReader = lastReader[0]; IndexSearcher indexSearcher = LuceneTestCase.NewSearcher(previousReader, false); indexSearcher.Similarity = s.Similarity; Weight w = indexSearcher.CreateNormalizedWeight(q); AtomicReaderContext ctx = (AtomicReaderContext)previousReader.Context; Scorer scorer = w.Scorer(ctx, ((AtomicReader)ctx.Reader).LiveDocs); if (scorer != null) { bool more = scorer.Advance(lastDoc[0] + 1) != DocIdSetIterator.NO_MORE_DOCS; Assert.IsFalse(more, "query's last doc was " + lastDoc[0] + " but skipTo(" + (lastDoc[0] + 1) + ") got to " + scorer.DocID()); } } } }
/// <summary> /// check that first skip on just created scorers always goes to the right doc </summary> public static void CheckFirstSkipTo(Query q, IndexSearcher s) { //System.out.println("checkFirstSkipTo: "+q); const float maxDiff = 1e-3f; int[] lastDoc = new int[] { -1 }; AtomicReader[] lastReader = new AtomicReader[] { null }; IList<AtomicReaderContext> context = s.TopReaderContext.Leaves; s.Search(q, new CollectorAnonymousInnerClassHelper2(q, s, maxDiff, lastDoc, lastReader, context)); if (lastReader[0] != null) { // confirm that skipping beyond the last doc, on the // previous reader, hits NO_MORE_DOCS AtomicReader previousReader = lastReader[0]; IndexSearcher indexSearcher = LuceneTestCase.NewSearcher(previousReader); indexSearcher.Similarity = s.Similarity; Weight w = indexSearcher.CreateNormalizedWeight(q); Scorer scorer = w.Scorer((AtomicReaderContext)indexSearcher.TopReaderContext, previousReader.LiveDocs); if (scorer != null) { bool more = scorer.Advance(lastDoc[0] + 1) != DocIdSetIterator.NO_MORE_DOCS; Assert.IsFalse(more, "query's last doc was " + lastDoc[0] + " but skipTo(" + (lastDoc[0] + 1) + ") got to " + scorer.DocID()); } } }
protected override DocIdSet CacheImpl(DocIdSetIterator iterator, AtomicReader reader) { var cached = new FixedBitSet(reader.MaxDoc); filterWasUsed.Set(true); cached.Or(iterator); return cached; }
public override void SetUp() { base.SetUp(); DirA = NewDirectory(); DirB = NewDirectory(); IndexWriter wA = new IndexWriter(DirA, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random()))); IndexWriter wB = new IndexWriter(DirB, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random()))); long theLong = long.MaxValue; double theDouble = double.MaxValue; sbyte theByte = sbyte.MaxValue; short theShort = short.MaxValue; int theInt = int.MaxValue; float theFloat = float.MaxValue; for (int i = 0; i < NUM_DOCS; i++) { Document doc = new Document(); doc.Add(NewStringField("theLong", Convert.ToString(theLong--), Field.Store.NO)); doc.Add(NewStringField("theDouble", theDouble.ToString("R"), Field.Store.NO)); theDouble--; doc.Add(NewStringField("theByte", Convert.ToString(theByte--), Field.Store.NO)); doc.Add(NewStringField("theShort", Convert.ToString(theShort--), Field.Store.NO)); doc.Add(NewStringField("theInt", Convert.ToString(theInt--), Field.Store.NO)); doc.Add(NewStringField("theFloat", Convert.ToString(theFloat--), Field.Store.NO)); if (0 == i % 3) { wA.AddDocument(doc); } else { wB.AddDocument(doc); } } wA.Dispose(); wB.Dispose(); DirectoryReader rA = DirectoryReader.Open(DirA); ReaderA = SlowCompositeReaderWrapper.Wrap(rA); ReaderAclone = SlowCompositeReaderWrapper.Wrap(rA); ReaderA = SlowCompositeReaderWrapper.Wrap(DirectoryReader.Open(DirA)); ReaderB = SlowCompositeReaderWrapper.Wrap(DirectoryReader.Open(DirB)); ReaderX = SlowCompositeReaderWrapper.Wrap(new MultiReader(ReaderA, ReaderB)); // LUCENENET specific.Ensure we have an infostream attached to the default FieldCache // when running the tests. In Java, this was done in the Core.Search.TestFieldCache.TestInfoStream() // method (which polluted the state of these tests), but we need to make the tests self-contained // so they can be run correctly regardless of order. Not setting the InfoStream skips an execution // path within these tests, so we should do it to make sure we test all of the code. FieldCache.DEFAULT.InfoStream = new StringWriter(); }
public DocIdSetAnonymousInnerClassHelper(FilterAnonymousInnerClassHelper3 outerInstance, bool nullBitset, AtomicReader reader, BitArray bitSet) { this.OuterInstance = outerInstance; this.NullBitset = nullBitset; this.Reader = reader; this.BitSet = bitSet; }
/// <summary> /// Provide the DocIdSet to be cached, using the DocIdSet provided /// by the wrapped Filter. <p>this implementation returns the given <seealso cref="DocIdSet"/>, /// if <seealso cref="DocIdSet#isCacheable"/> returns <code>true</code>, else it calls /// <seealso cref="#cacheImpl(DocIdSetIterator,AtomicReader)"/> /// <p>Note: this method returns <seealso cref="#EMPTY_DOCIDSET"/> if the given docIdSet /// is <code>null</code> or if <seealso cref="DocIdSet#iterator()"/> return <code>null</code>. The empty /// instance is use as a placeholder in the cache instead of the <code>null</code> value. /// </summary> protected internal virtual DocIdSet DocIdSetToCache(DocIdSet docIdSet, AtomicReader reader) { if (docIdSet == null) { // this is better than returning null, as the nonnull result can be cached return EMPTY_DOCIDSET; } else if (docIdSet.Cacheable) { return docIdSet; } else { DocIdSetIterator it = docIdSet.GetIterator(); // null is allowed to be returned by iterator(), // in this case we wrap with the sentinel set, // which is cacheable. if (it == null) { return EMPTY_DOCIDSET; } else { return CacheImpl(it, reader); } } }
public static void AfterClass() { Reader.Dispose(); Reader = null; Directory.Dispose(); Directory = null; UnicodeStrings = null; MultiValued = null; }
private int CopyVectorsNoDeletions(MergeState mergeState, Lucene40TermVectorsReader matchingVectorsReader, AtomicReader reader, int[] rawDocLengths, int[] rawDocLengths2) { int maxDoc = reader.MaxDoc; if (matchingVectorsReader != null) { // We can bulk-copy because the fieldInfos are "congruent" int docCount = 0; while (docCount < maxDoc) { int len = Math.Min(MAX_RAW_MERGE_DOCS, maxDoc - docCount); matchingVectorsReader.RawDocs(rawDocLengths, rawDocLengths2, docCount, len); AddRawDocuments(matchingVectorsReader, rawDocLengths, rawDocLengths2, len); docCount += len; mergeState.checkAbort.Work(300 * len); } } else { for (int docNum = 0; docNum < maxDoc; docNum++) { // NOTE: it's very important to first assign to vectors then pass it to // termVectorsWriter.addAllDocVectors; see LUCENE-1282 Fields vectors = reader.GetTermVectors(docNum); AddAllDocVectors(vectors, mergeState); mergeState.checkAbort.Work(300); } } return maxDoc; }
private int CopyFieldsWithDeletions(MergeState mergeState, AtomicReader reader, Lucene40StoredFieldsReader matchingFieldsReader, int[] rawDocLengths) { int docCount = 0; int maxDoc = reader.MaxDoc; Bits liveDocs = reader.LiveDocs; Debug.Assert(liveDocs != null); if (matchingFieldsReader != null) { // We can bulk-copy because the fieldInfos are "congruent" for (int j = 0; j < maxDoc; ) { if (!liveDocs.Get(j)) { // skip deleted docs ++j; continue; } // We can optimize this case (doing a bulk byte copy) since the field // numbers are identical int start = j, numDocs = 0; do { j++; numDocs++; if (j >= maxDoc) { break; } if (!liveDocs.Get(j)) { j++; break; } } while (numDocs < MAX_RAW_MERGE_DOCS); IndexInput stream = matchingFieldsReader.RawDocs(rawDocLengths, start, numDocs); AddRawDocuments(stream, rawDocLengths, numDocs); docCount += numDocs; mergeState.checkAbort.Work(300 * numDocs); } } else { for (int j = 0; j < maxDoc; j++) { if (!liveDocs.Get(j)) { // skip deleted docs continue; } // TODO: this could be more efficient using // FieldVisitor instead of loading/writing entire // doc; ie we just have to renumber the field number // on the fly? // NOTE: it's very important to first assign to doc then pass it to // fieldsWriter.addDocument; see LUCENE-1282 Document doc = reader.Document(j); AddDocument(doc, mergeState.FieldInfos); docCount++; mergeState.checkAbort.Work(300); } } return docCount; }
private int CopyFieldsNoDeletions(MergeState mergeState, AtomicReader reader, Lucene40StoredFieldsReader matchingFieldsReader, int[] rawDocLengths) { int maxDoc = reader.MaxDoc; int docCount = 0; if (matchingFieldsReader != null) { // We can bulk-copy because the fieldInfos are "congruent" while (docCount < maxDoc) { int len = Math.Min(MAX_RAW_MERGE_DOCS, maxDoc - docCount); IndexInput stream = matchingFieldsReader.RawDocs(rawDocLengths, docCount, len); AddRawDocuments(stream, rawDocLengths, len); docCount += len; mergeState.checkAbort.Work(300 * len); } } else { for (; docCount < maxDoc; docCount++) { // NOTE: it's very important to first assign to doc then pass it to // fieldsWriter.addDocument; see LUCENE-1282 Document doc = reader.Document(docCount); AddDocument(doc, mergeState.FieldInfos); mergeState.checkAbort.Work(300); } } return docCount; }
internal bool TermNotInReader(AtomicReader reader, Term term) { // only called from assert //System.out.println("TQ.termNotInReader reader=" + reader + " term=" + field + ":" + bytes.utf8ToString()); return reader.DocFreq(term) == 0; }
internal virtual void MakeIndex() { // we use RAMDirectory here, because we dont want to stay on open files on Windows: d = new RAMDirectory(); RandomIndexWriter w = new RandomIndexWriter(Random(), d); Document doc = new Document(); doc.Add(newField("ints", "1", StringField.TYPE_NOT_STORED)); w.AddDocument(doc); w.ForceMerge(1); r = w.Reader; w.Dispose(); SubR = (AtomicReader)(r.Leaves()[0]).Reader(); }
/// <summary> /// Default cache implementation: uses <seealso cref="WAH8DocIdSet"/>. /// </summary> protected virtual DocIdSet CacheImpl(DocIdSetIterator iterator, AtomicReader reader) { var builder = new WAH8DocIdSet.Builder(); builder.Add(iterator); return builder.Build(); }
public CollectorAnonymousInnerClassHelper(Query q, IndexSearcher s, IList<AtomicReaderContext> readerContextArray, int skip_op, int[] order, int[] opidx, int[] lastDoc, float maxDiff, AtomicReader[] lastReader) { this.q = q; this.s = s; this.ReaderContextArray = readerContextArray; this.Skip_op = skip_op; this.Order = order; this.Opidx = opidx; this.LastDoc = lastDoc; this.MaxDiff = maxDiff; this.LastReader = lastReader; }
public CollectorAnonymousInnerClassHelper2(Query q, IndexSearcher s, float maxDiff, int[] lastDoc, AtomicReader[] lastReader, IList<AtomicReaderContext> context) { this.q = q; this.s = s; this.MaxDiff = maxDiff; this.LastDoc = lastDoc; this.LastReader = lastReader; this.Context = context; }
private int CopyVectorsWithDeletions(MergeState mergeState, Lucene40TermVectorsReader matchingVectorsReader, AtomicReader reader, int[] rawDocLengths, int[] rawDocLengths2) { int maxDoc = reader.MaxDoc; Bits liveDocs = reader.LiveDocs; int totalNumDocs = 0; if (matchingVectorsReader != null) { // We can bulk-copy because the fieldInfos are "congruent" for (int docNum = 0; docNum < maxDoc; ) { if (!liveDocs.Get(docNum)) { // skip deleted docs ++docNum; continue; } // We can optimize this case (doing a bulk byte copy) since the field // numbers are identical int start = docNum, numDocs = 0; do { docNum++; numDocs++; if (docNum >= maxDoc) { break; } if (!liveDocs.Get(docNum)) { docNum++; break; } } while (numDocs < MAX_RAW_MERGE_DOCS); matchingVectorsReader.RawDocs(rawDocLengths, rawDocLengths2, start, numDocs); AddRawDocuments(matchingVectorsReader, rawDocLengths, rawDocLengths2, numDocs); totalNumDocs += numDocs; mergeState.checkAbort.Work(300 * numDocs); } } else { for (int docNum = 0; docNum < maxDoc; docNum++) { if (!liveDocs.Get(docNum)) { // skip deleted docs continue; } // NOTE: it's very important to first assign to vectors then pass it to // termVectorsWriter.addAllDocVectors; see LUCENE-1282 Fields vectors = reader.GetTermVectors(docNum); AddAllDocVectors(vectors, mergeState); totalNumDocs++; mergeState.checkAbort.Work(300); } } return totalNumDocs; }
public static void BeforeClass() { NUM_DOCS = AtLeast(500); NUM_ORDS = AtLeast(2); Directory = NewDirectory(); RandomIndexWriter writer = new RandomIndexWriter(Random(), Directory, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).SetMergePolicy(NewLogMergePolicy())); long theLong = long.MaxValue; double theDouble = double.MaxValue; sbyte theByte = sbyte.MaxValue; short theShort = short.MaxValue; int theInt = int.MaxValue; float theFloat = float.MaxValue; UnicodeStrings = new string[NUM_DOCS]; MultiValued = new BytesRef[NUM_DOCS, NUM_ORDS]; if (VERBOSE) { Console.WriteLine("TEST: setUp"); } for (int i = 0; i < NUM_DOCS; i++) { Document doc = new Document(); doc.Add(NewStringField("theLong", Convert.ToString(theLong--), Field.Store.NO)); doc.Add(NewStringField("theDouble", Convert.ToString(theDouble--), Field.Store.NO)); doc.Add(NewStringField("theByte", Convert.ToString(theByte--), Field.Store.NO)); doc.Add(NewStringField("theShort", Convert.ToString(theShort--), Field.Store.NO)); doc.Add(NewStringField("theInt", Convert.ToString(theInt--), Field.Store.NO)); doc.Add(NewStringField("theFloat", Convert.ToString(theFloat--), Field.Store.NO)); if (i % 2 == 0) { doc.Add(NewStringField("sparse", Convert.ToString(i), Field.Store.NO)); } if (i % 2 == 0) { doc.Add(new IntField("numInt", i, Field.Store.NO)); } // sometimes skip the field: if (Random().Next(40) != 17) { UnicodeStrings[i] = GenerateString(i); doc.Add(NewStringField("theRandomUnicodeString", UnicodeStrings[i], Field.Store.YES)); } // sometimes skip the field: if (Random().Next(10) != 8) { for (int j = 0; j < NUM_ORDS; j++) { string newValue = GenerateString(i); MultiValued[i, j] = new BytesRef(newValue); doc.Add(NewStringField("theRandomUnicodeMultiValuedField", newValue, Field.Store.YES)); } Array.Sort(MultiValued[i]); } writer.AddDocument(doc); } IndexReader r = writer.Reader; Reader = SlowCompositeReaderWrapper.Wrap(r); writer.Dispose(); }
public override void SetUp() { base.SetUp(); DirA = NewDirectory(); DirB = NewDirectory(); IndexWriter wA = new IndexWriter(DirA, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random()))); IndexWriter wB = new IndexWriter(DirB, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random()))); long theLong = long.MaxValue; double theDouble = double.MaxValue; sbyte theByte = sbyte.MaxValue; short theShort = short.MaxValue; int theInt = int.MaxValue; float theFloat = float.MaxValue; for (int i = 0; i < NUM_DOCS; i++) { Document doc = new Document(); doc.Add(NewStringField("theLong", Convert.ToString(theLong--), Field.Store.NO)); doc.Add(NewStringField("theDouble", theDouble.ToString("R"), Field.Store.NO)); theDouble--; doc.Add(NewStringField("theByte", Convert.ToString(theByte--), Field.Store.NO)); doc.Add(NewStringField("theShort", Convert.ToString(theShort--), Field.Store.NO)); doc.Add(NewStringField("theInt", Convert.ToString(theInt--), Field.Store.NO)); doc.Add(NewStringField("theFloat", Convert.ToString(theFloat--), Field.Store.NO)); if (0 == i % 3) { wA.AddDocument(doc); } else { wB.AddDocument(doc); } } wA.Dispose(); wB.Dispose(); DirectoryReader rA = DirectoryReader.Open(DirA); ReaderA = SlowCompositeReaderWrapper.Wrap(rA); ReaderAclone = SlowCompositeReaderWrapper.Wrap(rA); ReaderA = SlowCompositeReaderWrapper.Wrap(DirectoryReader.Open(DirA)); ReaderB = SlowCompositeReaderWrapper.Wrap(DirectoryReader.Open(DirB)); ReaderX = SlowCompositeReaderWrapper.Wrap(new MultiReader(ReaderA, ReaderB)); }