public override DocIdSet GetDocIdSet(AtomicReaderContext context, Bits acceptDocs) { Bits docsWithField = FieldCache.DEFAULT.GetDocsWithField(((AtomicReader)context.Reader), Field_Renamed); if (Negate_Renamed) { if (docsWithField is Bits_MatchAllBits) { return(null); } return(new FieldCacheDocIdSetAnonymousInnerClassHelper(this, context.AtomicReader.MaxDoc, acceptDocs, docsWithField)); } else { if (docsWithField is Bits_MatchNoBits) { return(null); } if (docsWithField is DocIdSet) { // UweSays: this is always the case for our current impl - but who knows // :-) return(BitsFilteredDocIdSet.Wrap((DocIdSet)docsWithField, acceptDocs)); } return(new FieldCacheDocIdSetAnonymousInnerClassHelper2(this, context.AtomicReader.MaxDoc, acceptDocs, docsWithField)); } }
public DocIdSetAnonymousInnerClassHelper(QueryWrapperFilter outerInstance, Bits acceptDocs, AtomicReaderContext privateContext, Lucene.Net.Search.Weight weight) { this.OuterInstance = outerInstance; this.AcceptDocs = acceptDocs; this.PrivateContext = privateContext; this.Weight = weight; }
public override DocIdSet GetDocIdSet(AtomicReaderContext context, Bits acceptDocs) { // get a private context that is used to rewrite, createWeight and score eventually AtomicReaderContext privateContext = context.AtomicReader.AtomicContext; Weight weight = (new IndexSearcher(privateContext)).CreateNormalizedWeight(Query_Renamed); return new DocIdSetAnonymousInnerClassHelper(this, acceptDocs, privateContext, weight); }
public override DocIdSet GetDocIdSet(AtomicReaderContext context, Bits acceptDocs) { Bits docsWithField = FieldCache.DEFAULT.GetDocsWithField(((AtomicReader)context.Reader), Field_Renamed); if (Negate_Renamed) { if (docsWithField is Bits_MatchAllBits) { return null; } return new FieldCacheDocIdSetAnonymousInnerClassHelper(this, context.AtomicReader.MaxDoc, acceptDocs, docsWithField); } else { if (docsWithField is Bits_MatchNoBits) { return null; } if (docsWithField is DocIdSet) { // UweSays: this is always the case for our current impl - but who knows // :-) return BitsFilteredDocIdSet.Wrap((DocIdSet)docsWithField, acceptDocs); } return new FieldCacheDocIdSetAnonymousInnerClassHelper2(this, context.AtomicReader.MaxDoc, acceptDocs, docsWithField); } }
// start is inclusive; end is exclusive (length = end-start) public BitsSlice(Bits parent, ReaderSlice slice) { this.Parent = parent; this.Start = slice.Start; this.Length_Renamed = slice.Length; Debug.Assert(Length_Renamed >= 0, "length=" + Length_Renamed); }
/// <summary> /// Merges in the stored fields from the readers in /// <code>mergeState</code>. The default implementation skips /// over deleted documents, and uses <seealso cref="#startDocument(int)"/>, /// <seealso cref="#writeField(FieldInfo, IndexableField)"/>, and <seealso cref="#finish(FieldInfos, int)"/>, /// returning the number of documents that were written. /// Implementations can override this method for more sophisticated /// merging (bulk-byte copying, etc). /// </summary> public virtual int Merge(MergeState mergeState) { int docCount = 0; foreach (AtomicReader reader in mergeState.Readers) { int maxDoc = reader.MaxDoc; Bits liveDocs = reader.LiveDocs; for (int i = 0; i < maxDoc; i++) { if (liveDocs != null && !liveDocs.Get(i)) { // skip deleted docs continue; } // TODO: this could be more efficient using // FieldVisitor instead of loading/writing entire // doc; ie we just have to renumber the field number // on the fly? // NOTE: it's very important to first assign to doc then pass it to // fieldsWriter.addDocument; see LUCENE-1282 Document doc = reader.Document(i); AddDocument(doc, mergeState.FieldInfos); docCount++; mergeState.checkAbort.Work(300); } } Finish(mergeState.FieldInfos, docCount); return(docCount); }
public override BulkScorer BulkScorer(AtomicReaderContext context, bool scoreDocsInOrder, Bits acceptDocs) { // if the caller asks for in-order scoring or if the weight does not support // out-of order scoring then collection will have to happen in-order. BulkScorer inScorer = @in.BulkScorer(context, scoreDocsInOrder, acceptDocs); if (inScorer == null) { return null; } if (AssertingBulkScorer.ShouldWrap(inScorer)) { // The incoming scorer already has a specialized // implementation for BulkScorer, so we should use it: inScorer = AssertingBulkScorer.Wrap(new Random(Random.Next()), inScorer); } else if (Random.NextBoolean()) { // Let super wrap this.scorer instead, so we use // AssertingScorer: inScorer = base.BulkScorer(context, scoreDocsInOrder, acceptDocs); } if (scoreDocsInOrder == false && Random.NextBoolean()) { // The caller claims it can handle out-of-order // docs; let's confirm that by pulling docs and // randomly shuffling them before collection: inScorer = new AssertingBulkOutOfOrderScorer(new Random(Random.Next()), inScorer); } return inScorer; }
public override DocIdSet GetDocIdSet(AtomicReaderContext context, Bits acceptDocs) { // get a private context that is used to rewrite, createWeight and score eventually AtomicReaderContext privateContext = context.AtomicReader.AtomicContext; Weight weight = (new IndexSearcher(privateContext)).CreateNormalizedWeight(Query_Renamed); return(new DocIdSetAnonymousInnerClassHelper(this, acceptDocs, privateContext, weight)); }
private SlowCompositeReaderWrapper(CompositeReader reader) : base() { @in = reader; fields = MultiFields.GetFields(@in); liveDocs = MultiFields.GetLiveDocs(@in); @in.RegisterParentReader(this); }
/// <summary> /// Constructor. </summary> /// <param name="innerSet"> Underlying DocIdSet </param> /// <param name="acceptDocs"> Allowed docs, all docids not in this set will not be returned by this DocIdSet </param> public BitsFilteredDocIdSet(DocIdSet innerSet, Bits acceptDocs) : base(innerSet) { if (acceptDocs == null) { throw new System.NullReferenceException("acceptDocs is null"); } this.AcceptDocs = acceptDocs; }
public override DocIdSet GetDocIdSet(AtomicReaderContext context, Bits acceptDocs) { FixedBitSet bits = new FixedBitSet(context.Reader.MaxDoc); bits.Set(Doc); if (acceptDocs != null && !acceptDocs.Get(Doc)) { bits.Clear(Doc); } return bits; }
public static void PrintDelDocs(Bits bits) { if (bits == null) { return; } for (int x = 0; x < bits.Length(); x++) { Console.WriteLine(x + ":" + bits.Get(x)); } }
public override DocIdSet GetDocIdSet(AtomicReaderContext context, Bits acceptDocs) { FixedBitSet bits = new FixedBitSet(context.Reader.MaxDoc); bits.Set(Doc); if (acceptDocs != null && !acceptDocs.Get(Doc)) { bits.Clear(Doc); } return(bits); }
public virtual DocsAndPositionsEnum GetDocsAndPositions(AtomicReader reader, BytesRef bytes, Bits liveDocs) { Terms terms = reader.Terms(FieldName); if (terms != null) { TermsEnum te = terms.Iterator(null); if (te.SeekExact(bytes)) { return te.DocsAndPositions(liveDocs, null); } } return null; }
public override DocIdSet GetDocIdSet(AtomicReaderContext context, Bits acceptDocs) { SortedDocValues fcsi = FieldCache.GetTermsIndex((context.AtomicReader), Field); FixedBitSet bits = new FixedBitSet(fcsi.ValueCount); for (int i = 0; i < Terms.Length; i++) { int ord = fcsi.LookupTerm(Terms[i]); if (ord >= 0) { bits.Set(ord); } } return(new FieldCacheDocIdSetAnonymousInnerClassHelper(this, context.Reader.MaxDoc, acceptDocs, fcsi, bits)); }
private static void CheckSortedDocValues(string fieldName, AtomicReader reader, SortedDocValues dv, Bits docsWithField) { CheckBinaryDocValues(fieldName, reader, dv, docsWithField); int maxOrd = dv.ValueCount - 1; FixedBitSet seenOrds = new FixedBitSet(dv.ValueCount); int maxOrd2 = -1; for (int i = 0; i < reader.MaxDoc; i++) { int ord = dv.GetOrd(i); if (ord == -1) { if (docsWithField.Get(i)) { throw new Exception("dv for field: " + fieldName + " has -1 ord but is not marked missing for doc: " + i); } } else if (ord < -1 || ord > maxOrd) { throw new Exception("ord out of bounds: " + ord); } else { if (!docsWithField.Get(i)) { throw new Exception("dv for field: " + fieldName + " is missing but has ord=" + ord + " for doc: " + i); } maxOrd2 = Math.Max(maxOrd2, ord); seenOrds.Set(ord); } } if (maxOrd != maxOrd2) { throw new Exception("dv for field: " + fieldName + " reports wrong maxOrd=" + maxOrd + " but this is not the case: " + maxOrd2); } if (seenOrds.Cardinality() != dv.ValueCount) { throw new Exception("dv for field: " + fieldName + " has holes in its ords, valueCount=" + dv.ValueCount + " but only used: " + seenOrds.Cardinality()); } BytesRef lastValue = null; BytesRef scratch = new BytesRef(); for (int i = 0; i <= maxOrd; i++) { dv.LookupOrd(i, scratch); Debug.Assert(scratch.Valid); if (lastValue != null) { if (scratch.CompareTo(lastValue) <= 0) { throw new Exception("dv for field: " + fieldName + " has ords out of order: " + lastValue + " >=" + scratch); } } lastValue = BytesRef.DeepCopyOf(scratch); } }
private static void CheckNumericDocValues(string fieldName, AtomicReader reader, NumericDocValues ndv, Bits docsWithField) { for (int i = 0; i < reader.MaxDoc; i++) { long value = ndv.Get(i); if (docsWithField.Get(i) == false && value != 0) { throw new Exception("dv for field: " + fieldName + " is marked missing but has value=" + value + " for doc: " + i); } } }
/// <summary> /// checks Fields api is consistent with itself. /// searcher is optional, to verify with queries. Can be null. /// </summary> private static Status.TermIndexStatus CheckFields(Fields fields, Bits liveDocs, int maxDoc, FieldInfos fieldInfos, bool doPrint, bool isVectors, TextWriter infoStream, bool verbose) { // TODO: we should probably return our own stats thing...?! Status.TermIndexStatus status = new Status.TermIndexStatus(); int computedFieldCount = 0; if (fields == null) { Msg(infoStream, "OK [no fields/terms]"); return status; } DocsEnum docs = null; DocsEnum docsAndFreqs = null; DocsAndPositionsEnum postings = null; string lastField = null; foreach (string field in fields) { // MultiFieldsEnum relies upon this order... if (lastField != null && field.CompareTo(lastField) <= 0) { throw new Exception("fields out of order: lastField=" + lastField + " field=" + field); } lastField = field; // check that the field is in fieldinfos, and is indexed. // TODO: add a separate test to check this for different reader impls FieldInfo fieldInfo = fieldInfos.FieldInfo(field); if (fieldInfo == null) { throw new Exception("fieldsEnum inconsistent with fieldInfos, no fieldInfos for: " + field); } if (!fieldInfo.Indexed) { throw new Exception("fieldsEnum inconsistent with fieldInfos, isIndexed == false for: " + field); } // TODO: really the codec should not return a field // from FieldsEnum if it has no Terms... but we do // this today: // assert fields.terms(field) != null; computedFieldCount++; Terms terms = fields.Terms(field); if (terms == null) { continue; } bool hasFreqs = terms.HasFreqs(); bool hasPositions = terms.HasPositions(); bool hasPayloads = terms.HasPayloads(); bool hasOffsets = terms.HasOffsets(); // term vectors cannot omit TF: bool expectedHasFreqs = (isVectors || fieldInfo.FieldIndexOptions >= FieldInfo.IndexOptions.DOCS_AND_FREQS); if (hasFreqs != expectedHasFreqs) { throw new Exception("field \"" + field + "\" should have hasFreqs=" + expectedHasFreqs + " but got " + hasFreqs); } if (hasFreqs == false) { if (terms.SumTotalTermFreq != -1) { throw new Exception("field \"" + field + "\" hasFreqs is false, but Terms.getSumTotalTermFreq()=" + terms.SumTotalTermFreq + " (should be -1)"); } } if (!isVectors) { bool expectedHasPositions = fieldInfo.FieldIndexOptions >= FieldInfo.IndexOptions.DOCS_AND_FREQS_AND_POSITIONS; if (hasPositions != expectedHasPositions) { throw new Exception("field \"" + field + "\" should have hasPositions=" + expectedHasPositions + " but got " + hasPositions); } bool expectedHasPayloads = fieldInfo.HasPayloads(); if (hasPayloads != expectedHasPayloads) { throw new Exception("field \"" + field + "\" should have hasPayloads=" + expectedHasPayloads + " but got " + hasPayloads); } bool expectedHasOffsets = fieldInfo.FieldIndexOptions >= FieldInfo.IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS; if (hasOffsets != expectedHasOffsets) { throw new Exception("field \"" + field + "\" should have hasOffsets=" + expectedHasOffsets + " but got " + hasOffsets); } } TermsEnum termsEnum = terms.Iterator(null); bool hasOrd = true; long termCountStart = status.DelTermCount + status.TermCount; BytesRef lastTerm = null; IComparer<BytesRef> termComp = terms.Comparator; long sumTotalTermFreq = 0; long sumDocFreq = 0; FixedBitSet visitedDocs = new FixedBitSet(maxDoc); while (true) { BytesRef term = termsEnum.Next(); if (term == null) { break; } Debug.Assert(term.Valid); // make sure terms arrive in order according to // the comp if (lastTerm == null) { lastTerm = BytesRef.DeepCopyOf(term); } else { if (termComp.Compare(lastTerm, term) >= 0) { throw new Exception("terms out of order: lastTerm=" + lastTerm + " term=" + term); } lastTerm.CopyBytes(term); } int docFreq = termsEnum.DocFreq(); if (docFreq <= 0) { throw new Exception("docfreq: " + docFreq + " is out of bounds"); } sumDocFreq += docFreq; docs = termsEnum.Docs(liveDocs, docs); postings = termsEnum.DocsAndPositions(liveDocs, postings); if (hasFreqs == false) { if (termsEnum.TotalTermFreq() != -1) { throw new Exception("field \"" + field + "\" hasFreqs is false, but TermsEnum.totalTermFreq()=" + termsEnum.TotalTermFreq() + " (should be -1)"); } } if (hasOrd) { long ord = -1; try { ord = termsEnum.Ord(); } catch (System.NotSupportedException uoe) { hasOrd = false; } if (hasOrd) { long ordExpected = status.DelTermCount + status.TermCount - termCountStart; if (ord != ordExpected) { throw new Exception("ord mismatch: TermsEnum has ord=" + ord + " vs actual=" + ordExpected); } } } DocsEnum docs2; if (postings != null) { docs2 = postings; } else { docs2 = docs; } int lastDoc = -1; int docCount = 0; long totalTermFreq = 0; while (true) { int doc = docs2.NextDoc(); if (doc == DocIdSetIterator.NO_MORE_DOCS) { break; } status.TotFreq++; visitedDocs.Set(doc); int freq = -1; if (hasFreqs) { freq = docs2.Freq(); if (freq <= 0) { throw new Exception("term " + term + ": doc " + doc + ": freq " + freq + " is out of bounds"); } status.TotPos += freq; totalTermFreq += freq; } else { // When a field didn't index freq, it must // consistently "lie" and pretend that freq was // 1: if (docs2.Freq() != 1) { throw new Exception("term " + term + ": doc " + doc + ": freq " + freq + " != 1 when Terms.hasFreqs() is false"); } } docCount++; if (doc <= lastDoc) { throw new Exception("term " + term + ": doc " + doc + " <= lastDoc " + lastDoc); } if (doc >= maxDoc) { throw new Exception("term " + term + ": doc " + doc + " >= maxDoc " + maxDoc); } lastDoc = doc; int lastPos = -1; int lastOffset = 0; if (hasPositions) { for (int j = 0; j < freq; j++) { int pos = postings.NextPosition(); if (pos < 0) { throw new Exception("term " + term + ": doc " + doc + ": pos " + pos + " is out of bounds"); } if (pos < lastPos) { throw new Exception("term " + term + ": doc " + doc + ": pos " + pos + " < lastPos " + lastPos); } lastPos = pos; BytesRef payload = postings.Payload; if (payload != null) { Debug.Assert(payload.Valid); } if (payload != null && payload.Length < 1) { throw new Exception("term " + term + ": doc " + doc + ": pos " + pos + " payload length is out of bounds " + payload.Length); } if (hasOffsets) { int startOffset = postings.StartOffset(); int endOffset = postings.EndOffset(); // NOTE: we cannot enforce any bounds whatsoever on vectors... they were a free-for-all before? // but for offsets in the postings lists these checks are fine: they were always enforced by IndexWriter if (!isVectors) { if (startOffset < 0) { throw new Exception("term " + term + ": doc " + doc + ": pos " + pos + ": startOffset " + startOffset + " is out of bounds"); } if (startOffset < lastOffset) { throw new Exception("term " + term + ": doc " + doc + ": pos " + pos + ": startOffset " + startOffset + " < lastStartOffset " + lastOffset); } if (endOffset < 0) { throw new Exception("term " + term + ": doc " + doc + ": pos " + pos + ": endOffset " + endOffset + " is out of bounds"); } if (endOffset < startOffset) { throw new Exception("term " + term + ": doc " + doc + ": pos " + pos + ": endOffset " + endOffset + " < startOffset " + startOffset); } } lastOffset = startOffset; } } } } if (docCount != 0) { status.TermCount++; } else { status.DelTermCount++; } long totalTermFreq2 = termsEnum.TotalTermFreq(); bool hasTotalTermFreq = hasFreqs && totalTermFreq2 != -1; // Re-count if there are deleted docs: if (liveDocs != null) { if (hasFreqs) { DocsEnum docsNoDel = termsEnum.Docs(null, docsAndFreqs); docCount = 0; totalTermFreq = 0; while (docsNoDel.NextDoc() != DocIdSetIterator.NO_MORE_DOCS) { visitedDocs.Set(docsNoDel.DocID()); docCount++; totalTermFreq += docsNoDel.Freq(); } } else { DocsEnum docsNoDel = termsEnum.Docs(null, docs, DocsEnum.FLAG_NONE); docCount = 0; totalTermFreq = -1; while (docsNoDel.NextDoc() != DocIdSetIterator.NO_MORE_DOCS) { visitedDocs.Set(docsNoDel.DocID()); docCount++; } } } if (docCount != docFreq) { throw new Exception("term " + term + " docFreq=" + docFreq + " != tot docs w/o deletions " + docCount); } if (hasTotalTermFreq) { if (totalTermFreq2 <= 0) { throw new Exception("totalTermFreq: " + totalTermFreq2 + " is out of bounds"); } sumTotalTermFreq += totalTermFreq; if (totalTermFreq != totalTermFreq2) { throw new Exception("term " + term + " totalTermFreq=" + totalTermFreq2 + " != recomputed totalTermFreq=" + totalTermFreq); } } // Test skipping if (hasPositions) { for (int idx = 0; idx < 7; idx++) { int skipDocID = (int)(((idx + 1) * (long)maxDoc) / 8); postings = termsEnum.DocsAndPositions(liveDocs, postings); int docID = postings.Advance(skipDocID); if (docID == DocIdSetIterator.NO_MORE_DOCS) { break; } else { if (docID < skipDocID) { throw new Exception("term " + term + ": advance(docID=" + skipDocID + ") returned docID=" + docID); } int freq = postings.Freq(); if (freq <= 0) { throw new Exception("termFreq " + freq + " is out of bounds"); } int lastPosition = -1; int lastOffset = 0; for (int posUpto = 0; posUpto < freq; posUpto++) { int pos = postings.NextPosition(); if (pos < 0) { throw new Exception("position " + pos + " is out of bounds"); } if (pos < lastPosition) { throw new Exception("position " + pos + " is < lastPosition " + lastPosition); } lastPosition = pos; if (hasOffsets) { int startOffset = postings.StartOffset(); int endOffset = postings.EndOffset(); // NOTE: we cannot enforce any bounds whatsoever on vectors... they were a free-for-all before? // but for offsets in the postings lists these checks are fine: they were always enforced by IndexWriter if (!isVectors) { if (startOffset < 0) { throw new Exception("term " + term + ": doc " + docID + ": pos " + pos + ": startOffset " + startOffset + " is out of bounds"); } if (startOffset < lastOffset) { throw new Exception("term " + term + ": doc " + docID + ": pos " + pos + ": startOffset " + startOffset + " < lastStartOffset " + lastOffset); } if (endOffset < 0) { throw new Exception("term " + term + ": doc " + docID + ": pos " + pos + ": endOffset " + endOffset + " is out of bounds"); } if (endOffset < startOffset) { throw new Exception("term " + term + ": doc " + docID + ": pos " + pos + ": endOffset " + endOffset + " < startOffset " + startOffset); } } lastOffset = startOffset; } } int nextDocID = postings.NextDoc(); if (nextDocID == DocIdSetIterator.NO_MORE_DOCS) { break; } if (nextDocID <= docID) { throw new Exception("term " + term + ": advance(docID=" + skipDocID + "), then .next() returned docID=" + nextDocID + " vs prev docID=" + docID); } } } } else { for (int idx = 0; idx < 7; idx++) { int skipDocID = (int)(((idx + 1) * (long)maxDoc) / 8); docs = termsEnum.Docs(liveDocs, docs, DocsEnum.FLAG_NONE); int docID = docs.Advance(skipDocID); if (docID == DocIdSetIterator.NO_MORE_DOCS) { break; } else { if (docID < skipDocID) { throw new Exception("term " + term + ": advance(docID=" + skipDocID + ") returned docID=" + docID); } int nextDocID = docs.NextDoc(); if (nextDocID == DocIdSetIterator.NO_MORE_DOCS) { break; } if (nextDocID <= docID) { throw new Exception("term " + term + ": advance(docID=" + skipDocID + "), then .next() returned docID=" + nextDocID + " vs prev docID=" + docID); } } } } } Terms fieldTerms = fields.Terms(field); if (fieldTerms == null) { // Unusual: the FieldsEnum returned a field but // the Terms for that field is null; this should // only happen if it's a ghost field (field with // no terms, eg there used to be terms but all // docs got deleted and then merged away): } else { if (fieldTerms is BlockTreeTermsReader.FieldReader) { BlockTreeTermsReader.Stats stats = ((BlockTreeTermsReader.FieldReader)fieldTerms).ComputeStats(); Debug.Assert(stats != null); if (status.BlockTreeStats == null) { status.BlockTreeStats = new Dictionary<string, BlockTreeTermsReader.Stats>(); } status.BlockTreeStats[field] = stats; } if (sumTotalTermFreq != 0) { long v = fields.Terms(field).SumTotalTermFreq; if (v != -1 && sumTotalTermFreq != v) { throw new Exception("sumTotalTermFreq for field " + field + "=" + v + " != recomputed sumTotalTermFreq=" + sumTotalTermFreq); } } if (sumDocFreq != 0) { long v = fields.Terms(field).SumDocFreq; if (v != -1 && sumDocFreq != v) { throw new Exception("sumDocFreq for field " + field + "=" + v + " != recomputed sumDocFreq=" + sumDocFreq); } } if (fieldTerms != null) { int v = fieldTerms.DocCount; if (v != -1 && visitedDocs.Cardinality() != v) { throw new Exception("docCount for field " + field + "=" + v + " != recomputed docCount=" + visitedDocs.Cardinality()); } } // Test seek to last term: if (lastTerm != null) { if (termsEnum.SeekCeil(lastTerm) != TermsEnum.SeekStatus.FOUND) { throw new Exception("seek to last term " + lastTerm + " failed"); } int expectedDocFreq = termsEnum.DocFreq(); DocsEnum d = termsEnum.Docs(null, null, DocsEnum.FLAG_NONE); int docFreq = 0; while (d.NextDoc() != DocIdSetIterator.NO_MORE_DOCS) { docFreq++; } if (docFreq != expectedDocFreq) { throw new Exception("docFreq for last term " + lastTerm + "=" + expectedDocFreq + " != recomputed docFreq=" + docFreq); } } // check unique term count long termCount = -1; if ((status.DelTermCount + status.TermCount) - termCountStart > 0) { termCount = fields.Terms(field).Size(); if (termCount != -1 && termCount != status.DelTermCount + status.TermCount - termCountStart) { throw new Exception("termCount mismatch " + (status.DelTermCount + termCount) + " vs " + (status.TermCount - termCountStart)); } } // Test seeking by ord if (hasOrd && status.TermCount - termCountStart > 0) { int seekCount = (int)Math.Min(10000L, termCount); if (seekCount > 0) { BytesRef[] seekTerms = new BytesRef[seekCount]; // Seek by ord for (int i = seekCount - 1; i >= 0; i--) { long ord = i * (termCount / seekCount); termsEnum.SeekExact(ord); seekTerms[i] = BytesRef.DeepCopyOf(termsEnum.Term()); } // Seek by term long totDocCount = 0; for (int i = seekCount - 1; i >= 0; i--) { if (termsEnum.SeekCeil(seekTerms[i]) != TermsEnum.SeekStatus.FOUND) { throw new Exception("seek to existing term " + seekTerms[i] + " failed"); } docs = termsEnum.Docs(liveDocs, docs, DocsEnum.FLAG_NONE); if (docs == null) { throw new Exception("null DocsEnum from to existing term " + seekTerms[i]); } while (docs.NextDoc() != DocIdSetIterator.NO_MORE_DOCS) { totDocCount++; } } long totDocCountNoDeletes = 0; long totDocFreq = 0; for (int i = 0; i < seekCount; i++) { if (!termsEnum.SeekExact(seekTerms[i])) { throw new Exception("seek to existing term " + seekTerms[i] + " failed"); } totDocFreq += termsEnum.DocFreq(); docs = termsEnum.Docs(null, docs, DocsEnum.FLAG_NONE); if (docs == null) { throw new Exception("null DocsEnum from to existing term " + seekTerms[i]); } while (docs.NextDoc() != DocIdSetIterator.NO_MORE_DOCS) { totDocCountNoDeletes++; } } if (totDocCount > totDocCountNoDeletes) { throw new Exception("more postings with deletes=" + totDocCount + " than without=" + totDocCountNoDeletes); } if (totDocCountNoDeletes != totDocFreq) { throw new Exception("docfreqs=" + totDocFreq + " != recomputed docfreqs=" + totDocCountNoDeletes); } } } } } int fieldCount = fields.Size; if (fieldCount != -1) { if (fieldCount < 0) { throw new Exception("invalid fieldCount: " + fieldCount); } if (fieldCount != computedFieldCount) { throw new Exception("fieldCount mismatch " + fieldCount + " vs recomputed field count " + computedFieldCount); } } // for most implementations, this is boring (just the sum across all fields) // but codecs that don't work per-field like preflex actually implement this, // but don't implement it on Terms, so the check isn't redundant. long uniqueTermCountAllFields = fields.UniqueTermCount; if (uniqueTermCountAllFields != -1 && status.TermCount + status.DelTermCount != uniqueTermCountAllFields) { throw new Exception("termCount mismatch " + uniqueTermCountAllFields + " vs " + (status.TermCount + status.DelTermCount)); } if (doPrint) { Msg(infoStream, "OK [" + status.TermCount + " terms; " + status.TotFreq + " terms/docs pairs; " + status.TotPos + " tokens]"); } if (verbose && status.BlockTreeStats != null && infoStream != null && status.TermCount > 0) { foreach (KeyValuePair<string, BlockTreeTermsReader.Stats> ent in status.BlockTreeStats) { infoStream.WriteLine(" field \"" + ent.Key + "\":"); infoStream.WriteLine(" " + ent.Value.ToString().Replace("\n", "\n ")); } } return status; }
public override Scorer Scorer(AtomicReaderContext context, Bits acceptDocs) { return(new PayloadTermSpanScorer(this, (TermSpans)query.GetSpans(context, acceptDocs, TermContexts), this, Similarity.DoSimScorer(Stats, context))); }
public override Scorer Scorer(AtomicReaderContext context, Bits acceptDocs) { throw new System.NotSupportedException(); }
public override DocIdSet GetDocIdSet(AtomicReaderContext context, Bits acceptDocs) { WasCalled_Renamed = true; return(new FixedBitSet(context.Reader.MaxDoc)); }
public FieldCacheDocIdSetAnonymousInnerClassHelper(FieldCacheTermsFilter outerInstance, int maxDoc, Bits acceptDocs, SortedDocValues fcsi, FixedBitSet bits) : base(maxDoc, acceptDocs) { this.OuterInstance = outerInstance; this.Fcsi = fcsi; this.Bits = bits; }
public override Bits GetBits() { Bits bits = _innerSet.GetBits(); return((bits == null) ? null : new BitsAnonymousInnerClassHelper(this, bits)); }
public BitsAnonymousInnerClassHelper(FilteredDocIdSet outerInstance, Bits bits) { this.OuterInstance = outerInstance; this.Bits = bits; }
public FieldCacheDocIdSetAnonymousInnerClassHelper2(FieldValueFilter outerInstance, int maxDoc, Bits acceptDocs, Bits docsWithField) : base(maxDoc, acceptDocs) { this.OuterInstance = outerInstance; this.DocsWithField = docsWithField; }
private static void CheckSortedSetDocValues(string fieldName, AtomicReader reader, SortedSetDocValues dv, Bits docsWithField) { long maxOrd = dv.ValueCount - 1; LongBitSet seenOrds = new LongBitSet(dv.ValueCount); long maxOrd2 = -1; for (int i = 0; i < reader.MaxDoc; i++) { dv.Document = i; long lastOrd = -1; long ord; if (docsWithField.Get(i)) { int ordCount = 0; while ((ord = dv.NextOrd()) != SortedSetDocValues.NO_MORE_ORDS) { if (ord <= lastOrd) { throw new Exception("ords out of order: " + ord + " <= " + lastOrd + " for doc: " + i); } if (ord < 0 || ord > maxOrd) { throw new Exception("ord out of bounds: " + ord); } if (dv is RandomAccessOrds) { long ord2 = ((RandomAccessOrds)dv).OrdAt(ordCount); if (ord != ord2) { throw new Exception("ordAt(" + ordCount + ") inconsistent, expected=" + ord + ",got=" + ord2 + " for doc: " + i); } } lastOrd = ord; maxOrd2 = Math.Max(maxOrd2, ord); seenOrds.Set(ord); ordCount++; } if (ordCount == 0) { throw new Exception("dv for field: " + fieldName + " has no ordinals but is not marked missing for doc: " + i); } if (dv is RandomAccessOrds) { long ordCount2 = ((RandomAccessOrds)dv).Cardinality(); if (ordCount != ordCount2) { throw new Exception("cardinality inconsistent, expected=" + ordCount + ",got=" + ordCount2 + " for doc: " + i); } } } else { long o = dv.NextOrd(); if (o != SortedSetDocValues.NO_MORE_ORDS) { throw new Exception("dv for field: " + fieldName + " is marked missing but has ord=" + o + " for doc: " + i); } if (dv is RandomAccessOrds) { long ordCount2 = ((RandomAccessOrds)dv).Cardinality(); if (ordCount2 != 0) { throw new Exception("dv for field: " + fieldName + " is marked missing but has cardinality " + ordCount2 + " for doc: " + i); } } } } if (maxOrd != maxOrd2) { throw new Exception("dv for field: " + fieldName + " reports wrong maxOrd=" + maxOrd + " but this is not the case: " + maxOrd2); } if (seenOrds.Cardinality() != dv.ValueCount) { throw new Exception("dv for field: " + fieldName + " has holes in its ords, valueCount=" + dv.ValueCount + " but only used: " + seenOrds.Cardinality()); } BytesRef lastValue = null; BytesRef scratch = new BytesRef(); for (long i = 0; i <= maxOrd; i++) { dv.LookupOrd(i, scratch); Debug.Assert(scratch.Valid); if (lastValue != null) { if (scratch.CompareTo(lastValue) <= 0) { throw new Exception("dv for field: " + fieldName + " has ords out of order: " + lastValue + " >=" + scratch); } } lastValue = BytesRef.DeepCopyOf(scratch); } }
public override DocIdSet GetDocIdSet(AtomicReaderContext context, Bits acceptDocs) { DocIdSet innerNullIteratorSet = new DocIdSetAnonymousInnerClassHelper2(this); return new FilteredDocIdSetAnonymousInnerClassHelper2(this, innerNullIteratorSet); }
public override DocIdSet GetDocIdSet(AtomicReaderContext context, Bits acceptDocs) { SortedSetDocValues docTermOrds = FieldCache.DEFAULT.GetDocTermOrds(context.AtomicReader, Field); long lowerPoint = LowerVal == null ? -1 : docTermOrds.LookupTerm(LowerVal); long upperPoint = UpperVal == null ? -1 : docTermOrds.LookupTerm(UpperVal); long inclusiveLowerPoint, inclusiveUpperPoint; // Hints: // * binarySearchLookup returns -1, if value was null. // * the value is <0 if no exact hit was found, the returned value // is (-(insertion point) - 1) if (lowerPoint == -1 && LowerVal == null) { inclusiveLowerPoint = 0; } else if (IncludeLower && lowerPoint >= 0) { inclusiveLowerPoint = lowerPoint; } else if (lowerPoint >= 0) { inclusiveLowerPoint = lowerPoint + 1; } else { inclusiveLowerPoint = Math.Max(0, -lowerPoint - 1); } if (upperPoint == -1 && UpperVal == null) { inclusiveUpperPoint = long.MaxValue; } else if (IncludeUpper && upperPoint >= 0) { inclusiveUpperPoint = upperPoint; } else if (upperPoint >= 0) { inclusiveUpperPoint = upperPoint - 1; } else { inclusiveUpperPoint = -upperPoint - 2; } if (inclusiveUpperPoint < 0 || inclusiveLowerPoint > inclusiveUpperPoint) { return null; } Debug.Assert(inclusiveLowerPoint >= 0 && inclusiveUpperPoint >= 0); return new FieldCacheDocIdSetAnonymousInnerClassHelper(this, context.AtomicReader.MaxDoc, acceptDocs, docTermOrds, inclusiveLowerPoint, inclusiveUpperPoint); }
/// <summary> /// Convenience wrapper method: If {@code acceptDocs == null} it returns the original set without wrapping. </summary> /// <param name="set"> Underlying DocIdSet. If {@code null}, this method returns {@code null} </param> /// <param name="acceptDocs"> Allowed docs, all docids not in this set will not be returned by this DocIdSet. /// If {@code null}, this method returns the original set without wrapping. </param> public static DocIdSet Wrap(DocIdSet set, Bits acceptDocs) { return((set == null || acceptDocs == null) ? set : new BitsFilteredDocIdSet(set, acceptDocs)); }
public FieldCacheDocIdSet(int maxDoc, Bits acceptDocs) { this.MaxDoc = maxDoc; this.AcceptDocs = acceptDocs; }
public override BulkScorer BulkScorer(AtomicReaderContext context, bool scoreDocsInOrder, Bits acceptDocs) { return new BulkScorerAnonymousInnerClassHelper(this); }
private static void CheckBinaryDocValues(string fieldName, AtomicReader reader, BinaryDocValues dv, Bits docsWithField) { BytesRef scratch = new BytesRef(); for (int i = 0; i < reader.MaxDoc; i++) { dv.Get(i, scratch); Debug.Assert(scratch.Valid); if (docsWithField.Get(i) == false && scratch.Length > 0) { throw new Exception("dv for field: " + fieldName + " is missing but has value=" + scratch + " for doc: " + i); } } }
/// <summary> /// Convenience wrapper method: If {@code acceptDocs == null} it returns the original set without wrapping. </summary> /// <param name="set"> Underlying DocIdSet. If {@code null}, this method returns {@code null} </param> /// <param name="acceptDocs"> Allowed docs, all docids not in this set will not be returned by this DocIdSet. /// If {@code null}, this method returns the original set without wrapping. </param> public static DocIdSet Wrap(DocIdSet set, Bits acceptDocs) { return (set == null || acceptDocs == null) ? set : new BitsFilteredDocIdSet(set, acceptDocs); }
/// <summary> /// Must fully consume state, since after this call that /// TermState may be reused. /// </summary> public abstract DocsAndPositionsEnum DocsAndPositions(FieldInfo fieldInfo, BlockTermState state, Bits skipDocs, DocsAndPositionsEnum reuse, int flags);
public override Scorer Scorer(AtomicReaderContext context, Bits acceptDocs) { return new PayloadTermSpanScorer(this, (TermSpans)query.GetSpans(context, acceptDocs, TermContexts), this, Similarity.DoSimScorer(Stats, context)); }
public override DocsAndPositionsEnum DocsAndPositions(Bits liveDocs, DocsAndPositionsEnum reuse, int flags) { throw new System.NotSupportedException(); }
public virtual DocsEnum RandomDocsEnum(string field, BytesRef term, IList<AtomicReaderContext> readers, Bits bits) { if (Random().Next(10) == 0) { return null; } AtomicReader indexReader = (AtomicReader)readers[Random().Next(readers.Count)].Reader; Terms terms = indexReader.Terms(field); if (terms == null) { return null; } TermsEnum iterator = terms.Iterator(null); if (iterator.SeekExact(term)) { return iterator.Docs(bits, null, Random().NextBoolean() ? DocsEnum.FLAG_FREQS : DocsEnum.FLAG_NONE); } return null; }
public override DocIdSet GetDocIdSet(AtomicReaderContext context, Bits acceptDocs) { return null; }
public override BulkScorer BulkScorer(AtomicReaderContext context, bool scoreDocsInOrder, Bits acceptDocs) { // TODO: it could be better if we take acceptDocs // into account instead of baseScorer? Scorer baseScorer = baseWeight.Scorer(context, acceptDocs); DrillSidewaysScorer.DocsAndCost[] dims = new DrillSidewaysScorer.DocsAndCost[drillDowns.Length]; int nullCount = 0; for (int dim = 0; dim < dims.Length; dim++) { dims[dim] = new DrillSidewaysScorer.DocsAndCost(); dims[dim].sidewaysCollector = outerInstance.drillSidewaysCollectors[dim]; if (drillDowns[dim] is Filter) { // Pass null for acceptDocs because we already // passed it to baseScorer and baseScorer is // MUST'd here DocIdSet dis = ((Filter)drillDowns[dim]).GetDocIdSet(context, null); if (dis == null) { continue; } Bits bits = dis.GetBits(); if (bits != null) { // TODO: this logic is too naive: the // existence of bits() in DIS today means // either "I'm a cheap FixedBitSet so apply me down // low as you decode the postings" or "I'm so // horribly expensive so apply me after all // other Query/Filter clauses pass" // Filter supports random access; use that to // prevent .advance() on costly filters: dims[dim].bits = bits; // TODO: Filter needs to express its expected // cost somehow, before pulling the iterator; // we should use that here to set the order to // check the filters: } else { DocIdSetIterator disi = dis.GetIterator(); if (disi == null) { nullCount++; continue; } dims[dim].disi = disi; } } else { DocIdSetIterator disi = ((Weight)drillDowns[dim]).Scorer(context, null); if (disi == null) { nullCount++; continue; } dims[dim].disi = disi; } } // If more than one dim has no matches, then there // are no hits nor drill-sideways counts. Or, if we // have only one dim and that dim has no matches, // same thing. //if (nullCount > 1 || (nullCount == 1 && dims.length == 1)) { if (nullCount > 1) { return null; } // Sort drill-downs by most restrictive first: Array.Sort(dims); if (baseScorer == null) { return null; } return new DrillSidewaysScorer(context, baseScorer, outerInstance.drillDownCollector, dims, outerInstance.scoreSubDocsAtOnce); }
/// <summary> /// this method is implemented for each data type </summary> public abstract override DocIdSet GetDocIdSet(AtomicReaderContext context, Bits acceptDocs);
public override Scorer Scorer(AtomicReaderContext context, Bits acceptDocs) { // We can only run as a top scorer: throw new System.NotSupportedException(); }
public FieldCacheDocIdSetAnonymousInnerClassHelper(DocTermOrdsRangeFilterAnonymousInnerClassHelper outerInstance, int maxDoc, Bits acceptDocs, SortedSetDocValues docTermOrds, long inclusiveLowerPoint, long inclusiveUpperPoint) : base(maxDoc, acceptDocs) { this.OuterInstance = outerInstance; this.DocTermOrds = docTermOrds; this.InclusiveLowerPoint = inclusiveLowerPoint; this.InclusiveUpperPoint = inclusiveUpperPoint; }