internal MappingMultiDocsEnum Reset(MultiDocsEnum docsEnum) { this.NumSubs_Renamed = docsEnum.NumSubs; this.Subs_Renamed = docsEnum.Subs; Upto = -1; Current = null; return this; }
/// <summary> /// Look up the given category in the cache and/or the on-disk storage, /// returning the category's ordinal, or a negative number in case the /// category does not yet exist in the taxonomy. /// </summary> protected virtual int FindCategory(FacetLabel categoryPath) { lock (this) { // If we can find the category in the cache, or we know the cache is // complete, we can return the response directly from it int res = cache.Get(categoryPath); if (res >= 0 || cacheIsComplete) { return(res); } cacheMisses.IncrementAndGet(); // After a few cache misses, it makes sense to read all the categories // from disk and into the cache. The reason not to do this on the first // cache miss (or even when opening the writer) is that it will // significantly slow down the case when a taxonomy is opened just to // add one category. The idea only spending a long time on reading // after enough time was spent on cache misses is known as an "online // algorithm". PerhapsFillCache(); res = cache.Get(categoryPath); if (res >= 0 || cacheIsComplete) { // if after filling the cache from the info on disk, the category is in it // or the cache is complete, return whatever cache.get returned. return(res); } // if we get here, it means the category is not in the cache, and it is not // complete, and therefore we must look for the category on disk. // We need to get an answer from the on-disk index. InitReaderManager(); int doc = -1; DirectoryReader reader = readerManager.Acquire(); try { BytesRef catTerm = new BytesRef(FacetsConfig.PathToString(categoryPath.Components, categoryPath.Length)); TermsEnum termsEnum = null; // reuse DocsEnum docs = null; // reuse foreach (AtomicReaderContext ctx in reader.Leaves) { Terms terms = ctx.AtomicReader.GetTerms(Consts.FULL); if (terms != null) { termsEnum = terms.GetIterator(termsEnum); if (termsEnum.SeekExact(catTerm)) { // liveDocs=null because the taxonomy has no deletes docs = termsEnum.Docs(null, docs, 0); // freqs not required // if the term was found, we know it has exactly one document. doc = docs.NextDoc() + ctx.DocBase; break; } } } } finally { readerManager.Release(reader); } if (doc > 0) { AddToCache(categoryPath, doc); } return(doc); } }
/// <summary> /// for a docsenum, gets the 'other' reused enum. /// Example: Pulsing(Standard). /// when doing a term range query you are switching back and forth /// between Pulsing and Standard /// /// The way the reuse works is that Pulsing.other = Standard and /// Standard.other = Pulsing. /// </summary> private DocsEnum GetOther(DocsEnum de) { if (de == null) return null; var atts = de.Attributes(); DocsEnum result; atts.AddAttribute<IPulsingEnumAttribute>().Enums().TryGetValue(this, out result); return result; }
private void DuellReaders(CompositeReader other, AtomicReader memIndexReader) { AtomicReader competitor = SlowCompositeReaderWrapper.Wrap(other); Fields memFields = memIndexReader.Fields; foreach (string field in competitor.Fields) { Terms memTerms = memFields.GetTerms(field); Terms iwTerms = memIndexReader.GetTerms(field); if (iwTerms == null) { assertNull(memTerms); } else { NumericDocValues normValues = competitor.GetNormValues(field); NumericDocValues memNormValues = memIndexReader.GetNormValues(field); if (normValues != null) { // mem idx always computes norms on the fly assertNotNull(memNormValues); assertEquals(normValues.Get(0), memNormValues.Get(0)); } assertNotNull(memTerms); assertEquals(iwTerms.DocCount, memTerms.DocCount); assertEquals(iwTerms.SumDocFreq, memTerms.SumDocFreq); assertEquals(iwTerms.SumTotalTermFreq, memTerms.SumTotalTermFreq); TermsEnum iwTermsIter = iwTerms.GetIterator(null); TermsEnum memTermsIter = memTerms.GetIterator(null); if (iwTerms.HasPositions) { bool offsets = iwTerms.HasOffsets && memTerms.HasOffsets; while (iwTermsIter.Next() != null) { assertNotNull(memTermsIter.Next()); assertEquals(iwTermsIter.Term, memTermsIter.Term); DocsAndPositionsEnum iwDocsAndPos = iwTermsIter.DocsAndPositions(null, null); DocsAndPositionsEnum memDocsAndPos = memTermsIter.DocsAndPositions(null, null); while (iwDocsAndPos.NextDoc() != DocsAndPositionsEnum.NO_MORE_DOCS) { assertEquals(iwDocsAndPos.DocID, memDocsAndPos.NextDoc()); assertEquals(iwDocsAndPos.Freq, memDocsAndPos.Freq); for (int i = 0; i < iwDocsAndPos.Freq; i++) { assertEquals("term: " + iwTermsIter.Term.Utf8ToString(), iwDocsAndPos.NextPosition(), memDocsAndPos.NextPosition()); if (offsets) { assertEquals(iwDocsAndPos.StartOffset, memDocsAndPos.StartOffset); assertEquals(iwDocsAndPos.EndOffset, memDocsAndPos.EndOffset); } } } } } else { while (iwTermsIter.Next() != null) { assertEquals(iwTermsIter.Term, memTermsIter.Term); DocsEnum iwDocsAndPos = iwTermsIter.Docs(null, null); DocsEnum memDocsAndPos = memTermsIter.Docs(null, null); while (iwDocsAndPos.NextDoc() != DocsAndPositionsEnum.NO_MORE_DOCS) { assertEquals(iwDocsAndPos.DocID, memDocsAndPos.NextDoc()); assertEquals(iwDocsAndPos.Freq, memDocsAndPos.Freq); } } } } } }
private IndexIterationContext CreateContext(int nDocs, RandomIndexWriter fromWriter, RandomIndexWriter toWriter, bool multipleValuesPerDocument, bool scoreDocsInOrder) { IndexIterationContext context = new IndexIterationContext(); int numRandomValues = nDocs / 2; context.RandomUniqueValues = new string[numRandomValues]; ISet <string> trackSet = new HashSet <string>(); context.RandomFrom = new bool[numRandomValues]; for (int i = 0; i < numRandomValues; i++) { string uniqueRandomValue; do { uniqueRandomValue = TestUtil.RandomRealisticUnicodeString(Random()); // uniqueRandomValue = TestUtil.randomSimpleString(random); } while ("".Equals(uniqueRandomValue, StringComparison.Ordinal) || trackSet.Contains(uniqueRandomValue)); // Generate unique values and empty strings aren't allowed. trackSet.Add(uniqueRandomValue); context.RandomFrom[i] = Random().NextBoolean(); context.RandomUniqueValues[i] = uniqueRandomValue; } RandomDoc[] docs = new RandomDoc[nDocs]; for (int i = 0; i < nDocs; i++) { string id = Convert.ToString(i); int randomI = Random().Next(context.RandomUniqueValues.Length); string value = context.RandomUniqueValues[randomI]; Document document = new Document(); document.Add(NewTextField(Random(), "id", id, Field.Store.NO)); document.Add(NewTextField(Random(), "value", value, Field.Store.NO)); bool from = context.RandomFrom[randomI]; int numberOfLinkValues = multipleValuesPerDocument ? 2 + Random().Next(10) : 1; docs[i] = new RandomDoc(id, numberOfLinkValues, value, from); for (int j = 0; j < numberOfLinkValues; j++) { string linkValue = context.RandomUniqueValues[Random().Next(context.RandomUniqueValues.Length)]; docs[i].LinkValues.Add(linkValue); if (from) { if (!context.FromDocuments.ContainsKey(linkValue)) { context.FromDocuments[linkValue] = new List <RandomDoc>(); } if (!context.RandomValueFromDocs.ContainsKey(value)) { context.RandomValueFromDocs[value] = new List <RandomDoc>(); } context.FromDocuments[linkValue].Add(docs[i]); context.RandomValueFromDocs[value].Add(docs[i]); document.Add(NewTextField(Random(), "from", linkValue, Field.Store.NO)); } else { if (!context.ToDocuments.ContainsKey(linkValue)) { context.ToDocuments[linkValue] = new List <RandomDoc>(); } if (!context.RandomValueToDocs.ContainsKey(value)) { context.RandomValueToDocs[value] = new List <RandomDoc>(); } context.ToDocuments[linkValue].Add(docs[i]); context.RandomValueToDocs[value].Add(docs[i]); document.Add(NewTextField(Random(), "to", linkValue, Field.Store.NO)); } } RandomIndexWriter w; if (from) { w = fromWriter; } else { w = toWriter; } w.AddDocument(document); if (Random().Next(10) == 4) { w.Commit(); } if (VERBOSE) { Console.WriteLine("Added document[" + docs[i].Id + "]: " + document); } } // Pre-compute all possible hits for all unique random values. On top of this also compute all possible score for // any ScoreMode. IndexSearcher fromSearcher = NewSearcher(fromWriter.Reader); IndexSearcher toSearcher = NewSearcher(toWriter.Reader); for (int i = 0; i < context.RandomUniqueValues.Length; i++) { string uniqueRandomValue = context.RandomUniqueValues[i]; string fromField; string toField; IDictionary <string, IDictionary <int, JoinScore> > queryVals; if (context.RandomFrom[i]) { fromField = "from"; toField = "to"; queryVals = context.FromHitsToJoinScore; } else { fromField = "to"; toField = "from"; queryVals = context.ToHitsToJoinScore; } IDictionary <BytesRef, JoinScore> joinValueToJoinScores = new Dictionary <BytesRef, JoinScore>(); if (multipleValuesPerDocument) { fromSearcher.Search(new TermQuery(new Term("value", uniqueRandomValue)), new CollectorAnonymousInnerClassHelper3(this, context, fromField, joinValueToJoinScores)); } else { fromSearcher.Search(new TermQuery(new Term("value", uniqueRandomValue)), new CollectorAnonymousInnerClassHelper4(this, context, fromField, joinValueToJoinScores)); } IDictionary <int, JoinScore> docToJoinScore = new Dictionary <int, JoinScore>(); if (multipleValuesPerDocument) { if (scoreDocsInOrder) { AtomicReader slowCompositeReader = SlowCompositeReaderWrapper.Wrap(toSearcher.IndexReader); Terms terms = slowCompositeReader.GetTerms(toField); if (terms != null) { DocsEnum docsEnum = null; TermsEnum termsEnum = null; SortedSet <BytesRef> joinValues = new SortedSet <BytesRef>(BytesRef.UTF8SortedAsUnicodeComparer); joinValues.AddAll(joinValueToJoinScores.Keys); foreach (BytesRef joinValue in joinValues) { termsEnum = terms.GetIterator(termsEnum); if (termsEnum.SeekExact(joinValue)) { docsEnum = termsEnum.Docs(slowCompositeReader.LiveDocs, docsEnum, DocsFlags.NONE); JoinScore joinScore = joinValueToJoinScores[joinValue]; for (int doc = docsEnum.NextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = docsEnum.NextDoc()) { // First encountered join value determines the score. // Something to keep in mind for many-to-many relations. if (!docToJoinScore.ContainsKey(doc)) { docToJoinScore[doc] = joinScore; } } } } } } else { toSearcher.Search(new MatchAllDocsQuery(), new CollectorAnonymousInnerClassHelper5(this, context, toField, joinValueToJoinScores, docToJoinScore)); } } else { toSearcher.Search(new MatchAllDocsQuery(), new CollectorAnonymousInnerClassHelper6(this, toField, joinValueToJoinScores, docToJoinScore)); } queryVals[uniqueRandomValue] = docToJoinScore; } fromSearcher.IndexReader.Dispose(); toSearcher.IndexReader.Dispose(); return(context); }
public override DocsEnum Docs(Bits liveDocs, DocsEnum reuse, int flags) { DecodeMetaData(); return(_blockTermsReader._postingsReader.Docs(_fieldReader._fieldInfo, _state, liveDocs, reuse, flags)); }
public override int NextDoc() { while (true) { if (Current == null) { if (Upto == NumSubs_Renamed - 1) { return this.Doc = NO_MORE_DOCS; } else { Upto++; int reader = Subs_Renamed[Upto].Slice.ReaderIndex; Current = Subs_Renamed[Upto].DocsEnum; CurrentBase = MergeState_Renamed.DocBase[reader]; CurrentMap = MergeState_Renamed.DocMaps[reader]; Debug.Assert(CurrentMap.MaxDoc == Subs_Renamed[Upto].Slice.Length, "readerIndex=" + reader + " subs.len=" + Subs_Renamed.Length + " len1=" + CurrentMap.MaxDoc + " vs " + Subs_Renamed[Upto].Slice.Length); } } int doc = Current.NextDoc(); if (doc != NO_MORE_DOCS) { // compact deletions doc = CurrentMap.Get(doc); if (doc == -1) { continue; } return this.Doc = CurrentBase + doc; } else { Current = null; } } }
public override DocsEnum Docs(Bits liveDocs, DocsEnum reuse, int flags) { DecodeMetaData(); return _blockTermsReader._postingsReader.Docs(_fieldReader._fieldInfo, _state, liveDocs, reuse, flags); }
public override DocsEnum Docs(Bits skipDocs, DocsEnum reuse, int flags) { Debug.Assert(!Eof); //if (DEBUG) { //System.out.println("BTTR.docs seg=" + segment); //} CurrentFrame.DecodeMetaData(); //if (DEBUG) { //System.out.println(" state=" + currentFrame.state); //} return OuterInstance.OuterInstance.PostingsReader.Docs(OuterInstance.fieldInfo, CurrentFrame.State, skipDocs, reuse, flags); }
public override DocsEnum Docs(Bits skipDocs, DocsEnum reuse, int flags) { CurrentFrame.DecodeMetaData(); return OuterInstance.OuterInstance.PostingsReader.Docs(OuterInstance.fieldInfo, CurrentFrame.TermState, skipDocs, reuse, flags); }
/// <summary> /// for a docsenum, sets the 'other' reused enum. /// see GetOther for an example. /// </summary> private DocsEnum SetOther(DocsEnum de, DocsEnum other) { var atts = de.Attributes(); return atts.AddAttribute<IPulsingEnumAttribute>().Enums()[this] = other; }
/// <summary> /// for a docsenum, gets the 'other' reused enum. /// Example: Pulsing(Standard). /// when doing a term range query you are switching back and forth /// between Pulsing and Standard /// /// The way the reuse works is that Pulsing.other = Standard and /// Standard.other = Pulsing. /// </summary> private DocsEnum GetOther(DocsEnum de) { if (de == null) return null; var atts = de.Attributes(); return atts.AddAttribute<IPulsingEnumAttribute>().Enums()[this]; }
public override DocsEnum Docs(FieldInfo field, BlockTermState _termState, Bits liveDocs, DocsEnum reuse, int flags) { var termState = (PulsingTermState) _termState; if (termState.PostingsSize != -1) { PulsingDocsEnum postings; if (reuse is PulsingDocsEnum) { postings = (PulsingDocsEnum) reuse; if (!postings.CanReuse(field)) { postings = new PulsingDocsEnum(field); } } else { // the 'reuse' is actually the wrapped enum var previous = (PulsingDocsEnum) GetOther(reuse); if (previous != null && previous.CanReuse(field)) { postings = previous; } else { postings = new PulsingDocsEnum(field); } } if (reuse != postings) SetOther(postings, reuse); // postings.other = reuse return postings.Reset(liveDocs, termState); } if (!(reuse is PulsingDocsEnum)) return _wrappedPostingsReader.Docs(field, termState.WrappedTermState, liveDocs, reuse, flags); var wrapped = _wrappedPostingsReader.Docs(field, termState.WrappedTermState, liveDocs, GetOther(reuse), flags); SetOther(wrapped, reuse); // wrapped.other = reuse return wrapped; }
public override DocsEnum Docs(Bits liveDocs, DocsEnum reuse, int flags) { DocsEnum inReuse; SortingDocsEnum wrapReuse; if (reuse != null && reuse is SortingDocsEnum) { // if we're asked to reuse the given DocsEnum and it is Sorting, return // the wrapped one, since some Codecs expect it. wrapReuse = (SortingDocsEnum)reuse; inReuse = wrapReuse.Wrapped; } else { wrapReuse = null; inReuse = reuse; } DocsEnum inDocs = @in.Docs(NewToOld(liveDocs), inReuse, flags); bool withFreqs = indexOptions.GetValueOrDefault().CompareTo(FieldInfo.IndexOptions.DOCS_AND_FREQS) >= 0 && (flags & DocsEnum.FLAG_FREQS) != 0; return new SortingDocsEnum(docMap.Count, wrapReuse, inDocs, withFreqs, docMap); }
public override int GetOrdinal(FacetLabel cp) { EnsureOpen(); if (cp.Length == 0) { return(ROOT_ORDINAL); } // First try to find the answer in the LRU cache: // LUCENENET: Despite LRUHashMap being thread-safe, we get much better performance // if reads are separated from writes. ordinalCacheLock.EnterReadLock(); try { if (ordinalCache.TryGetValue(cp, out Int32Class res)) { if (res < indexReader.MaxDoc) { // Since the cache is shared with DTR instances allocated from // doOpenIfChanged, we need to ensure that the ordinal is one that // this DTR instance recognizes. return(res); } else { // if we get here, it means that the category was found in the cache, // but is not recognized by this TR instance. Therefore there's no // need to continue search for the path on disk, because we won't find // it there too. return(TaxonomyReader.INVALID_ORDINAL); } } } finally { ordinalCacheLock.ExitReadLock(); } // If we're still here, we have a cache miss. We need to fetch the // value from disk, and then also put it in the cache: int ret = TaxonomyReader.INVALID_ORDINAL; DocsEnum docs = MultiFields.GetTermDocsEnum(indexReader, null, Consts.FULL, new BytesRef(FacetsConfig.PathToString(cp.Components, cp.Length)), 0); if (docs != null && docs.NextDoc() != DocIdSetIterator.NO_MORE_DOCS) { ret = docs.DocID; // we only store the fact that a category exists, not its inexistence. // This is required because the caches are shared with new DTR instances // that are allocated from doOpenIfChanged. Therefore, if we only store // information about found categories, we cannot accidently tell a new // generation of DTR that a category does not exist. ordinalCacheLock.EnterWriteLock(); try { ordinalCache[cp] = ret; } finally { ordinalCacheLock.ExitWriteLock(); } } return(ret); }
/// <summary> /// checks the terms enum sequentially /// if deep is false, it does a 'shallow' test that doesnt go down to the docsenums /// </summary> public virtual void AssertTermsEnum(TermsEnum leftTermsEnum, TermsEnum rightTermsEnum, bool deep) { BytesRef term; Bits randomBits = new RandomBits(MAXDOC, Random().NextDouble(), Random()); DocsAndPositionsEnum leftPositions = null; DocsAndPositionsEnum rightPositions = null; DocsEnum leftDocs = null; DocsEnum rightDocs = null; while ((term = leftTermsEnum.Next()) != null) { Assert.AreEqual(term, rightTermsEnum.Next()); AssertTermStats(leftTermsEnum, rightTermsEnum); if (deep) { // with payloads + off AssertDocsAndPositionsEnum(leftPositions = leftTermsEnum.DocsAndPositions(null, leftPositions), rightPositions = rightTermsEnum.DocsAndPositions(null, rightPositions)); AssertDocsAndPositionsEnum(leftPositions = leftTermsEnum.DocsAndPositions(randomBits, leftPositions), rightPositions = rightTermsEnum.DocsAndPositions(randomBits, rightPositions)); AssertPositionsSkipping(leftTermsEnum.DocFreq(), leftPositions = leftTermsEnum.DocsAndPositions(null, leftPositions), rightPositions = rightTermsEnum.DocsAndPositions(null, rightPositions)); AssertPositionsSkipping(leftTermsEnum.DocFreq(), leftPositions = leftTermsEnum.DocsAndPositions(randomBits, leftPositions), rightPositions = rightTermsEnum.DocsAndPositions(randomBits, rightPositions)); // with payloads only AssertDocsAndPositionsEnum(leftPositions = leftTermsEnum.DocsAndPositions(null, leftPositions, DocsAndPositionsEnum.FLAG_PAYLOADS), rightPositions = rightTermsEnum.DocsAndPositions(null, rightPositions, DocsAndPositionsEnum.FLAG_PAYLOADS)); AssertDocsAndPositionsEnum(leftPositions = leftTermsEnum.DocsAndPositions(randomBits, leftPositions, DocsAndPositionsEnum.FLAG_PAYLOADS), rightPositions = rightTermsEnum.DocsAndPositions(randomBits, rightPositions, DocsAndPositionsEnum.FLAG_PAYLOADS)); AssertPositionsSkipping(leftTermsEnum.DocFreq(), leftPositions = leftTermsEnum.DocsAndPositions(null, leftPositions, DocsAndPositionsEnum.FLAG_PAYLOADS), rightPositions = rightTermsEnum.DocsAndPositions(null, rightPositions, DocsAndPositionsEnum.FLAG_PAYLOADS)); AssertPositionsSkipping(leftTermsEnum.DocFreq(), leftPositions = leftTermsEnum.DocsAndPositions(randomBits, leftPositions, DocsAndPositionsEnum.FLAG_PAYLOADS), rightPositions = rightTermsEnum.DocsAndPositions(randomBits, rightPositions, DocsAndPositionsEnum.FLAG_PAYLOADS)); // with offsets only AssertDocsAndPositionsEnum(leftPositions = leftTermsEnum.DocsAndPositions(null, leftPositions, DocsAndPositionsEnum.FLAG_OFFSETS), rightPositions = rightTermsEnum.DocsAndPositions(null, rightPositions, DocsAndPositionsEnum.FLAG_OFFSETS)); AssertDocsAndPositionsEnum(leftPositions = leftTermsEnum.DocsAndPositions(randomBits, leftPositions, DocsAndPositionsEnum.FLAG_OFFSETS), rightPositions = rightTermsEnum.DocsAndPositions(randomBits, rightPositions, DocsAndPositionsEnum.FLAG_OFFSETS)); AssertPositionsSkipping(leftTermsEnum.DocFreq(), leftPositions = leftTermsEnum.DocsAndPositions(null, leftPositions, DocsAndPositionsEnum.FLAG_OFFSETS), rightPositions = rightTermsEnum.DocsAndPositions(null, rightPositions, DocsAndPositionsEnum.FLAG_OFFSETS)); AssertPositionsSkipping(leftTermsEnum.DocFreq(), leftPositions = leftTermsEnum.DocsAndPositions(randomBits, leftPositions, DocsAndPositionsEnum.FLAG_OFFSETS), rightPositions = rightTermsEnum.DocsAndPositions(randomBits, rightPositions, DocsAndPositionsEnum.FLAG_OFFSETS)); // with positions only AssertDocsAndPositionsEnum(leftPositions = leftTermsEnum.DocsAndPositions(null, leftPositions, DocsEnum.FLAG_NONE), rightPositions = rightTermsEnum.DocsAndPositions(null, rightPositions, DocsEnum.FLAG_NONE)); AssertDocsAndPositionsEnum(leftPositions = leftTermsEnum.DocsAndPositions(randomBits, leftPositions, DocsEnum.FLAG_NONE), rightPositions = rightTermsEnum.DocsAndPositions(randomBits, rightPositions, DocsEnum.FLAG_NONE)); AssertPositionsSkipping(leftTermsEnum.DocFreq(), leftPositions = leftTermsEnum.DocsAndPositions(null, leftPositions, DocsEnum.FLAG_NONE), rightPositions = rightTermsEnum.DocsAndPositions(null, rightPositions, DocsEnum.FLAG_NONE)); AssertPositionsSkipping(leftTermsEnum.DocFreq(), leftPositions = leftTermsEnum.DocsAndPositions(randomBits, leftPositions, DocsEnum.FLAG_NONE), rightPositions = rightTermsEnum.DocsAndPositions(randomBits, rightPositions, DocsEnum.FLAG_NONE)); // with freqs: AssertDocsEnum(leftDocs = leftTermsEnum.Docs(null, leftDocs), rightDocs = rightTermsEnum.Docs(null, rightDocs)); AssertDocsEnum(leftDocs = leftTermsEnum.Docs(randomBits, leftDocs), rightDocs = rightTermsEnum.Docs(randomBits, rightDocs)); // w/o freqs: AssertDocsEnum(leftDocs = leftTermsEnum.Docs(null, leftDocs, DocsEnum.FLAG_NONE), rightDocs = rightTermsEnum.Docs(null, rightDocs, DocsEnum.FLAG_NONE)); AssertDocsEnum(leftDocs = leftTermsEnum.Docs(randomBits, leftDocs, DocsEnum.FLAG_NONE), rightDocs = rightTermsEnum.Docs(randomBits, rightDocs, DocsEnum.FLAG_NONE)); // with freqs: AssertDocsSkipping(leftTermsEnum.DocFreq(), leftDocs = leftTermsEnum.Docs(null, leftDocs), rightDocs = rightTermsEnum.Docs(null, rightDocs)); AssertDocsSkipping(leftTermsEnum.DocFreq(), leftDocs = leftTermsEnum.Docs(randomBits, leftDocs), rightDocs = rightTermsEnum.Docs(randomBits, rightDocs)); // w/o freqs: AssertDocsSkipping(leftTermsEnum.DocFreq(), leftDocs = leftTermsEnum.Docs(null, leftDocs, DocsEnum.FLAG_NONE), rightDocs = rightTermsEnum.Docs(null, rightDocs, DocsEnum.FLAG_NONE)); AssertDocsSkipping(leftTermsEnum.DocFreq(), leftDocs = leftTermsEnum.Docs(randomBits, leftDocs, DocsEnum.FLAG_NONE), rightDocs = rightTermsEnum.Docs(randomBits, rightDocs, DocsEnum.FLAG_NONE)); } } Assert.IsNull(rightTermsEnum.Next()); }
public override DocsEnum docs(Bits liveDocs, DocsEnum reuse, int flags) { decodeMetaData(); FSTDocsEnum docsEnum; if (reuse == null || !(reuse is FSTDocsEnum)) { docsEnum = new FSTDocsEnum(field.IndexOptions, field.hasPayloads()); } else { docsEnum = (FSTDocsEnum) reuse; if (!docsEnum.canReuse(field.IndexOptions, field.hasPayloads())) { docsEnum = new FSTDocsEnum(field.IndexOptions, field.hasPayloads()); } } return docsEnum.reset(this.postingsSpare, liveDocs, docFreq_Renamed); }
//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET: //ORIGINAL LINE: SortingDocsEnum(int maxDoc, SortingDocsEnum reuse, final org.apache.lucene.index.DocsEnum in, boolean withFreqs, final Sorter.DocMap docMap) throws java.io.IOException //JAVA TO C# CONVERTER WARNING: 'final' parameters are not available in .NET: internal SortingDocsEnum(int maxDoc, SortingDocsEnum reuse, DocsEnum @in, bool withFreqs, Sorter.DocMap docMap) : base(@in) { this.maxDoc = maxDoc; this.withFreqs = withFreqs; if (reuse != null) { if (reuse.maxDoc == maxDoc) { sorter = reuse.sorter; } else { sorter = new DocFreqSorter(maxDoc); } docs = reuse.docs; freqs = reuse.freqs; // maybe null } else { docs = new int[64]; sorter = new DocFreqSorter(maxDoc); } docIt = -1; int i = 0; int doc; if (withFreqs) { if (freqs == null || freqs.Length < docs.Length) { freqs = new int[docs.Length]; } while ((doc = @in.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) { if (i >= docs.Length) { docs = ArrayUtil.grow(docs, docs.Length + 1); freqs = ArrayUtil.grow(freqs, freqs.Length + 1); } docs[i] = docMap.oldToNew(doc); freqs[i] = @in.freq(); ++i; } } else { freqs = null; while ((doc = @in.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) { if (i >= docs.Length) { docs = ArrayUtil.grow(docs, docs.Length + 1); } docs[i++] = docMap.oldToNew(doc); } } // TimSort can save much time compared to other sorts in case of // reverse sorting, or when sorting a concatenation of sorted readers sorter.reset(docs, freqs); sorter.sort(0, i); upto = i; }
public override DocsEnum Docs(Bits liveDocs, DocsEnum reuse, int flags) { // TODO: reuse var e = new SimpleTVDocsEnum(); e.Reset(liveDocs, (flags & DocsEnum.FLAG_FREQS) == 0 ? 1 : _current.Value.FREQ); return e; }
public override DocsEnum Docs(IBits liveDocs, DocsEnum reuse, DocsFlags flags) { return(actualEnum.Docs(liveDocs, reuse, flags)); }
public virtual void Load(string fieldName, AtomicReader reader, TermListFactory listFactory) { #if FEATURE_STRING_INTERN string field = string.Intern(fieldName); #else string field = fieldName; #endif int maxDoc = reader.MaxDoc; int dictValueCount = GetDictValueCount(reader, fieldName); BigSegmentedArray order = NewInstance(dictValueCount, maxDoc); this.m_orderArray = order; List <int> minIDList = new List <int>(); List <int> maxIDList = new List <int>(); List <int> freqList = new List <int>(); int length = maxDoc + 1; ITermValueList list = listFactory == null ? (ITermValueList) new TermStringList() : listFactory.CreateTermList(); int negativeValueCount = GetNegativeValueCount(reader, field); int t = 1; // valid term id starts from 1 list.Add(null); minIDList.Add(-1); maxIDList.Add(-1); freqList.Add(0); int totalFreq = 0; Terms terms = reader.GetTerms(field); if (terms != null) { TermsEnum termsEnum = terms.GetIterator(null); BytesRef text; while ((text = termsEnum.Next()) != null) { // store term text // we expect that there is at most one term per document if (t >= length) { throw new RuntimeException("there are more terms than " + "documents in field \"" + field + "\", but it's impossible to sort on " + "tokenized fields"); } string strText = text.Utf8ToString(); list.Add(strText); Term term = new Term(field, strText); DocsEnum docsEnum = reader.GetTermDocsEnum(term); // freqList.add(termEnum.docFreq()); // doesn't take into account // deldocs int minID = -1; int maxID = -1; int docID = -1; int df = 0; int valId = (t - 1 < negativeValueCount) ? (negativeValueCount - t + 1) : t; while ((docID = docsEnum.NextDoc()) != DocsEnum.NO_MORE_DOCS) { df++; order.Add(docID, valId); minID = docID; while (docsEnum.NextDoc() != DocsEnum.NO_MORE_DOCS) { docID = docsEnum.DocID; df++; order.Add(docID, valId); } maxID = docID; } freqList.Add(df); totalFreq += df; minIDList.Add(minID); maxIDList.Add(maxID); t++; } } list.Seal(); this.m_valArray = list; this.m_freqs = freqList.ToArray(); this.m_minIDs = minIDList.ToArray(); this.m_maxIDs = maxIDList.ToArray(); int doc = 0; while (doc < maxDoc && order.Get(doc) != 0) { ++doc; } if (doc < maxDoc) { this.m_minIDs[0] = doc; // Try to get the max doc = maxDoc - 1; while (doc >= 0 && order.Get(doc) != 0) { --doc; } this.m_maxIDs[0] = doc; } this.m_freqs[0] = reader.NumDocs - totalFreq; }
private bool CanReuse(DocsEnum reuse, Bits liveDocs) { if (reuse != null && (reuse is SegmentDocsEnumBase)) { SegmentDocsEnumBase docsEnum = (SegmentDocsEnumBase)reuse; // If you are using ParellelReader, and pass in a // reused DocsEnum, it could have come from another // reader also using standard codec if (docsEnum.StartFreqIn == FreqIn) { // we only reuse if the the actual the incoming enum has the same liveDocs as the given liveDocs return liveDocs == docsEnum.LiveDocs; } } return false; }
public override DocsEnum Docs(FieldInfo fieldInfo, BlockTermState bTermState, Bits liveDocs, DocsEnum reuse, int flags) { var termState = (SepTermState)bTermState; SepDocsEnum docsEnum; if (!(reuse is SepDocsEnum)) { docsEnum = new SepDocsEnum(this); } else { docsEnum = (SepDocsEnum)reuse; if (docsEnum.START_DOC_IN != _docIn) { // If you are using ParellelReader, and pass in a // reused DocsAndPositionsEnum, it could have come // from another reader also using sep codec docsEnum = new SepDocsEnum(this); } } return(docsEnum.Init(fieldInfo, termState, liveDocs)); }
public override DocsEnum Docs(Bits liveDocs, DocsEnum reuse, int flags) { DecodeMetaData(); return(outerInstance.outerInstance.postingsReader.Docs(outerInstance.fieldInfo, state, liveDocs, reuse, flags)); }
public override DocsEnum Docs(Bits liveDocs, DocsEnum reuse, int flags) { PreDocsEnum docsEnum; if (reuse == null || !(reuse is PreDocsEnum)) { docsEnum = new PreDocsEnum(OuterInstance); } else { docsEnum = (PreDocsEnum)reuse; if (docsEnum.FreqStream != OuterInstance.FreqStream) { docsEnum = new PreDocsEnum(OuterInstance); } } return docsEnum.Reset(TermEnum, liveDocs); }
public override DocsEnum Docs(Bits liveDocs, DocsEnum reuse, int flags) // ignored { TVDocsEnum docsEnum; if (reuse != null && reuse is TVDocsEnum) { docsEnum = (TVDocsEnum)reuse; } else { docsEnum = new TVDocsEnum(); } docsEnum.Reset(liveDocs, Freq); return docsEnum; }
public override DocsEnum Docs(FieldInfo fieldInfo, BlockTermState termState, IBits liveDocs, DocsEnum reuse, DocsFlags flags) { SepTermState termState_ = (SepTermState)termState; // If you are using ParellelReader, and pass in a // reused DocsAndPositionsEnum, it could have come // from another reader also using sep codec if (reuse is null || !(reuse is SepDocsEnum docsEnum) || docsEnum.startDocIn != docIn) { docsEnum = new SepDocsEnum(this); } return(docsEnum.Init(fieldInfo, termState_, liveDocs)); }
public override DocsEnum Docs(IBits liveDocs, DocsEnum reuse, DocsFlags flags) { return(new RAMDocsEnum(ramField.termToDocs[current], liveDocs)); }
public DocIdSetIteratorAnonymousInnerClassHelper(DocIdSetAnonymousInnerClassHelper2 outerInstance, DocsEnum termDocsEnum) { this.OuterInstance = outerInstance; this.TermDocsEnum = termDocsEnum; }
// we need to guarantee that if several threads call this concurrently, only // one executes it, and after it returns, the cache is updated and is either // complete or not. private void PerhapsFillCache() { lock (this) { if (cacheMisses < cacheMissesUntilFill) { return; } if (!shouldFillCache) { // we already filled the cache once, there's no need to re-fill it return; } shouldFillCache = false; InitReaderManager(); bool aborted = false; DirectoryReader reader = readerManager.Acquire(); try { TermsEnum termsEnum = null; DocsEnum docsEnum = null; foreach (AtomicReaderContext ctx in reader.Leaves) { Terms terms = ctx.AtomicReader.GetTerms(Consts.FULL); if (terms != null) // cannot really happen, but be on the safe side { termsEnum = terms.GetIterator(termsEnum); while (termsEnum.Next() != null) { if (!cache.IsFull) { BytesRef t = termsEnum.Term; // Since we guarantee uniqueness of categories, each term has exactly // one document. Also, since we do not allow removing categories (and // hence documents), there are no deletions in the index. Therefore, it // is sufficient to call next(), and then doc(), exactly once with no // 'validation' checks. FacetLabel cp = new FacetLabel(FacetsConfig.StringToPath(t.Utf8ToString())); docsEnum = termsEnum.Docs(null, docsEnum, DocsFlags.NONE); bool res = cache.Put(cp, docsEnum.NextDoc() + ctx.DocBase); Debug.Assert(!res, "entries should not have been evicted from the cache"); } else { // the cache is full and the next put() will evict entries from it, therefore abort the iteration. aborted = true; break; } } } if (aborted) { break; } } } finally { readerManager.Release(reader); } cacheIsComplete = !aborted; if (cacheIsComplete) { lock (this) { // everything is in the cache, so no need to keep readerManager open. // this block is executed in a sync block so that it works well with // initReaderManager called in parallel. readerManager.Dispose(); readerManager = null; initializedReaderManager = false; } } } }
/// <summary> /// Returns a <see cref="DocsEnum"/>, but randomly sometimes uses a /// <see cref="MultiDocsEnum"/>, <see cref="DocsAndPositionsEnum"/>. Returns null /// if field/term doesn't exist. /// </summary> public static DocsEnum Docs(Random random, IndexReader r, string field, BytesRef term, IBits liveDocs, DocsEnum reuse, DocsFlags flags) { Terms terms = MultiFields.GetTerms(r, field); if (terms == null) { return(null); } TermsEnum termsEnum = terms.GetEnumerator(); if (!termsEnum.SeekExact(term)) { return(null); } return(Docs(random, termsEnum, liveDocs, reuse, flags)); }
public virtual void Test10kPulsed() { // we always run this test with pulsing codec. Codec cp = TestUtil.AlwaysPostingsFormat(new Pulsing41PostingsFormat(1)); DirectoryInfo f = CreateTempDir("10kpulsed"); BaseDirectoryWrapper dir = NewFSDirectory(f); dir.CheckIndexOnDispose = false; // we do this ourselves explicitly RandomIndexWriter iw = new RandomIndexWriter(Random, dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random)).SetCodec(cp)); Document document = new Document(); FieldType ft = new FieldType(TextField.TYPE_STORED); switch (TestUtil.NextInt32(Random, 0, 2)) { case 0: ft.IndexOptions = IndexOptions.DOCS_ONLY; break; case 1: ft.IndexOptions = IndexOptions.DOCS_AND_FREQS; break; default: ft.IndexOptions = IndexOptions.DOCS_AND_FREQS_AND_POSITIONS; break; } Field field = NewField("field", "", ft); document.Add(field); //NumberFormat df = new DecimalFormat("00000", new DecimalFormatSymbols(Locale.ROOT)); for (int i = 0; i < 10050; i++) { //field.StringValue = df.format(i); field.SetStringValue(i.ToString("00000", CultureInfo.InvariantCulture)); iw.AddDocument(document); } IndexReader ir = iw.GetReader(); iw.Dispose(); TermsEnum te = MultiFields.GetTerms(ir, "field").GetIterator(null); DocsEnum de = null; for (int i = 0; i < 10050; i++) { //string expected = df.format(i); string expected = i.ToString("00000", CultureInfo.InvariantCulture); assertEquals(expected, te.Next().Utf8ToString()); de = TestUtil.Docs(Random, te, null, de, DocsFlags.NONE); assertTrue(de.NextDoc() != DocIdSetIterator.NO_MORE_DOCS); assertEquals(DocIdSetIterator.NO_MORE_DOCS, de.NextDoc()); } ir.Dispose(); TestUtil.CheckIndex(dir); dir.Dispose(); }
/// <summary> /// Returns a <see cref="DocsEnum"/> from a positioned <see cref="TermsEnum"/>, but /// randomly sometimes uses a <see cref="MultiDocsEnum"/>, <see cref="DocsAndPositionsEnum"/>. /// </summary> public static DocsEnum Docs(Random random, TermsEnum termsEnum, IBits liveDocs, DocsEnum reuse, DocsFlags flags) { if (random.NextBoolean()) { if (random.NextBoolean()) { DocsAndPositionsFlags posFlags; switch (random.Next(4)) { case 0: posFlags = 0; break; case 1: posFlags = DocsAndPositionsFlags.OFFSETS; break; case 2: posFlags = DocsAndPositionsFlags.PAYLOADS; break; default: posFlags = DocsAndPositionsFlags.OFFSETS | DocsAndPositionsFlags.PAYLOADS; break; } // TODO: cast to DocsAndPositionsEnum? DocsAndPositionsEnum docsAndPositions = termsEnum.DocsAndPositions(liveDocs, null, posFlags); if (docsAndPositions != null) { return(docsAndPositions); } } flags |= DocsFlags.FREQS; } return(termsEnum.Docs(liveDocs, reuse, flags)); }
public override DocsEnum Docs(Bits liveDocs, DocsEnum reuse, int flags) { return(new RAMDocsEnum(RamField.TermToDocs[Current], liveDocs)); }
//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET: //ORIGINAL LINE: @Override public org.apache.lucene.index.DocsEnum docs(org.apache.lucene.util.Bits liveDocs, org.apache.lucene.index.DocsEnum reuse, final int flags) throws java.io.IOException //JAVA TO C# CONVERTER WARNING: 'final' parameters are not available in .NET: public override DocsEnum docs(Bits liveDocs, DocsEnum reuse, int flags) { //JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final': //ORIGINAL LINE: final org.apache.lucene.index.DocsEnum inReuse; DocsEnum inReuse; //JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final': //ORIGINAL LINE: final SortingDocsEnum wrapReuse; SortingDocsEnum wrapReuse; if (reuse != null && reuse is SortingDocsEnum) { // if we're asked to reuse the given DocsEnum and it is Sorting, return // the wrapped one, since some Codecs expect it. wrapReuse = (SortingDocsEnum) reuse; inReuse = wrapReuse.Wrapped; } else { wrapReuse = null; inReuse = reuse; } //JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final': //ORIGINAL LINE: final org.apache.lucene.index.DocsEnum inDocs = in.docs(newToOld(liveDocs), inReuse, flags); DocsEnum inDocs = @in.docs(newToOld(liveDocs), inReuse, flags); //JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final': //ORIGINAL LINE: final boolean withFreqs = indexOptions.compareTo(org.apache.lucene.index.FieldInfo.IndexOptions.DOCS_AND_FREQS) >=0 && (flags & org.apache.lucene.index.DocsEnum.FLAG_FREQS) != 0; bool withFreqs = indexOptions.compareTo(IndexOptions.DOCS_AND_FREQS) >= 0 && (flags & DocsEnum.FLAG_FREQS) != 0; return new SortingDocsEnum(docMap.size(), wrapReuse, inDocs, withFreqs, docMap); }
/// <summary> /// Must fully consume state, since after this call that /// <see cref="TermState"/> may be reused. /// </summary> public abstract DocsEnum Docs(FieldInfo fieldInfo, BlockTermState state, IBits skipDocs, DocsEnum reuse, DocsFlags flags);
// for testing internal virtual bool reused(DocsEnum other) { if (other == null || !(other is SortingDocsEnum)) { return false; } return docs == ((SortingDocsEnum) other).docs; }
public override DocsEnum Docs(Bits liveDocs, DocsEnum reuse, int flags) { return Delegate().Docs(liveDocs, reuse, flags); }
public override DocsEnum Docs(IBits liveDocs, DocsEnum reuse, DocsFlags flags) { return(CreatePagesEnum()); }
public override DocsEnum Docs(Bits liveDocs, DocsEnum reuse, int flags) { // TODO: implement reuse, something like Pulsing: // it's hairy! if (outerInstance.terms[termOrd] is LowFreqTerm) { int[] postings = ((LowFreqTerm) outerInstance.terms[termOrd]).postings; if (outerInstance.hasFreq) { if (outerInstance.hasPos) { int posLen; if (outerInstance.hasOffsets_Renamed) { posLen = 3; } else { posLen = 1; } if (outerInstance.hasPayloads_Renamed) { posLen++; } LowFreqDocsEnum docsEnum; if (reuse is LowFreqDocsEnum) { docsEnum = (LowFreqDocsEnum) reuse; if (!docsEnum.CanReuse(liveDocs, posLen)) { docsEnum = new LowFreqDocsEnum(liveDocs, posLen); } } else { docsEnum = new LowFreqDocsEnum(liveDocs, posLen); } return docsEnum.Reset(postings); } else { LowFreqDocsEnumNoPos docsEnum; if (reuse is LowFreqDocsEnumNoPos) { docsEnum = (LowFreqDocsEnumNoPos) reuse; if (!docsEnum.CanReuse(liveDocs)) { docsEnum = new LowFreqDocsEnumNoPos(liveDocs); } } else { docsEnum = new LowFreqDocsEnumNoPos(liveDocs); } return docsEnum.Reset(postings); } } else { LowFreqDocsEnumNoTF docsEnum; if (reuse is LowFreqDocsEnumNoTF) { docsEnum = (LowFreqDocsEnumNoTF) reuse; if (!docsEnum.CanReuse(liveDocs)) { docsEnum = new LowFreqDocsEnumNoTF(liveDocs); } } else { docsEnum = new LowFreqDocsEnumNoTF(liveDocs); } return docsEnum.Reset(postings); } } else { HighFreqTerm term = (HighFreqTerm) outerInstance.terms[termOrd]; HighFreqDocsEnum docsEnum; if (reuse is HighFreqDocsEnum) { docsEnum = (HighFreqDocsEnum) reuse; if (!docsEnum.canReuse(liveDocs)) { docsEnum = new HighFreqDocsEnum(liveDocs); } } else { docsEnum = new HighFreqDocsEnum(liveDocs); } //System.out.println(" DE for term=" + new BytesRef(terms[termOrd].term).utf8ToString() + ": " + term.docIDs.length + " docs"); return docsEnum.Reset(term.docIDs, term.freqs); } }
public override DocsEnum Docs(IBits liveDocs, DocsEnum reuse, DocsFlags flags) { throw new System.NotSupportedException(); }
/// <summary> /// Default merge impl: append documents, mapping around /// deletes. /// </summary> public virtual TermStats Merge(MergeState mergeState, IndexOptions indexOptions, DocsEnum postings, FixedBitSet visitedDocs) { int df = 0; long totTF = 0; if (indexOptions == IndexOptions.DOCS_ONLY) { while (true) { int doc = postings.NextDoc(); if (doc == DocIdSetIterator.NO_MORE_DOCS) { break; } visitedDocs.Set(doc); this.StartDoc(doc, -1); this.FinishDoc(); df++; } totTF = -1; } else if (indexOptions == IndexOptions.DOCS_AND_FREQS) { while (true) { int doc = postings.NextDoc(); if (doc == DocIdSetIterator.NO_MORE_DOCS) { break; } visitedDocs.Set(doc); int freq = postings.Freq; this.StartDoc(doc, freq); this.FinishDoc(); df++; totTF += freq; } } else if (indexOptions == IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) { var postingsEnum = (DocsAndPositionsEnum)postings; while (true) { int doc = postingsEnum.NextDoc(); if (doc == DocIdSetIterator.NO_MORE_DOCS) { break; } visitedDocs.Set(doc); int freq = postingsEnum.Freq; this.StartDoc(doc, freq); totTF += freq; for (int i = 0; i < freq; i++) { int position = postingsEnum.NextPosition(); BytesRef payload = postingsEnum.GetPayload(); this.AddPosition(position, payload, -1, -1); } this.FinishDoc(); df++; } } else { Debug.Assert(indexOptions == IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS); var postingsEnum = (DocsAndPositionsEnum)postings; while (true) { int doc = postingsEnum.NextDoc(); if (doc == DocIdSetIterator.NO_MORE_DOCS) { break; } visitedDocs.Set(doc); int freq = postingsEnum.Freq; this.StartDoc(doc, freq); totTF += freq; for (int i = 0; i < freq; i++) { int position = postingsEnum.NextPosition(); BytesRef payload = postingsEnum.GetPayload(); this.AddPosition(position, payload, postingsEnum.StartOffset, postingsEnum.EndOffset); } this.FinishDoc(); df++; } } return(new TermStats(df, indexOptions == IndexOptions.DOCS_ONLY ? -1 : totTF)); }
public override DocsEnum Docs(FieldInfo fieldInfo, BlockTermState termState, Bits liveDocs, DocsEnum reuse, int flags) { if (CanReuse(reuse, liveDocs)) { // if (DEBUG) System.out.println("SPR.docs ts=" + termState); return ((SegmentDocsEnumBase)reuse).Reset(fieldInfo, (StandardTermState)termState); } return NewDocsEnum(liveDocs, fieldInfo, (StandardTermState)termState); }
/// <summary> /// Construct a <see cref="TermScorer"/>. /// </summary> /// <param name="weight"> /// The weight of the <see cref="Index.Term"/> in the query. </param> /// <param name="td"> /// An iterator over the documents matching the <see cref="Index.Term"/>. </param> /// <param name="docScorer"> /// The <see cref="Similarity.SimScorer"/> implementation /// to be used for score computations. </param> internal TermScorer(Weight weight, DocsEnum td, Similarity.SimScorer docScorer) : base(weight) { this.docScorer = docScorer; this.docsEnum = td; }
public override DocsEnum Docs(IBits liveDocs, DocsEnum reuse, DocsFlags flags) { return(Delegate.Docs(liveDocs, reuse, flags)); }
public override DocsEnum Docs(Bits liveDocs, DocsEnum reuse, int flags) { throw new System.NotSupportedException(); }
public override DocsEnum Docs(IBits liveDocs, DocsEnum reuse, DocsFlags flags) { throw UnsupportedOperationException.Create(); }
/// <summary> /// loads multi-value facet data. This method uses a workarea to prepare loading. /// </summary> /// <param name="fieldName"></param> /// <param name="reader"></param> /// <param name="listFactory"></param> /// <param name="workArea"></param> public virtual void Load(string fieldName, AtomicReader reader, TermListFactory listFactory, BoboSegmentReader.WorkArea workArea) { #if FEATURE_STRING_INTERN string field = string.Intern(fieldName); #else string field = fieldName; #endif int maxdoc = reader.MaxDoc; BigNestedInt32Array.BufferedLoader loader = GetBufferedLoader(maxdoc, workArea); ITermValueList list = (listFactory == null ? (ITermValueList) new TermStringList() : listFactory.CreateTermList()); List <int> minIDList = new List <int>(); List <int> maxIDList = new List <int>(); List <int> freqList = new List <int>(); OpenBitSet bitset = new OpenBitSet(maxdoc + 1); int negativeValueCount = GetNegativeValueCount(reader, field); int t = 1; // valid term id starts from 1 list.Add(null); minIDList.Add(-1); maxIDList.Add(-1); freqList.Add(0); m_overflow = false; Terms terms = reader.GetTerms(field); if (terms != null) { TermsEnum termsEnum = terms.GetIterator(null); BytesRef text; while ((text = termsEnum.Next()) != null) { string strText = text.Utf8ToString(); list.Add(strText); Term term = new Term(field, strText); DocsEnum docsEnum = reader.GetTermDocsEnum(term); int df = 0; int minID = -1; int maxID = -1; int docID = -1; int valId = (t - 1 < negativeValueCount) ? (negativeValueCount - t + 1) : t; while ((docID = docsEnum.NextDoc()) != DocsEnum.NO_MORE_DOCS) { df++; if (!loader.Add(docID, valId)) { LogOverflow(fieldName); } minID = docID; bitset.FastSet(docID); while (docsEnum.NextDoc() != DocsEnum.NO_MORE_DOCS) { docID = docsEnum.DocID; df++; if (!loader.Add(docID, valId)) { LogOverflow(fieldName); } bitset.FastSet(docID); } maxID = docID; } freqList.Add(df); minIDList.Add(minID); maxIDList.Add(maxID); t++; } } list.Seal(); try { m_nestedArray.Load(maxdoc + 1, loader); } catch (Exception e) { throw new RuntimeException("failed to load due to " + e.ToString(), e); } this.m_valArray = list; this.m_freqs = freqList.ToArray(); this.m_minIDs = minIDList.ToArray(); this.m_maxIDs = maxIDList.ToArray(); int doc = 0; while (doc < maxdoc && !m_nestedArray.Contains(doc, 0, true)) { ++doc; } if (doc < maxdoc) { this.m_minIDs[0] = doc; doc = maxdoc - 1; while (doc >= 0 && !m_nestedArray.Contains(doc, 0, true)) { --doc; } this.m_maxIDs[0] = doc; } this.m_freqs[0] = maxdoc - (int)bitset.Cardinality(); }
public override DocsEnum Docs(FieldInfo fieldInfo, BlockTermState termState, IBits liveDocs, DocsEnum reuse, DocsFlags flags) { if (CanReuse(reuse, liveDocs)) { // if (DEBUG) System.out.println("SPR.docs ts=" + termState2); return(((SegmentDocsEnumBase)reuse).Reset(fieldInfo, (StandardTermState)termState)); } return(NewDocsEnum(liveDocs, fieldInfo, (StandardTermState)termState)); }
public override DocsEnum Docs(FieldInfo field, BlockTermState _termState, Bits liveDocs, DocsEnum reuse, int flags) { var termState = (PulsingTermState)_termState; if (termState.PostingsSize != -1) { PulsingDocsEnum postings; if (reuse is PulsingDocsEnum) { postings = (PulsingDocsEnum)reuse; if (!postings.CanReuse(field)) { postings = new PulsingDocsEnum(field); } } else { // the 'reuse' is actually the wrapped enum var previous = (PulsingDocsEnum)GetOther(reuse); if (previous != null && previous.CanReuse(field)) { postings = previous; } else { postings = new PulsingDocsEnum(field); } } if (reuse != postings) { SetOther(postings, reuse); // postings.other = reuse } return(postings.Reset(liveDocs, termState)); } if (!(reuse is PulsingDocsEnum)) { return(_wrappedPostingsReader.Docs(field, termState.WrappedTermState, liveDocs, reuse, flags)); } var wrapped = _wrappedPostingsReader.Docs(field, termState.WrappedTermState, liveDocs, GetOther(reuse), flags); SetOther(wrapped, reuse); // wrapped.other = reuse return(wrapped); }
internal SortingDocsEnum(int maxDoc, SortingDocsEnum reuse, DocsEnum input, bool withFreqs, Sorter.DocMap docMap) : base(input) { this.maxDoc = maxDoc; this.withFreqs = withFreqs; if (reuse != null) { if (reuse.maxDoc == maxDoc) { sorter = reuse.sorter; } else { sorter = new DocFreqSorter(maxDoc); } docs = reuse.docs; freqs = reuse.freqs; // maybe null } else { docs = new int[64]; sorter = new DocFreqSorter(maxDoc); } docIt = -1; int i = 0; int doc; if (withFreqs) { if (freqs == null || freqs.Length < docs.Length) { freqs = new int[docs.Length]; } while ((doc = input.NextDoc()) != DocIdSetIterator.NO_MORE_DOCS) { if (i >= docs.Length) { docs = ArrayUtil.Grow(docs, docs.Length + 1); freqs = ArrayUtil.Grow(freqs, freqs.Length + 1); } docs[i] = docMap.OldToNew(doc); freqs[i] = input.Freq; ++i; } } else { freqs = null; while ((doc = input.NextDoc()) != DocIdSetIterator.NO_MORE_DOCS) { if (i >= docs.Length) { docs = ArrayUtil.Grow(docs, docs.Length + 1); } docs[i++] = docMap.OldToNew(doc); } } // TimSort can save much time compared to other sorts in case of // reverse sorting, or when sorting a concatenation of sorted readers sorter.Reset(docs, freqs); sorter.Sort(0, i); upto = i; }
/// <summary> /// for a docsenum, sets the 'other' reused enum. /// see GetOther for an example. /// </summary> private DocsEnum SetOther(DocsEnum de, DocsEnum other) { var atts = de.Attributes(); return(atts.AddAttribute <IPulsingEnumAttribute>().Enums()[this] = other); }
public override DocsEnum Docs(Bits liveDocs, DocsEnum reuse, int flags) { return ActualEnum.Docs(liveDocs, reuse, flags); }
public override DocsEnum Docs(FieldInfo fieldInfo, BlockTermState bTermState, Bits liveDocs, DocsEnum reuse, int flags) { var termState = (SepTermState)bTermState; SepDocsEnum docsEnum; if (!(reuse is SepDocsEnum)) { docsEnum = new SepDocsEnum(this); } else { docsEnum = (SepDocsEnum) reuse; if (docsEnum.START_DOC_IN != _docIn) { // If you are using ParellelReader, and pass in a // reused DocsAndPositionsEnum, it could have come // from another reader also using sep codec docsEnum = new SepDocsEnum(this); } } return docsEnum.Init(fieldInfo, termState, liveDocs); }