public _VisitorTemplate_55(IntersectsPrefixTreeFilter _enclosing, AtomicReaderContext baseArg1, IBits baseArg2, bool baseArg3) : base(_enclosing, baseArg1, baseArg2, baseArg3) { this._enclosing = _enclosing; }
public DistanceDocValues(DistanceValueSource enclosingInstance, IndexReader reader) { this.enclosingInstance = enclosingInstance; ptX = FieldCache_Fields.DEFAULT.GetDoubles(reader, enclosingInstance.strategy.GetFieldNameX()/*, true*/); ptY = FieldCache_Fields.DEFAULT.GetDoubles(reader, enclosingInstance.strategy.GetFieldNameY()/*, true*/); validX = FieldCache_Fields.DEFAULT.GetDocsWithField(reader, enclosingInstance.strategy.GetFieldNameX()); validY = FieldCache_Fields.DEFAULT.GetDocsWithField(reader, enclosingInstance.strategy.GetFieldNameY()); }
public BBoxSimilarityValueSourceDocValues(IndexReader reader, BBoxSimilarityValueSource enclosingInstance) { _enclosingInstance = enclosingInstance; minX = FieldCache_Fields.DEFAULT.GetDoubles(reader, enclosingInstance.strategy.field_minX/*, true*/); minY = FieldCache_Fields.DEFAULT.GetDoubles(reader, enclosingInstance.strategy.field_minY/*, true*/); maxX = FieldCache_Fields.DEFAULT.GetDoubles(reader, enclosingInstance.strategy.field_maxX/*, true*/); maxY = FieldCache_Fields.DEFAULT.GetDoubles(reader, enclosingInstance.strategy.field_maxY/*, true*/); validMinX = FieldCache_Fields.DEFAULT.GetDocsWithField(reader, enclosingInstance.strategy.field_minX); validMaxX = FieldCache_Fields.DEFAULT.GetDocsWithField(reader, enclosingInstance.strategy.field_maxX); }
public DistanceDocValues(DistanceValueSource enclosingInstance, IndexReader reader) { this.enclosingInstance = enclosingInstance; ptX = FieldCache_Fields.DEFAULT.GetDoubles(reader, enclosingInstance.strategy.GetFieldNameX()/*, true*/); ptY = FieldCache_Fields.DEFAULT.GetDoubles(reader, enclosingInstance.strategy.GetFieldNameY()/*, true*/); validX = FieldCache_Fields.DEFAULT.GetDocsWithField(reader, enclosingInstance.strategy.GetFieldNameX()); validY = FieldCache_Fields.DEFAULT.GetDocsWithField(reader, enclosingInstance.strategy.GetFieldNameY()); from = enclosingInstance.from; calculator = enclosingInstance.strategy.GetSpatialContext().GetDistCalc(); nullValue = (enclosingInstance.strategy.GetSpatialContext().IsGeo() ? 180 : double.MaxValue); }
public BBoxSimilarityValueSourceDocValues(IndexReader reader, BBoxSimilarityValueSource enclosingInstance) { _enclosingInstance = enclosingInstance; rect = _enclosingInstance.strategy.GetSpatialContext().MakeRectangle(0, 0, 0, 0); //reused minX = FieldCache_Fields.DEFAULT.GetDoubles(reader, enclosingInstance.strategy.field_minX/*, true*/); minY = FieldCache_Fields.DEFAULT.GetDoubles(reader, enclosingInstance.strategy.field_minY/*, true*/); maxX = FieldCache_Fields.DEFAULT.GetDoubles(reader, enclosingInstance.strategy.field_maxX/*, true*/); maxY = FieldCache_Fields.DEFAULT.GetDoubles(reader, enclosingInstance.strategy.field_maxY/*, true*/); validMinX = FieldCache_Fields.DEFAULT.GetDocsWithField(reader, enclosingInstance.strategy.field_minX); validMaxX = FieldCache_Fields.DEFAULT.GetDocsWithField(reader, enclosingInstance.strategy.field_maxX); }
public BBoxSimilarityValueSourceFunctionValue(AtomicReader reader, BBoxSimilarityValueSource enclosingInstance) { _enclosingInstance = enclosingInstance; rect = _enclosingInstance.strategy.SpatialContext.MakeRectangle(0, 0, 0, 0); //reused minX = FieldCache.DEFAULT.GetDoubles(reader, enclosingInstance.strategy.field_minX, true); minY = FieldCache.DEFAULT.GetDoubles(reader, enclosingInstance.strategy.field_minY, true); maxX = FieldCache.DEFAULT.GetDoubles(reader, enclosingInstance.strategy.field_maxX, true); maxY = FieldCache.DEFAULT.GetDoubles(reader, enclosingInstance.strategy.field_maxY, true); validMinX = FieldCache.DEFAULT.GetDocsWithField(reader, enclosingInstance.strategy.field_minX); validMaxX = FieldCache.DEFAULT.GetDocsWithField(reader, enclosingInstance.strategy.field_maxX); }
/// <exception cref="System.IO.IOException"></exception> public override DocIdSet GetDocIdSet(AtomicReaderContext context, IBits acceptDocs ) { return new _VisitorTemplate_121(this, context, acceptDocs, true).GetDocIdSet(); }
/// <exception cref="System.IO.IOException"></exception> public override DocIdSet GetDocIdSet(AtomicReaderContext context, IBits acceptDocs ) { return new ContainsVisitor(this, context, acceptDocs).Visit(grid.WorldCell, acceptDocs); }
//see getLeafDocs /// <summary>This is the primary algorithm; recursive.</summary> /// <remarks>This is the primary algorithm; recursive. Returns null if finds none.</remarks> /// <exception cref="System.IO.IOException"></exception> internal SmallDocSet Visit(Cell cell, IBits acceptContains ) { if (termsEnum == null) { //signals all done return null; } //Leaf docs match all query shape SmallDocSet leafDocs = GetLeafDocs(cell, acceptContains); // Get the AND of all child results SmallDocSet combinedSubResults = null; ICollection<Cell> subCells = cell.GetSubCells(_enclosing.queryShape); foreach (Cell subCell in subCells) { if (!SeekExact(subCell)) { combinedSubResults = null; } else { if (subCell.Level == _enclosing.detailLevel) { combinedSubResults = GetDocs(subCell, acceptContains); } else { if (subCell.GetShapeRel() == SpatialRelation.WITHIN) { combinedSubResults = GetLeafDocs(subCell, acceptContains); } else { combinedSubResults = Visit(subCell, acceptContains); } } } //recursion if (combinedSubResults == null) { break; } acceptContains = combinedSubResults; } //has the 'AND' effect on next iteration // Result: OR the leaf docs with AND of all child results if (combinedSubResults != null) { if (leafDocs == null) { return combinedSubResults; } return leafDocs.Union(combinedSubResults); } return leafDocs; }
/// <exception cref="System.IO.IOException"></exception> public override DocIdSet GetDocIdSet(AtomicReaderContext context, IBits acceptDocs ) { IBits docsWithField; if (field == null) { docsWithField = null; } else { //all docs //NOTE By using the FieldCache we re-use a cache // which is nice but loading it in this way might be slower than say using an // intersects filter against the world bounds. So do we add a method to the // strategy, perhaps? But the strategy can't cache it. docsWithField = FieldCache.DEFAULT.GetDocsWithField((context.AtomicReader), field); int maxDoc = context.AtomicReader.MaxDoc; if (docsWithField.Length != maxDoc) { throw new InvalidOperationException("Bits length should be maxDoc (" + maxDoc + ") but wasn't: " + docsWithField); } if (docsWithField is Bits.MatchNoBits) { return null; } else { //match nothing if (docsWithField is Bits.MatchAllBits) { docsWithField = null; } } } //all docs //not so much a chain but a way to conveniently invert the Filter DocIdSet docIdSet = new ChainedFilter(new[] { intersectsFilter }, ChainedFilter.ANDNOT).GetDocIdSet(context, acceptDocs); return BitsFilteredDocIdSet.Wrap(docIdSet, docsWithField); }
/// <exception cref="System.IO.IOException"></exception> private ContainsPrefixTreeFilter.SmallDocSet GetDocs(Cell cell, IBits acceptContains ) { System.Diagnostics.Debug.Assert(new BytesRef(cell.GetTokenBytes().ToSByteArray()).Equals(this.termBytes )); return this.CollectDocs(acceptContains); }
public override DocIdSet GetDocIdSet(AtomicReaderContext context, IBits acceptDocs) { Assert.IsNull(acceptDocs, "acceptDocs should be null, as we have an index without deletions"); return(new DocIdBitSet(Rnd)); }
public override DocsAndPositionsEnum DocsAndPositions(IBits liveDocs, DocsAndPositionsEnum reuse, DocsAndPositionsFlags flags) { return(m_input.DocsAndPositions(liveDocs, reuse, flags)); }
/// <exception cref="System.IO.IOException"/> public AllScorer(FunctionQuery outerInstance, AtomicReaderContext context, IBits acceptDocs, FunctionWeight w, float qWeight) : base(w) { this.outerInstance = outerInstance; this.weight = w; this.qWeight = qWeight; this.reader = context.Reader; this.maxDoc = reader.MaxDoc; this.acceptDocs = acceptDocs; vals = outerInstance.func.GetValues(weight.m_context, context); }
public FieldCacheDocIdSetAnonymousInnerClassHelper(MultiTermQueryDocTermOrdsWrapperFilter outerInstance, int maxDoc, IBits acceptDocs, SortedSetDocValues docTermOrds, Int64BitSet termSet) : base(maxDoc, acceptDocs) { this.outerInstance = outerInstance; this.docTermOrds = docTermOrds; this.termSet = termSet; }
public NearSpansOrdered(SpanNearQuery spanNearQuery, AtomicReaderContext context, IBits acceptDocs, IDictionary <Term, TermContext> termContexts, bool collectPayloads) { // LUCENENET: Added guard clauses for null if (spanNearQuery is null) { throw new ArgumentNullException(nameof(spanNearQuery)); } sorter = new InPlaceMergeSorterAnonymousClass(this); if (spanNearQuery.GetClauses().Length < 2) { throw new ArgumentException("Less than 2 clauses: " + spanNearQuery); } this.collectPayloads = collectPayloads; allowedSlop = spanNearQuery.Slop; SpanQuery[] clauses = spanNearQuery.GetClauses(); subSpans = new Spans[clauses.Length]; matchPayload = new JCG.List <byte[]>(); subSpansByDoc = new Spans[clauses.Length]; for (int i = 0; i < clauses.Length; i++) { subSpans[i] = clauses[i].GetSpans(context, acceptDocs, termContexts); subSpansByDoc[i] = subSpans[i]; // used in toSameDoc() } query = spanNearQuery; // kept for toString() only. }
public Int32DocValuesAnonymousInnerClassHelper(EnumFieldSource outerInstance, EnumFieldSource @this, FieldCache.Int32s arr, IBits valid) : base(@this) { this.outerInstance = outerInstance; this.arr = arr; this.valid = valid; val = new MutableValueInt32(); }
private readonly bool collectPayloads = true; // LUCENENET: marked readonly public NearSpansOrdered(SpanNearQuery spanNearQuery, AtomicReaderContext context, IBits acceptDocs, IDictionary <Term, TermContext> termContexts) : this(spanNearQuery, context, acceptDocs, termContexts, true) { }
public override DocsAndPositionsEnum DocsAndPositions(IBits bits, DocsAndPositionsEnum reuse, DocsAndPositionsFlags flags) { return(tenum.DocsAndPositions(bits, reuse, flags)); }
public override DocsEnum Docs(IBits bits, DocsEnum reuse, DocsFlags flags) { return(tenum.Docs(bits, reuse, flags)); }
/// <summary> /// Assert that the content of the <see cref="DocIdSet"/> is the same as the content of the <see cref="BitArray"/>. /// </summary> #pragma warning disable xUnit1013 public virtual void AssertEquals(int numBits, BitArray ds1, T ds2) #pragma warning restore xUnit1013 { // nextDoc DocIdSetIterator it2 = ds2.GetIterator(); if (it2 == null) { Assert.AreEqual(-1, ds1.NextSetBit(0)); } else { Assert.AreEqual(-1, it2.DocID); for (int doc = ds1.NextSetBit(0); doc != -1; doc = ds1.NextSetBit(doc + 1)) { Assert.AreEqual(doc, it2.NextDoc()); Assert.AreEqual(doc, it2.DocID); } Assert.AreEqual(DocIdSetIterator.NO_MORE_DOCS, it2.NextDoc()); Assert.AreEqual(DocIdSetIterator.NO_MORE_DOCS, it2.DocID); } // nextDoc / advance it2 = ds2.GetIterator(); if (it2 == null) { Assert.AreEqual(-1, ds1.NextSetBit(0)); } else { for (int doc = -1; doc != DocIdSetIterator.NO_MORE_DOCS;) { if (Random.NextBoolean()) { doc = ds1.NextSetBit(doc + 1); if (doc == -1) { doc = DocIdSetIterator.NO_MORE_DOCS; } Assert.AreEqual(doc, it2.NextDoc()); Assert.AreEqual(doc, it2.DocID); } else { int target = doc + 1 + Random.Next(Random.NextBoolean() ? 64 : Math.Max(numBits / 8, 1)); doc = ds1.NextSetBit(target); if (doc == -1) { doc = DocIdSetIterator.NO_MORE_DOCS; } Assert.AreEqual(doc, it2.Advance(target)); Assert.AreEqual(doc, it2.DocID); } } } // bits() IBits bits = ds2.Bits; if (bits != null) { // test consistency between bits and iterator it2 = ds2.GetIterator(); for (int previousDoc = -1, doc = it2.NextDoc(); ; previousDoc = doc, doc = it2.NextDoc()) { int max = doc == DocIdSetIterator.NO_MORE_DOCS ? bits.Length : doc; for (int i = previousDoc + 1; i < max; ++i) { Assert.AreEqual(false, bits.Get(i)); } if (doc == DocIdSetIterator.NO_MORE_DOCS) { break; } Assert.AreEqual(true, bits.Get(doc)); } } }
public override IMutableBits NewLiveDocs(IBits existing) { BitVector liveDocs = (BitVector)existing; return((BitVector)liveDocs.Clone()); }
public override Scorer GetScorer(AtomicReaderContext ctx, IBits acceptDocs) { return(new AllScorer(outerInstance, ctx, acceptDocs, this, m_queryWeight)); }
// Runs test, with multiple threads, using the specific // failure to trigger an IOException public virtual void TestMultipleThreadsFailure(Func <IConcurrentMergeScheduler> newScheduler, MockDirectoryWrapper.Failure failure) { int NUM_THREADS = 3; for (int iter = 0; iter < 2; iter++) { if (VERBOSE) { Console.WriteLine("TEST: iter=" + iter); } MockDirectoryWrapper dir = NewMockDirectory(); var config = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())) .SetMaxBufferedDocs(2) .SetMergeScheduler(newScheduler()) .SetMergePolicy(NewLogMergePolicy(4)); IndexWriter writer = new IndexWriter(dir, config); var scheduler = config.mergeScheduler as IConcurrentMergeScheduler; if (scheduler != null) { scheduler.SetSuppressExceptions(); } IndexerThread[] threads = new IndexerThread[NUM_THREADS]; for (int i = 0; i < NUM_THREADS; i++) { threads[i] = new IndexerThread(writer, true, NewField); } for (int i = 0; i < NUM_THREADS; i++) { threads[i].Start(); } Thread.Sleep(10); dir.FailOn(failure); failure.SetDoFail(); for (int i = 0; i < NUM_THREADS; i++) { threads[i].Join(); Assert.IsTrue(threads[i].Error == null, "hit unexpected Throwable"); } bool success = false; try { writer.Dispose(false); success = true; } catch (IOException) { failure.ClearDoFail(); writer.Dispose(false); } if (VERBOSE) { Console.WriteLine("TEST: success=" + success); } if (success) { IndexReader reader = DirectoryReader.Open(dir); IBits delDocs = MultiFields.GetLiveDocs(reader); for (int j = 0; j < reader.MaxDoc; j++) { if (delDocs == null || !delDocs.Get(j)) { reader.Document(j); reader.GetTermVectors(j); } } reader.Dispose(); } dir.Dispose(); } }
public override DocsEnum Docs(IBits liveDocs, DocsEnum reuse, DocsFlags flags) { return(termsEnum.Docs(liveDocs, reuse, flags)); }
public override int DoLogic() { int res = 0; // open reader or use existing one IndexSearcher searcher = RunData.GetIndexSearcher(); IndexReader reader; bool closeSearcher; if (searcher == null) { // open our own reader Directory dir = RunData.Directory; reader = DirectoryReader.Open(dir); searcher = new IndexSearcher(reader); closeSearcher = true; } else { // use existing one; this passes +1 ref to us reader = searcher.IndexReader; closeSearcher = false; } // optionally warm and add num docs traversed to count if (WithWarm) { Document doc = null; IBits liveDocs = MultiFields.GetLiveDocs(reader); for (int m = 0; m < reader.MaxDoc; m++) { if (null == liveDocs || liveDocs.Get(m)) { doc = reader.Document(m); res += (doc == null ? 0 : 1); } } } if (WithSearch) { res++; Query q = queryMaker.MakeQuery(); Sort sort = Sort; TopDocs hits = null; int numHits = NumHits; if (numHits > 0) { if (WithCollector == false) { if (sort != null) { // TODO: instead of always passing false we // should detect based on the query; if we make // the IndexSearcher search methods that take // Weight public again, we can go back to // pulling the Weight ourselves: TopFieldCollector collector = TopFieldCollector.Create(sort, numHits, true, WithScore, WithMaxScore, false); searcher.Search(q, null, collector); hits = collector.GetTopDocs(); } else { hits = searcher.Search(q, numHits); } } else { ICollector collector = CreateCollector(); searcher.Search(q, null, collector); //hits = collector.topDocs(); } string printHitsField = RunData.Config.Get("print.hits.field", null); if (hits != null && printHitsField != null && printHitsField.Length > 0) { Console.WriteLine("totalHits = " + hits.TotalHits); Console.WriteLine("maxDoc() = " + reader.MaxDoc); Console.WriteLine("numDocs() = " + reader.NumDocs); for (int i = 0; i < hits.ScoreDocs.Length; i++) { int docID = hits.ScoreDocs[i].Doc; Document doc = reader.Document(docID); Console.WriteLine(" " + i + ": doc=" + docID + " score=" + hits.ScoreDocs[i].Score + " " + printHitsField + " =" + doc.Get(printHitsField)); } } if (WithTraverse) { ScoreDoc[] scoreDocs = hits.ScoreDocs; int traversalSize = Math.Min(scoreDocs.Length, TraversalSize); if (traversalSize > 0) { bool retrieve = WithRetrieve; int numHighlight = Math.Min(NumToHighlight, scoreDocs.Length); Analyzer analyzer = RunData.Analyzer; BenchmarkHighlighter highlighter = null; if (numHighlight > 0) { highlighter = GetBenchmarkHighlighter(q); } for (int m = 0; m < traversalSize; m++) { int id = scoreDocs[m].Doc; res++; if (retrieve) { Document document = RetrieveDoc(reader, id); res += document != null ? 1 : 0; if (numHighlight > 0 && m < numHighlight) { ICollection <string> fieldsToHighlight = GetFieldsToHighlight(document); foreach (string field in fieldsToHighlight) { string text = document.Get(field); res += highlighter.DoHighlight(reader, id, field, document, analyzer, text); } } } } } } } } if (closeSearcher) { reader.Dispose(); } else { // Release our +1 ref from above reader.DecRef(); } return(res); }
/// <summary> /// Fills a <see cref="T:IDictionary{string, WeightedSpanTerm}"/> with <see cref="WeightedSpanTerm"/>s using the terms from the supplied <see cref="SpanQuery"/>. /// </summary> /// <param name="terms"><see cref="T:IDictionary{string, WeightedSpanTerm}"/> to place created <see cref="WeightedSpanTerm"/>s in</param> /// <param name="spanQuery"><see cref="SpanQuery"/> to extract Terms from</param> /// <exception cref="IOException">If there is a low-level I/O error</exception> protected virtual void ExtractWeightedSpanTerms(IDictionary <string, WeightedSpanTerm> terms, SpanQuery spanQuery) { ISet <string> fieldNames; if (fieldName == null) { fieldNames = new JCG.HashSet <string>(); CollectSpanQueryFields(spanQuery, fieldNames); } else { fieldNames = new JCG.HashSet <string> { fieldName }; } // To support the use of the default field name if (defaultField != null) { fieldNames.Add(defaultField); } IDictionary <string, SpanQuery> queries = new JCG.Dictionary <string, SpanQuery>(); var nonWeightedTerms = new JCG.HashSet <Term>(); bool mustRewriteQuery = MustRewriteQuery(spanQuery); if (mustRewriteQuery) { foreach (string field in fieldNames) { SpanQuery rewrittenQuery = (SpanQuery)spanQuery.Rewrite(GetLeafContext().Reader); queries[field] = rewrittenQuery; rewrittenQuery.ExtractTerms(nonWeightedTerms); } } else { spanQuery.ExtractTerms(nonWeightedTerms); } List <PositionSpan> spanPositions = new List <PositionSpan>(); foreach (string field in fieldNames) { SpanQuery q; q = mustRewriteQuery ? queries[field] : spanQuery; AtomicReaderContext context = GetLeafContext(); var termContexts = new JCG.Dictionary <Term, TermContext>(); ISet <Term> extractedTerms = new JCG.SortedSet <Term>(); q.ExtractTerms(extractedTerms); foreach (Term term in extractedTerms) { termContexts[term] = TermContext.Build(context, term); } IBits acceptDocs = context.AtomicReader.LiveDocs; Spans.Spans spans = q.GetSpans(context, acceptDocs, termContexts); // collect span positions while (spans.MoveNext()) { spanPositions.Add(new PositionSpan(spans.Start, spans.End - 1)); } } if (spanPositions.Count == 0) { // no spans found return; } foreach (Term queryTerm in nonWeightedTerms) { if (FieldNameComparer(queryTerm.Field)) { if (!terms.TryGetValue(queryTerm.Text(), out WeightedSpanTerm weightedSpanTerm) || weightedSpanTerm == null) { weightedSpanTerm = new WeightedSpanTerm(spanQuery.Boost, queryTerm.Text()); weightedSpanTerm.AddPositionSpans(spanPositions); weightedSpanTerm.IsPositionSensitive = true; terms[queryTerm.Text()] = weightedSpanTerm; } else { if (spanPositions.Count > 0) { weightedSpanTerm.AddPositionSpans(spanPositions); } } } } }
public virtual void TestRandom() { int num = AtLeast(2); for (int iter = 0; iter < num; iter++) { if (VERBOSE) { Console.WriteLine("TEST: iter=" + iter); } Directory dir = NewDirectory(); IndexWriter w = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).SetMergePolicy(NoMergePolicy.COMPOUND_FILES)); // we can do this because we use NoMergePolicy (and dont merge to "nothing") w.KeepFullyDeletedSegments = true; IDictionary <BytesRef, IList <int?> > docs = new Dictionary <BytesRef, IList <int?> >(); HashSet <int?> deleted = new HashSet <int?>(); IList <BytesRef> terms = new List <BytesRef>(); int numDocs = TestUtil.NextInt(Random(), 1, 100 * RANDOM_MULTIPLIER); Documents.Document doc = new Documents.Document(); Field f = NewStringField("field", "", Field.Store.NO); doc.Add(f); Field id = NewStringField("id", "", Field.Store.NO); doc.Add(id); bool onlyUniqueTerms = Random().NextBoolean(); if (VERBOSE) { Console.WriteLine("TEST: onlyUniqueTerms=" + onlyUniqueTerms + " numDocs=" + numDocs); } HashSet <BytesRef> uniqueTerms = new HashSet <BytesRef>(); for (int i = 0; i < numDocs; i++) { if (!onlyUniqueTerms && Random().NextBoolean() && terms.Count > 0) { // re-use existing term BytesRef term = terms[Random().Next(terms.Count)]; docs[term].Add(i); f.SetStringValue(term.Utf8ToString()); } else { string s = TestUtil.RandomUnicodeString(Random(), 10); BytesRef term = new BytesRef(s); if (!docs.ContainsKey(term)) { docs[term] = new List <int?>(); } docs[term].Add(i); terms.Add(term); uniqueTerms.Add(term); f.SetStringValue(s); } id.SetStringValue("" + i); w.AddDocument(doc); if (Random().Next(4) == 1) { w.Commit(); } if (i > 0 && Random().Next(20) == 1) { int delID = Random().Next(i); deleted.Add(delID); w.DeleteDocuments(new Term("id", "" + delID)); if (VERBOSE) { Console.WriteLine("TEST: delete " + delID); } } } if (VERBOSE) { List <BytesRef> termsList = new List <BytesRef>(uniqueTerms); #pragma warning disable 612, 618 termsList.Sort(BytesRef.UTF8SortedAsUTF16Comparer); #pragma warning restore 612, 618 Console.WriteLine("TEST: terms in UTF16 order:"); foreach (BytesRef b in termsList) { Console.WriteLine(" " + UnicodeUtil.ToHexString(b.Utf8ToString()) + " " + b); foreach (int docID in docs[b]) { if (deleted.Contains(docID)) { Console.WriteLine(" " + docID + " (deleted)"); } else { Console.WriteLine(" " + docID); } } } } IndexReader reader = w.GetReader(); w.Dispose(); if (VERBOSE) { Console.WriteLine("TEST: reader=" + reader); } IBits liveDocs = MultiFields.GetLiveDocs(reader); foreach (int delDoc in deleted) { Assert.IsFalse(liveDocs.Get(delDoc)); } for (int i = 0; i < 100; i++) { BytesRef term = terms[Random().Next(terms.Count)]; if (VERBOSE) { Console.WriteLine("TEST: seek term=" + UnicodeUtil.ToHexString(term.Utf8ToString()) + " " + term); } DocsEnum docsEnum = TestUtil.Docs(Random(), reader, "field", term, liveDocs, null, DocsFlags.NONE); Assert.IsNotNull(docsEnum); foreach (int docID in docs[term]) { if (!deleted.Contains(docID)) { Assert.AreEqual(docID, docsEnum.NextDoc()); } } Assert.AreEqual(DocIdSetIterator.NO_MORE_DOCS, docsEnum.NextDoc()); } reader.Dispose(); dir.Dispose(); } }
public override DocsAndPositionsEnum DocsAndPositions(IBits liveDocs, DocsAndPositionsEnum reuse, DocsAndPositionsFlags flags) { throw new System.NotSupportedException(); }
public override BulkScorer GetBulkScorer(AtomicReaderContext context, bool scoreDocsInOrder, IBits acceptDocs) { if (scoreDocsInOrder || outerInstance.m_minNrShouldMatch > 1) { // TODO: (LUCENE-4872) in some cases BooleanScorer may be faster for minNrShouldMatch // but the same is even true of pure conjunctions... return(base.GetBulkScorer(context, scoreDocsInOrder, acceptDocs)); } IList <BulkScorer> prohibited = new List <BulkScorer>(); IList <BulkScorer> optional = new List <BulkScorer>(); using (IEnumerator <BooleanClause> cIter = outerInstance.clauses.GetEnumerator()) { foreach (Weight w in m_weights) { cIter.MoveNext(); BooleanClause c = cIter.Current; BulkScorer subScorer = w.GetBulkScorer(context, false, acceptDocs); if (subScorer == null) { if (c.IsRequired) { return(null); } } else if (c.IsRequired) { // TODO: there are some cases where BooleanScorer // would handle conjunctions faster than // BooleanScorer2... return(base.GetBulkScorer(context, scoreDocsInOrder, acceptDocs)); } else if (c.IsProhibited) { prohibited.Add(subScorer); } else { optional.Add(subScorer); } } } // Check if we can and should return a BooleanScorer return(new BooleanScorer(this, disableCoord, outerInstance.m_minNrShouldMatch, optional, prohibited, m_maxCoord)); }
/// <exception cref="System.IO.IOException"></exception> private ContainsPrefixTreeFilter.SmallDocSet CollectDocs(IBits acceptContains) { ContainsPrefixTreeFilter.SmallDocSet set = null; this.docsEnum = this.termsEnum.Docs(acceptContains, this.docsEnum, DocsEnum.FLAG_NONE ); int docid; while ((docid = this.docsEnum.NextDoc()) != DocIdSetIterator.NO_MORE_DOCS) { if (set == null) { int size = this.termsEnum.DocFreq; if (size <= 0) { size = 16; } set = new ContainsPrefixTreeFilter.SmallDocSet(size); } set.Set(docid); } return set; }
public override Scorer GetScorer(AtomicReaderContext context, IBits acceptDocs) { IList <Scorer> required = new List <Scorer>(); IList <Scorer> prohibited = new List <Scorer>(); IList <Scorer> optional = new List <Scorer>(); IEnumerator <BooleanClause> cIter = outerInstance.clauses.GetEnumerator(); foreach (Weight w in m_weights) { cIter.MoveNext(); BooleanClause c = cIter.Current; Scorer subScorer = w.GetScorer(context, acceptDocs); if (subScorer == null) { if (c.IsRequired) { return(null); } } else if (c.IsRequired) { required.Add(subScorer); } else if (c.IsProhibited) { prohibited.Add(subScorer); } else { optional.Add(subScorer); } } if (required.Count == 0 && optional.Count == 0) { // no required and optional clauses. return(null); } else if (optional.Count < outerInstance.m_minNrShouldMatch) { // either >1 req scorer, or there are 0 req scorers and at least 1 // optional scorer. Therefore if there are not enough optional scorers // no documents will be matched by the query return(null); } // simple conjunction if (optional.Count == 0 && prohibited.Count == 0) { float coord = disableCoord ? 1.0f : Coord(required.Count, m_maxCoord); return(new ConjunctionScorer(this, required.ToArray(), coord)); } // simple disjunction if (required.Count == 0 && prohibited.Count == 0 && outerInstance.m_minNrShouldMatch <= 1 && optional.Count > 1) { var coord = new float[optional.Count + 1]; for (int i = 0; i < coord.Length; i++) { coord[i] = disableCoord ? 1.0f : Coord(i, m_maxCoord); } return(new DisjunctionSumScorer(this, optional.ToArray(), coord)); } // Return a BooleanScorer2 return(new BooleanScorer2(this, disableCoord, outerInstance.m_minNrShouldMatch, required, prohibited, optional, m_maxCoord)); }
/// <exception cref="System.IO.IOException"></exception> private ContainsPrefixTreeFilter.SmallDocSet GetLeafDocs(Cell leafCell, IBits acceptContains) { System.Diagnostics.Debug.Assert(new BytesRef(leafCell.GetTokenBytes().ToSByteArray()).Equals(this .termBytes)); BytesRef nextTerm = this.termsEnum.Next(); if (nextTerm == null) { this.termsEnum = null; //signals all done return null; } this.nextCell = this._enclosing.grid.GetCell(nextTerm.bytes.ToByteArray(), nextTerm.offset, nextTerm .length, this.nextCell); if (this.nextCell.Level == leafCell.Level && this.nextCell.IsLeaf()) { return this.CollectDocs(acceptContains); } else { return null; } }
/// <summary> /// Must fully consume state, since after this call that /// <see cref="TermState"/> may be reused. /// </summary> public abstract DocsAndPositionsEnum DocsAndPositions(FieldInfo fieldInfo, BlockTermState state, IBits skipDocs, DocsAndPositionsEnum reuse, DocsAndPositionsFlags flags);
/// <exception cref="System.IO.IOException"></exception> public ContainsVisitor(ContainsPrefixTreeFilter _enclosing, AtomicReaderContext context , IBits acceptDocs) : base(_enclosing, context, acceptDocs) { this._enclosing = _enclosing; }
/// <summary> /// Inverts all terms </summary> public DocTermOrds(AtomicReader reader, IBits liveDocs, string field) : this(reader, liveDocs, field, null, int.MaxValue) { }
public SingleDocValuesAnonymousClass(SingleFieldSource @this, FieldCache.Singles arr, IBits valid) : base(@this) { this.arr = arr; this.valid = valid; }
/// <summary> /// Inverts only terms starting w/ prefix </summary> public DocTermOrds(AtomicReader reader, IBits liveDocs, string field, BytesRef termPrefix) : this(reader, liveDocs, field, termPrefix, int.MaxValue) { }
/// <exception cref="System.IO.IOException"></exception> public override DocIdSet GetDocIdSet(AtomicReaderContext context, IBits acceptDocs ) { return new _VisitorTemplate_55(this, context, acceptDocs, hasIndexedLeaves).GetDocIdSet (); }
public DistanceFunctionValue(DistanceValueSource enclosingInstance, AtomicReader reader) { this.enclosingInstance = enclosingInstance; ptX = FieldCache.DEFAULT.GetDoubles(reader, enclosingInstance.strategy.FieldNameX, true); ptY = FieldCache.DEFAULT.GetDoubles(reader, enclosingInstance.strategy.FieldNameY, true); validX = FieldCache.DEFAULT.GetDocsWithField(reader, enclosingInstance.strategy.FieldNameX); validY = FieldCache.DEFAULT.GetDocsWithField(reader, enclosingInstance.strategy.FieldNameY); from = enclosingInstance.from; calculator = enclosingInstance.strategy.SpatialContext.GetDistCalc(); nullValue = (enclosingInstance.strategy.SpatialContext.IsGeo() ? 180 : double.MaxValue); }
/// <exception cref="System.IO.IOException"></exception> public BaseTermsEnumTraverser(AbstractPrefixTreeFilter _enclosing, AtomicReaderContext context, IBits acceptDocs) { this._enclosing = _enclosing; //remember to check for null in getDocIdSet this.context = context; AtomicReader reader = context.AtomicReader; this.acceptDocs = acceptDocs; maxDoc = reader.MaxDoc; Terms terms = reader.Terms(this._enclosing.fieldName); if (terms != null) { termsEnum = terms.Iterator(null); } }
public ToChildBlockJoinScorer(Weight weight, Scorer parentScorer, FixedBitSet parentBits, bool doScores, IBits acceptDocs) : base(weight) { _doScores = doScores; _parentBits = parentBits; _parentScorer = parentScorer; _acceptDocs = acceptDocs; }
/// <summary> /// Inverts only terms starting w/ prefix, and only terms /// whose docFreq (not taking deletions into account) is /// <= <paramref name="maxTermDocFreq"/> /// </summary> public DocTermOrds(AtomicReader reader, IBits liveDocs, string field, BytesRef termPrefix, int maxTermDocFreq) : this(reader, liveDocs, field, termPrefix, maxTermDocFreq, DEFAULT_INDEX_INTERVAL_BITS) { }
public override int Merge(MergeState mergeState) { int docCount = 0; int idx = 0; foreach (AtomicReader reader in mergeState.Readers) { SegmentReader matchingSegmentReader = mergeState.MatchingSegmentReaders[idx++]; CompressingStoredFieldsReader matchingFieldsReader = null; if (matchingSegmentReader != null) { StoredFieldsReader fieldsReader = matchingSegmentReader.FieldsReader; // we can only bulk-copy if the matching reader is also a CompressingStoredFieldsReader if (fieldsReader != null && fieldsReader is CompressingStoredFieldsReader) { matchingFieldsReader = (CompressingStoredFieldsReader)fieldsReader; } } int maxDoc = reader.MaxDoc; IBits liveDocs = reader.LiveDocs; if (matchingFieldsReader == null || matchingFieldsReader.Version != VERSION_CURRENT || matchingFieldsReader.CompressionMode != compressionMode || matchingFieldsReader.ChunkSize != chunkSize) // the way data is decompressed depends on the chunk size - means reader version is not the same as the writer version { // naive merge... for (int i = NextLiveDoc(0, liveDocs, maxDoc); i < maxDoc; i = NextLiveDoc(i + 1, liveDocs, maxDoc)) { Document doc = reader.Document(i); AddDocument(doc, mergeState.FieldInfos); ++docCount; mergeState.CheckAbort.Work(300); } } else { int docID = NextLiveDoc(0, liveDocs, maxDoc); if (docID < maxDoc) { // not all docs were deleted CompressingStoredFieldsReader.ChunkIterator it = matchingFieldsReader.GetChunkIterator(docID); int[] startOffsets = new int[0]; do { // go to the next chunk that contains docID it.Next(docID); // transform lengths into offsets if (startOffsets.Length < it.chunkDocs) { startOffsets = new int[ArrayUtil.Oversize(it.chunkDocs, 4)]; } for (int i = 1; i < it.chunkDocs; ++i) { startOffsets[i] = startOffsets[i - 1] + it.lengths[i - 1]; } if (numBufferedDocs == 0 && startOffsets[it.chunkDocs - 1] < chunkSize && startOffsets[it.chunkDocs - 1] + it.lengths[it.chunkDocs - 1] >= chunkSize && NextDeletedDoc(it.docBase, liveDocs, it.docBase + it.chunkDocs) == it.docBase + it.chunkDocs) // no deletion in the chunk - chunk is large enough - chunk is small enough - starting a new chunk { Debug.Assert(docID == it.docBase); // no need to decompress, just copy data indexWriter.WriteIndex(it.chunkDocs, fieldsStream.GetFilePointer()); WriteHeader(this.docBase, it.chunkDocs, it.numStoredFields, it.lengths); it.CopyCompressedData(fieldsStream); this.docBase += it.chunkDocs; docID = NextLiveDoc(it.docBase + it.chunkDocs, liveDocs, maxDoc); docCount += it.chunkDocs; mergeState.CheckAbort.Work(300 * it.chunkDocs); } else { // decompress it.Decompress(); if (startOffsets[it.chunkDocs - 1] + it.lengths[it.chunkDocs - 1] != it.bytes.Length) { throw new CorruptIndexException("Corrupted: expected chunk size=" + startOffsets[it.chunkDocs - 1] + it.lengths[it.chunkDocs - 1] + ", got " + it.bytes.Length); } // copy non-deleted docs for (; docID < it.docBase + it.chunkDocs; docID = NextLiveDoc(docID + 1, liveDocs, maxDoc)) { int diff = docID - it.docBase; StartDocument(it.numStoredFields[diff]); bufferedDocs.WriteBytes(it.bytes.Bytes, it.bytes.Offset + startOffsets[diff], it.lengths[diff]); FinishDocument(); ++docCount; mergeState.CheckAbort.Work(300); } } } while (docID < maxDoc); it.CheckIntegrity(); } } } Finish(mergeState.FieldInfos, docCount); return(docCount); }
/// <summary> /// Inverts only terms starting w/ prefix, and only terms /// whose docFreq (not taking deletions into account) is /// <= <paramref name="maxTermDocFreq"/>, with a custom indexing interval /// (default is every 128nd term). /// </summary> public DocTermOrds(AtomicReader reader, IBits liveDocs, string field, BytesRef termPrefix, int maxTermDocFreq, int indexIntervalBits) : this(field, maxTermDocFreq, indexIntervalBits) { Uninvert(reader, liveDocs, termPrefix); }
/// <summary> /// Call this only once (if you subclass!) </summary> protected virtual void Uninvert(AtomicReader reader, IBits liveDocs, BytesRef termPrefix) { FieldInfo info = reader.FieldInfos.FieldInfo(m_field); if (info != null && info.HasDocValues) { throw IllegalStateException.Create("Type mismatch: " + m_field + " was indexed as " + info.DocValuesType); } //System.out.println("DTO uninvert field=" + field + " prefix=" + termPrefix); long startTime = J2N.Time.NanoTime() / J2N.Time.MillisecondsPerNanosecond; // LUCENENET: Use NanoTime() rather than CurrentTimeMilliseconds() for more accurate/reliable results m_prefix = termPrefix == null ? null : BytesRef.DeepCopyOf(termPrefix); int maxDoc = reader.MaxDoc; int[] index = new int[maxDoc]; // immediate term numbers, or the index into the byte[] representing the last number int[] lastTerm = new int[maxDoc]; // last term we saw for this document var bytes = new sbyte[maxDoc][]; // list of term numbers for the doc (delta encoded vInts) Fields fields = reader.Fields; if (fields == null) { // No terms return; } Terms terms = fields.GetTerms(m_field); if (terms == null) { // No terms return; } TermsEnum te = terms.GetEnumerator(); BytesRef seekStart = termPrefix ?? new BytesRef(); //System.out.println("seekStart=" + seekStart.utf8ToString()); if (te.SeekCeil(seekStart) == TermsEnum.SeekStatus.END) { // No terms match return; } // If we need our "term index wrapper", these will be // init'd below: IList <BytesRef> indexedTerms = null; PagedBytes indexedTermsBytes = null; bool testedOrd = false; // we need a minimum of 9 bytes, but round up to 12 since the space would // be wasted with most allocators anyway. var tempArr = new sbyte[12]; // // enumerate all terms, and build an intermediate form of the un-inverted field. // // During this intermediate form, every document has a (potential) byte[] // and the int[maxDoc()] array either contains the termNumber list directly // or the *end* offset of the termNumber list in it's byte array (for faster // appending and faster creation of the final form). // // idea... if things are too large while building, we could do a range of docs // at a time (but it would be a fair amount slower to build) // could also do ranges in parallel to take advantage of multiple CPUs // OPTIONAL: remap the largest df terms to the lowest 128 (single byte) // values. this requires going over the field first to find the most // frequent terms ahead of time. int termNum = 0; m_docsEnum = null; // Loop begins with te positioned to first term (we call // seek above): for (; ;) { BytesRef t = te.Term; if (t == null || (termPrefix != null && !StringHelper.StartsWith(t, termPrefix))) { break; } //System.out.println("visit term=" + t.utf8ToString() + " " + t + " termNum=" + termNum); if (!testedOrd) { try { m_ordBase = (int)te.Ord; //System.out.println("got ordBase=" + ordBase); } catch (Exception uoe) when(uoe.IsUnsupportedOperationException()) { // Reader cannot provide ord support, so we wrap // our own support by creating our own terms index: indexedTerms = new JCG.List <BytesRef>(); indexedTermsBytes = new PagedBytes(15); //System.out.println("NO ORDS"); } testedOrd = true; } VisitTerm(te, termNum); if (indexedTerms != null && (termNum & indexIntervalMask) == 0) { // Index this term m_sizeOfIndexedStrings += t.Length; BytesRef indexedTerm = new BytesRef(); indexedTermsBytes.Copy(t, indexedTerm); // TODO: really should 1) strip off useless suffix, // and 2) use FST not array/PagedBytes indexedTerms.Add(indexedTerm); } int df = te.DocFreq; if (df <= m_maxTermDocFreq) { m_docsEnum = te.Docs(liveDocs, m_docsEnum, DocsFlags.NONE); // dF, but takes deletions into account int actualDF = 0; for (; ;) { int doc = m_docsEnum.NextDoc(); if (doc == DocIdSetIterator.NO_MORE_DOCS) { break; } //System.out.println(" chunk=" + chunk + " docs"); actualDF++; m_termInstances++; //System.out.println(" docID=" + doc); // add TNUM_OFFSET to the term number to make room for special reserved values: // 0 (end term) and 1 (index into byte array follows) int delta = termNum - lastTerm[doc] + TNUM_OFFSET; lastTerm[doc] = termNum; int val = index[doc]; if ((val & 0xff) == 1) { // index into byte array (actually the end of // the doc-specific byte[] when building) int pos = val.TripleShift(8); int ilen = VInt32Size(delta); var arr = bytes[doc]; int newend = pos + ilen; if (newend > arr.Length) { // We avoid a doubling strategy to lower memory usage. // this faceting method isn't for docs with many terms. // In hotspot, objects have 2 words of overhead, then fields, rounded up to a 64-bit boundary. // TODO: figure out what array lengths we can round up to w/o actually using more memory // (how much space does a byte[] take up? Is data preceded by a 32 bit length only? // It should be safe to round up to the nearest 32 bits in any case. int newLen = (newend + 3) & unchecked ((int)0xfffffffc); // 4 byte alignment var newarr = new sbyte[newLen]; Array.Copy(arr, 0, newarr, 0, pos); arr = newarr; bytes[doc] = newarr; } pos = WriteInt32(delta, arr, pos); index[doc] = (pos << 8) | 1; // update pointer to end index in byte[] } else { // OK, this int has data in it... find the end (a zero starting byte - not // part of another number, hence not following a byte with the high bit set). int ipos; if (val == 0) { ipos = 0; } else if ((val & 0x0000ff80) == 0) { ipos = 1; } else if ((val & 0x00ff8000) == 0) { ipos = 2; } else if ((val & 0xff800000) == 0) { ipos = 3; } else { ipos = 4; } //System.out.println(" ipos=" + ipos); int endPos = WriteInt32(delta, tempArr, ipos); //System.out.println(" endpos=" + endPos); if (endPos <= 4) { //System.out.println(" fits!"); // value will fit in the integer... move bytes back for (int j = ipos; j < endPos; j++) { val |= (tempArr[j] & 0xff) << (j << 3); } index[doc] = val; } else { // value won't fit... move integer into byte[] for (int j = 0; j < ipos; j++) { tempArr[j] = (sbyte)val; val = val.TripleShift(8); } // point at the end index in the byte[] index[doc] = (endPos << 8) | 1; bytes[doc] = tempArr; tempArr = new sbyte[12]; } } } SetActualDocFreq(termNum, actualDF); } termNum++; if (!te.MoveNext()) { break; } } m_numTermsInField = termNum; long midPoint = J2N.Time.NanoTime() / J2N.Time.MillisecondsPerNanosecond; // LUCENENET: Use NanoTime() rather than CurrentTimeMilliseconds() for more accurate/reliable results if (m_termInstances == 0) { // we didn't invert anything // lower memory consumption. m_tnums = null; } else { this.m_index = index; // // transform intermediate form into the final form, building a single byte[] // at a time, and releasing the intermediate byte[]s as we go to avoid // increasing the memory footprint. // for (int pass = 0; pass < 256; pass++) { var target = m_tnums[pass]; var pos = 0; // end in target; if (target != null) { pos = target.Length; } else { target = new sbyte[4096]; } // loop over documents, 0x00ppxxxx, 0x01ppxxxx, 0x02ppxxxx // where pp is the pass (which array we are building), and xx is all values. // each pass shares the same byte[] for termNumber lists. for (int docbase = pass << 16; docbase < maxDoc; docbase += (1 << 24)) { int lim = Math.Min(docbase + (1 << 16), maxDoc); for (int doc = docbase; doc < lim; doc++) { //System.out.println(" pass="******" process docID=" + doc); int val = index[doc]; if ((val & 0xff) == 1) { int len = val.TripleShift(8); //System.out.println(" ptr pos=" + pos); index[doc] = (pos << 8) | 1; // change index to point to start of array if ((pos & 0xff000000) != 0) { // we only have 24 bits for the array index throw IllegalStateException.Create("Too many values for UnInvertedField faceting on field " + m_field); } var arr = bytes[doc]; /* * for(byte b : arr) { * //System.out.println(" b=" + Integer.toHexString((int) b)); * } */ bytes[doc] = null; // IMPORTANT: allow GC to avoid OOM if (target.Length <= pos + len) { int newlen = target.Length; //* we don't have to worry about the array getting too large // since the "pos" param will overflow first (only 24 bits available) // if ((newlen<<1) <= 0) { // // overflow... // newlen = Integer.MAX_VALUE; // if (newlen <= pos + len) { // throw new SolrException(400,"Too many terms to uninvert field!"); // } // } else { // while (newlen <= pos + len) newlen<<=1; // doubling strategy // } // while (newlen <= pos + len) // doubling strategy { newlen <<= 1; } var newtarget = new sbyte[newlen]; Array.Copy(target, 0, newtarget, 0, pos); target = newtarget; } Array.Copy(arr, 0, target, pos, len); pos += len + 1; // skip single byte at end and leave it 0 for terminator } } } // shrink array if (pos < target.Length) { var newtarget = new sbyte[pos]; Array.Copy(target, 0, newtarget, 0, pos); target = newtarget; } m_tnums[pass] = target; if ((pass << 16) > maxDoc) { break; } } } if (indexedTerms != null) { m_indexedTermsArray = new BytesRef[indexedTerms.Count]; indexedTerms.CopyTo(m_indexedTermsArray, 0); } long endTime = J2N.Time.NanoTime() / J2N.Time.MillisecondsPerNanosecond; // LUCENENET: Use NanoTime() rather than CurrentTimeMilliseconds() for more accurate/reliable results m_total_time = (int)(endTime - startTime); m_phase1_time = (int)(midPoint - startTime); }
public override DocIdSet GetDocIdSet(AtomicReaderContext context, IBits acceptDocs) { var values = source.GetValues(null, context); return new ValueSourceFilteredDocIdSet(startingFilter.GetDocIdSet(context, acceptDocs), values, this); }
public DocIdSetAnonymousInnerClassHelper(IBits acceptDocs, TermsEnum termsEnum) { this.acceptDocs = acceptDocs; this.termsEnum = termsEnum; }