Example #1
0
        public void TestConcurrentSpan()
        {
            String      TEXT        = "the fox jumped";
            Directory   directory   = NewDirectory();
            IndexWriter indexWriter = new IndexWriter(directory,
                                                      NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random(), MockTokenizer.WHITESPACE, false)));

            try
            {
                Document document = new Document();

                FieldType customType = new FieldType(TextField.TYPE_NOT_STORED);
                customType.StoreTermVectorOffsets   = (true);
                customType.StoreTermVectorPositions = (true);
                customType.StoreTermVectors         = (true);
                document.Add(new Field(FIELD, new TokenStreamConcurrent(), customType));
                indexWriter.AddDocument(document);
            }
            finally
            {
                indexWriter.Dispose();
            }
            IndexReader indexReader = DirectoryReader.Open(directory);

            try
            {
                assertEquals(1, indexReader.NumDocs);
                IndexSearcher indexSearcher = NewSearcher(indexReader);
                Query         phraseQuery   = new SpanNearQuery(new SpanQuery[] {
                    new SpanTermQuery(new Term(FIELD, "fox")),
                    new SpanTermQuery(new Term(FIELD, "jumped"))
                }, 0, true);
                FixedBitSet bitset = new FixedBitSet(indexReader.MaxDoc);
                indexSearcher.Search(phraseQuery, new ConcurrentSpanCollectorAnonymousHelper(this, bitset));

                assertEquals(1, bitset.Cardinality());
                int         maxDoc      = indexReader.MaxDoc;
                Highlighter highlighter = new Highlighter(
                    new SimpleHTMLFormatter(), new SimpleHTMLEncoder(),
                    new QueryScorer(phraseQuery));
                for (int position = bitset.NextSetBit(0); position >= 0 && position < maxDoc - 1; position = bitset
                                                                                                             .NextSetBit(position + 1))
                {
                    assertEquals(0, position);
                    TokenStream tokenStream = TokenSources.GetTokenStream(
                        indexReader.GetTermVector(position,
                                                  FIELD), false);
                    assertEquals(highlighter.GetBestFragment(new TokenStreamConcurrent(),
                                                             TEXT), highlighter.GetBestFragment(tokenStream, TEXT));
                }
            }
            finally
            {
                indexReader.Dispose();
                directory.Dispose();
            }
        }
Example #2
0
		void doNextSetBit(BitArray a, FixedBitSet b)
		{
			int aa = -1, bb = -1;
			do
			{
				aa = a.NextSetBit(aa + 1);
				bb = bb < b.Length() - 1 ? b.NextSetBit(bb + 1) : -1;
				Assert.AreEqual(aa, bb);
			} while (aa >= 0);
		}
        void doNextSetBit(BitArray a, FixedBitSet b)
        {
            int aa = -1, bb = -1;

            do
            {
                aa = a.NextSetBit(aa + 1);
                bb = bb < b.Length() - 1 ? b.NextSetBit(bb + 1) : -1;
                Assert.AreEqual(aa, bb);
            } while (aa >= 0);
        }
Example #4
0
        /// <param name="targetMaxSaturation">
        /// A number between 0 and 1 describing the % of bits that would ideally be set in the result.
        /// Lower values have better accuracy but require more space.
        /// </param>
        /// <return>A smaller <see cref="FuzzySet"/> or <c>null</c> if the current set is already over-saturated.</return>
        public virtual FuzzySet Downsize(float targetMaxSaturation)
        {
            var         numBitsSet = _filter.Cardinality();
            FixedBitSet rightSizedBitSet;
            var         rightSizedBitSetSize = _bloomSize;

            //Hopefully find a smaller size bitset into which we can project accumulated values while maintaining desired saturation level
            for (int i = 0; i < _usableBitSetSizes.Length; i++)
            {
                int   candidateBitsetSize = _usableBitSetSizes[i];
                float candidateSaturation = (float)numBitsSet
                                            / (float)candidateBitsetSize;
                if (candidateSaturation <= targetMaxSaturation)
                {
                    rightSizedBitSetSize = candidateBitsetSize;
                    break;
                }
            }
            // Re-project the numbers to a smaller space if necessary
            if (rightSizedBitSetSize < _bloomSize)
            {
                // Reset the choice of bitset to the smaller version
                rightSizedBitSet = new FixedBitSet(rightSizedBitSetSize + 1);
                // Map across the bits from the large set to the smaller one
                var bitIndex = 0;
                do
                {
                    bitIndex = _filter.NextSetBit(bitIndex);
                    if (bitIndex < 0)
                    {
                        continue;
                    }

                    // Project the larger number into a smaller one effectively
                    // modulo-ing by using the target bitset size as a mask
                    var downSizedBitIndex = bitIndex & rightSizedBitSetSize;
                    rightSizedBitSet.Set(downSizedBitIndex);
                    bitIndex++;
                } while ((bitIndex >= 0) && (bitIndex <= _bloomSize));
            }
            else
            {
                return(null);
            }
            return(new FuzzySet(rightSizedBitSet, rightSizedBitSetSize, _hashFunction));
        }
 internal virtual int Parent(int doc)
 {
     return(parentBits.NextSetBit(doc));
 }
            public override int NextDoc()
            {
                //System.out.println("Q.nextDoc() nextChildDoc=" + nextChildDoc);
                // Loop until we hit a parentDoc that's accepted
                while (true)
                {
                    if (_nextChildDoc == NO_MORE_DOCS)
                    {
                        //System.out.println("  end");
                        return(_parentDoc = NO_MORE_DOCS);
                    }

                    // Gather all children sharing the same parent as
                    // nextChildDoc

                    _parentDoc = _parentBits.NextSetBit(_nextChildDoc);

                    // Parent & child docs are supposed to be
                    // orthogonal:
                    if (_nextChildDoc == _parentDoc)
                    {
                        throw IllegalStateException.Create("child query must only match non-parent docs, but parent docID=" + _nextChildDoc + " matched childScorer=" + _childScorer.GetType());
                    }

                    //System.out.println("  parentDoc=" + parentDoc);
                    if (Debugging.AssertsEnabled)
                    {
                        Debugging.Assert(_parentDoc != -1);
                    }

                    //System.out.println("  nextChildDoc=" + nextChildDoc);
                    if (_acceptDocs != null && !_acceptDocs.Get(_parentDoc))
                    {
                        // Parent doc not accepted; skip child docs until
                        // we hit a new parent doc:
                        do
                        {
                            _nextChildDoc = _childScorer.NextDoc();
                        } while (_nextChildDoc < _parentDoc);

                        // Parent & child docs are supposed to be
                        // orthogonal:
                        if (_nextChildDoc == _parentDoc)
                        {
                            throw IllegalStateException.Create("child query must only match non-parent docs, but parent docID=" + _nextChildDoc + " matched childScorer=" + _childScorer.GetType());
                        }

                        continue;
                    }

                    float totalScore = 0;
                    float maxScore   = float.NegativeInfinity;

                    _childDocUpto = 0;
                    _parentFreq   = 0;
                    do
                    {
                        //System.out.println("  c=" + nextChildDoc);
                        if (_pendingChildDocs != null && _pendingChildDocs.Length == _childDocUpto)
                        {
                            _pendingChildDocs = ArrayUtil.Grow(_pendingChildDocs);
                        }
                        if (_pendingChildScores != null && _scoreMode != ScoreMode.None && _pendingChildScores.Length == _childDocUpto)
                        {
                            _pendingChildScores = ArrayUtil.Grow(_pendingChildScores);
                        }
                        if (_pendingChildDocs != null)
                        {
                            _pendingChildDocs[_childDocUpto] = _nextChildDoc;
                        }
                        if (_scoreMode != ScoreMode.None)
                        {
                            // TODO: specialize this into dedicated classes per-scoreMode
                            float childScore = _childScorer.GetScore();
                            int   childFreq  = _childScorer.Freq;
                            if (_pendingChildScores != null)
                            {
                                _pendingChildScores[_childDocUpto] = childScore;
                            }
                            maxScore     = Math.Max(childScore, maxScore);
                            totalScore  += childScore;
                            _parentFreq += childFreq;
                        }
                        _childDocUpto++;
                        _nextChildDoc = _childScorer.NextDoc();
                    } while (_nextChildDoc < _parentDoc);

                    // Parent & child docs are supposed to be
                    // orthogonal:
                    if (_nextChildDoc == _parentDoc)
                    {
                        throw IllegalStateException.Create("child query must only match non-parent docs, but parent docID=" + _nextChildDoc + " matched childScorer=" + _childScorer.GetType());
                    }

                    switch (_scoreMode)
                    {
                    case ScoreMode.Avg:
                        _parentScore = totalScore / _childDocUpto;
                        break;

                    case ScoreMode.Max:
                        _parentScore = maxScore;
                        break;

                    case ScoreMode.Total:
                        _parentScore = totalScore;
                        break;

                    case ScoreMode.None:
                        break;
                    }

                    //System.out.println("  return parentDoc=" + parentDoc + " childDocUpto=" + childDocUpto);
                    return(_parentDoc);
                }
            }
Example #7
0
        public void Test()
        {
            RandomIndexWriter writer;
            DirectoryReader   indexReader;
            int numParents        = AtLeast(200);
            IndexWriterConfig cfg = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random()));

            cfg.SetMergePolicy(NewLogMergePolicy());
            using (writer = new RandomIndexWriter(Random(), NewDirectory(), cfg))
            {
                Document parentDoc = new Document();
                NumericDocValuesField parentVal = new NumericDocValuesField("parent_val", 0L);
                parentDoc.Add(parentVal);
                StringField parent = new StringField("parent", "true", Field.Store.YES);
                parentDoc.Add(parent);
                for (int i = 0; i < numParents; ++i)
                {
                    List <Document> documents   = new List <Document>();
                    int             numChildren = Random().nextInt(10);
                    for (int j = 0; j < numChildren; ++j)
                    {
                        Document childDoc = new Document();
                        childDoc.Add(new NumericDocValuesField("child_val", Random().nextInt(5)));
                        documents.Add(childDoc);
                    }
                    parentVal.SetInt64Value(Random().nextInt(50));
                    documents.Add(parentDoc);
                    writer.AddDocuments(documents);
                }
                writer.ForceMerge(1);
                indexReader = writer.Reader;
            }

            AtomicReader     reader        = GetOnlySegmentReader(indexReader);
            Filter           parentsFilter = new FixedBitSetCachingWrapperFilter(new QueryWrapperFilter(new TermQuery(new Term("parent", "true"))));
            FixedBitSet      parentBits    = (FixedBitSet)parentsFilter.GetDocIdSet(reader.AtomicContext, null);
            NumericDocValues parentValues  = reader.GetNumericDocValues("parent_val");

            NumericDocValues childValues = reader.GetNumericDocValues("child_val");

            Sort parentSort = new Sort(new SortField("parent_val", SortFieldType.INT64));
            Sort childSort  = new Sort(new SortField("child_val", SortFieldType.INT64));

            Sort   sort   = new Sort(new SortField("custom", new BlockJoinComparerSource(parentsFilter, parentSort, childSort)));
            Sorter sorter = new Sorter(sort);

            Sorter.DocMap docMap = sorter.Sort(reader);
            assertEquals(reader.MaxDoc, docMap.Count);

            int[] children       = new int[1];
            int   numChildren2   = 0;
            int   previousParent = -1;

            for (int i = 0; i < docMap.Count; ++i)
            {
                int oldID = docMap.NewToOld(i);
                if (parentBits.Get(oldID))
                {
                    // check that we have the right children
                    for (int j = 0; j < numChildren2; ++j)
                    {
                        assertEquals(oldID, parentBits.NextSetBit(children[j]));
                    }
                    // check that children are sorted
                    for (int j = 1; j < numChildren2; ++j)
                    {
                        int doc1 = children[j - 1];
                        int doc2 = children[j];
                        if (childValues.Get(doc1) == childValues.Get(doc2))
                        {
                            assertTrue(doc1 < doc2); // sort is stable
                        }
                        else
                        {
                            assertTrue(childValues.Get(doc1) < childValues.Get(doc2));
                        }
                    }
                    // check that parents are sorted
                    if (previousParent != -1)
                    {
                        if (parentValues.Get(previousParent) == parentValues.Get(oldID))
                        {
                            assertTrue(previousParent < oldID);
                        }
                        else
                        {
                            assertTrue(parentValues.Get(previousParent) < parentValues.Get(oldID));
                        }
                    }
                    // reset
                    previousParent = oldID;
                    numChildren2   = 0;
                }
                else
                {
                    children = ArrayUtil.Grow(children, numChildren2 + 1);
                    children[numChildren2++] = oldID;
                }
            }
            indexReader.Dispose();
            writer.w.Directory.Dispose();
        }
Example #8
0
        /// <summary>
        /// Used when drill downs are highly constraining vs
        /// baseQuery.
        /// </summary>
        private void DoDrillDownAdvanceScoring(ICollector collector, DocIdSetIterator[] disis, ICollector[] sidewaysCollectors)
        {
            int maxDoc  = context.Reader.MaxDoc;
            int numDims = dims.Length;

            //if (DEBUG) {
            //  System.out.println("  doDrillDownAdvanceScoring");
            //}

            // TODO: maybe a class like BS, instead of parallel arrays
            int[]   filledSlots = new int[CHUNK];
            int[]   docIDs      = new int[CHUNK];
            float[] scores      = new float[CHUNK];
            int[]   missingDims = new int[CHUNK];
            int[]   counts      = new int[CHUNK];

            docIDs[0] = -1;
            int nextChunkStart = CHUNK;

            FixedBitSet seen = new FixedBitSet(CHUNK);

            while (true)
            {
                //if (DEBUG) {
                //  System.out.println("\ncycle nextChunkStart=" + nextChunkStart + " docIds[0]=" + docIDs[0]);
                //}

                // First dim:
                //if (DEBUG) {
                //  System.out.println("  dim0");
                //}
                DocIdSetIterator disi = disis[0];
                if (disi != null)
                {
                    int docID = disi.DocID;
                    while (docID < nextChunkStart)
                    {
                        int slot = docID & MASK;

                        if (docIDs[slot] != docID)
                        {
                            seen.Set(slot);
                            // Mark slot as valid:
                            //if (DEBUG) {
                            //  System.out.println("    set docID=" + docID + " id=" + context.reader().document(docID).get("id"));
                            //}
                            docIDs[slot]      = docID;
                            missingDims[slot] = 1;
                            counts[slot]      = 1;
                        }

                        docID = disi.NextDoc();
                    }
                }

                // Second dim:
                //if (DEBUG) {
                //  System.out.println("  dim1");
                //}
                disi = disis[1];
                if (disi != null)
                {
                    int docID = disi.DocID;
                    while (docID < nextChunkStart)
                    {
                        int slot = docID & MASK;

                        if (docIDs[slot] != docID)
                        {
                            // Mark slot as valid:
                            seen.Set(slot);
                            //if (DEBUG) {
                            //  System.out.println("    set docID=" + docID + " missingDim=0 id=" + context.reader().document(docID).get("id"));
                            //}
                            docIDs[slot]      = docID;
                            missingDims[slot] = 0;
                            counts[slot]      = 1;
                        }
                        else
                        {
                            // TODO: single-valued dims will always be true
                            // below; we could somehow specialize
                            if (missingDims[slot] >= 1)
                            {
                                missingDims[slot] = 2;
                                counts[slot]      = 2;
                                //if (DEBUG) {
                                //  System.out.println("    set docID=" + docID + " missingDim=2 id=" + context.reader().document(docID).get("id"));
                                //}
                            }
                            else
                            {
                                counts[slot] = 1;
                                //if (DEBUG) {
                                //  System.out.println("    set docID=" + docID + " missingDim=" + missingDims[slot] + " id=" + context.reader().document(docID).get("id"));
                                //}
                            }
                        }

                        docID = disi.NextDoc();
                    }
                }

                // After this we can "upgrade" to conjunction, because
                // any doc not seen by either dim 0 or dim 1 cannot be
                // a hit or a near miss:

                //if (DEBUG) {
                //  System.out.println("  baseScorer");
                //}

                // Fold in baseScorer, using advance:
                int filledCount = 0;
                int slot0       = 0;
                while (slot0 < CHUNK && (slot0 = seen.NextSetBit(slot0)) != -1)
                {
                    int ddDocID = docIDs[slot0];
                    if (Debugging.AssertsEnabled)
                    {
                        Debugging.Assert(ddDocID != -1);
                    }

                    int baseDocID = baseScorer.DocID;
                    if (baseDocID < ddDocID)
                    {
                        baseDocID = baseScorer.Advance(ddDocID);
                    }
                    if (baseDocID == ddDocID)
                    {
                        //if (DEBUG) {
                        //  System.out.println("    keep docID=" + ddDocID + " id=" + context.reader().document(ddDocID).get("id"));
                        //}
                        scores[slot0] = baseScorer.GetScore();
                        filledSlots[filledCount++] = slot0;
                        counts[slot0]++;
                    }
                    else
                    {
                        //if (DEBUG) {
                        //  System.out.println("    no docID=" + ddDocID + " id=" + context.reader().document(ddDocID).get("id"));
                        //}
                        docIDs[slot0] = -1;

                        // TODO: we could jump slot0 forward to the
                        // baseDocID ... but we'd need to set docIDs for
                        // intervening slots to -1
                    }
                    slot0++;
                }
                seen.Clear(0, CHUNK);

                if (filledCount == 0)
                {
                    if (nextChunkStart >= maxDoc)
                    {
                        break;
                    }
                    nextChunkStart += CHUNK;
                    continue;
                }

                // TODO: factor this out & share w/ union scorer,
                // except we start from dim=2 instead:
                for (int dim = 2; dim < numDims; dim++)
                {
                    //if (DEBUG) {
                    //  System.out.println("  dim=" + dim + " [" + dims[dim].dim + "]");
                    //}
                    disi = disis[dim];
                    if (disi != null)
                    {
                        int docID = disi.DocID;
                        while (docID < nextChunkStart)
                        {
                            int slot = docID & MASK;
                            if (docIDs[slot] == docID && counts[slot] >= dim)
                            {
                                // TODO: single-valued dims will always be true
                                // below; we could somehow specialize
                                if (missingDims[slot] >= dim)
                                {
                                    //if (DEBUG) {
                                    //  System.out.println("    set docID=" + docID + " count=" + (dim+2));
                                    //}
                                    missingDims[slot] = dim + 1;
                                    counts[slot]      = dim + 2;
                                }
                                else
                                {
                                    //if (DEBUG) {
                                    //  System.out.println("    set docID=" + docID + " missing count=" + (dim+1));
                                    //}
                                    counts[slot] = dim + 1;
                                }
                            }

                            // TODO: sometimes use advance?
                            docID = disi.NextDoc();
                        }
                    }
                }

                // Collect:
                //if (DEBUG) {
                //  System.out.println("  now collect: " + filledCount + " hits");
                //}
                for (int i = 0; i < filledCount; i++)
                {
                    int slot = filledSlots[i];
                    collectDocID = docIDs[slot];
                    collectScore = scores[slot];
                    //if (DEBUG) {
                    //  System.out.println("    docID=" + docIDs[slot] + " count=" + counts[slot]);
                    //}
                    if (counts[slot] == 1 + numDims)
                    {
                        CollectHit(collector, sidewaysCollectors);
                    }
                    else if (counts[slot] == numDims)
                    {
                        CollectNearMiss(sidewaysCollectors[missingDims[slot]]);
                    }
                }

                if (nextChunkStart >= maxDoc)
                {
                    break;
                }

                nextChunkStart += CHUNK;
            }
        }