예제 #1
0
 internal bool EnsureValidDisi()
 {
     while (CurrentIdIterator == null && Docs.MoveNext())
     {
         MatchingDocs matchingDocs = Docs.Current;
         try
         {
             CurrentIdIterator = matchingDocs.DocIdSet.GetEnumerator();
             if (KeepScores)
             {
                 CurrentScorer = new ReplayingScorer(matchingDocs.Scores);
             }
             else
             {
                 CurrentScorer = new ConstantScoreScorer(null, Float.NaN, CurrentIdIterator);
             }
             CurrentReader = matchingDocs.Context.reader();
         }
         catch (IOException e)
         {
             throw new Exception(e);
         }
     }
     return(CurrentIdIterator != null);
 }
예제 #2
0
        private static LeafReaderContext[] GetLeafReaderContexts(IList <MatchingDocs> matchingDocs)
        {
            int segments = matchingDocs.Count;

            LeafReaderContext[] contexts = new LeafReaderContext[segments];
            for (int i = 0; i < segments; i++)
            {
                MatchingDocs matchingDoc = matchingDocs[i];
                contexts[i] = matchingDoc.Context;
            }
            return(contexts);
        }
예제 #3
0
        /// <summary>
        /// Create a sampled of the given hits.
        /// </summary>
        private MatchingDocs CreateSample(MatchingDocs docs)
        {
            int maxdoc = docs.Context.Reader.MaxDoc;

            // TODO: we could try the WAH8DocIdSet here as well, as the results will be sparse
            FixedBitSet sampleDocs = new FixedBitSet(maxdoc);

            int binSize = (int)(1.0 / samplingRate);

            try
            {
                int counter = 0;
                int limit, randomIndex;
                if (leftoverBin != NOT_CALCULATED)
                {
                    limit = leftoverBin;
                    // either NOT_CALCULATED, which means we already sampled from that bin,
                    // or the next document to sample
                    randomIndex = leftoverIndex;
                }
                else
                {
                    limit       = binSize;
                    randomIndex = random.NextInt32(binSize);
                }
                DocIdSetIterator it = docs.Bits.GetIterator();
                for (int doc = it.NextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = it.NextDoc())
                {
                    if (counter == randomIndex)
                    {
                        sampleDocs.Set(doc);
                    }
                    counter++;
                    if (counter >= limit)
                    {
                        counter     = 0;
                        limit       = binSize;
                        randomIndex = random.NextInt32(binSize);
                    }
                }

                if (counter == 0)
                {
                    // we either exhausted the bin and the iterator at the same time, or
                    // this segment had no results. in the latter case we might want to
                    // carry leftover to the next segment as is, but that complicates the
                    // code and doesn't seem so important.
                    leftoverBin = leftoverIndex = NOT_CALCULATED;
                }
                else
                {
                    leftoverBin = limit - counter;
                    if (randomIndex > counter)
                    {
                        // the document to sample is in the next bin
                        leftoverIndex = randomIndex - counter;
                    }
                    else if (randomIndex < counter)
                    {
                        // we sampled a document from the bin, so just skip over remaining
                        // documents in the bin in the next segment.
                        leftoverIndex = NOT_CALCULATED;
                    }
                }

                return(new MatchingDocs(docs.Context, sampleDocs, docs.TotalHits, null));
            }
            catch (IOException)
            {
                throw new Exception();
            }
        }
        /// <summary>
        /// Create a sampled of the given hits. </summary>
        private MatchingDocs CreateSample(MatchingDocs docs)
        {
            int maxdoc = docs.context.Reader.MaxDoc;

            // TODO: we could try the WAH8DocIdSet here as well, as the results will be sparse
            FixedBitSet sampleDocs = new FixedBitSet(maxdoc);

            int binSize = (int)(1.0 / samplingRate);

            try
            {
                int counter = 0;
                int limit, randomIndex;
                if (leftoverBin != NOT_CALCULATED)
                {
                    limit = leftoverBin;
                    // either NOT_CALCULATED, which means we already sampled from that bin,
                    // or the next document to sample
                    randomIndex = leftoverIndex;
                }
                else
                {
                    limit = binSize;
                    randomIndex = random.NextInt(binSize);
                }
                DocIdSetIterator it = docs.bits.GetIterator();
                for (int doc = it.NextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = it.NextDoc())
                {
                    if (counter == randomIndex)
                    {
                        sampleDocs.Set(doc);
                    }
                    counter++;
                    if (counter >= limit)
                    {
                        counter = 0;
                        limit = binSize;
                        randomIndex = random.NextInt(binSize);
                    }
                }

                if (counter == 0)
                {
                    // we either exhausted the bin and the iterator at the same time, or
                    // this segment had no results. in the latter case we might want to
                    // carry leftover to the next segment as is, but that complicates the
                    // code and doesn't seem so important.
                    leftoverBin = leftoverIndex = NOT_CALCULATED;
                }
                else
                {
                    leftoverBin = limit - counter;
                    if (randomIndex > counter)
                    {
                        // the document to sample is in the next bin
                        leftoverIndex = randomIndex - counter;
                    }
                    else if (randomIndex < counter)
                    {
                        // we sampled a document from the bin, so just skip over remaining
                        // documents in the bin in the next segment.
                        leftoverIndex = NOT_CALCULATED;
                    }
                }

                return new MatchingDocs(docs.context, sampleDocs, docs.totalHits, null);
            }
            catch (IOException)
            {
                throw new Exception();
            }
        }