Exemplo n.º 1
0
        /// <summary>
        /// Merge a list of sorted temporary files (partitions) into an output file. </summary>
        internal void MergePartitions(IList <FileInfo> merges, FileInfo outputFile)
        {
            long start = J2N.Time.NanoTime() / J2N.Time.MillisecondsPerNanosecond; // LUCENENET: Use NanoTime() rather than CurrentTimeMilliseconds() for more accurate/reliable results

            var @out = new ByteSequencesWriter(outputFile);

            PriorityQueue <FileAndTop> queue = new PriorityQueueAnonymousClass(this, merges.Count);

            var streams = new ByteSequencesReader[merges.Count];

            try
            {
                // Open streams and read the top for each file
                for (int i = 0; i < merges.Count; i++)
                {
                    streams[i] = new ByteSequencesReader(merges[i]);
                    byte[] line = streams[i].Read();
                    if (line != null)
                    {
                        queue.InsertWithOverflow(new FileAndTop(i, line));
                    }
                }

                // Unix utility sort() uses ordered array of files to pick the next line from, updating
                // it as it reads new lines. The PQ used here is a more elegant solution and has
                // a nicer theoretical complexity bound :) The entire sorting process is I/O bound anyway
                // so it shouldn't make much of a difference (didn't check).
                FileAndTop top;
                while ((top = queue.Top) != null)
                {
                    @out.Write(top.Current);
                    if (!streams[top.Fd].Read(top.Current))
                    {
                        queue.Pop();
                    }
                    else
                    {
                        queue.UpdateTop();
                    }
                }

                sortInfo.MergeTime += (J2N.Time.NanoTime() / J2N.Time.MillisecondsPerNanosecond) - start; // LUCENENET: Use NanoTime() rather than CurrentTimeMilliseconds() for more accurate/reliable results
                sortInfo.MergeRounds++;
            }
            finally
            {
                // The logic below is: if an exception occurs in closing out, it has a priority over exceptions
                // happening in closing streams.
                try
                {
                    IOUtils.Dispose(streams);
                }
                finally
                {
                    IOUtils.Dispose(@out);
                }
            }
        }
Exemplo n.º 2
0
        /// <summary>
        /// Compute the union of the provided sets. This method is much faster than
        /// computing the union manually since it operates directly at the byte level.
        /// </summary>
        public static WAH8DocIdSet Union(ICollection <WAH8DocIdSet> docIdSets, int indexInterval)
        {
            switch (docIdSets.Count)
            {
            case 0:
                return(EMPTY);

            case 1:
                return(docIdSets.First());
            }
            // The logic below is very similar to DisjunctionScorer
            int numSets = docIdSets.Count;
            PriorityQueue <Iterator> iterators = new PriorityQueueAnonymousClass(numSets);

            foreach (WAH8DocIdSet set in docIdSets)
            {
                Iterator iterator = (Iterator)set.GetIterator();
                iterator.NextWord();
                iterators.Add(iterator);
            }

            Iterator top = iterators.Top;

            if (top.wordNum == int.MaxValue)
            {
                return(EMPTY);
            }
            int         wordNum = top.wordNum;
            byte        word    = top.word;
            WordBuilder builder = (WordBuilder)(new WordBuilder()).SetIndexInterval(indexInterval);

            while (true)
            {
                top.NextWord();
                iterators.UpdateTop();
                top = iterators.Top;
                if (top.wordNum == wordNum)
                {
                    word |= top.word;
                }
                else
                {
                    builder.AddWord(wordNum, word);
                    if (top.wordNum == int.MaxValue)
                    {
                        break;
                    }
                    wordNum = top.wordNum;
                    word    = top.word;
                }
            }
            return(builder.Build());
        }
Exemplo n.º 3
0
        public void TestRandomIndex()
        {
            Directory    dir      = NewDirectory();
            MockAnalyzer analyzer = new MockAnalyzer(Random);

            analyzer.MaxTokenLength = TestUtil.NextInt32(Random, 1, IndexWriter.MAX_TERM_LENGTH);
            RandomIndexWriter w = new RandomIndexWriter(
#if FEATURE_INSTANCE_TESTDATA_INITIALIZATION
                this,
#endif
                Random, dir, analyzer);

            CreateRandomIndex(AtLeast(50), w, Random.NextInt64());
            DirectoryReader reader        = w.GetReader();
            AtomicReader    wrapper       = SlowCompositeReaderWrapper.Wrap(reader);
            string          field         = @"body";
            Terms           terms         = wrapper.GetTerms(field);
            var             lowFreqQueue  = new PriorityQueueAnonymousClass(5);
            var             highFreqQueue = new PriorityQueueAnonymousClass1(5);

            try
            {
                TermsEnum iterator = terms.GetEnumerator();
                while (iterator.MoveNext())
                {
                    if (highFreqQueue.Count < 5)
                    {
                        highFreqQueue.Add(new TermAndFreq(
                                              BytesRef.DeepCopyOf(iterator.Term), iterator.DocFreq));
                        lowFreqQueue.Add(new TermAndFreq(
                                             BytesRef.DeepCopyOf(iterator.Term), iterator.DocFreq));
                    }
                    else
                    {
                        if (highFreqQueue.Top.freq < iterator.DocFreq)
                        {
                            highFreqQueue.Top.freq = iterator.DocFreq;
                            highFreqQueue.Top.term = BytesRef.DeepCopyOf(iterator.Term);
                            highFreqQueue.UpdateTop();
                        }

                        if (lowFreqQueue.Top.freq > iterator.DocFreq)
                        {
                            lowFreqQueue.Top.freq = iterator.DocFreq;
                            lowFreqQueue.Top.term = BytesRef.DeepCopyOf(iterator.Term);
                            lowFreqQueue.UpdateTop();
                        }
                    }
                }

                int lowFreq  = lowFreqQueue.Top.freq;
                int highFreq = highFreqQueue.Top.freq;
                AssumeTrue(@"unlucky index", highFreq - 1 > lowFreq);
                List <TermAndFreq> highTerms = QueueToList(highFreqQueue);
                List <TermAndFreq> lowTerms  = QueueToList(lowFreqQueue);

                IndexSearcher    searcher     = NewSearcher(reader);
                Occur            lowFreqOccur = RandomOccur(Random);
                BooleanQuery     verifyQuery  = new BooleanQuery();
                CommonTermsQuery cq           = new CommonTermsQuery(RandomOccur(Random),
                                                                     lowFreqOccur, highFreq - 1, Random.NextBoolean());
                foreach (TermAndFreq termAndFreq in lowTerms)
                {
                    cq.Add(new Term(field, termAndFreq.term));
                    verifyQuery.Add(new BooleanClause(new TermQuery(new Term(field,
                                                                             termAndFreq.term)), lowFreqOccur));
                }
                foreach (TermAndFreq termAndFreq in highTerms)
                {
                    cq.Add(new Term(field, termAndFreq.term));
                }

                TopDocs cqSearch = searcher.Search(cq, reader.MaxDoc);

                TopDocs verifySearch = searcher.Search(verifyQuery, reader.MaxDoc);
                assertEquals(verifySearch.TotalHits, cqSearch.TotalHits);
                var hits = new JCG.HashSet <int>();
                foreach (ScoreDoc doc in verifySearch.ScoreDocs)
                {
                    hits.Add(doc.Doc);
                }

                foreach (ScoreDoc doc in cqSearch.ScoreDocs)
                {
                    assertTrue(hits.Remove(doc.Doc));
                }

                assertTrue(hits.Count == 0);

                /*
                 *  need to force merge here since QueryUtils adds checks based
                 *  on leave readers which have different statistics than the top
                 *  level reader if we have more than one segment. This could
                 *  result in a different query / results.
                 */
                w.ForceMerge(1);
                DirectoryReader reader2 = w.GetReader();
                QueryUtils.Check(
#if FEATURE_INSTANCE_TESTDATA_INITIALIZATION
                    this,
#endif
                    Random, cq, NewSearcher(reader2));
                reader2.Dispose();
            }
            finally
            {
                reader.Dispose();
                wrapper.Dispose();
                w.Dispose();
                dir.Dispose();
            }
        }