NumDocs() private méthode

private NumDocs ( ) : int
Résultat int
		/// <summary>Add an IndexReader whose stored fields will not be returned.  This can
		/// accellerate search when stored fields are only needed from a subset of
		/// the IndexReaders.
		/// 
		/// </summary>
		/// <throws>  IllegalArgumentException if not all indexes contain the same number  </throws>
		/// <summary>     of documents
		/// </summary>
		/// <throws>  IllegalArgumentException if not all indexes have the same value  </throws>
		/// <summary>     of {@link IndexReader#MaxDoc()}
		/// </summary>
		public virtual void  Add(IndexReader reader, bool ignoreStoredFields)
		{
			
			if (readers.Count == 0)
			{
				this.maxDoc = reader.MaxDoc();
				this.numDocs = reader.NumDocs();
				this.hasDeletions = reader.HasDeletions();
			}
			
			if (reader.MaxDoc() != maxDoc)
			// check compatibility
				throw new System.ArgumentException("All readers must have same maxDoc: " + maxDoc + "!=" + reader.MaxDoc());
			if (reader.NumDocs() != numDocs)
				throw new System.ArgumentException("All readers must have same numDocs: " + numDocs + "!=" + reader.NumDocs());
			
			System.Collections.IEnumerator i = reader.GetFieldNames(IndexReader.FieldOption.ALL).GetEnumerator();
			while (i.MoveNext())
			{
                System.Collections.DictionaryEntry fi = (System.Collections.DictionaryEntry) i.Current;

				// update fieldToReader map
				System.String field = fi.Key.ToString();
				if (fieldToReader[field] == null)
					fieldToReader[field] = reader;
			}
			
			if (!ignoreStoredFields)
				storedFieldReaders.Add(reader); // add to storedFieldReaders
			readers.Add(reader);
		}
Exemple #2
0
        public void TestDeletesNumDocs()
        {
            Directory   dir = new MockRAMDirectory();
            IndexWriter w   = new IndexWriter(dir, new WhitespaceAnalyzer(),
                                              IndexWriter.MaxFieldLength.LIMITED);
            Document doc = new Document();

            doc.Add(new Field("field", "a b c", Field.Store.NO, Field.Index.ANALYZED));
            Field id = new Field("id", "", Field.Store.NO, Field.Index.NOT_ANALYZED);

            doc.Add(id);
            id.SetValue("0");
            w.AddDocument(doc);
            id.SetValue("1");
            w.AddDocument(doc);
            IndexReader r = w.GetReader();

            Assert.AreEqual(2, r.NumDocs());
            r.Close();

            w.DeleteDocuments(new Term("id", "0"));
            r = w.GetReader();
            Assert.AreEqual(1, r.NumDocs());
            r.Close();

            w.DeleteDocuments(new Term("id", "1"));
            r = w.GetReader();
            Assert.AreEqual(0, r.NumDocs());
            r.Close();

            w.Close();
            dir.Close();
        }
Exemple #3
0
        public virtual void  TestDocCount()
        {
            Directory dir = new RAMDirectory();

            IndexWriter writer = null;
            IndexReader reader = null;
            int         i;

            try
            {
                writer = new IndexWriter(dir, new WhitespaceAnalyzer(), true);

                // add 100 documents
                for (i = 0; i < 100; i++)
                {
                    AddDoc(writer);
                }
                Assert.AreEqual(100, writer.DocCount());
                writer.Close();

                // delete 40 documents
                reader = IndexReader.Open(dir);
                for (i = 0; i < 40; i++)
                {
                    reader.Delete(i);
                }
                reader.Close();

                // test doc count before segments are merged/index is optimized
                writer = new IndexWriter(dir, new WhitespaceAnalyzer(), false);
                Assert.AreEqual(100, writer.DocCount());
                writer.Close();

                reader = IndexReader.Open(dir);
                Assert.AreEqual(100, reader.MaxDoc());
                Assert.AreEqual(60, reader.NumDocs());
                reader.Close();

                // optimize the index and check that the new doc count is correct
                writer = new IndexWriter(dir, new WhitespaceAnalyzer(), false);
                writer.Optimize();
                Assert.AreEqual(60, writer.DocCount());
                writer.Close();

                // check that the index reader gives the same numbers.
                reader = IndexReader.Open(dir);
                Assert.AreEqual(60, reader.MaxDoc());
                Assert.AreEqual(60, reader.NumDocs());
                reader.Close();
            }
            catch (System.IO.IOException e)
            {
                System.Console.Error.WriteLine(e.StackTrace);
            }
        }
        public virtual void  TestDeleteAll()
        {
            for (int pass = 0; pass < 2; pass++)
            {
                bool        autoCommit = (0 == pass);
                Directory   dir        = new MockRAMDirectory();
                IndexWriter modifier   = new IndexWriter(dir, autoCommit, new WhitespaceAnalyzer(), true);
                modifier.SetMaxBufferedDocs(2);
                modifier.SetMaxBufferedDeleteTerms(2);

                int id            = 0;
                int value_Renamed = 100;

                for (int i = 0; i < 7; i++)
                {
                    AddDoc(modifier, ++id, value_Renamed);
                }
                modifier.Commit();

                IndexReader reader = IndexReader.Open(dir);
                Assert.AreEqual(7, reader.NumDocs());
                reader.Close();

                // Add 1 doc (so we will have something buffered)
                AddDoc(modifier, 99, value_Renamed);

                // Delete all
                modifier.DeleteAll();

                // Delete all shouldn't be on disk yet
                reader = IndexReader.Open(dir);
                Assert.AreEqual(7, reader.NumDocs());
                reader.Close();

                // Add a doc and update a doc (after the deleteAll, before the commit)
                AddDoc(modifier, 101, value_Renamed);
                UpdateDoc(modifier, 102, value_Renamed);

                // commit the delete all
                modifier.Commit();

                // Validate there are no docs left
                reader = IndexReader.Open(dir);
                Assert.AreEqual(2, reader.NumDocs());
                reader.Close();

                modifier.Close();
                dir.Close();
            }
        }
        public virtual void  TestMoreMerges()
        {
            // main directory
            Directory dir = new RAMDirectory();
            // auxiliary directory
            Directory aux  = new RAMDirectory();
            Directory aux2 = new RAMDirectory();

            SetUpDirs(dir, aux);

            IndexWriter writer = NewWriter(aux2, true);

            writer.SetMaxBufferedDocs(100);
            writer.SetMergeFactor(10);
            writer.AddIndexesNoOptimize(new Directory[] { aux });
            Assert.AreEqual(30, writer.DocCount());
            Assert.AreEqual(3, writer.GetSegmentCount());
            writer.Close();

            IndexReader reader = IndexReader.Open(aux);

            for (int i = 0; i < 27; i++)
            {
                reader.DeleteDocument(i);
            }
            Assert.AreEqual(3, reader.NumDocs());
            reader.Close();

            reader = IndexReader.Open(aux2);
            for (int i = 0; i < 8; i++)
            {
                reader.DeleteDocument(i);
            }
            Assert.AreEqual(22, reader.NumDocs());
            reader.Close();

            writer = NewWriter(dir, false);
            writer.SetMaxBufferedDocs(6);
            writer.SetMergeFactor(4);

            writer.AddIndexesNoOptimize(new Directory[] { aux, aux2 });
            Assert.AreEqual(1025, writer.DocCount());
            Assert.AreEqual(1000, writer.GetDocCount(0));
            writer.Close();

            // make sure the index is correct
            VerifyNumDocs(dir, 1025);
        }
        public virtual void  TestBatchDeletes()
        {
            for (int pass = 0; pass < 2; pass++)
            {
                bool        autoCommit = (0 == pass);
                Directory   dir        = new MockRAMDirectory();
                IndexWriter modifier   = new IndexWriter(dir, autoCommit, new WhitespaceAnalyzer(), true);
                modifier.SetMaxBufferedDocs(2);
                modifier.SetMaxBufferedDeleteTerms(2);

                int id            = 0;
                int value_Renamed = 100;

                for (int i = 0; i < 7; i++)
                {
                    AddDoc(modifier, ++id, value_Renamed);
                }
                modifier.Commit();

                IndexReader reader = IndexReader.Open(dir);
                Assert.AreEqual(7, reader.NumDocs());
                reader.Close();

                id = 0;
                modifier.DeleteDocuments(new Term("id", System.Convert.ToString(++id)));
                modifier.DeleteDocuments(new Term("id", System.Convert.ToString(++id)));

                modifier.Commit();

                reader = IndexReader.Open(dir);
                Assert.AreEqual(5, reader.NumDocs());
                reader.Close();

                Term[] terms = new Term[3];
                for (int i = 0; i < terms.Length; i++)
                {
                    terms[i] = new Term("id", System.Convert.ToString(++id));
                }
                modifier.DeleteDocuments(terms);
                modifier.Commit();
                reader = IndexReader.Open(dir);
                Assert.AreEqual(2, reader.NumDocs());
                reader.Close();

                modifier.Close();
                dir.Close();
            }
        }
        /// <summary>Add an IndexReader whose stored fields will not be returned.  This can
        /// accellerate search when stored fields are only needed from a subset of
        /// the IndexReaders.
        ///
        /// </summary>
        /// <throws>  IllegalArgumentException if not all indexes contain the same number </throws>
        /// <summary>     of documents
        /// </summary>
        /// <throws>  IllegalArgumentException if not all indexes have the same value </throws>
        /// <summary>     of {@link IndexReader#MaxDoc()}
        /// </summary>
        /// <throws>  IOException if there is a low-level IO error </throws>
        public virtual void  Add(IndexReader reader, bool ignoreStoredFields)
        {
            EnsureOpen();
            if (readers.Count == 0)
            {
                this.maxDoc       = reader.MaxDoc();
                this.numDocs      = reader.NumDocs();
                this.hasDeletions = reader.HasDeletions();
            }

            if (reader.MaxDoc() != maxDoc)
            {
                // check compatibility
                throw new System.ArgumentException("All readers must have same maxDoc: " + maxDoc + "!=" + reader.MaxDoc());
            }
            if (reader.NumDocs() != numDocs)
            {
                throw new System.ArgumentException("All readers must have same numDocs: " + numDocs + "!=" + reader.NumDocs());
            }

            ICollection <string> fields = reader.GetFieldNames(IndexReader.FieldOption.ALL);

            readerToFields[reader] = fields;
            IEnumerator <string> i = fields.GetEnumerator();

            while (i.MoveNext())
            {
                //// update fieldToReader map
                string field = i.Current;
                //if (fieldToReader[field] == null)
                if (!fieldToReader.ContainsKey(field))
                {
                    fieldToReader[field] = reader;
                }
            }

            if (!ignoreStoredFields)
            {
                storedFieldReaders.Add(reader);                 // add to storedFieldReaders
            }
            readers.Add(reader);

            if (incRefReaders)
            {
                reader.IncRef();
            }
            decrefOnClose.Add(incRefReaders);
        }
        public virtual void  TestMergeAfterCopy()
        {
            // main directory
            Directory dir = new RAMDirectory();
            // auxiliary directory
            Directory aux = new RAMDirectory();

            SetUpDirs(dir, aux);

            IndexReader reader = IndexReader.Open(aux);

            for (int i = 0; i < 20; i++)
            {
                reader.DeleteDocument(i);
            }
            Assert.AreEqual(10, reader.NumDocs());
            reader.Close();

            IndexWriter writer = NewWriter(dir, false);

            writer.SetMaxBufferedDocs(4);
            writer.SetMergeFactor(4);

            writer.AddIndexesNoOptimize(new Directory[] { aux, new RAMDirectory(aux) });
            Assert.AreEqual(1020, writer.DocCount());
            Assert.AreEqual(1000, writer.GetDocCount(0));
            writer.Close();

            // make sure the index is correct
            VerifyNumDocs(dir, 1020);
        }
            public override void  DoWork()
            {
                IndexReader r = IndexReader.Open(directory, true);

                Assert.AreEqual(100, r.NumDocs());
                r.Close();
            }
        public virtual void  TestFieldCacheRangeFilterDoubles()
        {
            IndexReader   reader = IndexReader.Open((Directory)signedIndex.index, true, null);
            IndexSearcher Search = new IndexSearcher(reader);

            int numDocs = reader.NumDocs();

            System.Double minIdO = (double)(minId + .5);
            System.Double medIdO = (double)((float)minIdO + ((double)(maxId - minId)) / 2.0);

            ScoreDoc[] result;
            Query      q = new TermQuery(new Term("body", "body"));

            result = Search.Search(q, FieldCacheRangeFilter.NewDoubleRange("id", minIdO, medIdO, T, T), numDocs, null).ScoreDocs;
            Assert.AreEqual(numDocs / 2, result.Length, "find all");
            int count = 0;

            result = Search.Search(q, FieldCacheRangeFilter.NewDoubleRange("id", null, medIdO, F, T), numDocs, null).ScoreDocs;
            count += result.Length;
            result = Search.Search(q, FieldCacheRangeFilter.NewDoubleRange("id", medIdO, null, F, F), numDocs, null).ScoreDocs;
            count += result.Length;
            Assert.AreEqual(numDocs, count, "sum of two concenatted ranges");
            result = Search.Search(q, FieldCacheRangeFilter.NewDoubleRange("id", null, null, T, T), numDocs, null).ScoreDocs;
            Assert.AreEqual(numDocs, result.Length, "find all");
            System.Double tempAux = (double)System.Double.PositiveInfinity;
            result = Search.Search(q, FieldCacheRangeFilter.NewDoubleRange("id", tempAux, null, F, F), numDocs, null).ScoreDocs;
            Assert.AreEqual(0, result.Length, "infinity special case");
            System.Double tempAux2 = (double)System.Double.NegativeInfinity;
            result = Search.Search(q, FieldCacheRangeFilter.NewDoubleRange("id", null, tempAux2, F, F), numDocs, null).ScoreDocs;
            Assert.AreEqual(0, result.Length, "infinity special case");
        }
Exemple #11
0
        public virtual void  TestVerifyIndex()
        {
            IndexReader reader = IndexReader.Open(mDirectory, true, null);

            Assert.AreEqual(8, reader.NumDocs());
            reader.Close();
        }
Exemple #12
0
        public virtual void  TestAddIndexesAndDoDeletesThreads()
        {
            int numIter = 5;
            int numDirs = 3;

            Directory   mainDir    = new MockRAMDirectory();
            IndexWriter mainWriter = new IndexWriter(mainDir, new WhitespaceAnalyzer(), IndexWriter.MaxFieldLength.LIMITED);

            mainWriter.SetInfoStream(infoStream);
            AddDirectoriesThreads addDirThreads = new AddDirectoriesThreads(this, numIter, mainWriter);

            addDirThreads.LaunchThreads(numDirs);
            addDirThreads.JoinThreads();

            //Assert.AreEqual(100 + numDirs * (3 * numIter / 4) * addDirThreads.NUM_THREADS
            //    * addDirThreads.NUM_INIT_DOCS, addDirThreads.mainWriter.numDocs());
            Assert.AreEqual(addDirThreads.count.IntValue(), addDirThreads.mainWriter.NumDocs());

            addDirThreads.Close(true);

            Assert.IsTrue(addDirThreads.failures.Count == 0);

            _TestUtil.CheckIndex(mainDir);

            IndexReader reader = IndexReader.Open(mainDir);

            Assert.AreEqual(addDirThreads.count.IntValue(), reader.NumDocs());
            //Assert.AreEqual(100 + numDirs * (3 * numIter / 4) * addDirThreads.NUM_THREADS
            //    * addDirThreads.NUM_INIT_DOCS, reader.numDocs());
            reader.Close();

            addDirThreads.CloseDir();
            mainDir.Close();
        }
 /// <summary> Extracts all terms texts of a given Query into an array of WeightedTerms
 /// 
 /// </summary>
 /// <param name="query">Query to extract term texts from</param>
 /// <param name="reader">used to compute IDF which can be used to a) score selected fragments better 
 /// b) use graded highlights eg chaning intensity of font color</param>
 /// <param name="fieldName">the field on which Inverse Document Frequency (IDF) calculations are based</param>
 /// <returns> an array of the terms used in a query, plus their weights.</returns>
 public static WeightedTerm[] GetIdfWeightedTerms(Query query, IndexReader reader, string fieldName)
 {
     WeightedTerm[] terms = GetTerms(query, false, fieldName);
     int totalNumDocs = reader.NumDocs();
     foreach (WeightedTerm t in terms)
     {
         try
         {
             int docFreq = reader.DocFreq(new Term(fieldName, t.Term));
             // docFreq counts deletes
             if (totalNumDocs < docFreq)
             {
                 docFreq = totalNumDocs;
             }
             //IDF algorithm taken from DefaultSimilarity class
             var idf = (float)(Math.Log((float)totalNumDocs / (double)(docFreq + 1)) + 1.0);
             t.Weight *= idf;
         }
         catch (IOException e)
         {
             //ignore
         }
     }
     return terms;
 }
        public virtual void  TestFieldCacheRangeFilterRand()
        {
            IndexReader   reader = IndexReader.Open((Directory)signedIndex.index, true, null);
            IndexSearcher Search = new IndexSearcher(reader);

            System.String minRP = Pad(signedIndex.minR);
            System.String maxRP = Pad(signedIndex.maxR);

            int numDocs = reader.NumDocs();

            Assert.AreEqual(numDocs, 1 + maxId - minId, "num of docs");

            ScoreDoc[] result;
            Query      q = new TermQuery(new Term("body", "body"));

            // test extremes, bounded on both ends

            result = Search.Search(q, FieldCacheRangeFilter.NewStringRange("rand", minRP, maxRP, T, T), numDocs, null).ScoreDocs;
            Assert.AreEqual(numDocs, result.Length, "find all");

            result = Search.Search(q, FieldCacheRangeFilter.NewStringRange("rand", minRP, maxRP, T, F), numDocs, null).ScoreDocs;
            Assert.AreEqual(numDocs - 1, result.Length, "all but biggest");

            result = Search.Search(q, FieldCacheRangeFilter.NewStringRange("rand", minRP, maxRP, F, T), numDocs, null).ScoreDocs;
            Assert.AreEqual(numDocs - 1, result.Length, "all but smallest");

            result = Search.Search(q, FieldCacheRangeFilter.NewStringRange("rand", minRP, maxRP, F, F), numDocs, null).ScoreDocs;
            Assert.AreEqual(numDocs - 2, result.Length, "all but extremes");

            // unbounded

            result = Search.Search(q, FieldCacheRangeFilter.NewStringRange("rand", minRP, null, T, F), numDocs, null).ScoreDocs;
            Assert.AreEqual(numDocs, result.Length, "smallest and up");

            result = Search.Search(q, FieldCacheRangeFilter.NewStringRange("rand", null, maxRP, F, T), numDocs, null).ScoreDocs;
            Assert.AreEqual(numDocs, result.Length, "biggest and down");

            result = Search.Search(q, FieldCacheRangeFilter.NewStringRange("rand", minRP, null, F, F), numDocs, null).ScoreDocs;
            Assert.AreEqual(numDocs - 1, result.Length, "not smallest, but up");

            result = Search.Search(q, FieldCacheRangeFilter.NewStringRange("rand", null, maxRP, F, F), numDocs, null).ScoreDocs;
            Assert.AreEqual(numDocs - 1, result.Length, "not biggest, but down");

            // very small sets

            result = Search.Search(q, FieldCacheRangeFilter.NewStringRange("rand", minRP, minRP, F, F), numDocs, null).ScoreDocs;
            Assert.AreEqual(0, result.Length, "min,min,F,F");
            result = Search.Search(q, FieldCacheRangeFilter.NewStringRange("rand", maxRP, maxRP, F, F), numDocs, null).ScoreDocs;
            Assert.AreEqual(0, result.Length, "max,max,F,F");

            result = Search.Search(q, FieldCacheRangeFilter.NewStringRange("rand", minRP, minRP, T, T), numDocs, null).ScoreDocs;
            Assert.AreEqual(1, result.Length, "min,min,T,T");
            result = Search.Search(q, FieldCacheRangeFilter.NewStringRange("rand", null, minRP, F, T), numDocs, null).ScoreDocs;
            Assert.AreEqual(1, result.Length, "nul,min,F,T");

            result = Search.Search(q, FieldCacheRangeFilter.NewStringRange("rand", maxRP, maxRP, T, T), numDocs, null).ScoreDocs;
            Assert.AreEqual(1, result.Length, "max,max,T,T");
            result = Search.Search(q, FieldCacheRangeFilter.NewStringRange("rand", maxRP, null, T, F), numDocs, null).ScoreDocs;
            Assert.AreEqual(1, result.Length, "max,nul,T,T");
        }
Exemple #15
0
        public virtual void  TestAfterClose()
        {
            Directory   dir1   = new MockRAMDirectory();
            IndexWriter writer = new IndexWriter(dir1, new WhitespaceAnalyzer(), IndexWriter.MaxFieldLength.LIMITED);

            writer.SetInfoStream(infoStream);

            // create the index
            CreateIndexNoClose(false, "test", writer);

            IndexReader r = writer.GetReader();

            writer.Close();

            _TestUtil.CheckIndex(dir1);

            // reader should remain usable even after IndexWriter is closed:
            Assert.AreEqual(100, r.NumDocs());
            Query q = new TermQuery(new Term("indexname", "test"));

            Assert.AreEqual(100, new IndexSearcher(r).Search(q, 10).totalHits);

            try
            {
                r.Reopen();
                Assert.Fail("failed to hit AlreadyClosedException");
            }
            catch (AlreadyClosedException ace)
            {
                // expected
            }
            r.Close();
            dir1.Close();
        }
Exemple #16
0
        public virtual void  TestRAMDirectory_Renamed()
        {
            Directory        dir    = FSDirectory.Open(indexDir);
            MockRAMDirectory ramDir = new MockRAMDirectory(dir);

            // close the underlaying directory
            dir.Close();

            // Check size
            Assert.AreEqual(ramDir.SizeInBytes(), ramDir.GetRecomputedSizeInBytes());

            // open reader to test document count
            IndexReader reader = IndexReader.Open((Directory)ramDir, true, null);

            Assert.AreEqual(docsToAdd, reader.NumDocs());

            // open search zo check if all doc's are there
            IndexSearcher searcher = new IndexSearcher(reader);

            // search for all documents
            for (int i = 0; i < docsToAdd; i++)
            {
                Document doc = searcher.Doc(i, null);
                Assert.IsTrue(doc.GetField("content") != null);
            }

            // cleanup
            reader.Close();
            searcher.Close();
        }
        private void  TestTermVectors()
        {
            // check:
            int  numDocs = reader.NumDocs();
            long start   = 0L;

            for (int docId = 0; docId < numDocs; docId++)
            {
                start = (DateTime.Now.Ticks / TimeSpan.TicksPerMillisecond);
                ITermFreqVector[] vectors = reader.GetTermFreqVectors(docId, null);
                timeElapsed += (DateTime.Now.Ticks / TimeSpan.TicksPerMillisecond) - start;

                // verify vectors result
                VerifyVectors(vectors, docId);

                start = (DateTime.Now.Ticks / TimeSpan.TicksPerMillisecond);
                ITermFreqVector vector = reader.GetTermFreqVector(docId, "field", null);
                timeElapsed += (DateTime.Now.Ticks / TimeSpan.TicksPerMillisecond) - start;

                vectors    = new ITermFreqVector[1];
                vectors[0] = vector;

                VerifyVectors(vectors, docId);
            }
        }
Exemple #18
0
        public virtual void  TestExpungeDeletes()
        {
            Directory   dir = new MockRAMDirectory();
            IndexWriter w   = new IndexWriter(dir, new WhitespaceAnalyzer(), IndexWriter.MaxFieldLength.LIMITED, null);
            Document    doc = new Document();

            doc.Add(new Field("field", "a b c", Field.Store.NO, Field.Index.ANALYZED));
            Field id = new Field("id", "", Field.Store.NO, Field.Index.NOT_ANALYZED);

            doc.Add(id);
            id.SetValue("0");
            w.AddDocument(doc, null);
            id.SetValue("1");
            w.AddDocument(doc, null);
            w.DeleteDocuments(null, new Term("id", "0"));

            IndexReader r = w.GetReader(null);

            w.ExpungeDeletes(null);
            w.Close();
            r.Close();
            r = IndexReader.Open(dir, true, null);
            Assert.AreEqual(1, r.NumDocs());
            Assert.IsFalse(r.HasDeletions);
            r.Close();
            dir.Close();
        }
Exemple #19
0
        public void TestRollbackIntegrityWithBufferFlush()
        {
            Directory   dir = new MockRAMDirectory();
            IndexWriter w   = new IndexWriter(dir, new WhitespaceAnalyzer(), IndexWriter.MaxFieldLength.UNLIMITED);

            for (int i = 0; i < 5; i++)
            {
                Document doc = new Document();
                doc.Add(new Field("pk", i.ToString(), Field.Store.YES, Field.Index.ANALYZED_NO_NORMS));
                w.AddDocument(doc);
            }
            w.Close();

            // If buffer size is small enough to cause a flush, errors ensue...
            w = new IndexWriter(dir, new WhitespaceAnalyzer(), IndexWriter.MaxFieldLength.UNLIMITED);
            w.SetMaxBufferedDocs(2);

            Term pkTerm = new Term("pk", "");

            for (int i = 0; i < 3; i++)
            {
                Document doc   = new Document();
                String   value = i.ToString();
                doc.Add(new Field("pk", value, Field.Store.YES, Field.Index.ANALYZED_NO_NORMS));
                doc.Add(new Field("text", "foo", Field.Store.YES, Field.Index.ANALYZED_NO_NORMS));
                w.UpdateDocument(pkTerm.CreateTerm(value), doc);
            }
            w.Rollback();

            IndexReader r = IndexReader.Open(dir, true);

            Assert.AreEqual(5, r.NumDocs(), "index should contain same number of docs post rollback");
            r.Close();
            dir.Close();
        }
Exemple #20
0
        public virtual void TestBatchDeletes()
        {
            Directory   dir      = new MockRAMDirectory();
            IndexWriter modifier = new IndexWriter(dir, new WhitespaceAnalyzer(), true,
                                                   IndexWriter.MaxFieldLength.UNLIMITED, null);

            modifier.SetMaxBufferedDocs(2);
            modifier.SetMaxBufferedDeleteTerms(2);

            int id            = 0;
            int value_Renamed = 100;

            for (int i = 0; i < 7; i++)
            {
                AddDoc(modifier, ++id, value_Renamed);
            }
            modifier.Commit(null);

            IndexReader reader = IndexReader.Open(dir, true, null);

            Assert.AreEqual(7, reader.NumDocs());
            reader.Close();

            id = 0;
            modifier.DeleteDocuments(null, new Term("id", System.Convert.ToString(++id)));
            modifier.DeleteDocuments(null, new Term("id", System.Convert.ToString(++id)));

            modifier.Commit(null);

            reader = IndexReader.Open(dir, true, null);
            Assert.AreEqual(5, reader.NumDocs());
            reader.Close();

            Term[] terms = new Term[3];
            for (int i = 0; i < terms.Length; i++)
            {
                terms[i] = new Term("id", System.Convert.ToString(++id));
            }
            modifier.DeleteDocuments(null, terms);
            modifier.Commit(null);
            reader = IndexReader.Open(dir, true, null);
            Assert.AreEqual(2, reader.NumDocs());
            reader.Close();

            modifier.Close();
            dir.Close();
        }
Exemple #21
0
        /*
         * Run one indexer and 2 searchers against single index as
         * stress test.
         */
        public virtual void  RunTest(Directory directory)
        {
            TimedThread[] threads = new TimedThread[4];

            IndexWriter writer = new MockIndexWriter(this, directory, true, ANALYZER, true);

            writer.SetMaxBufferedDocs(7);
            writer.SetMergeFactor(3);

            // Establish a base index of 100 docs:
            for (int i = 0; i < 100; i++)
            {
                Document d = new Document();
                d.Add(new Field("id", System.Convert.ToString(i), Field.Store.YES, Field.Index.NOT_ANALYZED));
                d.Add(new Field("contents", English.IntToEnglish(i), Field.Store.NO, Field.Index.ANALYZED));
                writer.AddDocument(d);
            }
            writer.Commit();

            IndexReader r = IndexReader.Open(directory);

            Assert.AreEqual(100, r.NumDocs());
            r.Close();

            IndexerThread indexerThread = new IndexerThread(writer, threads);

            threads[0] = indexerThread;
            indexerThread.Start();

            IndexerThread indexerThread2 = new IndexerThread(writer, threads);

            threads[1] = indexerThread2;
            indexerThread2.Start();

            SearcherThread searcherThread1 = new SearcherThread(directory, threads);

            threads[2] = searcherThread1;
            searcherThread1.Start();

            SearcherThread searcherThread2 = new SearcherThread(directory, threads);

            threads[3] = searcherThread2;
            searcherThread2.Start();

            indexerThread.Join();
            indexerThread2.Join();
            searcherThread1.Join();
            searcherThread2.Join();

            writer.Close();

            Assert.IsTrue(!indexerThread.failed, "hit unexpected exception in indexer");
            Assert.IsTrue(!indexerThread2.failed, "hit unexpected exception in indexer2");
            Assert.IsTrue(!searcherThread1.failed, "hit unexpected exception in search1");
            Assert.IsTrue(!searcherThread2.failed, "hit unexpected exception in search2");
            //System.out.println("    Writer: " + indexerThread.count + " iterations");
            //System.out.println("Searcher 1: " + searcherThread1.count + " searchers created");
            //System.out.println("Searcher 2: " + searcherThread2.count + " searchers created");
        }
        private void  VerifyNumDocs(Directory dir, int numDocs)
        {
            IndexReader reader = IndexReader.Open(dir);

            Assert.AreEqual(numDocs, reader.MaxDoc());
            Assert.AreEqual(numDocs, reader.NumDocs());
            reader.Close();
        }
Exemple #23
0
        public virtual void TestDeleteOldIndex()
        {
            foreach (string name in OldNames)
            {
                if (VERBOSE)
                {
                    Console.WriteLine("TEST: oldName=" + name);
                }

                // Try one delete:
                Directory dir = NewDirectory(OldIndexDirs[name]);

                IndexReader ir = DirectoryReader.Open(dir);
                Assert.AreEqual(35, ir.NumDocs());
                ir.Dispose();

                IndexWriter iw = new IndexWriter(dir, new IndexWriterConfig(TEST_VERSION_CURRENT, null));
                iw.DeleteDocuments(new Term("id", "3"));
                iw.Dispose();

                ir = DirectoryReader.Open(dir);
                Assert.AreEqual(34, ir.NumDocs());
                ir.Dispose();

                // Delete all but 1 document:
                iw = new IndexWriter(dir, new IndexWriterConfig(TEST_VERSION_CURRENT, null));
                for (int i = 0; i < 35; i++)
                {
                    iw.DeleteDocuments(new Term("id", "" + i));
                }

                // Verify NRT reader takes:
                ir = DirectoryReader.Open(iw, true);
                iw.Dispose();

                Assert.AreEqual(1, ir.NumDocs(), "index " + name);
                ir.Dispose();

                // Verify non-NRT reader takes:
                ir = DirectoryReader.Open(dir);
                Assert.AreEqual(1, ir.NumDocs(), "index " + name);
                ir.Dispose();

                dir.Dispose();
            }
        }
Exemple #24
0
        private void  MergeTermInfos()
        {
            int base_Renamed = 0;

            for (int i = 0; i < readers.Count; i++)
            {
                IndexReader      reader   = (IndexReader)readers[i];
                TermEnum         termEnum = reader.Terms();
                SegmentMergeInfo smi      = new SegmentMergeInfo(base_Renamed, termEnum, reader);
                base_Renamed += reader.NumDocs();
                if (smi.Next())
                {
                    queue.Put(smi);
                }
                // initialize queue
                else
                {
                    smi.Close();
                }
            }

            SegmentMergeInfo[] match = new SegmentMergeInfo[readers.Count];

            while (queue.Size() > 0)
            {
                int matchSize = 0;                 // pop matching terms
                match[matchSize++] = (SegmentMergeInfo)queue.Pop();
                Term             term = match[0].term;
                SegmentMergeInfo top  = (SegmentMergeInfo)queue.Top();

                while (top != null && term.CompareTo(top.term) == 0)
                {
                    match[matchSize++] = (SegmentMergeInfo)queue.Pop();
                    top = (SegmentMergeInfo)queue.Top();
                }

                int df = MergeTermInfo(match, matchSize);                 // add new TermInfo

                if (checkAbort != null)
                {
                    checkAbort.Work(df / 3.0);
                }

                while (matchSize > 0)
                {
                    SegmentMergeInfo smi = match[--matchSize];
                    if (smi.Next())
                    {
                        queue.Put(smi);
                    }
                    // restore queue
                    else
                    {
                        smi.Close();                         // done with a segment
                    }
                }
            }
        }
Exemple #25
0
        /// <summary>Add an IndexReader whose stored fields will not be returned.  This can
        /// accellerate search when stored fields are only needed from a subset of
        /// the IndexReaders.
        ///
        /// </summary>
        /// <throws>  IllegalArgumentException if not all indexes contain the same number </throws>
        /// <summary>     of documents
        /// </summary>
        /// <throws>  IllegalArgumentException if not all indexes have the same value </throws>
        /// <summary>     of <see cref="IndexReader.MaxDoc" />
        /// </summary>
        /// <throws>  IOException if there is a low-level IO error </throws>
        public virtual void  Add(IndexReader reader, bool ignoreStoredFields)
        {
            EnsureOpen();
            if (readers.Count == 0)
            {
                this.maxDoc       = reader.MaxDoc;
                this.numDocs      = reader.NumDocs();
                this.hasDeletions = reader.HasDeletions;
            }

            if (reader.MaxDoc != maxDoc)
            {
                // check compatibility
                throw new System.ArgumentException("All readers must have same maxDoc: " + maxDoc + "!=" + reader.MaxDoc);
            }
            if (reader.NumDocs() != numDocs)
            {
                throw new System.ArgumentException("All readers must have same numDocs: " + numDocs + "!=" + reader.NumDocs());
            }

            ICollection <string> fields = reader.GetFieldNames(IndexReader.FieldOption.ALL);

            readerToFields[reader] = fields;
            foreach (var field in fields)
            {
                // update fieldToReader map
                // Do a containskey firt to mimic java behavior
                if (!fieldToReader.ContainsKey(field) || fieldToReader[field] == null)
                {
                    fieldToReader[field] = reader;
                }
            }

            if (!ignoreStoredFields)
            {
                storedFieldReaders.Add(reader);                 // add to storedFieldReaders
            }
            readers.Add(reader);

            if (incRefReaders)
            {
                reader.IncRef();
            }
            decrefOnClose.Add(incRefReaders);
        }
Exemple #26
0
        public virtual void TestFlushExceptions()
        {
            MockRAMDirectory directory = new MockRAMDirectory();
            FailOnlyOnFlush  failure   = new FailOnlyOnFlush();

            directory.FailOn(failure);

            IndexWriter writer           = new IndexWriter(directory, ANALYZER, true, IndexWriter.MaxFieldLength.UNLIMITED);
            ConcurrentMergeScheduler cms = new ConcurrentMergeScheduler();

            writer.SetMergeScheduler(cms);
            writer.SetMaxBufferedDocs(2);
            Document doc     = new Document();
            Field    idField = new Field("id", "", Field.Store.YES, Field.Index.NOT_ANALYZED);

            doc.Add(idField);
            int extraCount = 0;

            for (int i = 0; i < 10; i++)
            {
                for (int j = 0; j < 20; j++)
                {
                    idField.SetValue(System.Convert.ToString(i * 20 + j));
                    writer.AddDocument(doc);
                }

                while (true)
                {
                    // must cycle here because sometimes the merge flushes
                    // the doc we just added and so there's nothing to
                    // flush, and we don't hit the exception
                    writer.AddDocument(doc);
                    failure.SetDoFail();
                    try
                    {
                        writer.Flush(true, false, true);
                        if (failure.hitExc)
                        {
                            Assert.Fail("failed to hit IOException");
                        }
                        extraCount++;
                    }
                    catch (System.IO.IOException ioe)
                    {
                        failure.ClearDoFail();
                        break;
                    }
                }
            }

            writer.Close();
            IndexReader reader = IndexReader.Open(directory, true);

            Assert.AreEqual(200 + extraCount, reader.NumDocs());
            reader.Close();
            directory.Close();
        }
Exemple #27
0
        public virtual void  TestDeleteAllNRT()
        {
            Directory   dir      = new MockRAMDirectory();
            IndexWriter modifier = new IndexWriter(dir, new WhitespaceAnalyzer(), true, IndexWriter.MaxFieldLength.UNLIMITED, null);

            modifier.SetMaxBufferedDocs(2);
            modifier.SetMaxBufferedDeleteTerms(2);

            int id            = 0;
            int value_Renamed = 100;

            for (int i = 0; i < 7; i++)
            {
                AddDoc(modifier, ++id, value_Renamed);
            }
            modifier.Commit(null);

            IndexReader reader = modifier.GetReader(null);

            Assert.AreEqual(7, reader.NumDocs());
            reader.Close();

            AddDoc(modifier, ++id, value_Renamed);
            AddDoc(modifier, ++id, value_Renamed);

            // Delete all
            modifier.DeleteAll(null);

            reader = modifier.GetReader(null);
            Assert.AreEqual(0, reader.NumDocs());
            reader.Close();


            // Roll it back
            modifier.Rollback(null);
            modifier.Close();

            // Validate that the docs are still there
            reader = IndexReader.Open(dir, true, null);
            Assert.AreEqual(7, reader.NumDocs());
            reader.Close();

            dir.Close();
        }
Exemple #28
0
        public virtual void  TestCrashWhileIndexing()
        {
            IndexWriter      writer = InitIndex();
            MockRAMDirectory dir    = (MockRAMDirectory)writer.Directory;

            Crash(writer);
            IndexReader reader = IndexReader.Open((Directory)dir, true, null);

            Assert.IsTrue(reader.NumDocs() < 157);
        }
Exemple #29
0
        public virtual void  TestNoWaitClose()
        {
            RAMDirectory directory = new MockRAMDirectory();

            Document doc     = new Document();
            Field    idField = new Field("id", "", Field.Store.YES, Field.Index.NOT_ANALYZED);

            doc.Add(idField);

            for (int pass = 0; pass < 2; pass++)
            {
                bool        autoCommit = pass == 0;
                IndexWriter writer     = new IndexWriter(directory, autoCommit, ANALYZER, true);

                for (int iter = 0; iter < 10; iter++)
                {
                    ConcurrentMergeScheduler cms = new ConcurrentMergeScheduler();
                    writer.SetMergeScheduler(cms);
                    writer.SetMaxBufferedDocs(2);
                    writer.SetMergeFactor(100);

                    for (int j = 0; j < 201; j++)
                    {
                        idField.SetValue(System.Convert.ToString(iter * 201 + j));
                        writer.AddDocument(doc);
                    }

                    int delID = iter * 201;
                    for (int j = 0; j < 20; j++)
                    {
                        writer.DeleteDocuments(new Term("id", System.Convert.ToString(delID)));
                        delID += 5;
                    }

                    // Force a bunch of merge threads to kick off so we
                    // stress out aborting them on close:
                    writer.SetMergeFactor(3);
                    writer.AddDocument(doc);
                    writer.Flush();

                    writer.Close(false);

                    IndexReader reader = IndexReader.Open(directory);
                    Assert.AreEqual((1 + iter) * 182, reader.NumDocs());
                    reader.Close();

                    // Reopen
                    writer = new IndexWriter(directory, autoCommit, ANALYZER, false);
                }
                writer.Close();
            }

            directory.Close();
        }
        public virtual void  TestRAMDeletes()
        {
            for (int pass = 0; pass < 2; pass++)
            {
                for (int t = 0; t < 2; t++)
                {
                    bool        autoCommit = (0 == pass);
                    Directory   dir        = new MockRAMDirectory();
                    IndexWriter modifier   = new IndexWriter(dir, autoCommit, new WhitespaceAnalyzer(), true);
                    modifier.SetMaxBufferedDocs(4);
                    modifier.SetMaxBufferedDeleteTerms(4);

                    int id            = 0;
                    int value_Renamed = 100;

                    AddDoc(modifier, ++id, value_Renamed);
                    if (0 == t)
                    {
                        modifier.DeleteDocuments(new Term("value", System.Convert.ToString(value_Renamed)));
                    }
                    else
                    {
                        modifier.DeleteDocuments(new TermQuery(new Term("value", System.Convert.ToString(value_Renamed))));
                    }
                    AddDoc(modifier, ++id, value_Renamed);
                    if (0 == t)
                    {
                        modifier.DeleteDocuments(new Term("value", System.Convert.ToString(value_Renamed)));
                        Assert.AreEqual(2, modifier.GetNumBufferedDeleteTerms());
                        Assert.AreEqual(1, modifier.GetBufferedDeleteTermsSize());
                    }
                    else
                    {
                        modifier.DeleteDocuments(new TermQuery(new Term("value", System.Convert.ToString(value_Renamed))));
                    }

                    AddDoc(modifier, ++id, value_Renamed);
                    Assert.AreEqual(0, modifier.GetSegmentCount());
                    modifier.Flush();

                    modifier.Commit();

                    IndexReader reader = IndexReader.Open(dir);
                    Assert.AreEqual(1, reader.NumDocs());

                    int hitCount = GetHitCount(dir, new Term("id", System.Convert.ToString(id)));
                    Assert.AreEqual(1, hitCount);
                    reader.Close();
                    modifier.Close();
                    dir.Close();
                }
            }
        }
Exemple #31
0
 public virtual void  TestRollbackDeletionPolicy()
 {
     for (int i = 0; i < 2; i++)
     {
         // Unless you specify a prior commit point, rollback
         // should not work:
         new IndexWriter(dir, new WhitespaceAnalyzer(), new DeleteLastCommitPolicy(this), MaxFieldLength.UNLIMITED).Close();
         IndexReader r = IndexReader.Open(dir);
         Assert.AreEqual(100, r.NumDocs());
         r.Close();
     }
 }
Exemple #32
0
        public virtual void  TestReopenWriteableToReadOnly()
        {
            Directory dir1 = new MockRAMDirectory();

            TestIndexReaderReopen.CreateIndex(dir1, true);
            IndexReader reader   = IndexReader.Open(dir1, false);
            int         docCount = reader.NumDocs();

            Assert.IsTrue(DeleteWorked(1, reader));
            Assert.AreEqual(docCount - 1, reader.NumDocs());

            IndexReader readOnlyReader = reader.Reopen(true);

            Assert.IsTrue(IsReadOnly(readOnlyReader), "reader isn't read only");

            Assert.IsFalse(DeleteWorked(1, readOnlyReader));
            Assert.AreEqual(docCount - 1, readOnlyReader.NumDocs());
            reader.Close();
            readOnlyReader.Close();
            dir1.Close();
        }
 public override DocIdSet GetDocIdSet(IndexReader reader)
 {
     OpenBitSet bitSet = new OpenBitSet(reader.NumDocs());
     TermDocs termDocs = reader.TermDocs(new Term("TenantId", _tenantId));
     while (termDocs.Next())
     {
         if (termDocs.Freq > 0)
         {
             bitSet.Set(termDocs.Doc);
         }
     }
     return bitSet;
 }
Exemple #34
0
        public override IEnumerable<Row> Execute(IEnumerable<Row> rows)
        {
            if (_indexDirectory == null)
                yield break;

            try {
                _reader = IndexReader.Open(_indexDirectory, true);
            }
            catch (Exception) {
                Warn("Failed to open lucene index in {0}.", _indexDirectory.Directory.FullName);
                yield break;
            }

            var docCount = _reader.NumDocs();
            Info("Found {0} documents in lucene index.", docCount);

            for (var i = 0; i < docCount; i++) {

                if (_reader.IsDeleted(i))
                    continue;

                var doc = _reader.Document(i);
                var row = new Row();
                foreach (var field in doc.GetFields().Where(field => field.IsStored)) {
                    switch (field.Name) {
                        case "dropped":
                            row[field.Name] = Convert.ToBoolean(field.StringValue);
                            break;
                        default:
                            row[field.Name] = field.StringValue;
                            break;
                    }

                }
                yield return row;

            }
        }
		// Returns true if there are docs to search and creates the readers and searchers
		// in that case. Otherwise, returns false.
		private bool BuildSearchers (out IndexReader primary_reader,
					    out LNS.IndexSearcher primary_searcher,
					    out IndexReader secondary_reader,
					    out LNS.IndexSearcher secondary_searcher)
		{
			primary_searcher = null;
			secondary_reader = null;
			secondary_searcher = null;

			primary_reader = LuceneCommon.GetReader (PrimaryStore);
			if (primary_reader.NumDocs() == 0) {
				ReleaseReader (primary_reader);
				primary_reader = null;
				return false;
			}

			primary_searcher = new LNS.IndexSearcher (primary_reader);

			if (SecondaryStore != null) {
				secondary_reader = LuceneCommon.GetReader (SecondaryStore);
				if (secondary_reader.NumDocs () == 0) {
					ReleaseReader (secondary_reader);
					secondary_reader = null;
				}
			}

			if (secondary_reader != null)
				secondary_searcher = new LNS.IndexSearcher (secondary_reader);

			return true;
		}
 /*
    * Automatically adds stop words for the given field with terms exceeding the maxPercentDocs
    *
    * @param reader         The {@link IndexReader} which will be consulted to identify potential stop words that
    *                       exceed the required document frequency
    * @param fieldName      The field for which stopwords will be added
    * @param maxPercentDocs The maximum percentage (between 0.0 and 1.0) of index documents which
    *                       contain a term, after which the word is considered to be a stop word.
    * @return The number of stop words identified.
    * @throws IOException
    */
 public int AddStopWords(IndexReader reader, String fieldName, float maxPercentDocs)
 {
     return AddStopWords(reader, fieldName, (int) (reader.NumDocs() * maxPercentDocs));
 }
Exemple #37
0
		/// <summary>Add an IndexReader whose stored fields will not be returned.  This can
		/// accellerate search when stored fields are only needed from a subset of
		/// the IndexReaders.
		/// 
		/// </summary>
		/// <throws>  IllegalArgumentException if not all indexes contain the same number </throws>
		/// <summary>     of documents
		/// </summary>
		/// <throws>  IllegalArgumentException if not all indexes have the same value </throws>
		/// <summary>     of {@link IndexReader#MaxDoc()}
		/// </summary>
		/// <throws>  IOException if there is a low-level IO error </throws>
		public virtual void  Add(IndexReader reader, bool ignoreStoredFields)
		{
			
			EnsureOpen();
			if (readers.Count == 0)
			{
				this.maxDoc = reader.MaxDoc();
				this.numDocs = reader.NumDocs();
				this.hasDeletions = reader.HasDeletions();
			}
			
			if (reader.MaxDoc() != maxDoc)
			// check compatibility
				throw new System.ArgumentException("All readers must have same maxDoc: " + maxDoc + "!=" + reader.MaxDoc());
			if (reader.NumDocs() != numDocs)
				throw new System.ArgumentException("All readers must have same numDocs: " + numDocs + "!=" + reader.NumDocs());
			
			System.Collections.Generic.ICollection<string> fields = reader.GetFieldNames(IndexReader.FieldOption.ALL);
			readerToFields[reader] = fields;
			System.Collections.IEnumerator i = fields.GetEnumerator();
			while (i.MoveNext())
			{
				// update fieldToReader map
				System.String field = (System.String) i.Current;
				if (fieldToReader[field] == null)
					fieldToReader[field] = reader;
			}
			
			if (!ignoreStoredFields)
				storedFieldReaders.Add(reader); // add to storedFieldReaders
			readers.Add(reader);
			
			if (incRefReaders)
			{
				reader.IncRef();
			}
			decrefOnClose.Add(incRefReaders);
		}
Exemple #38
0
 protected void SetItemCount (IndexReader reader)
 {
         last_item_count = reader.NumDocs ();
 }
Exemple #39
0
        protected override IEnumerable<LucObject> DoExecute(Query query, bool allVersions, IndexReader idxReader, out int totalCount)
        {
            Searcher searcher = null;
            IEnumerable<LucObject> result;
            totalCount = 0;

            BeginFullExecutingTime();
            searcher = new IndexSearcher(idxReader);

            var numDocs = idxReader.NumDocs();

            var start = this.LucQuery.Skip;

            var maxtop = numDocs - start;
            if (maxtop < 1)
                return EmptyResult;

            int top = this.LucQuery.Top != 0 ? this.LucQuery.Top : this.LucQuery.PageSize;

            if (top == 0)
                top = 100000;
            var howMany = (top < int.MaxValue / 2) ? top * 2 : int.MaxValue; // numDocs; // * 4; // * 2;

            bool noMorePage = false;
            if ((long)howMany > maxtop)
            {
                howMany = maxtop - start;
                noMorePage = true;
            }

            var numHits = howMany + start;
            if (numHits > numDocs)
                numHits = numDocs;

            try
            {
                //====================================================
                var collector = CreateCollector(numHits);
                BeginKernelTime();
                searcher.Search(query, collector);
                FinishKernelTime();
                BeginCollectingTime();
                //var topDocs = collector.TopDocs(start, howMany);
                var topDocs = (this.LucQuery.SortFields.Length > 0) ?
                    ((TopFieldCollector)collector).TopDocs(start, howMany) :
                    ((TopScoreDocCollector)collector).TopDocs(start, howMany);

                totalCount = topDocs.TotalHits;
                var hits = topDocs.ScoreDocs;
                FinishCollectingTime();
                //====================================================

                BeginPagingTime();
                bool noMoreHits;
                result = GetResultPage(hits, searcher, top, allVersions, out noMoreHits);
                FinishPagingTime();
                if (result.Count() < top && !noMorePage /*&& !noMoreHits*/)
                {
                    //re-search
                    numHits = numDocs - start;
                    collector = CreateCollector(numHits);
                    searcher.Search(query, collector);
                    //topDocs = collector.TopDocs(start);
                    topDocs = (this.LucQuery.SortFields.Length > 0) ?
                        ((TopFieldCollector)collector).TopDocs(start, howMany) :
                        ((TopScoreDocCollector)collector).TopDocs(start, howMany);

                    hits = topDocs.ScoreDocs;
                    result = GetResultPage(hits, searcher, top, allVersions, out noMoreHits);
                }

                return result;
            }
            catch
            {
                FinishKernelTime();
                FinishCollectingTime();
                FinishPagingTime();

                throw;
            }
            finally
            {
                if (searcher != null)
                    searcher.Close();
                searcher = null;
                FinishFullExecutingTime();
            }
        }
		public static void  VerifyEquals(IndexReader r1, IndexReader r2, System.String idField)
		{
			Assert.AreEqual(r1.NumDocs(), r2.NumDocs());
			bool hasDeletes = !(r1.MaxDoc() == r2.MaxDoc() && r1.NumDocs() == r1.MaxDoc());
			
			int[] r2r1 = new int[r2.MaxDoc()]; // r2 id to r1 id mapping
			
			TermDocs termDocs1 = r1.TermDocs();
			TermDocs termDocs2 = r2.TermDocs();
			
			// create mapping from id2 space to id2 based on idField
			idField = StringHelper.Intern(idField);
			TermEnum termEnum = r1.Terms(new Term(idField, ""));
			do 
			{
				Term term = termEnum.Term();
				if (term == null || (System.Object) term.Field() != (System.Object) idField)
					break;
				
				termDocs1.Seek(termEnum);
				if (!termDocs1.Next())
				{
					// This doc is deleted and wasn't replaced
					termDocs2.Seek(termEnum);
					Assert.IsFalse(termDocs2.Next());
					continue;
				}
				
				int id1 = termDocs1.Doc();
				Assert.IsFalse(termDocs1.Next());
				
				termDocs2.Seek(termEnum);
				Assert.IsTrue(termDocs2.Next());
				int id2 = termDocs2.Doc();
				Assert.IsFalse(termDocs2.Next());
				
				r2r1[id2] = id1;
				
				// verify stored fields are equivalent
				try
				{
					VerifyEquals(r1.Document(id1), r2.Document(id2));
				}
				catch (System.Exception t)
				{
					System.Console.Out.WriteLine("FAILED id=" + term + " id1=" + id1 + " id2=" + id2 + " term=" + term);
					System.Console.Out.WriteLine("  d1=" + r1.Document(id1));
					System.Console.Out.WriteLine("  d2=" + r2.Document(id2));
					throw t;
				}
				
				try
				{
					// verify term vectors are equivalent        
					VerifyEquals(r1.GetTermFreqVectors(id1), r2.GetTermFreqVectors(id2));
				}
				catch (System.Exception e)
				{
					System.Console.Out.WriteLine("FAILED id=" + term + " id1=" + id1 + " id2=" + id2);
					TermFreqVector[] tv1 = r1.GetTermFreqVectors(id1);
					System.Console.Out.WriteLine("  d1=" + tv1);
					if (tv1 != null)
						for (int i = 0; i < tv1.Length; i++)
						{
							System.Console.Out.WriteLine("    " + i + ": " + tv1[i]);
						}
					
					TermFreqVector[] tv2 = r2.GetTermFreqVectors(id2);
					System.Console.Out.WriteLine("  d2=" + tv2);
					if (tv2 != null)
						for (int i = 0; i < tv2.Length; i++)
						{
							System.Console.Out.WriteLine("    " + i + ": " + tv2[i]);
						}
					
					throw e;
				}
			}
			while (termEnum.Next());
			
			termEnum.Close();
			
			// Verify postings
			TermEnum termEnum1 = r1.Terms(new Term("", ""));
			TermEnum termEnum2 = r2.Terms(new Term("", ""));
			
			// pack both doc and freq into single element for easy sorting
			long[] info1 = new long[r1.NumDocs()];
			long[] info2 = new long[r2.NumDocs()];
			
			for (; ; )
			{
				Term term1, term2;
				
				// iterate until we get some docs
				int len1;
				for (; ; )
				{
					len1 = 0;
					term1 = termEnum1.Term();
					if (term1 == null)
						break;
					termDocs1.Seek(termEnum1);
					while (termDocs1.Next())
					{
						int d1 = termDocs1.Doc();
						int f1 = termDocs1.Freq();
						info1[len1] = (((long) d1) << 32) | f1;
						len1++;
					}
					if (len1 > 0)
						break;
					if (!termEnum1.Next())
						break;
				}
				
				// iterate until we get some docs
				int len2;
				for (; ; )
				{
					len2 = 0;
					term2 = termEnum2.Term();
					if (term2 == null)
						break;
					termDocs2.Seek(termEnum2);
					while (termDocs2.Next())
					{
						int d2 = termDocs2.Doc();
						int f2 = termDocs2.Freq();
						info2[len2] = (((long) r2r1[d2]) << 32) | f2;
						len2++;
					}
					if (len2 > 0)
						break;
					if (!termEnum2.Next())
						break;
				}
				
				if (!hasDeletes)
					Assert.AreEqual(termEnum1.DocFreq(), termEnum2.DocFreq());
				
				Assert.AreEqual(len1, len2);
				if (len1 == 0)
					break; // no more terms
				
				Assert.AreEqual(term1, term2);
				
				// sort info2 to get it into ascending docid
				System.Array.Sort(info2, 0, len2 - 0);
				
				// now compare
				for (int i = 0; i < len1; i++)
				{
					Assert.AreEqual(info1[i], info2[i]);
				}
				
				termEnum1.Next();
				termEnum2.Next();
			}
		}
Exemple #41
0
        protected override SearchResult DoExecute(Query query, bool allVersions, IndexReader idxReader, Stopwatch timer)
        {
            var numDocs = idxReader.NumDocs();

            var start = this.LucQuery.Skip;

            var maxtop = numDocs - start;
            if (maxtop < 1)
                return SearchResult.Empty;

            var user = this.LucQuery.User;
            var currentUser = AccessProvider.Current.GetCurrentUser();
            if (user == null)
                user = currentUser;
            var isCurrentUser = user.Id == currentUser.Id;

            int top = this.LucQuery.Top != 0 ? this.LucQuery.Top : this.LucQuery.PageSize;
            if (top == 0)
                top = int.MaxValue;

            var searcher = new IndexSearcher(idxReader);

            var p = new SearchParams
            {
                query = query,
                allVersions = allVersions,
                searcher = searcher,
                user = user,
                isCurrentUser = isCurrentUser,
                skip = start,
                timer = timer,
                top = top
            };

            SearchResult r = null;
            SearchResult r1 = null;
            try
            {
                var defaultTops = SenseNet.ContentRepository.Storage.StorageContext.Search.DefaultTopAndGrowth;
                var howManyList = new List<int>(defaultTops);
                if (howManyList[howManyList.Count - 1] == 0)
                    howManyList[howManyList.Count - 1] = int.MaxValue;

                if (top < int.MaxValue)
                {
                    var howMany = (top < int.MaxValue / 2) ? top * 2 : int.MaxValue; // numDocs; // * 4; // * 2;
                    if ((long)howMany > maxtop)
                        howMany = maxtop - start;
                    while (howManyList.Count > 0)
                    {
                        if (howMany < howManyList[0])
                            break;
                        howManyList.RemoveAt(0);
                    }
                    howManyList.Insert(0, howMany);
                }

                for (var i = 0; i < howManyList.Count; i++)
                {
                    var defaultTop = howManyList[i];
                    if (defaultTop == 0)
                        defaultTop = numDocs;

                    p.howMany = defaultTop;
                    p.useHowMany = i < howManyList.Count - 1;
                    var maxSize = i == 0 ? numDocs : r.totalCount;
                    p.collectorSize = Math.Min(defaultTop, maxSize - p.skip) + p.skip;

                    r1 = Search(p);

                    if (i == 0)
                        r = r1;
                    else
                        r.Add(r1);
                    p.skip += r.nextIndex;
                    p.top = top - r.result.Count;

                    if (r.result.Count >= top || r.result.Count >= r.totalCount)
                        break;
                }
                p.timer.Stop();
                return r;
            }
            finally
            {
                if (searcher != null)
                {
                    searcher.Close();
                    searcher = null;
                }
            }
        }
		/// <summary>Add an IndexReader whose stored fields will not be returned.  This can
		/// accellerate search when stored fields are only needed from a subset of
		/// the IndexReaders.
		/// 
		/// </summary>
		/// <throws>  IllegalArgumentException if not all indexes contain the same number </throws>
		/// <summary>     of documents
		/// </summary>
		/// <throws>  IllegalArgumentException if not all indexes have the same value </throws>
		/// <summary>     of <see cref="IndexReader.MaxDoc" />
		/// </summary>
		/// <throws>  IOException if there is a low-level IO error </throws>
		public virtual void  Add(IndexReader reader, bool ignoreStoredFields)
		{
			
			EnsureOpen();
			if (readers.Count == 0)
			{
				this.maxDoc = reader.MaxDoc;
				this.numDocs = reader.NumDocs();
				this.hasDeletions = reader.HasDeletions;
			}
			
			if (reader.MaxDoc != maxDoc)
			// check compatibility
				throw new System.ArgumentException("All readers must have same maxDoc: " + maxDoc + "!=" + reader.MaxDoc);
			if (reader.NumDocs() != numDocs)
				throw new System.ArgumentException("All readers must have same numDocs: " + numDocs + "!=" + reader.NumDocs());
			
			ICollection<string> fields = reader.GetFieldNames(IndexReader.FieldOption.ALL);
			readerToFields[reader] = fields;
			foreach(var field in fields)
			{
				// update fieldToReader map
                // Do a containskey firt to mimic java behavior
				if (!fieldToReader.ContainsKey(field) || fieldToReader[field] == null)
					fieldToReader[field] = reader;
			}
			
			if (!ignoreStoredFields)
				storedFieldReaders.Add(reader); // add to storedFieldReaders
			readers.Add(reader);
			
			if (incRefReaders)
			{
				reader.IncRef();
			}
			decrefOnClose.Add(incRefReaders);
		}
Exemple #43
0
		public static void  AssertIndexEquals(IndexReader index1, IndexReader index2)
		{
			Assert.AreEqual(index1.NumDocs(), index2.NumDocs(), "IndexReaders have different values for numDocs.");
			Assert.AreEqual(index1.MaxDoc, index2.MaxDoc, "IndexReaders have different values for maxDoc.");
			Assert.AreEqual(index1.HasDeletions, index2.HasDeletions, "Only one IndexReader has deletions.");
			Assert.AreEqual(index1.IsOptimized(), index2.IsOptimized(), "Only one index is optimized.");
			
			// check field names
			System.Collections.Generic.ICollection<string> fieldsNames1 = index1.GetFieldNames(FieldOption.ALL);
			System.Collections.Generic.ICollection<string> fieldsNames2 = index1.GetFieldNames(FieldOption.ALL);

            System.Collections.Generic.ICollection<IFieldable> fields1 = null;
            System.Collections.Generic.ICollection<IFieldable> fields2 = null;

            Assert.AreEqual(fieldsNames1.Count, fieldsNames2.Count, "IndexReaders have different numbers of fields.");
            System.Collections.IEnumerator it1 = fieldsNames1.GetEnumerator();
            System.Collections.IEnumerator it2 = fieldsNames2.GetEnumerator();
			while (it1.MoveNext() && it2.MoveNext())
			{
				Assert.AreEqual((System.String) it1.Current, (System.String) it2.Current, "Different field names.");
			}
			
			// check norms
            it1 = fieldsNames1.GetEnumerator();
			while (it1.MoveNext())
			{
				System.String curField = (System.String) it1.Current;
				byte[] norms1 = index1.Norms(curField);
				byte[] norms2 = index2.Norms(curField);
				if (norms1 != null && norms2 != null)
				{
					Assert.AreEqual(norms1.Length, norms2.Length);
					for (int i = 0; i < norms1.Length; i++)
					{
						Assert.AreEqual(norms1[i], norms2[i], "Norm different for doc " + i + " and field '" + curField + "'.");
					}
				}
				else
				{
					Assert.AreSame(norms1, norms2);
				}
			}
			
			// check deletions
			for (int i = 0; i < index1.MaxDoc; i++)
			{
				Assert.AreEqual(index1.IsDeleted(i), index2.IsDeleted(i), "Doc " + i + " only deleted in one index.");
			}
			
			// check stored fields
			for (int i = 0; i < index1.MaxDoc; i++)
			{
				if (!index1.IsDeleted(i))
				{
					Document doc1 = index1.Document(i);
					Document doc2 = index2.Document(i);
					fields1 = doc1.GetFields();
					fields2 = doc2.GetFields();
					Assert.AreEqual(fields1.Count, fields2.Count, "Different numbers of fields for doc " + i + ".");
					it1 = fields1.GetEnumerator();
					it2 = fields2.GetEnumerator();
					while (it1.MoveNext() && it2.MoveNext())
					{
						Field curField1 = (Field) it1.Current;
						Field curField2 = (Field) it2.Current;
						Assert.AreEqual(curField1.Name, curField2.Name, "Different fields names for doc " + i + ".");
						Assert.AreEqual(curField1.StringValue, curField2.StringValue, "Different field values for doc " + i + ".");
					}
				}
			}
			
			// check dictionary and posting lists
			TermEnum enum1 = index1.Terms();
			TermEnum enum2 = index2.Terms();
			TermPositions tp1 = index1.TermPositions();
			TermPositions tp2 = index2.TermPositions();
			while (enum1.Next())
			{
				Assert.IsTrue(enum2.Next());
				Assert.AreEqual(enum1.Term, enum2.Term, "Different term in dictionary.");
				tp1.Seek(enum1.Term);
				tp2.Seek(enum1.Term);
				while (tp1.Next())
				{
					Assert.IsTrue(tp2.Next());
					Assert.AreEqual(tp1.Doc, tp2.Doc, "Different doc id in postinglist of term " + enum1.Term + ".");
					Assert.AreEqual(tp1.Freq, tp2.Freq, "Different term frequence in postinglist of term " + enum1.Term + ".");
					for (int i = 0; i < tp1.Freq; i++)
					{
						Assert.AreEqual(tp1.NextPosition(), tp2.NextPosition(), "Different positions in postinglist of term " + enum1.Term + ".");
					}
				}
			}
		}