Example #1
0
		/// <summary>Add an IndexReader whose stored fields will not be returned.  This can
		/// accellerate search when stored fields are only needed from a subset of
		/// the IndexReaders.
		/// 
		/// </summary>
		/// <throws>  IllegalArgumentException if not all indexes contain the same number  </throws>
		/// <summary>     of documents
		/// </summary>
		/// <throws>  IllegalArgumentException if not all indexes have the same value  </throws>
		/// <summary>     of {@link IndexReader#MaxDoc()}
		/// </summary>
		public virtual void  Add(IndexReader reader, bool ignoreStoredFields)
		{
			
			if (readers.Count == 0)
			{
				this.maxDoc = reader.MaxDoc();
				this.numDocs = reader.NumDocs();
				this.hasDeletions = reader.HasDeletions();
			}
			
			if (reader.MaxDoc() != maxDoc)
			// check compatibility
				throw new System.ArgumentException("All readers must have same maxDoc: " + maxDoc + "!=" + reader.MaxDoc());
			if (reader.NumDocs() != numDocs)
				throw new System.ArgumentException("All readers must have same numDocs: " + numDocs + "!=" + reader.NumDocs());
			
			System.Collections.IEnumerator i = reader.GetFieldNames(IndexReader.FieldOption.ALL).GetEnumerator();
			while (i.MoveNext())
			{
                System.Collections.DictionaryEntry fi = (System.Collections.DictionaryEntry) i.Current;

				// update fieldToReader map
				System.String field = fi.Key.ToString();
				if (fieldToReader[field] == null)
					fieldToReader[field] = reader;
			}
			
			if (!ignoreStoredFields)
				storedFieldReaders.Add(reader); // add to storedFieldReaders
			readers.Add(reader);
		}
Example #2
0
        public virtual void  TestDocCount()
        {
            Directory dir = new RAMDirectory();

            IndexWriter writer = null;
            IndexReader reader = null;
            int         i;

            try
            {
                writer = new IndexWriter(dir, new WhitespaceAnalyzer(), true);

                // add 100 documents
                for (i = 0; i < 100; i++)
                {
                    AddDoc(writer);
                }
                Assert.AreEqual(100, writer.DocCount());
                writer.Close();

                // delete 40 documents
                reader = IndexReader.Open(dir);
                for (i = 0; i < 40; i++)
                {
                    reader.Delete(i);
                }
                reader.Close();

                // test doc count before segments are merged/index is optimized
                writer = new IndexWriter(dir, new WhitespaceAnalyzer(), false);
                Assert.AreEqual(100, writer.DocCount());
                writer.Close();

                reader = IndexReader.Open(dir);
                Assert.AreEqual(100, reader.MaxDoc());
                Assert.AreEqual(60, reader.NumDocs());
                reader.Close();

                // optimize the index and check that the new doc count is correct
                writer = new IndexWriter(dir, new WhitespaceAnalyzer(), false);
                writer.Optimize();
                Assert.AreEqual(60, writer.DocCount());
                writer.Close();

                // check that the index reader gives the same numbers.
                reader = IndexReader.Open(dir);
                Assert.AreEqual(60, reader.MaxDoc());
                Assert.AreEqual(60, reader.NumDocs());
                reader.Close();
            }
            catch (System.IO.IOException e)
            {
                System.Console.Error.WriteLine(e.StackTrace);
            }
        }
        /// <summary>Add an IndexReader whose stored fields will not be returned.  This can
        /// accellerate search when stored fields are only needed from a subset of
        /// the IndexReaders.
        ///
        /// </summary>
        /// <throws>  IllegalArgumentException if not all indexes contain the same number </throws>
        /// <summary>     of documents
        /// </summary>
        /// <throws>  IllegalArgumentException if not all indexes have the same value </throws>
        /// <summary>     of {@link IndexReader#MaxDoc()}
        /// </summary>
        /// <throws>  IOException if there is a low-level IO error </throws>
        public virtual void  Add(IndexReader reader, bool ignoreStoredFields)
        {
            EnsureOpen();
            if (readers.Count == 0)
            {
                this.maxDoc       = reader.MaxDoc();
                this.numDocs      = reader.NumDocs();
                this.hasDeletions = reader.HasDeletions();
            }

            if (reader.MaxDoc() != maxDoc)
            {
                // check compatibility
                throw new System.ArgumentException("All readers must have same maxDoc: " + maxDoc + "!=" + reader.MaxDoc());
            }
            if (reader.NumDocs() != numDocs)
            {
                throw new System.ArgumentException("All readers must have same numDocs: " + numDocs + "!=" + reader.NumDocs());
            }

            ICollection <string> fields = reader.GetFieldNames(IndexReader.FieldOption.ALL);

            readerToFields[reader] = fields;
            IEnumerator <string> i = fields.GetEnumerator();

            while (i.MoveNext())
            {
                //// update fieldToReader map
                string field = i.Current;
                //if (fieldToReader[field] == null)
                if (!fieldToReader.ContainsKey(field))
                {
                    fieldToReader[field] = reader;
                }
            }

            if (!ignoreStoredFields)
            {
                storedFieldReaders.Add(reader);                 // add to storedFieldReaders
            }
            readers.Add(reader);

            if (incRefReaders)
            {
                reader.IncRef();
            }
            decrefOnClose.Add(incRefReaders);
        }
        private void  CopyVectorsNoDeletions(TermVectorsWriter termVectorsWriter, TermVectorsReader matchingVectorsReader, IndexReader reader)
        {
            int maxDoc = reader.MaxDoc();

            if (matchingVectorsReader != null)
            {
                // We can bulk-copy because the fieldInfos are "congruent"
                int docCount = 0;
                while (docCount < maxDoc)
                {
                    int len = System.Math.Min(MAX_RAW_MERGE_DOCS, maxDoc - docCount);
                    matchingVectorsReader.RawDocs(rawDocLengths, rawDocLengths2, docCount, len);
                    termVectorsWriter.AddRawDocuments(matchingVectorsReader, rawDocLengths, rawDocLengths2, len);
                    docCount += len;
                    checkAbort.Work(300 * len);
                }
            }
            else
            {
                for (int docNum = 0; docNum < maxDoc; docNum++)
                {
                    // NOTE: it's very important to first assign to vectors then pass it to
                    // termVectorsWriter.addAllDocVectors; see LUCENE-1282
                    TermFreqVector[] vectors = reader.GetTermFreqVectors(docNum);
                    termVectorsWriter.AddAllDocVectors(vectors);
                    checkAbort.Work(300);
                }
            }
        }
Example #5
0
 // maps around deleted docs
 internal int[] GetDocMap()
 {
     if (docMap == null)
     {
         // build array which maps document numbers around deletions
         if (reader.HasDeletions())
         {
             int maxDoc = reader.MaxDoc();
             docMap = new int[maxDoc];
             int j = 0;
             for (int i = 0; i < maxDoc; i++)
             {
                 if (reader.IsDeleted(i))
                 {
                     docMap[i] = -1;
                 }
                 else
                 {
                     docMap[i] = j++;
                 }
             }
         }
     }
     return(docMap);
 }
Example #6
0
 private void  MergeNorms()
 {
     for (int i = 0; i < fieldInfos.Size(); i++)
     {
         FieldInfo fi = fieldInfos.FieldInfo(i);
         if (fi.isIndexed && !fi.omitNorms)
         {
             IndexOutput output = directory.CreateOutput(segment + ".f" + i);
             try
             {
                 for (int j = 0; j < readers.Count; j++)
                 {
                     IndexReader reader = (IndexReader)readers[j];
                     int         maxDoc = reader.MaxDoc();
                     byte[]      input  = new byte[maxDoc];
                     reader.Norms(fi.name, input, 0);
                     for (int k = 0; k < maxDoc; k++)
                     {
                         if (!reader.IsDeleted(k))
                         {
                             output.WriteByte(input[k]);
                         }
                     }
                 }
             }
             finally
             {
                 output.Close();
             }
         }
     }
 }
Example #7
0
        public virtual void  TestAddIndexes2()
        {
            bool optimize = false;

            Directory   dir1   = new MockRAMDirectory();
            IndexWriter writer = new IndexWriter(dir1, new WhitespaceAnalyzer(), IndexWriter.MaxFieldLength.LIMITED);

            writer.SetInfoStream(infoStream);

            // create a 2nd index
            Directory   dir2    = new MockRAMDirectory();
            IndexWriter writer2 = new IndexWriter(dir2, new WhitespaceAnalyzer(), IndexWriter.MaxFieldLength.LIMITED);

            writer2.SetInfoStream(infoStream);
            CreateIndexNoClose(!optimize, "index2", writer2);
            writer2.Close();

            writer.AddIndexesNoOptimize(new Directory[] { dir2 });
            writer.AddIndexesNoOptimize(new Directory[] { dir2 });
            writer.AddIndexesNoOptimize(new Directory[] { dir2 });
            writer.AddIndexesNoOptimize(new Directory[] { dir2 });
            writer.AddIndexesNoOptimize(new Directory[] { dir2 });

            IndexReader r1 = writer.GetReader();

            Assert.AreEqual(500, r1.MaxDoc());

            r1.Close();
            writer.Close();
            dir1.Close();
        }
Example #8
0
        /// <summary> Merge the TermVectors from each of the segments into the new one.</summary>
        /// <throws>  IOException </throws>
        private void  MergeVectors()
        {
            TermVectorsWriter termVectorsWriter = new TermVectorsWriter(directory, segment, fieldInfos);

            try
            {
                for (int r = 0; r < readers.Count; r++)
                {
                    IndexReader reader = (IndexReader)readers[r];
                    int         maxDoc = reader.MaxDoc();
                    for (int docNum = 0; docNum < maxDoc; docNum++)
                    {
                        // skip deleted docs
                        if (reader.IsDeleted(docNum))
                        {
                            continue;
                        }
                        termVectorsWriter.AddAllDocVectors(reader.GetTermFreqVectors(docNum));
                        if (checkAbort != null)
                        {
                            checkAbort.Work(300);
                        }
                    }
                }
            }
            finally
            {
                termVectorsWriter.Close();
            }
        }
        private int CopyFieldsNoDeletions(FieldSelector fieldSelectorMerge, FieldsWriter fieldsWriter, IndexReader reader, FieldsReader matchingFieldsReader)
        {
            int maxDoc   = reader.MaxDoc();
            int docCount = 0;

            if (matchingFieldsReader != null)
            {
                // We can bulk-copy because the fieldInfos are "congruent"
                while (docCount < maxDoc)
                {
                    int        len    = System.Math.Min(MAX_RAW_MERGE_DOCS, maxDoc - docCount);
                    IndexInput stream = matchingFieldsReader.RawDocs(rawDocLengths, docCount, len);
                    fieldsWriter.AddRawDocuments(stream, rawDocLengths, len);
                    docCount += len;
                    checkAbort.Work(300 * len);
                }
            }
            else
            {
                for (; docCount < maxDoc; docCount++)
                {
                    // NOTE: it's very important to first assign to doc then pass it to
                    // termVectorsWriter.addAllDocVectors; see LUCENE-1282
                    Document doc = reader.Document(docCount, fieldSelectorMerge);
                    fieldsWriter.AddDocument(doc);
                    checkAbort.Work(300);
                }
            }
            return(docCount);
        }
Example #10
0
        private void  MergeNorms()
        {
            byte[]      normBuffer = null;
            IndexOutput output     = null;

            try
            {
                for (int i = 0; i < fieldInfos.Size(); i++)
                {
                    FieldInfo fi = fieldInfos.FieldInfo(i);
                    if (fi.isIndexed && !fi.omitNorms)
                    {
                        if (output == null)
                        {
                            output = directory.CreateOutput(segment + "." + IndexFileNames.NORMS_EXTENSION);
                            output.WriteBytes(NORMS_HEADER, NORMS_HEADER.Length);
                        }
                        for (int j = 0; j < readers.Count; j++)
                        {
                            IndexReader reader = (IndexReader)readers[j];
                            int         maxDoc = reader.MaxDoc();
                            if (normBuffer == null || normBuffer.Length < maxDoc)
                            {
                                // the buffer is too small for the current segment
                                normBuffer = new byte[maxDoc];
                            }
                            reader.Norms(fi.name, normBuffer, 0);
                            if (!reader.HasDeletions())
                            {
                                //optimized case for segments without deleted docs
                                output.WriteBytes(normBuffer, maxDoc);
                            }
                            else
                            {
                                // this segment has deleted docs, so we have to
                                // check for every doc if it is deleted or not
                                for (int k = 0; k < maxDoc; k++)
                                {
                                    if (!reader.IsDeleted(k))
                                    {
                                        output.WriteByte(normBuffer[k]);
                                    }
                                }
                            }
                            if (checkAbort != null)
                            {
                                checkAbort.Work(maxDoc);
                            }
                        }
                    }
                }
            }
            finally
            {
                if (output != null)
                {
                    output.Close();
                }
            }
        }
Example #11
0
        private int CopyFieldsWithDeletions(FieldSelector fieldSelectorMerge, FieldsWriter fieldsWriter, IndexReader reader, FieldsReader matchingFieldsReader)
        {
            int docCount = 0;
            int maxDoc   = reader.MaxDoc();

            if (matchingFieldsReader != null)
            {
                // We can bulk-copy because the fieldInfos are "congruent"
                for (int j = 0; j < maxDoc;)
                {
                    if (reader.IsDeleted(j))
                    {
                        // skip deleted docs
                        ++j;
                        continue;
                    }
                    // We can optimize this case (doing a bulk byte copy) since the field
                    // numbers are identical
                    int start = j, numDocs = 0;
                    do
                    {
                        j++;
                        numDocs++;
                        if (j >= maxDoc)
                        {
                            break;
                        }
                        if (reader.IsDeleted(j))
                        {
                            j++;
                            break;
                        }
                    }while (numDocs < MAX_RAW_MERGE_DOCS);

                    IndexInput stream = matchingFieldsReader.RawDocs(rawDocLengths, start, numDocs);
                    fieldsWriter.AddRawDocuments(stream, rawDocLengths, numDocs);
                    docCount += numDocs;
                    checkAbort.Work(300 * numDocs);
                }
            }
            else
            {
                for (int j = 0; j < maxDoc; j++)
                {
                    if (reader.IsDeleted(j))
                    {
                        // skip deleted docs
                        continue;
                    }
                    // NOTE: it's very important to first assign to doc then pass it to
                    // termVectorsWriter.addAllDocVectors; see LUCENE-1282
                    Document doc = reader.Document(j, fieldSelectorMerge);
                    fieldsWriter.AddDocument(doc);
                    docCount++;
                    checkAbort.Work(300);
                }
            }
            return(docCount);
        }
Example #12
0
		public override void SetNextReader(IndexReader reader, int docBase)
		{
			currentReaderValues = new int[reader.MaxDoc()];
			for (int i = 0; i < currentReaderValues.Length; i++)
			{
				currentReaderValues[i] = random.Next();
			}
		}
        private void  VerifyNumDocs(Directory dir, int numDocs)
        {
            IndexReader reader = IndexReader.Open(dir);

            Assert.AreEqual(numDocs, reader.MaxDoc());
            Assert.AreEqual(numDocs, reader.NumDocs());
            reader.Close();
        }
Example #14
0
        public OpenBitSetDISI TermToBitSet(string term, IndexReader indexReader)
        {
            var facetQuery = new TermQuery(new Term(this.Field, term));
            var facetQueryFilter = new CachingWrapperFilter(new QueryWrapperFilter(facetQuery));
            var bitSet = new OpenBitSetDISI(facetQueryFilter.GetDocIdSet(indexReader).Iterator(), indexReader.MaxDoc());

            return bitSet;
        }
Example #15
0
        /// <summary> Tests creating a segment, then check to insure the segment can be seen via
        /// IW.getReader
        /// </summary>
        public virtual void  DoTestIndexWriterReopenSegment(bool optimize)
        {
            Directory   dir1   = new MockRAMDirectory();
            IndexWriter writer = new IndexWriter(dir1, new WhitespaceAnalyzer(), IndexWriter.MaxFieldLength.LIMITED);

            writer.SetInfoStream(infoStream);
            IndexReader r1 = writer.GetReader();

            Assert.AreEqual(0, r1.MaxDoc());
            CreateIndexNoClose(false, "index1", writer);
            writer.Flush(!optimize, true, true);

            IndexReader iwr1 = writer.GetReader();

            Assert.AreEqual(100, iwr1.MaxDoc());

            IndexReader r2 = writer.GetReader();

            Assert.AreEqual(r2.MaxDoc(), 100);
            // add 100 documents
            for (int x = 10000; x < 10000 + 100; x++)
            {
                Document d = CreateDocument(x, "index1", 5);
                writer.AddDocument(d);
            }
            writer.Flush(false, true, true);
            // verify the reader was reopened internally
            IndexReader iwr2 = writer.GetReader();

            Assert.IsTrue(iwr2 != r1);
            Assert.AreEqual(200, iwr2.MaxDoc());
            // should have flushed out a segment
            IndexReader r3 = writer.GetReader();

            Assert.IsTrue(r2 != r3);
            Assert.AreEqual(200, r3.MaxDoc());

            // dec ref the readers rather than close them because
            // closing flushes changes to the writer
            r1.Close();
            iwr1.Close();
            r2.Close();
            r3.Close();
            iwr2.Close();
            writer.Close();

            // test whether the changes made it to the directory
            writer = new IndexWriter(dir1, new WhitespaceAnalyzer(), IndexWriter.MaxFieldLength.LIMITED);
            IndexReader w2r1 = writer.GetReader();

            // insure the deletes were actually flushed to the directory
            Assert.AreEqual(200, w2r1.MaxDoc());
            w2r1.Close();
            writer.Close();

            dir1.Close();
        }
Example #16
0
        private void  CopyVectorsWithDeletions(TermVectorsWriter termVectorsWriter, TermVectorsReader matchingVectorsReader, IndexReader reader)
        {
            int maxDoc = reader.MaxDoc();

            if (matchingVectorsReader != null)
            {
                // We can bulk-copy because the fieldInfos are "congruent"
                for (int docNum = 0; docNum < maxDoc;)
                {
                    if (reader.IsDeleted(docNum))
                    {
                        // skip deleted docs
                        ++docNum;
                        continue;
                    }
                    // We can optimize this case (doing a bulk byte copy) since the field
                    // numbers are identical
                    int start = docNum, numDocs = 0;
                    do
                    {
                        docNum++;
                        numDocs++;
                        if (docNum >= maxDoc)
                        {
                            break;
                        }
                        if (reader.IsDeleted(docNum))
                        {
                            docNum++;
                            break;
                        }
                    }while (numDocs < MAX_RAW_MERGE_DOCS);

                    matchingVectorsReader.RawDocs(rawDocLengths, rawDocLengths2, start, numDocs);
                    termVectorsWriter.AddRawDocuments(matchingVectorsReader, rawDocLengths, rawDocLengths2, numDocs);
                    checkAbort.Work(300 * numDocs);
                }
            }
            else
            {
                for (int docNum = 0; docNum < maxDoc; docNum++)
                {
                    if (reader.IsDeleted(docNum))
                    {
                        // skip deleted docs
                        continue;
                    }

                    // NOTE: it's very important to first assign to vectors then pass it to
                    // termVectorsWriter.addAllDocVectors; see LUCENE-1282
                    TermFreqVector[] vectors = reader.GetTermFreqVectors(docNum);
                    termVectorsWriter.AddAllDocVectors(vectors);
                    checkAbort.Work(300);
                }
            }
        }
Example #17
0
		public override DocIdSet GetDocIdSet(IndexReader reader)
		{
			double[] latIndex = FieldCache_Fields.DEFAULT.GetDoubles(reader, _latField);
			double[] lngIndex = FieldCache_Fields.DEFAULT.GetDoubles(reader, _lngField);

			int docBase = NextDocBase;
			NextDocBase += reader.MaxDoc();

			return new LatLongFilteredDocIdSet(StartingFilter.GetDocIdSet(reader), latIndex, lngIndex, DistanceLookupCache, _lat, _lng, Distance, docBase, Distances);
		}
        public override BitArray Bits(IndexReader reader)
        {
            BitArray bitArray = new BitArray(reader.MaxDoc());
            TermDocs termDocs = reader.TermDocs(new Term("score", "5"));
            while (termDocs.Next())
            {
                bitArray.Set(termDocs.Doc(), true);
            }

            return bitArray;
        }
        public override BitArray Bits(IndexReader reader)
        {
            if (done)
            {
                throw new NotSupportedException("Called twice");
            }

            BitArray bitArray = new BitArray(reader.MaxDoc());
            done = true;

            return bitArray;
        }
Example #20
0
        /// <summary>Add an IndexReader whose stored fields will not be returned.  This can
        /// accellerate search when stored fields are only needed from a subset of
        /// the IndexReaders.
        ///
        /// </summary>
        /// <throws>  IllegalArgumentException if not all indexes contain the same number </throws>
        /// <summary>     of documents
        /// </summary>
        /// <throws>  IllegalArgumentException if not all indexes have the same value </throws>
        /// <summary>     of {@link IndexReader#MaxDoc()}
        /// </summary>
        public virtual void  Add(IndexReader reader, bool ignoreStoredFields)
        {
            if (readers.Count == 0)
            {
                this.maxDoc       = reader.MaxDoc();
                this.numDocs      = reader.NumDocs();
                this.hasDeletions = reader.HasDeletions();
            }

            if (reader.MaxDoc() != maxDoc)
            {
                // check compatibility
                throw new System.ArgumentException("All readers must have same maxDoc: " + maxDoc + "!=" + reader.MaxDoc());
            }
            if (reader.NumDocs() != numDocs)
            {
                throw new System.ArgumentException("All readers must have same numDocs: " + numDocs + "!=" + reader.NumDocs());
            }

            System.Collections.ICollection fields = reader.GetFieldNames(IndexReader.FieldOption.ALL);
            readerToFields[reader] = fields;
            System.Collections.IEnumerator i = fields.GetEnumerator();
            while (i.MoveNext())
            {
                System.Collections.DictionaryEntry fi = (System.Collections.DictionaryEntry)i.Current;

                // update fieldToReader map
                System.String field = fi.Key.ToString();
                if (fieldToReader[field] == null)
                {
                    fieldToReader[field] = reader;
                }
            }

            if (!ignoreStoredFields)
            {
                storedFieldReaders.Add(reader);                 // add to storedFieldReaders
            }
            readers.Add(reader);
        }
Example #21
0
        public virtual void  TestAddIndexes()
        {
            bool optimize = false;

            Directory   dir1   = new MockRAMDirectory();
            IndexWriter writer = new IndexWriter(dir1, new WhitespaceAnalyzer(), IndexWriter.MaxFieldLength.LIMITED);

            writer.SetInfoStream(infoStream);
            // create the index
            CreateIndexNoClose(!optimize, "index1", writer);
            writer.Flush(false, true, true);

            // create a 2nd index
            Directory   dir2    = new MockRAMDirectory();
            IndexWriter writer2 = new IndexWriter(dir2, new WhitespaceAnalyzer(), IndexWriter.MaxFieldLength.LIMITED);

            writer2.SetInfoStream(infoStream);
            CreateIndexNoClose(!optimize, "index2", writer2);
            writer2.Close();

            IndexReader r0 = writer.GetReader();

            Assert.IsTrue(r0.IsCurrent());
            writer.AddIndexesNoOptimize(new Directory[] { dir2 });
            Assert.IsFalse(r0.IsCurrent());
            r0.Close();

            IndexReader r1 = writer.GetReader();

            Assert.IsTrue(r1.IsCurrent());

            writer.Commit();
            Assert.IsTrue(r1.IsCurrent());

            Assert.AreEqual(200, r1.MaxDoc());

            int index2df = r1.DocFreq(new Term("indexname", "index2"));

            Assert.AreEqual(100, index2df);

            // verify the docs are from different indexes
            Document doc5 = r1.Document(5);

            Assert.AreEqual("index1", doc5.Get("indexname"));
            Document doc150 = r1.Document(150);

            Assert.AreEqual("index2", doc150.Get("indexname"));
            r1.Close();
            writer.Close();
            dir1.Close();
        }
Example #22
0
 public static void  CheckNorms(IndexReader reader)
 {
     // test omit norms
     for (int i = 0; i < DocHelper.fields.Length; i++)
     {
         Fieldable f = DocHelper.fields[i];
         if (f.IsIndexed())
         {
             Assert.AreEqual(reader.HasNorms(f.Name()), !f.GetOmitNorms());
             Assert.AreEqual(reader.HasNorms(f.Name()), !DocHelper.noNorms.Contains(f.Name()));
             if (!reader.HasNorms(f.Name()))
             {
                 // test for fake norms of 1.0 or null depending on the flag
                 byte[] norms = reader.Norms(f.Name());
                 byte   norm1 = DefaultSimilarity.EncodeNorm(1.0f);
                 if (reader.GetDisableFakeNorms())
                 {
                     Assert.IsNull(norms);
                 }
                 else
                 {
                     Assert.AreEqual(norms.Length, reader.MaxDoc());
                     for (int j = 0; j < reader.MaxDoc(); j++)
                     {
                         Assert.AreEqual(norms[j], norm1);
                     }
                 }
                 norms = new byte[reader.MaxDoc()];
                 reader.Norms(f.Name(), norms, 0);
                 for (int j = 0; j < reader.MaxDoc(); j++)
                 {
                     Assert.AreEqual(norms[j], norm1);
                 }
             }
         }
     }
 }
Example #23
0
            public DeleteThreads(TestIndexWriterReader enclosingInstance, IndexWriter mainWriter)
            {
                InitBlock(enclosingInstance);
                this.mainWriter = mainWriter;
                IndexReader reader = mainWriter.GetReader();
                int         maxDoc = reader.MaxDoc();

                random = Enclosing_Instance.NewRandom();
                int iter = random.Next(maxDoc);

                for (int x = 0; x < iter; x++)
                {
                    int           doc = random.Next(iter);
                    System.String id  = reader.Document(doc).Get("id");
                    toDeleteTerms.Add(new Term("id", id));
                }
            }
Example #24
0
        /// <summary> </summary>
        /// <returns> The number of documents in all of the readers
        /// </returns>
        /// <throws>  IOException </throws>
        private int MergeFields()
        {
            fieldInfos = new FieldInfos();             // merge field names
            int docCount = 0;

            for (int i = 0; i < readers.Count; i++)
            {
                IndexReader reader = (IndexReader)readers[i];
                AddIndexed(reader, fieldInfos, reader.GetFieldNames(IndexReader.FieldOption.TERMVECTOR_WITH_POSITION_OFFSET), true, true, true);
                AddIndexed(reader, fieldInfos, reader.GetFieldNames(IndexReader.FieldOption.TERMVECTOR_WITH_POSITION), true, true, false);
                AddIndexed(reader, fieldInfos, reader.GetFieldNames(IndexReader.FieldOption.TERMVECTOR_WITH_OFFSET), true, false, true);
                AddIndexed(reader, fieldInfos, reader.GetFieldNames(IndexReader.FieldOption.TERMVECTOR), true, false, false);
                AddIndexed(reader, fieldInfos, reader.GetFieldNames(IndexReader.FieldOption.INDEXED), false, false, false);
                fieldInfos.Add(reader.GetFieldNames(IndexReader.FieldOption.UNINDEXED), false);
            }
            fieldInfos.Write(directory, segment + ".fnm");

            FieldsWriter fieldsWriter = new FieldsWriter(directory, segment, fieldInfos);

            // for merging we don't want to compress/uncompress the data, so to tell the FieldsReader that we're
            // in  merge mode, we use this FieldSelector
            FieldSelector fieldSelectorMerge = new AnonymousClassFieldSelector(this);

            try
            {
                for (int i = 0; i < readers.Count; i++)
                {
                    IndexReader reader = (IndexReader)readers[i];
                    int         maxDoc = reader.MaxDoc();
                    for (int j = 0; j < maxDoc; j++)
                    {
                        if (!reader.IsDeleted(j))
                        {
                            // skip deleted docs
                            fieldsWriter.AddDocument(reader.Document(j, fieldSelectorMerge));
                            docCount++;
                        }
                    }
                }
            }
            finally
            {
                fieldsWriter.Close();
            }
            return(docCount);
        }
Example #25
0
        /// <summary> </summary>
        /// <returns> The number of documents in all of the readers
        /// </returns>
        /// <throws>  IOException </throws>
        private int MergeFields()
        {
            fieldInfos = new FieldInfos();             // merge field names
            int docCount = 0;

            for (int i = 0; i < readers.Count; i++)
            {
                IndexReader reader = (IndexReader)readers[i];
                AddIndexed(reader, fieldInfos, reader.GetFieldNames(IndexReader.FieldOption.TERMVECTOR_WITH_POSITION_OFFSET), true, true, true);
                AddIndexed(reader, fieldInfos, reader.GetFieldNames(IndexReader.FieldOption.TERMVECTOR_WITH_POSITION), true, true, false);
                AddIndexed(reader, fieldInfos, reader.GetFieldNames(IndexReader.FieldOption.TERMVECTOR_WITH_OFFSET), true, false, true);
                AddIndexed(reader, fieldInfos, reader.GetFieldNames(IndexReader.FieldOption.TERMVECTOR), true, false, false);
                AddIndexed(reader, fieldInfos, reader.GetFieldNames(IndexReader.FieldOption.INDEXED), false, false, false);
                fieldInfos.Add(reader.GetFieldNames(IndexReader.FieldOption.UNINDEXED), false);
            }
            fieldInfos.Write(directory, segment + ".fnm");

            FieldsWriter fieldsWriter = new FieldsWriter(directory, segment, fieldInfos);

            try
            {
                for (int i = 0; i < readers.Count; i++)
                {
                    IndexReader reader = (IndexReader)readers[i];
                    int         maxDoc = reader.MaxDoc();
                    for (int j = 0; j < maxDoc; j++)
                    {
                        if (!reader.IsDeleted(j))
                        {
                            // skip deleted docs
                            fieldsWriter.AddDocument(reader.Document(j));
                            docCount++;
                        }
                    }
                }
            }
            finally
            {
                fieldsWriter.Close();
            }
            return(docCount);
        }
Example #26
0
        private void  ModifyNormsForF1(Directory dir)
        {
            IndexReader ir = IndexReader.Open(dir);
            int         n  = ir.MaxDoc();

            for (int i = 0; i < n; i += 3)
            {
                // modify for every third doc
                int   k        = (i * 3) % modifiedNorms.Count;
                float origNorm = (float)((System.Single)modifiedNorms[i]);
                float newNorm  = (float)((System.Single)modifiedNorms[k]);
                //System.out.println("Modifying: for "+i+" from "+origNorm+" to "+newNorm);
                //System.out.println("      and: for "+k+" from "+newNorm+" to "+origNorm);
                modifiedNorms[i] = (float)newNorm;
                modifiedNorms[k] = (float)origNorm;
                ir.SetNorm(i, "f" + 1, newNorm);
                ir.SetNorm(k, "f" + 1, origNorm);
            }
            ir.Close();
        }
        private bool VerifyIndex(Directory directory, int startAt)
        {
            bool        fail   = false;
            IndexReader reader = IndexReader.Open(directory);

            int max = reader.MaxDoc();

            for (int i = 0; i < max; i++)
            {
                Document temp = reader.Document(i);
                //System.out.println("doc "+i+"="+temp.getField("count").stringValue());
                //compare the index doc number to the value that it should be
                if (!temp.GetField("count").StringValue().Equals((i + startAt) + ""))
                {
                    fail = true;
                    System.Console.Out.WriteLine("Document " + (i + startAt) + " is returning document " + temp.GetField("count").StringValue());
                }
            }
            reader.Close();
            return(fail);
        }
Example #28
0
        private void  CheckExpecteds(System.Collections.BitArray expecteds)
        {
            IndexReader r = IndexReader.Open(dir);

            //Perhaps not the most efficient approach but meets our needs here.
            for (int i = 0; i < r.MaxDoc(); i++)
            {
                if (!r.IsDeleted(i))
                {
                    System.String sval = r.Document(i).Get(FIELD_RECORD_ID);
                    if (sval != null)
                    {
                        int val = System.Int32.Parse(sval);
                        Assert.IsTrue(expecteds.Get(val), "Did not expect document #" + val);
                        expecteds.Set(val, false);
                    }
                }
            }
            r.Close();
            Assert.AreEqual(0, SupportClass.BitSetSupport.Cardinality(expecteds), "Should have 0 docs remaining ");
        }
Example #29
0
        private List<FacetMatch> FindMatchesInQuery(Facet facet, Filter query, Filter filter, IndexReader indexReader)
        {
            var matches = facet.Values.Select(value =>
            {
                var bitsQuery = new OpenBitSetDISI(query.GetDocIdSet(indexReader).Iterator(), indexReader.MaxDoc());
                bitsQuery.And(value.Item2);

                if (filter != null)
                {
                    //TODO: Remove this hard coded value (1000)
                    var bitsFilter = new OpenBitSetDISI(filter.GetDocIdSet(indexReader).Iterator(), 1000);
                    bitsQuery.And(bitsFilter);
                }

                var count = bitsQuery.Cardinality();

                return new FacetMatch() { Count = count, Value = value.Item1, Id = facet.Id };
            }).ToList();

            return matches;
        }
Example #30
0
        /// <summary>
        /// Get the DocIdSet.
        /// </summary>
        /// <param name="reader">Applcible reader.</param>
        /// <returns>The set.</returns>
        public override DocIdSet GetDocIdSet(IndexReader reader)
        {
            OpenBitSet result = new OpenBitSet(reader.MaxDoc());
            TermDocs td = reader.TermDocs();
            try
            {
                foreach (Term t in this.terms)
                {
                    td.Seek(t);
                    while (td.Next())
                    {
                        result.Set(td.Doc());
                    }
                }
            }
            finally
            {
                td.Close();
            }

            return result;
        }
Example #31
0
        private OpenBitSet CorrectBits(IndexReader reader)
        {

            OpenBitSet bits = new OpenBitSet(reader.MaxDoc()); //assume all are INvalid
            Term startTerm = new Term(fieldName);
            TermEnum te = reader.Terms(startTerm);
            if (te != null)
            {
                Term currTerm = te.Term();
                while ((currTerm != null) && (currTerm.Field() == startTerm.Field())) //term fieldnames are interned
                {
                    int lastDoc = -1;
                    //set non duplicates
                    TermDocs td = reader.TermDocs(currTerm);
                    if (td.Next())
                    {
                        if (keepMode == KM_USE_FIRST_OCCURRENCE)
                        {
                            bits.Set(td.Doc());
                        }
                        else
                        {
                            do
                            {
                                lastDoc = td.Doc();
                            } while (td.Next());
                            bits.Set(lastDoc);
                        }
                    }
                    if (!te.Next())
                    {
                        break;
                    }
                    currTerm = te.Term();
                }
            }
            return bits;
        }
        private void  ModifyNormsForF1(IndexReader ir)
        {
            int n = ir.MaxDoc();

            // System.out.println("modifyNormsForF1 maxDoc: "+n);
            for (int i = 0; i < n; i += 3)
            {
                // modify for every third doc
                int   k        = (i * 3) % modifiedNorms.Count;
                float origNorm = (float)((System.Single)modifiedNorms[i]);
                float newNorm  = (float)((System.Single)modifiedNorms[k]);
                // System.out.println("Modifying: for "+i+" from "+origNorm+" to
                // "+newNorm);
                // System.out.println(" and: for "+k+" from "+newNorm+" to "+origNorm);
                modifiedNorms[i] = (float)newNorm;
                modifiedNorms[k] = (float)origNorm;
                ir.SetNorm(i, "f" + 1, newNorm);
                ir.SetNorm(k, "f" + 1, origNorm);
                // System.out.println("setNorm i: "+i);
                // break;
            }
            // ir.close();
        }
Example #33
0
		public override DocIdSet GetDocIdSet(IndexReader reader)
		{
			var bits = new OpenBitSet(reader.MaxDoc());

			TermDocs termDocs = reader.TermDocs();
			List<double> area = _shape.Area;
			int sz = area.Count;
			
			// iterate through each boxid
			for (int i = 0; i < sz; i++)
			{
				double boxId = area[i];
				termDocs.Seek(new Term(_fieldName, NumericUtils.DoubleToPrefixCoded(boxId)));

				// iterate through all documents
				// which have this boxId
				while (termDocs.Next())
				{
					bits.FastSet(termDocs.Doc());
				}
			}

			return bits;
		}
Example #34
0
		public static void  VerifyEquals(IndexReader r1, IndexReader r2, System.String idField)
		{
			Assert.AreEqual(r1.NumDocs(), r2.NumDocs());
			bool hasDeletes = !(r1.MaxDoc() == r2.MaxDoc() && r1.NumDocs() == r1.MaxDoc());
			
			int[] r2r1 = new int[r2.MaxDoc()]; // r2 id to r1 id mapping
			
			TermDocs termDocs1 = r1.TermDocs();
			TermDocs termDocs2 = r2.TermDocs();
			
			// create mapping from id2 space to id2 based on idField
			idField = StringHelper.Intern(idField);
			TermEnum termEnum = r1.Terms(new Term(idField, ""));
			do 
			{
				Term term = termEnum.Term();
				if (term == null || (System.Object) term.Field() != (System.Object) idField)
					break;
				
				termDocs1.Seek(termEnum);
				if (!termDocs1.Next())
				{
					// This doc is deleted and wasn't replaced
					termDocs2.Seek(termEnum);
					Assert.IsFalse(termDocs2.Next());
					continue;
				}
				
				int id1 = termDocs1.Doc();
				Assert.IsFalse(termDocs1.Next());
				
				termDocs2.Seek(termEnum);
				Assert.IsTrue(termDocs2.Next());
				int id2 = termDocs2.Doc();
				Assert.IsFalse(termDocs2.Next());
				
				r2r1[id2] = id1;
				
				// verify stored fields are equivalent
				try
				{
					VerifyEquals(r1.Document(id1), r2.Document(id2));
				}
				catch (System.Exception t)
				{
					System.Console.Out.WriteLine("FAILED id=" + term + " id1=" + id1 + " id2=" + id2 + " term=" + term);
					System.Console.Out.WriteLine("  d1=" + r1.Document(id1));
					System.Console.Out.WriteLine("  d2=" + r2.Document(id2));
					throw t;
				}
				
				try
				{
					// verify term vectors are equivalent        
					VerifyEquals(r1.GetTermFreqVectors(id1), r2.GetTermFreqVectors(id2));
				}
				catch (System.Exception e)
				{
					System.Console.Out.WriteLine("FAILED id=" + term + " id1=" + id1 + " id2=" + id2);
					TermFreqVector[] tv1 = r1.GetTermFreqVectors(id1);
					System.Console.Out.WriteLine("  d1=" + tv1);
					if (tv1 != null)
						for (int i = 0; i < tv1.Length; i++)
						{
							System.Console.Out.WriteLine("    " + i + ": " + tv1[i]);
						}
					
					TermFreqVector[] tv2 = r2.GetTermFreqVectors(id2);
					System.Console.Out.WriteLine("  d2=" + tv2);
					if (tv2 != null)
						for (int i = 0; i < tv2.Length; i++)
						{
							System.Console.Out.WriteLine("    " + i + ": " + tv2[i]);
						}
					
					throw e;
				}
			}
			while (termEnum.Next());
			
			termEnum.Close();
			
			// Verify postings
			TermEnum termEnum1 = r1.Terms(new Term("", ""));
			TermEnum termEnum2 = r2.Terms(new Term("", ""));
			
			// pack both doc and freq into single element for easy sorting
			long[] info1 = new long[r1.NumDocs()];
			long[] info2 = new long[r2.NumDocs()];
			
			for (; ; )
			{
				Term term1, term2;
				
				// iterate until we get some docs
				int len1;
				for (; ; )
				{
					len1 = 0;
					term1 = termEnum1.Term();
					if (term1 == null)
						break;
					termDocs1.Seek(termEnum1);
					while (termDocs1.Next())
					{
						int d1 = termDocs1.Doc();
						int f1 = termDocs1.Freq();
						info1[len1] = (((long) d1) << 32) | f1;
						len1++;
					}
					if (len1 > 0)
						break;
					if (!termEnum1.Next())
						break;
				}
				
				// iterate until we get some docs
				int len2;
				for (; ; )
				{
					len2 = 0;
					term2 = termEnum2.Term();
					if (term2 == null)
						break;
					termDocs2.Seek(termEnum2);
					while (termDocs2.Next())
					{
						int d2 = termDocs2.Doc();
						int f2 = termDocs2.Freq();
						info2[len2] = (((long) r2r1[d2]) << 32) | f2;
						len2++;
					}
					if (len2 > 0)
						break;
					if (!termEnum2.Next())
						break;
				}
				
				if (!hasDeletes)
					Assert.AreEqual(termEnum1.DocFreq(), termEnum2.DocFreq());
				
				Assert.AreEqual(len1, len2);
				if (len1 == 0)
					break; // no more terms
				
				Assert.AreEqual(term1, term2);
				
				// sort info2 to get it into ascending docid
				System.Array.Sort(info2, 0, len2 - 0);
				
				// now compare
				for (int i = 0; i < len1; i++)
				{
					Assert.AreEqual(info1[i], info2[i]);
				}
				
				termEnum1.Next();
				termEnum2.Next();
			}
		}
Example #35
0
		private int CopyFieldsWithDeletions(FieldSelector fieldSelectorMerge, FieldsWriter fieldsWriter, IndexReader reader, FieldsReader matchingFieldsReader)
		{
			int docCount = 0;
			int maxDoc = reader.MaxDoc();
			if (matchingFieldsReader != null)
			{
				// We can bulk-copy because the fieldInfos are "congruent"
				for (int j = 0; j < maxDoc; )
				{
					if (reader.IsDeleted(j))
					{
						// skip deleted docs
						++j;
						continue;
					}
					// We can optimize this case (doing a bulk byte copy) since the field 
					// numbers are identical
					int start = j, numDocs = 0;
					do 
					{
						j++;
						numDocs++;
						if (j >= maxDoc)
							break;
						if (reader.IsDeleted(j))
						{
							j++;
							break;
						}
					}
					while (numDocs < MAX_RAW_MERGE_DOCS);
					
					IndexInput stream = matchingFieldsReader.RawDocs(rawDocLengths, start, numDocs);
					fieldsWriter.AddRawDocuments(stream, rawDocLengths, numDocs);
					docCount += numDocs;
					checkAbort.Work(300 * numDocs);
				}
			}
			else
			{
				for (int j = 0; j < maxDoc; j++)
				{
					if (reader.IsDeleted(j))
					{
						// skip deleted docs
						continue;
					}
					// NOTE: it's very important to first assign to doc then pass it to
					// termVectorsWriter.addAllDocVectors; see LUCENE-1282
					Document doc = reader.Document(j, fieldSelectorMerge);
					fieldsWriter.AddDocument(doc);
					docCount++;
					checkAbort.Work(300);
				}
			}
			return docCount;
		}
        public SimpleFacetedSearch(IndexReader reader, string[] groupByFields)
        {
            this._Reader = reader;

            List<FieldValuesBitSets> fieldValuesBitSets = new List<FieldValuesBitSets>();

            //STEP 1
            //f1 = A, B
            //f2 = I, J
            //f3 = 1, 2, 3
            int maxFacets = 1;
            List<List<string>> inputToCP = new List<List<string>>();
            foreach (string field in groupByFields)
            {
                FieldValuesBitSets f = new FieldValuesBitSets(reader, field);
                maxFacets *= f.FieldValueBitSetPair.Count;
                if (maxFacets > MAX_FACETS) throw new Exception("Facet count exceeded " + MAX_FACETS);
                fieldValuesBitSets.Add(f);
                inputToCP.Add(f.FieldValueBitSetPair.Keys.ToList());
            }

            //STEP 2
            // comb1: A I 1
            // comb2: A I 2 etc.
            var cp = inputToCP.CartesianProduct();

            //SETP 3
            //create a single BitSet for each combination
            //BitSet1: A AND I AND 1
            //BitSet2: A AND I AND 2 etc.
            //and remove impossible comb's (for ex, B J 3) from list.
            Parallel.ForEach(cp, combinations =>
            {
                OpenBitSetDISI bitSet = new OpenBitSetDISI(_Reader.MaxDoc());
                bitSet.Set(0, bitSet.Size());

                List<string> comb = combinations.ToList();

                for (int j = 0; j < comb.Count; j++)
                {
                    bitSet.And(fieldValuesBitSets[j].FieldValueBitSetPair[comb[j]]);
                }

                //STEP 3
                if (bitSet.Cardinality() > 0)
                {
                    lock(_Groups)
                        _Groups.Add(new KeyValuePair<List<string>, OpenBitSetDISI>(comb, bitSet));
                }
            });

            //Now _Groups has 7 rows (as <List<string>, BitSet> pairs) 
        }
		// Apply buffered delete terms, queries and docIDs to the
		// provided reader
		private bool ApplyDeletes(IndexReader reader, int docIDStart)
		{
			lock (this)
			{
				
				int docEnd = docIDStart + reader.MaxDoc();
				bool any = false;
				
                System.Diagnostics.Debug.Assert(CheckDeleteTerm(null));

				// Delete by term
                //System.Collections.IEnumerator iter = new System.Collections.Hashtable(deletesFlushed.terms).GetEnumerator();
				System.Collections.IEnumerator iter = deletesFlushed.terms.GetEnumerator();
				TermDocs docs = reader.TermDocs();
				try
				{
					while (iter.MoveNext())
					{
						System.Collections.DictionaryEntry entry = (System.Collections.DictionaryEntry) iter.Current;
						Term term = (Term) entry.Key;
						// LUCENE-2086: we should be iterating a TreeMap,
                        // here, so terms better be in order:
                        System.Diagnostics.Debug.Assert(CheckDeleteTerm(term));
						docs.Seek(term);
						int limit = ((BufferedDeletes.Num) entry.Value).GetNum();
						while (docs.Next())
						{
							int docID = docs.Doc();
							if (docIDStart + docID >= limit)
								break;
							reader.DeleteDocument(docID);
							any = true;
						}
					}
				}
				finally
				{
					docs.Close();
				}
				
				// Delete by docID
				iter = deletesFlushed.docIDs.GetEnumerator();
				while (iter.MoveNext())
				{
					int docID = ((System.Int32) iter.Current);
					if (docID >= docIDStart && docID < docEnd)
					{
						reader.DeleteDocument(docID - docIDStart);
						any = true;
					}
				}
				
				// Delete by query
				IndexSearcher searcher = new IndexSearcher(reader);
				iter = new System.Collections.Hashtable(deletesFlushed.queries).GetEnumerator();
				while (iter.MoveNext())
				{
					System.Collections.DictionaryEntry entry = (System.Collections.DictionaryEntry) iter.Current;
					Query query = (Query) entry.Key;
					int limit = ((System.Int32) entry.Value);
					Weight weight = query.Weight(searcher);
					Scorer scorer = weight.Scorer(reader, true, false);
					if (scorer != null)
					{
						while (true)
						{
							int doc = scorer.NextDoc();
							if (((long) docIDStart) + doc >= limit)
								break;
							reader.DeleteDocument(doc);
							any = true;
						}
					}
				}
				searcher.Close();
				return any;
			}
		}
Example #38
0
        /// <summary> </summary>
        /// <returns> The number of documents in all of the readers
        /// </returns>
        /// <throws>  CorruptIndexException if the index is corrupt </throws>
        /// <throws>  IOException if there is a low-level IO error </throws>
        private int MergeFields()
        {
            if (!mergeDocStores)
            {
                // When we are not merging by doc stores, that means
                // all segments were written as part of a single
                // autoCommit=false IndexWriter session, so their field
                // name -> number mapping are the same.  So, we start
                // with the fieldInfos of the last segment in this
                // case, to keep that numbering.
                SegmentReader sr = (SegmentReader)readers[readers.Count - 1];
                fieldInfos = (FieldInfos)sr.fieldInfos.Clone();
            }
            else
            {
                fieldInfos = new FieldInfos();                 // merge field names
            }

            for (int i = 0; i < readers.Count; i++)
            {
                IndexReader reader = (IndexReader)readers[i];
                if (reader is SegmentReader)
                {
                    SegmentReader segmentReader = (SegmentReader)reader;
                    for (int j = 0; j < segmentReader.GetFieldInfos().Size(); j++)
                    {
                        FieldInfo fi = segmentReader.GetFieldInfos().FieldInfo(j);
                        fieldInfos.Add(fi.name, fi.isIndexed, fi.storeTermVector, fi.storePositionWithTermVector, fi.storeOffsetWithTermVector, !reader.HasNorms(fi.name), fi.storePayloads);
                    }
                }
                else
                {
                    AddIndexed(reader, fieldInfos, reader.GetFieldNames(IndexReader.FieldOption.TERMVECTOR_WITH_POSITION_OFFSET), true, true, true, false);
                    AddIndexed(reader, fieldInfos, reader.GetFieldNames(IndexReader.FieldOption.TERMVECTOR_WITH_POSITION), true, true, false, false);
                    AddIndexed(reader, fieldInfos, reader.GetFieldNames(IndexReader.FieldOption.TERMVECTOR_WITH_OFFSET), true, false, true, false);
                    AddIndexed(reader, fieldInfos, reader.GetFieldNames(IndexReader.FieldOption.TERMVECTOR), true, false, false, false);
                    AddIndexed(reader, fieldInfos, reader.GetFieldNames(IndexReader.FieldOption.STORES_PAYLOADS), false, false, false, true);
                    AddIndexed(reader, fieldInfos, reader.GetFieldNames(IndexReader.FieldOption.INDEXED), false, false, false, false);
                    fieldInfos.Add(reader.GetFieldNames(IndexReader.FieldOption.UNINDEXED), false);
                }
            }
            fieldInfos.Write(directory, segment + ".fnm");

            int docCount = 0;

            if (mergeDocStores)
            {
                // If the i'th reader is a SegmentReader and has
                // identical fieldName -> number mapping, then this
                // array will be non-null at position i:
                SegmentReader[] matchingSegmentReaders = new SegmentReader[readers.Count];

                // If this reader is a SegmentReader, and all of its
                // field name -> number mappings match the "merged"
                // FieldInfos, then we can do a bulk copy of the
                // stored fields:
                for (int i = 0; i < readers.Count; i++)
                {
                    IndexReader reader = (IndexReader)readers[i];
                    if (reader is SegmentReader)
                    {
                        SegmentReader segmentReader     = (SegmentReader)reader;
                        bool          same              = true;
                        FieldInfos    segmentFieldInfos = segmentReader.GetFieldInfos();
                        for (int j = 0; same && j < segmentFieldInfos.Size(); j++)
                        {
                            same = fieldInfos.FieldName(j).Equals(segmentFieldInfos.FieldName(j));
                        }
                        if (same)
                        {
                            matchingSegmentReaders[i] = segmentReader;
                        }
                    }
                }

                // Used for bulk-reading raw bytes for stored fields
                int[] rawDocLengths = new int[MAX_RAW_MERGE_DOCS];

                // for merging we don't want to compress/uncompress the data, so to tell the FieldsReader that we're
                // in  merge mode, we use this FieldSelector
                FieldSelector fieldSelectorMerge = new AnonymousClassFieldSelector(this);

                // merge field values
                FieldsWriter fieldsWriter = new FieldsWriter(directory, segment, fieldInfos);

                try
                {
                    for (int i = 0; i < readers.Count; i++)
                    {
                        IndexReader   reader = (IndexReader)readers[i];
                        SegmentReader matchingSegmentReader = matchingSegmentReaders[i];
                        FieldsReader  matchingFieldsReader;
                        if (matchingSegmentReader != null)
                        {
                            matchingFieldsReader = matchingSegmentReader.GetFieldsReader();
                        }
                        else
                        {
                            matchingFieldsReader = null;
                        }
                        int maxDoc = reader.MaxDoc();
                        for (int j = 0; j < maxDoc;)
                        {
                            if (!reader.IsDeleted(j))
                            {
                                // skip deleted docs
                                if (matchingSegmentReader != null)
                                {
                                    // We can optimize this case (doing a bulk
                                    // byte copy) since the field numbers are
                                    // identical
                                    int start   = j;
                                    int numDocs = 0;
                                    do
                                    {
                                        j++;
                                        numDocs++;
                                    }while (j < maxDoc && !matchingSegmentReader.IsDeleted(j) && numDocs < MAX_RAW_MERGE_DOCS);

                                    IndexInput stream = matchingFieldsReader.RawDocs(rawDocLengths, start, numDocs);
                                    fieldsWriter.AddRawDocuments(stream, rawDocLengths, numDocs);
                                    docCount += numDocs;
                                    if (checkAbort != null)
                                    {
                                        checkAbort.Work(300 * numDocs);
                                    }
                                }
                                else
                                {
                                    fieldsWriter.AddDocument(reader.Document(j, fieldSelectorMerge));
                                    j++;
                                    docCount++;
                                    if (checkAbort != null)
                                    {
                                        checkAbort.Work(300);
                                    }
                                }
                            }
                            else
                            {
                                j++;
                            }
                        }
                    }
                }
                finally
                {
                    fieldsWriter.Close();
                }
            }
            // If we are skipping the doc stores, that means there
            // are no deletions in any of these segments, so we
            // just sum numDocs() of each segment to get total docCount
            else
            {
                for (int i = 0; i < readers.Count; i++)
                {
                    docCount += ((IndexReader)readers[i]).NumDocs();
                }
            }

            return(docCount);
        }
Example #39
0
		public static void  CheckNorms(IndexReader reader)
		{
			// test omit norms
			for (int i = 0; i < DocHelper.fields.Length; i++)
			{
				Fieldable f = DocHelper.fields[i];
				if (f.IsIndexed())
				{
					Assert.AreEqual(reader.HasNorms(f.Name()), !f.GetOmitNorms());
					Assert.AreEqual(reader.HasNorms(f.Name()), !DocHelper.noNorms.Contains(f.Name()));
					if (!reader.HasNorms(f.Name()))
					{
						// test for fake norms of 1.0 or null depending on the flag
						byte[] norms = reader.Norms(f.Name());
						byte norm1 = DefaultSimilarity.EncodeNorm(1.0f);
						if (reader.GetDisableFakeNorms())
							Assert.IsNull(norms);
						else
						{
							Assert.AreEqual(norms.Length, reader.MaxDoc());
							for (int j = 0; j < reader.MaxDoc(); j++)
							{
								Assert.AreEqual(norms[j], norm1);
							}
						}
						norms = new byte[reader.MaxDoc()];
						reader.Norms(f.Name(), norms, 0);
						for (int j = 0; j < reader.MaxDoc(); j++)
						{
							Assert.AreEqual(norms[j], norm1);
						}
					}
				}
			}
		}
Example #40
0
        public virtual void TestRandom()
        {
            int                    numThreads          = 1 + Random().Next(8);
            int                    numDocumentsToIndex = 50 + AtLeast(70);
            AtomicInteger          numDocs             = new AtomicInteger(numDocumentsToIndex);
            Directory              dir         = NewDirectory();
            IndexWriterConfig      iwc         = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random()));
            MockDefaultFlushPolicy flushPolicy = new MockDefaultFlushPolicy();

            iwc.SetFlushPolicy(flushPolicy);

            int numDWPT = 1 + Random().Next(8);
            DocumentsWriterPerThreadPool threadPool = new ThreadAffinityDocumentsWriterThreadPool(numDWPT);

            iwc.SetIndexerThreadPool(threadPool);

            IndexWriter writer = new IndexWriter(dir, iwc);

            flushPolicy = (MockDefaultFlushPolicy)writer.Config.FlushPolicy;
            DocumentsWriter docsWriter = writer.DocsWriter;

            Assert.IsNotNull(docsWriter);
            DocumentsWriterFlushControl flushControl = docsWriter.FlushControl;

            Assert.AreEqual(0, flushControl.FlushBytes(), " bytes must be 0 after init");

            IndexThread[] threads = new IndexThread[numThreads];
            for (int x = 0; x < threads.Length; x++)
            {
                threads[x] = new IndexThread(this, numDocs, numThreads, writer, LineDocFile, true);
                threads[x].Start();
            }

            for (int x = 0; x < threads.Length; x++)
            {
                threads[x].Join();
            }
            Assert.AreEqual(0, flushControl.FlushBytes(), " all flushes must be due");
            Assert.AreEqual(numDocumentsToIndex, writer.NumDocs());
            Assert.AreEqual(numDocumentsToIndex, writer.MaxDoc());
            if (flushPolicy.FlushOnRAM() && !flushPolicy.FlushOnDocCount() && !flushPolicy.FlushOnDeleteTerms())
            {
                long maxRAMBytes = (long)(iwc.RAMBufferSizeMB * 1024.0 * 1024.0);
                Assert.IsTrue(flushPolicy.PeakBytesWithoutFlush <= maxRAMBytes, "peak bytes without flush exceeded watermark");
                if (flushPolicy.HasMarkedPending)
                {
                    Assert.IsTrue("max: " + maxRAMBytes + " " + flushControl.PeakActiveBytes, maxRAMBytes <= flushControl.PeakActiveBytes);
                }
            }
            AssertActiveBytesAfter(flushControl);
            writer.Commit();
            Assert.AreEqual(0, flushControl.ActiveBytes());
            IndexReader r = DirectoryReader.Open(dir);

            Assert.AreEqual(numDocumentsToIndex, r.NumDocs());
            Assert.AreEqual(numDocumentsToIndex, r.MaxDoc());
            if (!flushPolicy.FlushOnRAM())
            {
                Assert.IsFalse("never stall if we don't flush on RAM", docsWriter.FlushControl.StallControl.WasStalled());
                Assert.IsFalse("never block if we don't flush on RAM", docsWriter.FlushControl.StallControl.HasBlocked());
            }
            r.Dispose();
            writer.Dispose();
            dir.Dispose();
        }
Example #41
0
		private void  CopyVectorsNoDeletions(TermVectorsWriter termVectorsWriter, TermVectorsReader matchingVectorsReader, IndexReader reader)
		{
			int maxDoc = reader.MaxDoc();
			if (matchingVectorsReader != null)
			{
				// We can bulk-copy because the fieldInfos are "congruent"
				int docCount = 0;
				while (docCount < maxDoc)
				{
					int len = System.Math.Min(MAX_RAW_MERGE_DOCS, maxDoc - docCount);
					matchingVectorsReader.RawDocs(rawDocLengths, rawDocLengths2, docCount, len);
					termVectorsWriter.AddRawDocuments(matchingVectorsReader, rawDocLengths, rawDocLengths2, len);
					docCount += len;
					checkAbort.Work(300 * len);
				}
			}
			else
			{
				for (int docNum = 0; docNum < maxDoc; docNum++)
				{
					// NOTE: it's very important to first assign to vectors then pass it to
					// termVectorsWriter.addAllDocVectors; see LUCENE-1282
					TermFreqVector[] vectors = reader.GetTermFreqVectors(docNum);
					termVectorsWriter.AddAllDocVectors(vectors);
					checkAbort.Work(300);
				}
			}
		}
Example #42
0
        private OpenBitSet FastBits(IndexReader reader)
        {

            OpenBitSet bits = new OpenBitSet(reader.MaxDoc());
            bits.Set(0, reader.MaxDoc()); //assume all are valid
            Term startTerm = new Term(fieldName);
            TermEnum te = reader.Terms(startTerm);
            if (te != null)
            {
                Term currTerm = te.Term();

                while ((currTerm != null) && (currTerm.Field() == startTerm.Field())) //term fieldnames are interned
                {
                    if (te.DocFreq() > 1)
                    {
                        int lastDoc = -1;
                        //unset potential duplicates
                        TermDocs td = reader.TermDocs(currTerm);
                        td.Next();
                        if (keepMode == KM_USE_FIRST_OCCURRENCE)
                        {
                            td.Next();
                        }
                        do
                        {
                            lastDoc = td.Doc();
                            bits.Clear(lastDoc);
                        } while (td.Next());
                        if (keepMode == KM_USE_LAST_OCCURRENCE)
                        {
                            //restore the last bit
                            bits.Set(lastDoc);
                        }
                    }
                    if (!te.Next())
                    {
                        break;
                    }
                    currTerm = te.Term();
                }
            }
            return bits;
        }
Example #43
0
 public override int MaxDoc()
 {
     return(in_Renamed.MaxDoc());
 }
Example #44
0
        public static void  VerifyEquals(IndexReader r1, IndexReader r2, System.String idField)
        {
            Assert.AreEqual(r1.NumDocs(), r2.NumDocs());
            bool hasDeletes = !(r1.MaxDoc() == r2.MaxDoc() && r1.NumDocs() == r1.MaxDoc());

            int[] r2r1 = new int[r2.MaxDoc()];             // r2 id to r1 id mapping

            TermDocs termDocs1 = r1.TermDocs();
            TermDocs termDocs2 = r2.TermDocs();

            // create mapping from id2 space to id2 based on idField
            idField = StringHelper.Intern(idField);
            TermEnum termEnum = r1.Terms(new Term(idField, ""));

            do
            {
                Term term = termEnum.Term();
                if (term == null || (System.Object)term.Field() != (System.Object)idField)
                {
                    break;
                }

                termDocs1.Seek(termEnum);
                if (!termDocs1.Next())
                {
                    // This doc is deleted and wasn't replaced
                    termDocs2.Seek(termEnum);
                    Assert.IsFalse(termDocs2.Next());
                    continue;
                }

                int id1 = termDocs1.Doc();
                Assert.IsFalse(termDocs1.Next());

                termDocs2.Seek(termEnum);
                Assert.IsTrue(termDocs2.Next());
                int id2 = termDocs2.Doc();
                Assert.IsFalse(termDocs2.Next());

                r2r1[id2] = id1;

                // verify stored fields are equivalent
                try
                {
                    VerifyEquals(r1.Document(id1), r2.Document(id2));
                }
                catch (System.Exception t)
                {
                    System.Console.Out.WriteLine("FAILED id=" + term + " id1=" + id1 + " id2=" + id2 + " term=" + term);
                    System.Console.Out.WriteLine("  d1=" + r1.Document(id1));
                    System.Console.Out.WriteLine("  d2=" + r2.Document(id2));
                    throw t;
                }

                try
                {
                    // verify term vectors are equivalent
                    VerifyEquals(r1.GetTermFreqVectors(id1), r2.GetTermFreqVectors(id2));
                }
                catch (System.Exception e)
                {
                    System.Console.Out.WriteLine("FAILED id=" + term + " id1=" + id1 + " id2=" + id2);
                    TermFreqVector[] tv1 = r1.GetTermFreqVectors(id1);
                    System.Console.Out.WriteLine("  d1=" + tv1);
                    if (tv1 != null)
                    {
                        for (int i = 0; i < tv1.Length; i++)
                        {
                            System.Console.Out.WriteLine("    " + i + ": " + tv1[i]);
                        }
                    }

                    TermFreqVector[] tv2 = r2.GetTermFreqVectors(id2);
                    System.Console.Out.WriteLine("  d2=" + tv2);
                    if (tv2 != null)
                    {
                        for (int i = 0; i < tv2.Length; i++)
                        {
                            System.Console.Out.WriteLine("    " + i + ": " + tv2[i]);
                        }
                    }

                    throw e;
                }
            }while (termEnum.Next());

            termEnum.Close();

            // Verify postings
            TermEnum termEnum1 = r1.Terms(new Term("", ""));
            TermEnum termEnum2 = r2.Terms(new Term("", ""));

            // pack both doc and freq into single element for easy sorting
            long[] info1 = new long[r1.NumDocs()];
            long[] info2 = new long[r2.NumDocs()];

            for (; ;)
            {
                Term term1, term2;

                // iterate until we get some docs
                int len1;
                for (; ;)
                {
                    len1  = 0;
                    term1 = termEnum1.Term();
                    if (term1 == null)
                    {
                        break;
                    }
                    termDocs1.Seek(termEnum1);
                    while (termDocs1.Next())
                    {
                        int d1 = termDocs1.Doc();
                        int f1 = termDocs1.Freq();
                        info1[len1] = (((long)d1) << 32) | f1;
                        len1++;
                    }
                    if (len1 > 0)
                    {
                        break;
                    }
                    if (!termEnum1.Next())
                    {
                        break;
                    }
                }

                // iterate until we get some docs
                int len2;
                for (; ;)
                {
                    len2  = 0;
                    term2 = termEnum2.Term();
                    if (term2 == null)
                    {
                        break;
                    }
                    termDocs2.Seek(termEnum2);
                    while (termDocs2.Next())
                    {
                        int d2 = termDocs2.Doc();
                        int f2 = termDocs2.Freq();
                        info2[len2] = (((long)r2r1[d2]) << 32) | f2;
                        len2++;
                    }
                    if (len2 > 0)
                    {
                        break;
                    }
                    if (!termEnum2.Next())
                    {
                        break;
                    }
                }

                if (!hasDeletes)
                {
                    Assert.AreEqual(termEnum1.DocFreq(), termEnum2.DocFreq());
                }

                Assert.AreEqual(len1, len2);
                if (len1 == 0)
                {
                    break;                     // no more terms
                }
                Assert.AreEqual(term1, term2);

                // sort info2 to get it into ascending docid
                System.Array.Sort(info2, 0, len2 - 0);

                // now compare
                for (int i = 0; i < len1; i++)
                {
                    Assert.AreEqual(info1[i], info2[i]);
                }

                termEnum1.Next();
                termEnum2.Next();
            }
        }
Example #45
0
		// Apply buffered delete terms, queries and docIDs to the
		// provided reader
		private bool ApplyDeletes(IndexReader reader, int docIDStart)
		{
			lock (this)
			{
				
				int docEnd = docIDStart + reader.MaxDoc();
				bool any = false;
				
                System.Diagnostics.Debug.Assert(CheckDeleteTerm(null));

				// Delete by term
				TermDocs docs = reader.TermDocs();
				try
				{
                    foreach(KeyValuePair<Term,BufferedDeletes.Num> entry in deletesFlushed.terms)
					{
						Term term = entry.Key;
						// LUCENE-2086: we should be iterating a TreeMap,
                        // here, so terms better be in order:
                        System.Diagnostics.Debug.Assert(CheckDeleteTerm(term));
						docs.Seek(term);
						int limit = entry.Value.GetNum();
						while (docs.Next())
						{
							int docID = docs.Doc();
							if (docIDStart + docID >= limit)
								break;
							reader.DeleteDocument(docID);
							any = true;
						}
					}
				}
				finally
				{
					docs.Close();
				}
				
				// Delete by docID
                foreach(int docID in deletesFlushed.docIDs)
				{
					if (docID >= docIDStart && docID < docEnd)
					{
						reader.DeleteDocument(docID - docIDStart);
						any = true;
					}
				}
				
				// Delete by query
				IndexSearcher searcher = new IndexSearcher(reader);
                foreach(KeyValuePair<Query,int> entry in new Support.Dictionary<Query,int>(deletesFlushed.queries))
				{
					Query query = entry.Key;
					int limit = entry.Value;
					Weight weight = query.Weight(searcher);
					Scorer scorer = weight.Scorer(reader, true, false);
					if (scorer != null)
					{
						while (true)
						{
							int doc = scorer.NextDoc();
							if (((long) docIDStart) + doc >= limit)
								break;
							reader.DeleteDocument(doc);
							any = true;
						}
					}
				}
				searcher.Close();
				return any;
			}
		}
		////////////////////////////////////////////////////////////////

		static private void ScoreHits (Dictionary<int, Hit>   hits_by_id,
					       IndexReader reader,
					       ICollection term_list)
		{
			LNS.Similarity similarity;
			similarity = LNS.Similarity.GetDefault ();

			TermDocs term_docs = reader.TermDocs ();
			Hit hit;

			foreach (Term term in term_list) {

				double idf;
				idf = similarity.Idf (reader.DocFreq (term), reader.MaxDoc ());

				int hit_count;
				hit_count = hits_by_id.Count;

				term_docs.Seek (term);
				while (term_docs.Next () && hit_count > 0) {
					
					int id;
					id = term_docs.Doc ();

					if (hits_by_id.TryGetValue (id, out hit)) {
						double tf;
						tf = similarity.Tf (term_docs.Freq ());
						hit.Score += tf * idf;
						--hit_count;
					}
				}
			}

			term_docs.Close ();
		}
Example #47
0
		private int CopyFieldsNoDeletions(FieldSelector fieldSelectorMerge, FieldsWriter fieldsWriter, IndexReader reader, FieldsReader matchingFieldsReader)
		{
			int maxDoc = reader.MaxDoc();
			int docCount = 0;
			if (matchingFieldsReader != null)
			{
				// We can bulk-copy because the fieldInfos are "congruent"
				while (docCount < maxDoc)
				{
					int len = System.Math.Min(MAX_RAW_MERGE_DOCS, maxDoc - docCount);
					IndexInput stream = matchingFieldsReader.RawDocs(rawDocLengths, docCount, len);
					fieldsWriter.AddRawDocuments(stream, rawDocLengths, len);
					docCount += len;
					checkAbort.Work(300 * len);
				}
			}
			else
			{
				for (; docCount < maxDoc; docCount++)
				{
					// NOTE: it's very important to first assign to doc then pass it to
					// termVectorsWriter.addAllDocVectors; see LUCENE-1282
					Document doc = reader.Document(docCount, fieldSelectorMerge);
					fieldsWriter.AddDocument(doc);
					checkAbort.Work(300);
				}
			}
			return docCount;
		}
        // Apply buffered delete terms, queries and docIDs to the
        // provided reader
        private bool ApplyDeletes(IndexReader reader, int docIDStart)
        {
            lock (this)
            {
                int docEnd = docIDStart + reader.MaxDoc();
                bool any = false;

                // Delete by term
                IEnumerator<KeyValuePair<object, object>> iter = deletesFlushed.terms.GetEnumerator();
                while (iter.MoveNext())
                {
                    KeyValuePair<object, object> entry = (KeyValuePair<object, object>)iter.Current;
                    Term term = (Term)entry.Key;

                    TermDocs docs = reader.TermDocs(term);
                    if (docs != null)
                    {
                        int limit = ((BufferedDeletes.Num)entry.Value).GetNum();
                        try
                        {
                            while (docs.Next())
                            {
                                int docID = docs.Doc();
                                if (docIDStart + docID >= limit)
                                    break;
                                reader.DeleteDocument(docID);
                                any = true;
                            }
                        }
                        finally
                        {
                            docs.Close();
                        }
                    }
                }

                // Delete by docID
                IEnumerator<object> iter2 = deletesFlushed.docIDs.GetEnumerator();
                while (iter2.MoveNext())
                {
                    int docID = (int)iter2.Current;
                    if (docID >= docIDStart && docID < docEnd)
                    {
                        reader.DeleteDocument(docID - docIDStart);
                        any = true;
                    }
                }

                // Delete by query
                IndexSearcher searcher = new IndexSearcher(reader);
                iter = deletesFlushed.queries.GetEnumerator();
                while (iter.MoveNext())
                {
                    KeyValuePair<object, object> entry = (KeyValuePair<object, object>)iter.Current;
                    Query query = (Query)entry.Key;
                    int limit = (int)entry.Value;
                    Weight weight = query.Weight(searcher);
                    Scorer scorer = weight.Scorer(reader);
                    while (scorer.Next())
                    {
                        int docID = scorer.Doc();
                        if (docIDStart + docID >= limit)
                            break;
                        reader.DeleteDocument(docID);
                        any = true;
                    }
                }
                searcher.Close();
                return any;
            }
        }
Example #49
0
		private void  CopyVectorsWithDeletions(TermVectorsWriter termVectorsWriter, TermVectorsReader matchingVectorsReader, IndexReader reader)
		{
			int maxDoc = reader.MaxDoc();
			if (matchingVectorsReader != null)
			{
				// We can bulk-copy because the fieldInfos are "congruent"
				for (int docNum = 0; docNum < maxDoc; )
				{
					if (reader.IsDeleted(docNum))
					{
						// skip deleted docs
						++docNum;
						continue;
					}
					// We can optimize this case (doing a bulk byte copy) since the field 
					// numbers are identical
					int start = docNum, numDocs = 0;
					do 
					{
						docNum++;
						numDocs++;
						if (docNum >= maxDoc)
							break;
						if (reader.IsDeleted(docNum))
						{
							docNum++;
							break;
						}
					}
					while (numDocs < MAX_RAW_MERGE_DOCS);
					
					matchingVectorsReader.RawDocs(rawDocLengths, rawDocLengths2, start, numDocs);
					termVectorsWriter.AddRawDocuments(matchingVectorsReader, rawDocLengths, rawDocLengths2, numDocs);
					checkAbort.Work(300 * numDocs);
				}
			}
			else
			{
				for (int docNum = 0; docNum < maxDoc; docNum++)
				{
					if (reader.IsDeleted(docNum))
					{
						// skip deleted docs
						continue;
					}
					
					// NOTE: it's very important to first assign to vectors then pass it to
					// termVectorsWriter.addAllDocVectors; see LUCENE-1282
					TermFreqVector[] vectors = reader.GetTermFreqVectors(docNum);
					termVectorsWriter.AddAllDocVectors(vectors);
					checkAbort.Work(300);
				}
			}
		}
		private void  ModifyNormsForF1(IndexReader ir)
		{
			int n = ir.MaxDoc();
			// System.out.println("modifyNormsForF1 maxDoc: "+n);
			for (int i = 0; i < n; i += 3)
			{
				// modify for every third doc
				int k = (i * 3) % modifiedNorms.Count;
				float origNorm = (float) ((System.Single) modifiedNorms[i]);
				float newNorm = (float) ((System.Single) modifiedNorms[k]);
				// System.out.println("Modifying: for "+i+" from "+origNorm+" to
				// "+newNorm);
				// System.out.println(" and: for "+k+" from "+newNorm+" to "+origNorm);
				modifiedNorms[i] = (float) newNorm;
				modifiedNorms[k] = (float) origNorm;
				ir.SetNorm(i, "f" + 1, newNorm);
				ir.SetNorm(k, "f" + 1, origNorm);
				// System.out.println("setNorm i: "+i);
				// break;
			}
			// ir.close();
		}
Example #51
0
 public override int MaxDoc()
 {
     // Don't call ensureOpen() here (it could affect performance)
     return(in_Renamed.MaxDoc());
 }
Example #52
0
 private OpenBitSetDISI InitialResult(IndexReader reader, Logic logic, int[] index)
 {
     OpenBitSetDISI result;
     /**
      * First AND operation takes place against a completely false
      * bitset and will always return zero results.
      */
     if (logic == Logic.AND)
     {
         result = new OpenBitSetDISI(GetDISI(chain[index[0]], reader), reader.MaxDoc());
         ++index[0];
     }
     else if (logic == Logic.ANDNOT)
     {
         result = new OpenBitSetDISI(GetDISI(chain[index[0]], reader), reader.MaxDoc());
         result.Flip(0, reader.MaxDoc()); // NOTE: may set bits for deleted docs.
         ++index[0];
     }
     else
     {
         result = new OpenBitSetDISI(reader.MaxDoc());
     }
     return result;
 }
Example #53
0
		/// <summary>Add an IndexReader whose stored fields will not be returned.  This can
		/// accellerate search when stored fields are only needed from a subset of
		/// the IndexReaders.
		/// 
		/// </summary>
		/// <throws>  IllegalArgumentException if not all indexes contain the same number </throws>
		/// <summary>     of documents
		/// </summary>
		/// <throws>  IllegalArgumentException if not all indexes have the same value </throws>
		/// <summary>     of {@link IndexReader#MaxDoc()}
		/// </summary>
		/// <throws>  IOException if there is a low-level IO error </throws>
		public virtual void  Add(IndexReader reader, bool ignoreStoredFields)
		{
			
			EnsureOpen();
			if (readers.Count == 0)
			{
				this.maxDoc = reader.MaxDoc();
				this.numDocs = reader.NumDocs();
				this.hasDeletions = reader.HasDeletions();
			}
			
			if (reader.MaxDoc() != maxDoc)
			// check compatibility
				throw new System.ArgumentException("All readers must have same maxDoc: " + maxDoc + "!=" + reader.MaxDoc());
			if (reader.NumDocs() != numDocs)
				throw new System.ArgumentException("All readers must have same numDocs: " + numDocs + "!=" + reader.NumDocs());
			
			System.Collections.Generic.ICollection<string> fields = reader.GetFieldNames(IndexReader.FieldOption.ALL);
			readerToFields[reader] = fields;
			System.Collections.IEnumerator i = fields.GetEnumerator();
			while (i.MoveNext())
			{
				// update fieldToReader map
				System.String field = (System.String) i.Current;
				if (fieldToReader[field] == null)
					fieldToReader[field] = reader;
			}
			
			if (!ignoreStoredFields)
				storedFieldReaders.Add(reader); // add to storedFieldReaders
			readers.Add(reader);
			
			if (incRefReaders)
			{
				reader.IncRef();
			}
			decrefOnClose.Add(incRefReaders);
		}
		public static void  AssertIndexEquals(IndexReader index1, IndexReader index2)
		{
			Assert.AreEqual(index1.NumDocs(), index2.NumDocs(), "IndexReaders have different values for numDocs.");
			Assert.AreEqual(index1.MaxDoc(), index2.MaxDoc(), "IndexReaders have different values for maxDoc.");
			Assert.AreEqual(index1.HasDeletions(), index2.HasDeletions(), "Only one IndexReader has deletions.");
			Assert.AreEqual(index1.IsOptimized(), index2.IsOptimized(), "Only one index is optimized.");
			
			// check field names
			System.Collections.ICollection fields1 = index1.GetFieldNames(FieldOption.ALL);
			System.Collections.ICollection fields2 = index2.GetFieldNames(FieldOption.ALL);
			Assert.AreEqual(fields1.Count, fields2.Count, "IndexReaders have different numbers of fields.");
            System.Collections.IEnumerator it1 = fields1.GetEnumerator();
            System.Collections.IEnumerator it2 = fields2.GetEnumerator();
            while (it1.MoveNext())
			{
				Assert.IsTrue(it2.MoveNext());
				Assert.AreEqual((System.String) it1.Current, (System.String) it2.Current, "Different field names.");
			}
			
			// check norms
            it1 = fields1.GetEnumerator();
            while (it1.MoveNext())
			{
				System.String curField = (System.String) it1.Current;
				byte[] norms1 = index1.Norms(curField);
				byte[] norms2 = index2.Norms(curField);
				Assert.AreEqual(norms1.Length, norms2.Length);
				for (int i = 0; i < norms1.Length; i++)
				{
					Assert.AreEqual(norms1[i], norms2[i], "Norm different for doc " + i + " and field '" + curField + "'.");
				}
			}
			
			// check deletions
			for (int i = 0; i < index1.MaxDoc(); i++)
			{
				Assert.AreEqual(index1.IsDeleted(i), index2.IsDeleted(i), "Doc " + i + " only deleted in one index.");
			}
			
			// check stored fields
			for (int i = 0; i < index1.MaxDoc(); i++)
			{
				if (!index1.IsDeleted(i))
				{
					Document doc1 = index1.Document(i);
					Document doc2 = index2.Document(i);
					fields1 = doc1.GetFields();
					fields2 = doc2.GetFields();
					Assert.AreEqual(fields1.Count, fields2.Count, "Different numbers of fields for doc " + i + ".");
					it1 = fields1.GetEnumerator();
					it2 = fields2.GetEnumerator();
					while (it1.MoveNext())
					{
						Assert.IsTrue(it2.MoveNext());
						Field curField1 = (Field) it1.Current;
						Field curField2 = (Field) it2.Current;
						Assert.AreEqual(curField1.Name(), curField2.Name(), "Different fields names for doc " + i + ".");
						Assert.AreEqual(curField1.StringValue(), curField2.StringValue(), "Different field values for doc " + i + ".");
					}
				}
			}
			
			// check dictionary and posting lists
			TermEnum enum1 = index1.Terms();
			TermEnum enum2 = index2.Terms();
			TermPositions tp1 = index1.TermPositions();
			TermPositions tp2 = index2.TermPositions();
			while (enum1.Next())
			{
				Assert.IsTrue(enum2.Next());
				Assert.AreEqual(enum1.Term(), enum2.Term(), "Different term in dictionary.");
				tp1.Seek(enum1.Term());
				tp2.Seek(enum1.Term());
				while (tp1.Next())
				{
					Assert.IsTrue(tp2.Next());
					Assert.AreEqual(tp1.Doc(), tp2.Doc(), "Different doc id in postinglist of term " + enum1.Term() + ".");
					Assert.AreEqual(tp1.Freq(), tp2.Freq(), "Different term frequence in postinglist of term " + enum1.Term() + ".");
					for (int i = 0; i < tp1.Freq(); i++)
					{
						Assert.AreEqual(tp1.NextPosition(), tp2.NextPosition(), "Different positions in postinglist of term " + enum1.Term() + ".");
					}
				}
			}
		}
		public static void  CheckNorms(IndexReader reader)
		{
			// test omit norms
			for (int i = 0; i < DocHelper.fields.Length; i++)
			{
				Lucene.Net.Documents.Fieldable f = DocHelper.fields[i];
				if (f.IsIndexed())
				{
					Assert.AreEqual(reader.HasNorms(f.Name()), !f.GetOmitNorms());
					Assert.AreEqual(reader.HasNorms(f.Name()), !DocHelper.noNorms.Contains(f.Name()));
					if (!reader.HasNorms(f.Name()))
					{
						// test for fake norms of 1.0
						byte[] norms = reader.Norms(f.Name());
						Assert.AreEqual(norms.Length, reader.MaxDoc());
						for (int j = 0; j < reader.MaxDoc(); j++)
						{
							Assert.AreEqual(norms[j], DefaultSimilarity.EncodeNorm(1.0f));
						}
						norms = new byte[reader.MaxDoc()];
						reader.Norms(f.Name(), norms, 0);
						for (int j = 0; j < reader.MaxDoc(); j++)
						{
							Assert.AreEqual(norms[j], DefaultSimilarity.EncodeNorm(1.0f));
						}
					}
				}
			}
		}
Example #56
0
 /// <summary>
 /// Returns a single <seealso cref="Bits"/> instance for this
 ///  reader, merging live Documents on the
 ///  fly.  this method will return null if the reader
 ///  has no deletions.
 ///
 ///  <p><b>NOTE</b>: this is a very slow way to access live docs.
 ///  For example, each Bits access will require a binary search.
 ///  It's better to get the sub-readers and iterate through them
 ///  yourself.
 /// </summary>
 public static Bits GetLiveDocs(IndexReader reader)
 {
     if (reader.HasDeletions())
     {
         IList<AtomicReaderContext> leaves = reader.Leaves();
         int size = leaves.Count;
         Debug.Assert(size > 0, "A reader with deletions must have at least one leave");
         if (size == 1)
         {
             return leaves[0].AtomicReader.LiveDocs;
         }
         Bits[] liveDocs = new Bits[size];
         int[] starts = new int[size + 1];
         for (int i = 0; i < size; i++)
         {
             // record all liveDocs, even if they are null
             AtomicReaderContext ctx = leaves[i];
             liveDocs[i] = ctx.AtomicReader.LiveDocs;
             starts[i] = ctx.DocBase;
         }
         starts[size] = reader.MaxDoc();
         return new MultiBits(liveDocs, starts, true);
     }
     else
     {
         return null;
     }
 }
Example #57
0
        /// <summary> Merge the TermVectors from each of the segments into the new one.</summary>
        /// <throws>  IOException </throws>
        private void MergeVectors()
        {
            TermVectorsWriter termVectorsWriter =
                new TermVectorsWriter(directory, segment, fieldInfos);

            try
            {
                for (int r = 0; r < readers.Count; r++)
                {
                    SegmentReader     matchingSegmentReader = matchingSegmentReaders[r];
                    TermVectorsReader matchingVectorsReader;
                    bool hasMatchingReader;
                    if (matchingSegmentReader != null)
                    {
                        matchingVectorsReader = matchingSegmentReader.termVectorsReaderOrig;

                        // If the TV* files are an older format then they
                        // cannot read raw docs:
                        if (matchingVectorsReader != null && !matchingVectorsReader.CanReadRawDocs())
                        {
                            matchingVectorsReader = null;
                            hasMatchingReader     = false;
                        }
                        else
                        {
                            hasMatchingReader = matchingVectorsReader != null;
                        }
                    }
                    else
                    {
                        hasMatchingReader     = false;
                        matchingVectorsReader = null;
                    }
                    IndexReader reader       = (IndexReader)readers[r];
                    bool        hasDeletions = reader.HasDeletions();
                    int         maxDoc       = reader.MaxDoc();
                    for (int docNum = 0; docNum < maxDoc;)
                    {
                        // skip deleted docs
                        if (!hasDeletions || !reader.IsDeleted(docNum))
                        {
                            if (hasMatchingReader)
                            {
                                // We can optimize this case (doing a bulk
                                // byte copy) since the field numbers are
                                // identical
                                int start   = docNum;
                                int numDocs = 0;
                                do
                                {
                                    docNum++;
                                    numDocs++;
                                    if (docNum >= maxDoc)
                                    {
                                        break;
                                    }
                                    if (hasDeletions && matchingSegmentReader.IsDeleted(docNum))
                                    {
                                        docNum++;
                                        break;
                                    }
                                } while (numDocs < MAX_RAW_MERGE_DOCS);

                                matchingVectorsReader.RawDocs(rawDocLengths, rawDocLengths2, start, numDocs);
                                termVectorsWriter.AddRawDocuments(matchingVectorsReader, rawDocLengths, rawDocLengths2, numDocs);
                                if (checkAbort != null)
                                {
                                    checkAbort.Work(300 * numDocs);
                                }
                            }
                            else
                            {
                                // NOTE: it's very important to first assign
                                // to vectors then pass it to
                                // termVectorsWriter.addAllDocVectors; see
                                // LUCENE-1282
                                TermFreqVector[] vectors = reader.GetTermFreqVectors(docNum);
                                termVectorsWriter.AddAllDocVectors(vectors);
                                docNum++;
                                if (checkAbort != null)
                                {
                                    checkAbort.Work(300);
                                }
                            }
                        }
                        else
                        {
                            docNum++;
                        }
                    }
                }
            }
            finally
            {
                termVectorsWriter.Close();
            }

            long tvxSize = directory.FileLength(segment + "." + IndexFileNames.VECTORS_INDEX_EXTENSION);

            // {{dougsale-2.4.0}
            // this shouldn't be a problem for us - if it is,
            // then it's not a JRE bug
            //if (4 + mergedDocs * 16 != tvxSize)
            //  // This is most likely a bug in Sun JRE 1.6.0_04/_05;
            //  // we detect that the bug has struck, here, and
            //  // throw an exception to prevent the corruption from
            //  // entering the index.  See LUCENE-1282 for
            //  // details.
            //  throw new RuntimeException("mergeVectors produced an invalid result: mergedDocs is " + mergedDocs + " but tvx size is " + tvxSize + "; now aborting this merge to prevent index corruption");
        }
Example #58
0
			protected internal double[] ComputeDistances(IndexReader reader)
			{
				double[] retArray = null;
				var termDocs = reader.TermDocs();
				var termEnum = reader.Terms(new Term(Constants.SpatialShapeFieldName));
				try
				{
					do
					{
						Term term = termEnum.Term();
						if (term == null)
							break;

						Debug.Assert(Constants.SpatialShapeFieldName.Equals(term.Field()));

						Shape termval;
						try
						{
							termval = SpatialIndex.RavenSpatialContext.ReadShape(term.Text()); // read shape
						}
						catch (InvalidShapeException)
						{
							continue;
						}

						var pt = termval as Point;
						if (pt == null)
							continue;

						var distance = SpatialIndex.RavenSpatialContext.GetDistCalc().Distance(pt, originPt);

						if (retArray == null)
							// late init
							retArray = new double[reader.MaxDoc()];
						termDocs.Seek(termEnum);
						while (termDocs.Next())
						{
							retArray[termDocs.Doc()] = distance;
						}
					} while (termEnum.Next());
				}
				finally
				{
					termDocs.Close();
					termEnum.Close();
				}
				return retArray ?? new double[reader.MaxDoc()];
			}
Example #59
0
        /// <summary>
        /// Get the id set for the filter.
        /// </summary>
        /// <param name="reader">The reader.</param>
        /// <returns>The filter set to use.</returns>
        public override DocIdSet GetDocIdSet(IndexReader reader)
        {
            OpenBitSetDISI res = null;

            if (shouldFilters != null)
            {
                for (int i = 0; i < shouldFilters.Count; i++)
                {
                    if (res == null)
                    {
                        res = new OpenBitSetDISI(GetDISI(shouldFilters, i, reader), reader.MaxDoc());
                    }
                    else
                    {
                        DocIdSet dis = ((Filter)shouldFilters[i]).GetDocIdSet(reader);
                        if (dis is OpenBitSet)
                        {
                            // optimized case for OpenBitSets
                            res.Or((OpenBitSet)dis);
                        }
                        else
                        {
                            res.InPlaceOr(GetDISI(shouldFilters, i, reader));
                        }
                    }
                }
            }

            if (notFilters != null)
            {
                for (int i = 0; i < notFilters.Count; i++)
                {
                    if (res == null)
                    {
                        res = new OpenBitSetDISI(GetDISI(notFilters, i, reader), reader.MaxDoc());
                        res.Flip(0, reader.MaxDoc()); // NOTE: may set bits on deleted docs
                    }
                    else
                    {
                        DocIdSet dis = ((Filter)notFilters[i]).GetDocIdSet(reader);
                        if (dis is OpenBitSet)
                        {
                            // optimized case for OpenBitSets
                            res.AndNot((OpenBitSet)dis);
                        }
                        else
                        {
                            res.InPlaceNot(GetDISI(notFilters, i, reader));
                        }
                    }
                }
            }

            if (mustFilters != null)
            {
                for (int i = 0; i < mustFilters.Count; i++)
                {
                    if (res == null)
                    {
                        res = new OpenBitSetDISI(GetDISI(mustFilters, i, reader), reader.MaxDoc());
                    }
                    else
                    {
                        DocIdSet dis = ((Filter)mustFilters[i]).GetDocIdSet(reader);
                        if (dis is OpenBitSet)
                        {
                            // optimized case for OpenBitSets
                            res.And((OpenBitSet)dis);
                        }
                        else
                        {
                            res.InPlaceAnd(GetDISI(mustFilters, i, reader));
                        }
                    }
                }
            }

            if (res != null)
                return FinalResult(res, reader.MaxDoc());

            else
            {
                //TODO: 2.- change return DocIdSet.EMPTY_DOCIDSET;
                return null;
            }
        }
Example #60
0
        private void  MergeTermInfos(FormatPostingsFieldsConsumer consumer)
        {
            int base_Renamed = 0;
            int readerCount  = readers.Count;

            for (int i = 0; i < readerCount; i++)
            {
                IndexReader      reader   = (IndexReader)readers[i];
                TermEnum         termEnum = reader.Terms();
                SegmentMergeInfo smi      = new SegmentMergeInfo(base_Renamed, termEnum, reader);
                int[]            docMap   = smi.GetDocMap();
                if (docMap != null)
                {
                    if (docMaps == null)
                    {
                        docMaps   = new int[readerCount][];
                        delCounts = new int[readerCount];
                    }
                    docMaps[i]   = docMap;
                    delCounts[i] = smi.reader.MaxDoc() - smi.reader.NumDocs();
                }

                base_Renamed += reader.NumDocs();

                System.Diagnostics.Debug.Assert(reader.NumDocs() == reader.MaxDoc() - smi.delCount);

                if (smi.Next())
                {
                    queue.Add(smi);
                }
                // initialize queue
                else
                {
                    smi.Close();
                }
            }

            SegmentMergeInfo[] match = new SegmentMergeInfo[readers.Count];

            System.String currentField = null;
            FormatPostingsTermsConsumer termsConsumer = null;

            while (queue.Size() > 0)
            {
                int matchSize = 0;                 // pop matching terms
                match[matchSize++] = (SegmentMergeInfo)queue.Pop();
                Term             term = match[0].term;
                SegmentMergeInfo top  = (SegmentMergeInfo)queue.Top();

                while (top != null && term.CompareTo(top.term) == 0)
                {
                    match[matchSize++] = (SegmentMergeInfo)queue.Pop();
                    top = (SegmentMergeInfo)queue.Top();
                }

                if ((System.Object)currentField != (System.Object)term.field)
                {
                    currentField = term.field;
                    if (termsConsumer != null)
                    {
                        termsConsumer.Finish();
                    }
                    FieldInfo fieldInfo = fieldInfos.FieldInfo(currentField);
                    termsConsumer            = consumer.AddField(fieldInfo);
                    omitTermFreqAndPositions = fieldInfo.omitTermFreqAndPositions;
                }

                int df = AppendPostings(termsConsumer, match, matchSize);                 // add new TermInfo

                checkAbort.Work(df / 3.0);

                while (matchSize > 0)
                {
                    SegmentMergeInfo smi = match[--matchSize];
                    if (smi.Next())
                    {
                        queue.Add(smi);
                    }
                    // restore queue
                    else
                    {
                        smi.Close();                         // done with a segment
                    }
                }
            }
        }