Beispiel #1
0
        public virtual void  TestExpungeDeletes()
        {
            Directory   dir = new MockRAMDirectory();
            IndexWriter w   = new IndexWriter(dir, new WhitespaceAnalyzer(), IndexWriter.MaxFieldLength.LIMITED);
            Document    doc = new Document();

            doc.Add(new Field("field", "a b c", Field.Store.NO, Field.Index.ANALYZED));
            Field id = new Field("id", "", Field.Store.NO, Field.Index.NOT_ANALYZED);

            doc.Add(id);
            id.SetValue("0");
            w.AddDocument(doc);
            id.SetValue("1");
            w.AddDocument(doc);
            w.DeleteDocuments(new Term("id", "0"));

            IndexReader r = w.GetReader();

            w.ExpungeDeletes();
            w.Close();
            r.Close();
            r = IndexReader.Open(dir);
            Assert.AreEqual(1, r.NumDocs());
            Assert.IsFalse(r.HasDeletions());
            r.Close();
            dir.Close();
        }
		/// <summary>Add an IndexReader whose stored fields will not be returned.  This can
		/// accellerate search when stored fields are only needed from a subset of
		/// the IndexReaders.
		/// 
		/// </summary>
		/// <throws>  IllegalArgumentException if not all indexes contain the same number  </throws>
		/// <summary>     of documents
		/// </summary>
		/// <throws>  IllegalArgumentException if not all indexes have the same value  </throws>
		/// <summary>     of {@link IndexReader#MaxDoc()}
		/// </summary>
		public virtual void  Add(IndexReader reader, bool ignoreStoredFields)
		{
			
			if (readers.Count == 0)
			{
				this.maxDoc = reader.MaxDoc();
				this.numDocs = reader.NumDocs();
				this.hasDeletions = reader.HasDeletions();
			}
			
			if (reader.MaxDoc() != maxDoc)
			// check compatibility
				throw new System.ArgumentException("All readers must have same maxDoc: " + maxDoc + "!=" + reader.MaxDoc());
			if (reader.NumDocs() != numDocs)
				throw new System.ArgumentException("All readers must have same numDocs: " + numDocs + "!=" + reader.NumDocs());
			
			System.Collections.IEnumerator i = reader.GetFieldNames(IndexReader.FieldOption.ALL).GetEnumerator();
			while (i.MoveNext())
			{
                System.Collections.DictionaryEntry fi = (System.Collections.DictionaryEntry) i.Current;

				// update fieldToReader map
				System.String field = fi.Key.ToString();
				if (fieldToReader[field] == null)
					fieldToReader[field] = reader;
			}
			
			if (!ignoreStoredFields)
				storedFieldReaders.Add(reader); // add to storedFieldReaders
			readers.Add(reader);
		}
Beispiel #3
0
        private void  MergeNorms()
        {
            byte[]      normBuffer = null;
            IndexOutput output     = null;

            try
            {
                for (int i = 0; i < fieldInfos.Size(); i++)
                {
                    FieldInfo fi = fieldInfos.FieldInfo(i);
                    if (fi.isIndexed && !fi.omitNorms)
                    {
                        if (output == null)
                        {
                            output = directory.CreateOutput(segment + "." + IndexFileNames.NORMS_EXTENSION);
                            output.WriteBytes(NORMS_HEADER, NORMS_HEADER.Length);
                        }
                        for (int j = 0; j < readers.Count; j++)
                        {
                            IndexReader reader = (IndexReader)readers[j];
                            int         maxDoc = reader.MaxDoc();
                            if (normBuffer == null || normBuffer.Length < maxDoc)
                            {
                                // the buffer is too small for the current segment
                                normBuffer = new byte[maxDoc];
                            }
                            reader.Norms(fi.name, normBuffer, 0);
                            if (!reader.HasDeletions())
                            {
                                //optimized case for segments without deleted docs
                                output.WriteBytes(normBuffer, maxDoc);
                            }
                            else
                            {
                                // this segment has deleted docs, so we have to
                                // check for every doc if it is deleted or not
                                for (int k = 0; k < maxDoc; k++)
                                {
                                    if (!reader.IsDeleted(k))
                                    {
                                        output.WriteByte(normBuffer[k]);
                                    }
                                }
                            }
                            if (checkAbort != null)
                            {
                                checkAbort.Work(maxDoc);
                            }
                        }
                    }
                }
            }
            finally
            {
                if (output != null)
                {
                    output.Close();
                }
            }
        }
 // maps around deleted docs
 internal int[] GetDocMap()
 {
     if (docMap == null)
     {
         // build array which maps document numbers around deletions
         if (reader.HasDeletions())
         {
             int maxDoc = reader.MaxDoc();
             docMap = new int[maxDoc];
             int j = 0;
             for (int i = 0; i < maxDoc; i++)
             {
                 if (reader.IsDeleted(i))
                 {
                     docMap[i] = -1;
                 }
                 else
                 {
                     docMap[i] = j++;
                 }
             }
         }
     }
     return(docMap);
 }
        /// <summary> Merge the TermVectors from each of the segments into the new one.</summary>
        /// <throws>  IOException </throws>
        private void  MergeVectors()
        {
            TermVectorsWriter termVectorsWriter = new TermVectorsWriter(directory, segment, fieldInfos);

            try
            {
                int idx = 0;
                for (System.Collections.IEnumerator iter = readers.GetEnumerator(); iter.MoveNext();)
                {
                    SegmentReader     matchingSegmentReader = matchingSegmentReaders[idx++];
                    TermVectorsReader matchingVectorsReader = null;
                    if (matchingSegmentReader != null)
                    {
                        TermVectorsReader vectorsReader = matchingSegmentReader.GetTermVectorsReaderOrig();

                        // If the TV* files are an older format then they cannot read raw docs:
                        if (vectorsReader != null && vectorsReader.CanReadRawDocs())
                        {
                            matchingVectorsReader = vectorsReader;
                        }
                    }
                    IndexReader reader = (IndexReader)iter.Current;
                    if (reader.HasDeletions())
                    {
                        CopyVectorsWithDeletions(termVectorsWriter, matchingVectorsReader, reader);
                    }
                    else
                    {
                        CopyVectorsNoDeletions(termVectorsWriter, matchingVectorsReader, reader);
                    }
                }
            }
            finally
            {
                termVectorsWriter.Close();
            }

            System.String fileName = segment + "." + IndexFileNames.VECTORS_INDEX_EXTENSION;
            long          tvxSize  = directory.FileLength(fileName);

            if (4 + ((long)mergedDocs) * 16 != tvxSize)
            {
                // This is most likely a bug in Sun JRE 1.6.0_04/_05;
                // we detect that the bug has struck, here, and
                // throw an exception to prevent the corruption from
                // entering the index.  See LUCENE-1282 for
                // details.
                throw new System.SystemException("mergeVectors produced an invalid result: mergedDocs is " + mergedDocs + " but tvx size is " + tvxSize + " file=" + fileName + " file exists?=" + directory.FileExists(fileName) + "; now aborting this merge to prevent index corruption");
            }
        }
        /// <summary>Add an IndexReader whose stored fields will not be returned.  This can
        /// accellerate search when stored fields are only needed from a subset of
        /// the IndexReaders.
        ///
        /// </summary>
        /// <throws>  IllegalArgumentException if not all indexes contain the same number </throws>
        /// <summary>     of documents
        /// </summary>
        /// <throws>  IllegalArgumentException if not all indexes have the same value </throws>
        /// <summary>     of {@link IndexReader#MaxDoc()}
        /// </summary>
        /// <throws>  IOException if there is a low-level IO error </throws>
        public virtual void  Add(IndexReader reader, bool ignoreStoredFields)
        {
            EnsureOpen();
            if (readers.Count == 0)
            {
                this.maxDoc       = reader.MaxDoc();
                this.numDocs      = reader.NumDocs();
                this.hasDeletions = reader.HasDeletions();
            }

            if (reader.MaxDoc() != maxDoc)
            {
                // check compatibility
                throw new System.ArgumentException("All readers must have same maxDoc: " + maxDoc + "!=" + reader.MaxDoc());
            }
            if (reader.NumDocs() != numDocs)
            {
                throw new System.ArgumentException("All readers must have same numDocs: " + numDocs + "!=" + reader.NumDocs());
            }

            ICollection <string> fields = reader.GetFieldNames(IndexReader.FieldOption.ALL);

            readerToFields[reader] = fields;
            IEnumerator <string> i = fields.GetEnumerator();

            while (i.MoveNext())
            {
                //// update fieldToReader map
                string field = i.Current;
                //if (fieldToReader[field] == null)
                if (!fieldToReader.ContainsKey(field))
                {
                    fieldToReader[field] = reader;
                }
            }

            if (!ignoreStoredFields)
            {
                storedFieldReaders.Add(reader);                 // add to storedFieldReaders
            }
            readers.Add(reader);

            if (incRefReaders)
            {
                reader.IncRef();
            }
            decrefOnClose.Add(incRefReaders);
        }
Beispiel #7
0
        } // doIndex

        private static void removeAllDuplicateAndDeletedFiles(IndexableFileInfo[] fileInfos, string LuceneIndexDir, IndexCreationMode indexCreationMode)
        {
            if (indexCreationMode != IndexCreationMode.AppendToExistingIndex)
            {
                return;
            }

            IndexReader reader = IndexReader.Open(LuceneIndexDir);

            try
            {
                int numDocs = reader.NumDocs();
                for (int i = 0; i < numDocs; i++)
                {
                    Document docToCheck         = reader.Document(i);
                    bool     removeDocFromIndex = true;
                    string   filenameField      = docToCheck.GetField("filename").StringValue();
                    string   lastModified       = (docToCheck.GetField("LastModified").StringValue());

                    foreach (IndexableFileInfo fi in fileInfos)
                    {
                        if (String.Compare(fi.Filename, filenameField, true) == 0 && DateTools.DateToString(fi.LastModified, DateTools.Resolution.SECOND) == lastModified)
                        {
                            removeDocFromIndex = false;
                            break;
                        }
                    } // foreach

                    if (removeDocFromIndex)
                    {
                        reader.DeleteDocument(i);
                        if (!reader.HasDeletions())
                        {
                            throw new Exception("error: deletion failed!!");
                        }
                    }
                } // for each lucene doc
            }
            finally
            {
                reader.Close();
            }
            LuceneIndexer indexer = new LuceneIndexer(LuceneIndexDir, indexCreationMode); // open up the index again

            indexer.CloseIndexWriter(OptimizeMode.DoOptimization);                        // just to optimize the index (which removes deleted items).
        }
        /// <summary>Add an IndexReader whose stored fields will not be returned.  This can
        /// accellerate search when stored fields are only needed from a subset of
        /// the IndexReaders.
        ///
        /// </summary>
        /// <throws>  IllegalArgumentException if not all indexes contain the same number </throws>
        /// <summary>     of documents
        /// </summary>
        /// <throws>  IllegalArgumentException if not all indexes have the same value </throws>
        /// <summary>     of {@link IndexReader#MaxDoc()}
        /// </summary>
        public virtual void  Add(IndexReader reader, bool ignoreStoredFields)
        {
            if (readers.Count == 0)
            {
                this.maxDoc       = reader.MaxDoc();
                this.numDocs      = reader.NumDocs();
                this.hasDeletions = reader.HasDeletions();
            }

            if (reader.MaxDoc() != maxDoc)
            {
                // check compatibility
                throw new System.ArgumentException("All readers must have same maxDoc: " + maxDoc + "!=" + reader.MaxDoc());
            }
            if (reader.NumDocs() != numDocs)
            {
                throw new System.ArgumentException("All readers must have same numDocs: " + numDocs + "!=" + reader.NumDocs());
            }

            System.Collections.ICollection fields = reader.GetFieldNames(IndexReader.FieldOption.ALL);
            readerToFields[reader] = fields;
            System.Collections.IEnumerator i = fields.GetEnumerator();
            while (i.MoveNext())
            {
                System.Collections.DictionaryEntry fi = (System.Collections.DictionaryEntry)i.Current;

                // update fieldToReader map
                System.String field = fi.Key.ToString();
                if (fieldToReader[field] == null)
                {
                    fieldToReader[field] = reader;
                }
            }

            if (!ignoreStoredFields)
            {
                storedFieldReaders.Add(reader);                 // add to storedFieldReaders
            }
            readers.Add(reader);
        }
Beispiel #9
0
		/// <summary>Add an IndexReader whose stored fields will not be returned.  This can
		/// accellerate search when stored fields are only needed from a subset of
		/// the IndexReaders.
		/// 
		/// </summary>
		/// <throws>  IllegalArgumentException if not all indexes contain the same number </throws>
		/// <summary>     of documents
		/// </summary>
		/// <throws>  IllegalArgumentException if not all indexes have the same value </throws>
		/// <summary>     of {@link IndexReader#MaxDoc()}
		/// </summary>
		/// <throws>  IOException if there is a low-level IO error </throws>
		public virtual void  Add(IndexReader reader, bool ignoreStoredFields)
		{
			
			EnsureOpen();
			if (readers.Count == 0)
			{
				this.maxDoc = reader.MaxDoc();
				this.numDocs = reader.NumDocs();
				this.hasDeletions = reader.HasDeletions();
			}
			
			if (reader.MaxDoc() != maxDoc)
			// check compatibility
				throw new System.ArgumentException("All readers must have same maxDoc: " + maxDoc + "!=" + reader.MaxDoc());
			if (reader.NumDocs() != numDocs)
				throw new System.ArgumentException("All readers must have same numDocs: " + numDocs + "!=" + reader.NumDocs());
			
			System.Collections.Generic.ICollection<string> fields = reader.GetFieldNames(IndexReader.FieldOption.ALL);
			readerToFields[reader] = fields;
			System.Collections.IEnumerator i = fields.GetEnumerator();
			while (i.MoveNext())
			{
				// update fieldToReader map
				System.String field = (System.String) i.Current;
				if (fieldToReader[field] == null)
					fieldToReader[field] = reader;
			}
			
			if (!ignoreStoredFields)
				storedFieldReaders.Add(reader); // add to storedFieldReaders
			readers.Add(reader);
			
			if (incRefReaders)
			{
				reader.IncRef();
			}
			decrefOnClose.Add(incRefReaders);
		}
Beispiel #10
0
 public override bool HasDeletions()
 {
     // Don't call ensureOpen() here (it could affect performance)
     return(in_Renamed.HasDeletions());
 }
        /// <summary> </summary>
        /// <returns> The number of documents in all of the readers
        /// </returns>
        /// <throws>  CorruptIndexException if the index is corrupt </throws>
        /// <throws>  IOException if there is a low-level IO error </throws>
        private int MergeFields()
        {
            if (!mergeDocStores)
            {
                // When we are not merging by doc stores, that means
                // all segments were written as part of a single
                // autoCommit=false IndexWriter session, so their field
                // name -> number mapping are the same.  So, we start
                // with the fieldInfos of the last segment in this
                // case, to keep that numbering.
                SegmentReader sr = (SegmentReader)readers[readers.Count - 1];
                fieldInfos = (FieldInfos)sr.core.fieldInfos.Clone();
            }
            else
            {
                fieldInfos = new FieldInfos();                 // merge field names
            }

            for (System.Collections.IEnumerator iter = readers.GetEnumerator(); iter.MoveNext();)
            {
                IndexReader reader = (IndexReader)iter.Current;
                if (reader is SegmentReader)
                {
                    SegmentReader segmentReader       = (SegmentReader)reader;
                    FieldInfos    readerFieldInfos    = segmentReader.FieldInfos();
                    int           numReaderFieldInfos = readerFieldInfos.Size();
                    for (int j = 0; j < numReaderFieldInfos; j++)
                    {
                        FieldInfo fi = readerFieldInfos.FieldInfo(j);
                        fieldInfos.Add(fi.name, fi.isIndexed, fi.storeTermVector, fi.storePositionWithTermVector, fi.storeOffsetWithTermVector, !reader.HasNorms(fi.name), fi.storePayloads, fi.omitTermFreqAndPositions);
                    }
                }
                else
                {
                    AddIndexed(reader, fieldInfos, reader.GetFieldNames(FieldOption.TERMVECTOR_WITH_POSITION_OFFSET), true, true, true, false, false);
                    AddIndexed(reader, fieldInfos, reader.GetFieldNames(FieldOption.TERMVECTOR_WITH_POSITION), true, true, false, false, false);
                    AddIndexed(reader, fieldInfos, reader.GetFieldNames(FieldOption.TERMVECTOR_WITH_OFFSET), true, false, true, false, false);
                    AddIndexed(reader, fieldInfos, reader.GetFieldNames(FieldOption.TERMVECTOR), true, false, false, false, false);
                    AddIndexed(reader, fieldInfos, reader.GetFieldNames(FieldOption.OMIT_TERM_FREQ_AND_POSITIONS), false, false, false, false, true);
                    AddIndexed(reader, fieldInfos, reader.GetFieldNames(FieldOption.STORES_PAYLOADS), false, false, false, true, false);
                    AddIndexed(reader, fieldInfos, reader.GetFieldNames(FieldOption.INDEXED), false, false, false, false, false);
                    fieldInfos.Add(reader.GetFieldNames(FieldOption.UNINDEXED), false);
                }
            }
            fieldInfos.Write(directory, segment + ".fnm");

            int docCount = 0;

            SetMatchingSegmentReaders();

            if (mergeDocStores)
            {
                // for merging we don't want to compress/uncompress the data, so to tell the FieldsReader that we're
                // in  merge mode, we use this FieldSelector
                FieldSelector fieldSelectorMerge = new AnonymousClassFieldSelector(this);

                // merge field values
                FieldsWriter fieldsWriter = new FieldsWriter(directory, segment, fieldInfos);

                try
                {
                    int idx = 0;
                    for (System.Collections.IEnumerator iter = readers.GetEnumerator(); iter.MoveNext();)
                    {
                        IndexReader   reader = (IndexReader)iter.Current;
                        SegmentReader matchingSegmentReader = matchingSegmentReaders[idx++];
                        FieldsReader  matchingFieldsReader  = null;
                        if (matchingSegmentReader != null)
                        {
                            FieldsReader fieldsReader = matchingSegmentReader.GetFieldsReader();
                            if (fieldsReader != null && fieldsReader.CanReadRawDocs())
                            {
                                matchingFieldsReader = fieldsReader;
                            }
                        }
                        if (reader.HasDeletions())
                        {
                            docCount += CopyFieldsWithDeletions(fieldSelectorMerge, fieldsWriter, reader, matchingFieldsReader);
                        }
                        else
                        {
                            docCount += CopyFieldsNoDeletions(fieldSelectorMerge, fieldsWriter, reader, matchingFieldsReader);
                        }
                    }
                }
                finally
                {
                    fieldsWriter.Close();
                }

                System.String fileName      = segment + "." + IndexFileNames.FIELDS_INDEX_EXTENSION;
                long          fdxFileLength = directory.FileLength(fileName);

                if (4 + ((long)docCount) * 8 != fdxFileLength)
                {
                    // This is most likely a bug in Sun JRE 1.6.0_04/_05;
                    // we detect that the bug has struck, here, and
                    // throw an exception to prevent the corruption from
                    // entering the index.  See LUCENE-1282 for
                    // details.
                    throw new System.SystemException("mergeFields produced an invalid result: docCount is " + docCount + " but fdx file size is " + fdxFileLength + " file=" + fileName + " file exists?=" + directory.FileExists(fileName) + "; now aborting this merge to prevent index corruption");
                }
            }
            // If we are skipping the doc stores, that means there
            // are no deletions in any of these segments, so we
            // just sum numDocs() of each segment to get total docCount
            else
            {
                for (System.Collections.IEnumerator iter = readers.GetEnumerator(); iter.MoveNext();)
                {
                    docCount += ((IndexReader)iter.Current).NumDocs();
                }
            }

            return(docCount);
        }
		public static void  AssertIndexEquals(IndexReader index1, IndexReader index2)
		{
			Assert.AreEqual(index1.NumDocs(), index2.NumDocs(), "IndexReaders have different values for numDocs.");
			Assert.AreEqual(index1.MaxDoc(), index2.MaxDoc(), "IndexReaders have different values for maxDoc.");
			Assert.AreEqual(index1.HasDeletions(), index2.HasDeletions(), "Only one IndexReader has deletions.");
			Assert.AreEqual(index1.IsOptimized(), index2.IsOptimized(), "Only one index is optimized.");
			
			// check field names
			System.Collections.ICollection fields1 = index1.GetFieldNames(FieldOption.ALL);
			System.Collections.ICollection fields2 = index2.GetFieldNames(FieldOption.ALL);
			Assert.AreEqual(fields1.Count, fields2.Count, "IndexReaders have different numbers of fields.");
            System.Collections.IEnumerator it1 = fields1.GetEnumerator();
            System.Collections.IEnumerator it2 = fields2.GetEnumerator();
            while (it1.MoveNext())
			{
				Assert.IsTrue(it2.MoveNext());
				Assert.AreEqual((System.String) it1.Current, (System.String) it2.Current, "Different field names.");
			}
			
			// check norms
            it1 = fields1.GetEnumerator();
            while (it1.MoveNext())
			{
				System.String curField = (System.String) it1.Current;
				byte[] norms1 = index1.Norms(curField);
				byte[] norms2 = index2.Norms(curField);
				Assert.AreEqual(norms1.Length, norms2.Length);
				for (int i = 0; i < norms1.Length; i++)
				{
					Assert.AreEqual(norms1[i], norms2[i], "Norm different for doc " + i + " and field '" + curField + "'.");
				}
			}
			
			// check deletions
			for (int i = 0; i < index1.MaxDoc(); i++)
			{
				Assert.AreEqual(index1.IsDeleted(i), index2.IsDeleted(i), "Doc " + i + " only deleted in one index.");
			}
			
			// check stored fields
			for (int i = 0; i < index1.MaxDoc(); i++)
			{
				if (!index1.IsDeleted(i))
				{
					Document doc1 = index1.Document(i);
					Document doc2 = index2.Document(i);
					fields1 = doc1.GetFields();
					fields2 = doc2.GetFields();
					Assert.AreEqual(fields1.Count, fields2.Count, "Different numbers of fields for doc " + i + ".");
					it1 = fields1.GetEnumerator();
					it2 = fields2.GetEnumerator();
					while (it1.MoveNext())
					{
						Assert.IsTrue(it2.MoveNext());
						Field curField1 = (Field) it1.Current;
						Field curField2 = (Field) it2.Current;
						Assert.AreEqual(curField1.Name(), curField2.Name(), "Different fields names for doc " + i + ".");
						Assert.AreEqual(curField1.StringValue(), curField2.StringValue(), "Different field values for doc " + i + ".");
					}
				}
			}
			
			// check dictionary and posting lists
			TermEnum enum1 = index1.Terms();
			TermEnum enum2 = index2.Terms();
			TermPositions tp1 = index1.TermPositions();
			TermPositions tp2 = index2.TermPositions();
			while (enum1.Next())
			{
				Assert.IsTrue(enum2.Next());
				Assert.AreEqual(enum1.Term(), enum2.Term(), "Different term in dictionary.");
				tp1.Seek(enum1.Term());
				tp2.Seek(enum1.Term());
				while (tp1.Next())
				{
					Assert.IsTrue(tp2.Next());
					Assert.AreEqual(tp1.Doc(), tp2.Doc(), "Different doc id in postinglist of term " + enum1.Term() + ".");
					Assert.AreEqual(tp1.Freq(), tp2.Freq(), "Different term frequence in postinglist of term " + enum1.Term() + ".");
					for (int i = 0; i < tp1.Freq(); i++)
					{
						Assert.AreEqual(tp1.NextPosition(), tp2.NextPosition(), "Different positions in postinglist of term " + enum1.Term() + ".");
					}
				}
			}
		}
Beispiel #13
0
 public override bool HasDeletions()
 {
     return(in_Renamed.HasDeletions());
 }
Beispiel #14
0
 /// <summary>
 /// Returns a single <seealso cref="Bits"/> instance for this
 ///  reader, merging live Documents on the
 ///  fly.  this method will return null if the reader
 ///  has no deletions.
 ///
 ///  <p><b>NOTE</b>: this is a very slow way to access live docs.
 ///  For example, each Bits access will require a binary search.
 ///  It's better to get the sub-readers and iterate through them
 ///  yourself.
 /// </summary>
 public static Bits GetLiveDocs(IndexReader reader)
 {
     if (reader.HasDeletions())
     {
         IList<AtomicReaderContext> leaves = reader.Leaves();
         int size = leaves.Count;
         Debug.Assert(size > 0, "A reader with deletions must have at least one leave");
         if (size == 1)
         {
             return leaves[0].AtomicReader.LiveDocs;
         }
         Bits[] liveDocs = new Bits[size];
         int[] starts = new int[size + 1];
         for (int i = 0; i < size; i++)
         {
             // record all liveDocs, even if they are null
             AtomicReaderContext ctx = leaves[i];
             liveDocs[i] = ctx.AtomicReader.LiveDocs;
             starts[i] = ctx.DocBase;
         }
         starts[size] = reader.MaxDoc();
         return new MultiBits(liveDocs, starts, true);
     }
     else
     {
         return null;
     }
 }
Beispiel #15
0
        /// <summary> Merge the TermVectors from each of the segments into the new one.</summary>
        /// <throws>  IOException </throws>
        private void MergeVectors()
        {
            TermVectorsWriter termVectorsWriter =
                new TermVectorsWriter(directory, segment, fieldInfos);

            try
            {
                for (int r = 0; r < readers.Count; r++)
                {
                    SegmentReader     matchingSegmentReader = matchingSegmentReaders[r];
                    TermVectorsReader matchingVectorsReader;
                    bool hasMatchingReader;
                    if (matchingSegmentReader != null)
                    {
                        matchingVectorsReader = matchingSegmentReader.termVectorsReaderOrig;

                        // If the TV* files are an older format then they
                        // cannot read raw docs:
                        if (matchingVectorsReader != null && !matchingVectorsReader.CanReadRawDocs())
                        {
                            matchingVectorsReader = null;
                            hasMatchingReader     = false;
                        }
                        else
                        {
                            hasMatchingReader = matchingVectorsReader != null;
                        }
                    }
                    else
                    {
                        hasMatchingReader     = false;
                        matchingVectorsReader = null;
                    }
                    IndexReader reader       = (IndexReader)readers[r];
                    bool        hasDeletions = reader.HasDeletions();
                    int         maxDoc       = reader.MaxDoc();
                    for (int docNum = 0; docNum < maxDoc;)
                    {
                        // skip deleted docs
                        if (!hasDeletions || !reader.IsDeleted(docNum))
                        {
                            if (hasMatchingReader)
                            {
                                // We can optimize this case (doing a bulk
                                // byte copy) since the field numbers are
                                // identical
                                int start   = docNum;
                                int numDocs = 0;
                                do
                                {
                                    docNum++;
                                    numDocs++;
                                    if (docNum >= maxDoc)
                                    {
                                        break;
                                    }
                                    if (hasDeletions && matchingSegmentReader.IsDeleted(docNum))
                                    {
                                        docNum++;
                                        break;
                                    }
                                } while (numDocs < MAX_RAW_MERGE_DOCS);

                                matchingVectorsReader.RawDocs(rawDocLengths, rawDocLengths2, start, numDocs);
                                termVectorsWriter.AddRawDocuments(matchingVectorsReader, rawDocLengths, rawDocLengths2, numDocs);
                                if (checkAbort != null)
                                {
                                    checkAbort.Work(300 * numDocs);
                                }
                            }
                            else
                            {
                                // NOTE: it's very important to first assign
                                // to vectors then pass it to
                                // termVectorsWriter.addAllDocVectors; see
                                // LUCENE-1282
                                TermFreqVector[] vectors = reader.GetTermFreqVectors(docNum);
                                termVectorsWriter.AddAllDocVectors(vectors);
                                docNum++;
                                if (checkAbort != null)
                                {
                                    checkAbort.Work(300);
                                }
                            }
                        }
                        else
                        {
                            docNum++;
                        }
                    }
                }
            }
            finally
            {
                termVectorsWriter.Close();
            }

            long tvxSize = directory.FileLength(segment + "." + IndexFileNames.VECTORS_INDEX_EXTENSION);

            // {{dougsale-2.4.0}
            // this shouldn't be a problem for us - if it is,
            // then it's not a JRE bug
            //if (4 + mergedDocs * 16 != tvxSize)
            //  // This is most likely a bug in Sun JRE 1.6.0_04/_05;
            //  // we detect that the bug has struck, here, and
            //  // throw an exception to prevent the corruption from
            //  // entering the index.  See LUCENE-1282 for
            //  // details.
            //  throw new RuntimeException("mergeVectors produced an invalid result: mergedDocs is " + mergedDocs + " but tvx size is " + tvxSize + "; now aborting this merge to prevent index corruption");
        }
Beispiel #16
0
        /// <summary> </summary>
        /// <returns> The number of documents in all of the readers
        /// </returns>
        /// <throws>  CorruptIndexException if the index is corrupt </throws>
        /// <throws>  IOException if there is a low-level IO error </throws>
        private int MergeFields()
        {
            if (!mergeDocStores)
            {
                // When we are not merging by doc stores, that means
                // all segments were written as part of a single
                // autoCommit=false IndexWriter session, so their field
                // name -> number mapping are the same.  So, we start
                // with the fieldInfos of the last segment in this
                // case, to keep that numbering.
                SegmentReader sr = (SegmentReader)readers[readers.Count - 1];
                fieldInfos = (FieldInfos)sr.fieldInfos.Clone();
            }
            else
            {
                fieldInfos = new FieldInfos();                 // merge field names
            }

            for (int i = 0; i < readers.Count; i++)
            {
                IndexReader reader = (IndexReader)readers[i];
                if (reader is SegmentReader)
                {
                    SegmentReader segmentReader = (SegmentReader)reader;
                    for (int j = 0; j < segmentReader.GetFieldInfos().Size(); j++)
                    {
                        FieldInfo fi = segmentReader.GetFieldInfos().FieldInfo(j);
                        fieldInfos.Add(fi.name, fi.isIndexed, fi.storeTermVector, fi.storePositionWithTermVector, fi.storeOffsetWithTermVector, !reader.HasNorms(fi.name), fi.storePayloads, fi.omitTf);
                    }
                }
                else
                {
                    AddIndexed(reader, fieldInfos, reader.GetFieldNames(IndexReader.FieldOption.TERMVECTOR_WITH_POSITION_OFFSET), true, true, true, false, false);
                    AddIndexed(reader, fieldInfos, reader.GetFieldNames(IndexReader.FieldOption.TERMVECTOR_WITH_POSITION), true, true, false, false, false);
                    AddIndexed(reader, fieldInfos, reader.GetFieldNames(IndexReader.FieldOption.TERMVECTOR_WITH_OFFSET), true, false, true, false, false);
                    AddIndexed(reader, fieldInfos, reader.GetFieldNames(IndexReader.FieldOption.TERMVECTOR), true, false, false, false, false);
                    AddIndexed(reader, fieldInfos, reader.GetFieldNames(IndexReader.FieldOption.OMIT_TF), false, false, false, false, true);
                    AddIndexed(reader, fieldInfos, reader.GetFieldNames(IndexReader.FieldOption.STORES_PAYLOADS), false, false, false, true, false);
                    AddIndexed(reader, fieldInfos, reader.GetFieldNames(IndexReader.FieldOption.INDEXED), false, false, false, false, false);
                    fieldInfos.Add(reader.GetFieldNames(IndexReader.FieldOption.UNINDEXED), false);
                }
            }
            fieldInfos.Write(directory, segment + ".fnm");

            int docCount = 0;

            SetMatchingSegmentReaders();

            if (mergeDocStores)
            {
                // for merging we don't want to compress/uncompress the data, so to tell the FieldsReader that we're
                // in  merge mode, we use this FieldSelector
                FieldSelector fieldSelectorMerge = new AnonymousClassFieldSelector(this);

                // merge field values
                FieldsWriter fieldsWriter = new FieldsWriter(directory, segment, fieldInfos);

                try
                {
                    for (int i = 0; i < readers.Count; i++)
                    {
                        IndexReader   reader = (IndexReader)readers[i];
                        SegmentReader matchingSegmentReader = matchingSegmentReaders[i];
                        FieldsReader  matchingFieldsReader;
                        bool          hasMatchingReader;
                        if (matchingSegmentReader != null)
                        {
                            FieldsReader fieldsReader = matchingSegmentReader.GetFieldsReader();
                            if (fieldsReader != null && !fieldsReader.CanReadRawDocs())
                            {
                                matchingFieldsReader = null;
                                hasMatchingReader    = false;
                            }
                            else
                            {
                                matchingFieldsReader = fieldsReader;
                                hasMatchingReader    = true;
                            }
                        }
                        else
                        {
                            hasMatchingReader    = false;
                            matchingFieldsReader = null;
                        }
                        int  maxDoc       = reader.MaxDoc();
                        bool hasDeletions = reader.HasDeletions();
                        for (int j = 0; j < maxDoc;)
                        {
                            if (!hasDeletions || !reader.IsDeleted(j))
                            { // skip deleted docs
                                if (hasMatchingReader)
                                {
                                    // We can optimize this case (doing a bulk
                                    // byte copy) since the field numbers are
                                    // identical
                                    int start   = j;
                                    int numDocs = 0;
                                    do
                                    {
                                        j++;
                                        numDocs++;
                                        if (j >= maxDoc)
                                        {
                                            break;
                                        }
                                        if (hasDeletions && matchingSegmentReader.IsDeleted(j))
                                        {
                                            j++;
                                            break;
                                        }
                                    } while (numDocs < MAX_RAW_MERGE_DOCS);

                                    IndexInput stream = matchingFieldsReader.RawDocs(rawDocLengths, start, numDocs);
                                    fieldsWriter.AddRawDocuments(stream, rawDocLengths, numDocs);
                                    docCount += numDocs;
                                    if (checkAbort != null)
                                    {
                                        checkAbort.Work(300 * numDocs);
                                    }
                                }
                                else
                                {
                                    // NOTE: it's very important to first assign
                                    // to doc then pass it to
                                    // termVectorsWriter.addAllDocVectors; see
                                    // LUCENE-1282
                                    Document doc = reader.Document(j, fieldSelectorMerge);
                                    fieldsWriter.AddDocument(doc);
                                    j++;
                                    docCount++;
                                    if (checkAbort != null)
                                    {
                                        checkAbort.Work(300);
                                    }
                                }
                            }
                            else
                            {
                                j++;
                            }
                        }
                    }
                }
                finally
                {
                    fieldsWriter.Close();
                }

                long fdxFileLength = directory.FileLength(segment + "." + IndexFileNames.FIELDS_INDEX_EXTENSION);

                // {{dougsale-2.4.0}
                // this shouldn't be a problem for us - if it is,
                // then it's not a JRE bug...
                //if (4+docCount*8 != fdxFileLength)
                //  // This is most likely a bug in Sun JRE 1.6.0_04/_05;
                //  // we detect that the bug has struck, here, and
                //  // throw an exception to prevent the corruption from
                //  // entering the index.  See LUCENE-1282 for
                //  // details.
                //  throw new RuntimeException("mergeFields produced an invalid result: docCount is " + docCount + " but fdx file size is " + fdxFileLength + "; now aborting this merge to prevent index corruption");
            }
            else
            {
                // If we are skipping the doc stores, that means there
                // are no deletions in any of these segments, so we
                // just sum numDocs() of each segment to get total docCount
                for (int i = 0; i < readers.Count; i++)
                {
                    docCount += ((IndexReader)readers[i]).NumDocs();
                }
            }

            return(docCount);
        }