Ejemplo n.º 1
0
        private int CopyFieldsNoDeletions(FieldSelector fieldSelectorMerge, FieldsWriter fieldsWriter, IndexReader reader, FieldsReader matchingFieldsReader)
        {
            int maxDoc   = reader.MaxDoc();
            int docCount = 0;

            if (matchingFieldsReader != null)
            {
                // We can bulk-copy because the fieldInfos are "congruent"
                while (docCount < maxDoc)
                {
                    int        len    = System.Math.Min(MAX_RAW_MERGE_DOCS, maxDoc - docCount);
                    IndexInput stream = matchingFieldsReader.RawDocs(rawDocLengths, docCount, len);
                    fieldsWriter.AddRawDocuments(stream, rawDocLengths, len);
                    docCount += len;
                    checkAbort.Work(300 * len);
                }
            }
            else
            {
                for (; docCount < maxDoc; docCount++)
                {
                    // NOTE: it's very important to first assign to doc then pass it to
                    // termVectorsWriter.addAllDocVectors; see LUCENE-1282
                    Document doc = reader.Document(docCount, fieldSelectorMerge);
                    fieldsWriter.AddDocument(doc);
                    checkAbort.Work(300);
                }
            }
            return(docCount);
        }
Ejemplo n.º 2
0
        public void  CloseDocStore(SegmentWriteState state)
        {
            lock (this)
            {
                int inc = state.numDocsInStore - lastDocID;
                if (inc > 0)
                {
                    InitFieldsWriter();
                    Fill(state.numDocsInStore - docWriter.GetDocStoreOffset());
                }

                if (fieldsWriter != null)
                {
                    fieldsWriter.Close();
                    fieldsWriter = null;
                    lastDocID    = 0;
                    System.Diagnostics.Debug.Assert(state.docStoreSegmentName != null);
                    SupportClass.CollectionsHelper.AddIfNotContains(state.flushedFiles, state.docStoreSegmentName + "." + IndexFileNames.FIELDS_EXTENSION);
                    SupportClass.CollectionsHelper.AddIfNotContains(state.flushedFiles, state.docStoreSegmentName + "." + IndexFileNames.FIELDS_INDEX_EXTENSION);

                    state.docWriter.RemoveOpenFile(state.docStoreSegmentName + "." + IndexFileNames.FIELDS_EXTENSION);
                    state.docWriter.RemoveOpenFile(state.docStoreSegmentName + "." + IndexFileNames.FIELDS_INDEX_EXTENSION);

                    System.String fileName = state.docStoreSegmentName + "." + IndexFileNames.FIELDS_INDEX_EXTENSION;

                    if (4 + ((long)state.numDocsInStore) * 8 != state.directory.FileLength(fileName))
                    {
                        throw new System.SystemException("after flush: fdx size mismatch: " + state.numDocsInStore + " docs vs " + state.directory.FileLength(fileName) + " length in bytes of " + fileName + " file exists?=" + state.directory.FileExists(fileName));
                    }
                }
            }
        }
Ejemplo n.º 3
0
        private int CopyFieldsWithDeletions(FieldSelector fieldSelectorMerge, FieldsWriter fieldsWriter, IndexReader reader, FieldsReader matchingFieldsReader)
        {
            int docCount = 0;
            int maxDoc   = reader.MaxDoc();

            if (matchingFieldsReader != null)
            {
                // We can bulk-copy because the fieldInfos are "congruent"
                for (int j = 0; j < maxDoc;)
                {
                    if (reader.IsDeleted(j))
                    {
                        // skip deleted docs
                        ++j;
                        continue;
                    }
                    // We can optimize this case (doing a bulk byte copy) since the field
                    // numbers are identical
                    int start = j, numDocs = 0;
                    do
                    {
                        j++;
                        numDocs++;
                        if (j >= maxDoc)
                        {
                            break;
                        }
                        if (reader.IsDeleted(j))
                        {
                            j++;
                            break;
                        }
                    }while (numDocs < MAX_RAW_MERGE_DOCS);

                    IndexInput stream = matchingFieldsReader.RawDocs(rawDocLengths, start, numDocs);
                    fieldsWriter.AddRawDocuments(stream, rawDocLengths, numDocs);
                    docCount += numDocs;
                    checkAbort.Work(300 * numDocs);
                }
            }
            else
            {
                for (int j = 0; j < maxDoc; j++)
                {
                    if (reader.IsDeleted(j))
                    {
                        // skip deleted docs
                        continue;
                    }
                    // NOTE: it's very important to first assign to doc then pass it to
                    // termVectorsWriter.addAllDocVectors; see LUCENE-1282
                    Document doc = reader.Document(j, fieldSelectorMerge);
                    fieldsWriter.AddDocument(doc);
                    docCount++;
                    checkAbort.Work(300);
                }
            }
            return(docCount);
        }
Ejemplo n.º 4
0
		private void  InitFieldsWriter()
		{
			if (fieldsWriter == null)
			{
				System.String docStoreSegment = docWriter.GetDocStoreSegment();
				if (docStoreSegment != null)
				{
					System.Diagnostics.Debug.Assert(docStoreSegment != null);
					fieldsWriter = new FieldsWriter(docWriter.directory, docStoreSegment, fieldInfos);
					docWriter.AddOpenFile(docStoreSegment + "." + IndexFileNames.FIELDS_EXTENSION);
					docWriter.AddOpenFile(docStoreSegment + "." + IndexFileNames.FIELDS_INDEX_EXTENSION);
					lastDocID = 0;
				}
			}
		}
Ejemplo n.º 5
0
 private void  InitFieldsWriter()
 {
     if (fieldsWriter == null)
     {
         System.String docStoreSegment = docWriter.GetDocStoreSegment();
         if (docStoreSegment != null)
         {
             System.Diagnostics.Debug.Assert(docStoreSegment != null);
             fieldsWriter = new FieldsWriter(docWriter.directory, docStoreSegment, fieldInfos);
             docWriter.AddOpenFile(docStoreSegment + "." + IndexFileNames.FIELDS_EXTENSION);
             docWriter.AddOpenFile(docStoreSegment + "." + IndexFileNames.FIELDS_INDEX_EXTENSION);
             lastDocID = 0;
         }
     }
 }
Ejemplo n.º 6
0
 internal void  Abort()
 {
     lock (this)
     {
         if (fieldsWriter != null)
         {
             try
             {
                 fieldsWriter.Close();
             }
             catch (System.Exception t)
             {
             }
             fieldsWriter = null;
             lastDocID    = 0;
         }
     }
 }
Ejemplo n.º 7
0
		public void  CloseDocStore(SegmentWriteState state)
		{
			lock (this)
			{
				int inc = state.numDocsInStore - lastDocID;
				if (inc > 0)
				{
					InitFieldsWriter();
					Fill(state.numDocsInStore - docWriter.GetDocStoreOffset());
				}
				
				if (fieldsWriter != null)
				{
					fieldsWriter.Close();
					fieldsWriter = null;
					lastDocID = 0;
					System.Diagnostics.Debug.Assert(state.docStoreSegmentName != null);
                    SupportClass.CollectionsHelper.AddIfNotContains(state.flushedFiles, state.docStoreSegmentName + "." + IndexFileNames.FIELDS_EXTENSION);
                    SupportClass.CollectionsHelper.AddIfNotContains(state.flushedFiles, state.docStoreSegmentName + "." + IndexFileNames.FIELDS_INDEX_EXTENSION);
					
					state.docWriter.RemoveOpenFile(state.docStoreSegmentName + "." + IndexFileNames.FIELDS_EXTENSION);
					state.docWriter.RemoveOpenFile(state.docStoreSegmentName + "." + IndexFileNames.FIELDS_INDEX_EXTENSION);
					
					System.String fileName = state.docStoreSegmentName + "." + IndexFileNames.FIELDS_INDEX_EXTENSION;
					
					if (4 + ((long) state.numDocsInStore) * 8 != state.directory.FileLength(fileName))
						throw new System.SystemException("after flush: fdx size mismatch: " + state.numDocsInStore + " docs vs " + state.directory.FileLength(fileName) + " length in bytes of " + fileName + " file exists?=" + state.directory.FileExists(fileName));
				}
			}
		}
Ejemplo n.º 8
0
		internal void  Abort()
		{
			lock (this)
			{
				if (fieldsWriter != null)
				{
					try
					{
						fieldsWriter.Close();
					}
					catch (System.Exception t)
					{
					}
					fieldsWriter = null;
					lastDocID = 0;
				}
			}
		}
Ejemplo n.º 9
0
		public StoredFieldsWriterPerThread(DocumentsWriter.DocState docState, StoredFieldsWriter storedFieldsWriter)
		{
			this.storedFieldsWriter = storedFieldsWriter;
			this.docState = docState;
			localFieldsWriter = new FieldsWriter((IndexOutput) null, (IndexOutput) null, storedFieldsWriter.fieldInfos);
		}
Ejemplo n.º 10
0
 public StoredFieldsWriterPerThread(DocumentsWriter.DocState docState, StoredFieldsWriter storedFieldsWriter)
 {
     this.storedFieldsWriter = storedFieldsWriter;
     this.docState           = docState;
     localFieldsWriter       = new FieldsWriter((IndexOutput)null, (IndexOutput)null, storedFieldsWriter.fieldInfos);
 }
Ejemplo n.º 11
0
		private int CopyFieldsNoDeletions(FieldSelector fieldSelectorMerge, FieldsWriter fieldsWriter, IndexReader reader, FieldsReader matchingFieldsReader)
		{
			int maxDoc = reader.MaxDoc();
			int docCount = 0;
			if (matchingFieldsReader != null)
			{
				// We can bulk-copy because the fieldInfos are "congruent"
				while (docCount < maxDoc)
				{
					int len = System.Math.Min(MAX_RAW_MERGE_DOCS, maxDoc - docCount);
					IndexInput stream = matchingFieldsReader.RawDocs(rawDocLengths, docCount, len);
					fieldsWriter.AddRawDocuments(stream, rawDocLengths, len);
					docCount += len;
					checkAbort.Work(300 * len);
				}
			}
			else
			{
				for (; docCount < maxDoc; docCount++)
				{
					// NOTE: it's very important to first assign to doc then pass it to
					// termVectorsWriter.addAllDocVectors; see LUCENE-1282
					Document doc = reader.Document(docCount, fieldSelectorMerge);
					fieldsWriter.AddDocument(doc);
					checkAbort.Work(300);
				}
			}
			return docCount;
		}
Ejemplo n.º 12
0
		private int CopyFieldsWithDeletions(FieldSelector fieldSelectorMerge, FieldsWriter fieldsWriter, IndexReader reader, FieldsReader matchingFieldsReader)
		{
			int docCount = 0;
			int maxDoc = reader.MaxDoc();
			if (matchingFieldsReader != null)
			{
				// We can bulk-copy because the fieldInfos are "congruent"
				for (int j = 0; j < maxDoc; )
				{
					if (reader.IsDeleted(j))
					{
						// skip deleted docs
						++j;
						continue;
					}
					// We can optimize this case (doing a bulk byte copy) since the field 
					// numbers are identical
					int start = j, numDocs = 0;
					do 
					{
						j++;
						numDocs++;
						if (j >= maxDoc)
							break;
						if (reader.IsDeleted(j))
						{
							j++;
							break;
						}
					}
					while (numDocs < MAX_RAW_MERGE_DOCS);
					
					IndexInput stream = matchingFieldsReader.RawDocs(rawDocLengths, start, numDocs);
					fieldsWriter.AddRawDocuments(stream, rawDocLengths, numDocs);
					docCount += numDocs;
					checkAbort.Work(300 * numDocs);
				}
			}
			else
			{
				for (int j = 0; j < maxDoc; j++)
				{
					if (reader.IsDeleted(j))
					{
						// skip deleted docs
						continue;
					}
					// NOTE: it's very important to first assign to doc then pass it to
					// termVectorsWriter.addAllDocVectors; see LUCENE-1282
					Document doc = reader.Document(j, fieldSelectorMerge);
					fieldsWriter.AddDocument(doc);
					docCount++;
					checkAbort.Work(300);
				}
			}
			return docCount;
		}
Ejemplo n.º 13
0
		/// <summary> </summary>
		/// <returns> The number of documents in all of the readers
		/// </returns>
		/// <throws>  CorruptIndexException if the index is corrupt </throws>
		/// <throws>  IOException if there is a low-level IO error </throws>
		private int MergeFields()
		{
			
			if (!mergeDocStores)
			{
				// When we are not merging by doc stores, that means
				// all segments were written as part of a single
				// autoCommit=false IndexWriter session, so their field
				// name -> number mapping are the same.  So, we start
				// with the fieldInfos of the last segment in this
				// case, to keep that numbering.
				SegmentReader sr = (SegmentReader) readers[readers.Count - 1];
				fieldInfos = (FieldInfos) sr.core.fieldInfos.Clone();
			}
			else
			{
				fieldInfos = new FieldInfos(); // merge field names
			}
			
			for (System.Collections.IEnumerator iter = readers.GetEnumerator(); iter.MoveNext(); )
			{
				IndexReader reader = (IndexReader) iter.Current;
				if (reader is SegmentReader)
				{
					SegmentReader segmentReader = (SegmentReader) reader;
					FieldInfos readerFieldInfos = segmentReader.FieldInfos();
					int numReaderFieldInfos = readerFieldInfos.Size();
					for (int j = 0; j < numReaderFieldInfos; j++)
					{
						FieldInfo fi = readerFieldInfos.FieldInfo(j);
						fieldInfos.Add(fi.name, fi.isIndexed, fi.storeTermVector, fi.storePositionWithTermVector, fi.storeOffsetWithTermVector, !reader.HasNorms(fi.name), fi.storePayloads, fi.omitTermFreqAndPositions);
					}
				}
				else
				{
					AddIndexed(reader, fieldInfos, reader.GetFieldNames(FieldOption.TERMVECTOR_WITH_POSITION_OFFSET), true, true, true, false, false);
					AddIndexed(reader, fieldInfos, reader.GetFieldNames(FieldOption.TERMVECTOR_WITH_POSITION), true, true, false, false, false);
					AddIndexed(reader, fieldInfos, reader.GetFieldNames(FieldOption.TERMVECTOR_WITH_OFFSET), true, false, true, false, false);
					AddIndexed(reader, fieldInfos, reader.GetFieldNames(FieldOption.TERMVECTOR), true, false, false, false, false);
					AddIndexed(reader, fieldInfos, reader.GetFieldNames(FieldOption.OMIT_TERM_FREQ_AND_POSITIONS), false, false, false, false, true);
					AddIndexed(reader, fieldInfos, reader.GetFieldNames(FieldOption.STORES_PAYLOADS), false, false, false, true, false);
					AddIndexed(reader, fieldInfos, reader.GetFieldNames(FieldOption.INDEXED), false, false, false, false, false);
					fieldInfos.Add(reader.GetFieldNames(FieldOption.UNINDEXED), false);
				}
			}
			fieldInfos.Write(directory, segment + ".fnm");
			
			int docCount = 0;
			
			SetMatchingSegmentReaders();
			
			if (mergeDocStores)
			{
				
				// for merging we don't want to compress/uncompress the data, so to tell the FieldsReader that we're
				// in  merge mode, we use this FieldSelector
				FieldSelector fieldSelectorMerge = new AnonymousClassFieldSelector(this);
				
				// merge field values
				FieldsWriter fieldsWriter = new FieldsWriter(directory, segment, fieldInfos);
				
				try
				{
					int idx = 0;
					for (System.Collections.IEnumerator iter = readers.GetEnumerator(); iter.MoveNext(); )
					{
						IndexReader reader = (IndexReader) iter.Current;
						SegmentReader matchingSegmentReader = matchingSegmentReaders[idx++];
						FieldsReader matchingFieldsReader = null;
						if (matchingSegmentReader != null)
						{
							FieldsReader fieldsReader = matchingSegmentReader.GetFieldsReader();
							if (fieldsReader != null && fieldsReader.CanReadRawDocs())
							{
								matchingFieldsReader = fieldsReader;
							}
						}
						if (reader.HasDeletions())
						{
							docCount += CopyFieldsWithDeletions(fieldSelectorMerge, fieldsWriter, reader, matchingFieldsReader);
						}
						else
						{
							docCount += CopyFieldsNoDeletions(fieldSelectorMerge, fieldsWriter, reader, matchingFieldsReader);
						}
					}
				}
				finally
				{
					fieldsWriter.Close();
				}
				
				System.String fileName = segment + "." + IndexFileNames.FIELDS_INDEX_EXTENSION;
				long fdxFileLength = directory.FileLength(fileName);
				
				if (4 + ((long) docCount) * 8 != fdxFileLength)
				// This is most likely a bug in Sun JRE 1.6.0_04/_05;
				// we detect that the bug has struck, here, and
				// throw an exception to prevent the corruption from
				// entering the index.  See LUCENE-1282 for
				// details.
					throw new System.SystemException("mergeFields produced an invalid result: docCount is " + docCount + " but fdx file size is " + fdxFileLength + " file=" + fileName + " file exists?=" + directory.FileExists(fileName) + "; now aborting this merge to prevent index corruption");
			}
			// If we are skipping the doc stores, that means there
			// are no deletions in any of these segments, so we
			// just sum numDocs() of each segment to get total docCount
			else
			{
				for (System.Collections.IEnumerator iter = readers.GetEnumerator(); iter.MoveNext(); )
				{
					docCount += ((IndexReader) iter.Current).NumDocs();
				}
			}
			
			return docCount;
		}
Ejemplo n.º 14
0
        /// <summary> </summary>
        /// <returns> The number of documents in all of the readers
        /// </returns>
        /// <throws>  CorruptIndexException if the index is corrupt </throws>
        /// <throws>  IOException if there is a low-level IO error </throws>
        private int MergeFields()
        {
            if (!mergeDocStores)
            {
                // When we are not merging by doc stores, that means
                // all segments were written as part of a single
                // autoCommit=false IndexWriter session, so their field
                // name -> number mapping are the same.  So, we start
                // with the fieldInfos of the last segment in this
                // case, to keep that numbering.
                SegmentReader sr = (SegmentReader)readers[readers.Count - 1];
                fieldInfos = (FieldInfos)sr.core.fieldInfos.Clone();
            }
            else
            {
                fieldInfos = new FieldInfos();                 // merge field names
            }

            for (System.Collections.IEnumerator iter = readers.GetEnumerator(); iter.MoveNext();)
            {
                IndexReader reader = (IndexReader)iter.Current;
                if (reader is SegmentReader)
                {
                    SegmentReader segmentReader       = (SegmentReader)reader;
                    FieldInfos    readerFieldInfos    = segmentReader.FieldInfos();
                    int           numReaderFieldInfos = readerFieldInfos.Size();
                    for (int j = 0; j < numReaderFieldInfos; j++)
                    {
                        FieldInfo fi = readerFieldInfos.FieldInfo(j);
                        fieldInfos.Add(fi.name, fi.isIndexed, fi.storeTermVector, fi.storePositionWithTermVector, fi.storeOffsetWithTermVector, !reader.HasNorms(fi.name), fi.storePayloads, fi.omitTermFreqAndPositions);
                    }
                }
                else
                {
                    AddIndexed(reader, fieldInfos, reader.GetFieldNames(FieldOption.TERMVECTOR_WITH_POSITION_OFFSET), true, true, true, false, false);
                    AddIndexed(reader, fieldInfos, reader.GetFieldNames(FieldOption.TERMVECTOR_WITH_POSITION), true, true, false, false, false);
                    AddIndexed(reader, fieldInfos, reader.GetFieldNames(FieldOption.TERMVECTOR_WITH_OFFSET), true, false, true, false, false);
                    AddIndexed(reader, fieldInfos, reader.GetFieldNames(FieldOption.TERMVECTOR), true, false, false, false, false);
                    AddIndexed(reader, fieldInfos, reader.GetFieldNames(FieldOption.OMIT_TERM_FREQ_AND_POSITIONS), false, false, false, false, true);
                    AddIndexed(reader, fieldInfos, reader.GetFieldNames(FieldOption.STORES_PAYLOADS), false, false, false, true, false);
                    AddIndexed(reader, fieldInfos, reader.GetFieldNames(FieldOption.INDEXED), false, false, false, false, false);
                    fieldInfos.Add(reader.GetFieldNames(FieldOption.UNINDEXED), false);
                }
            }
            fieldInfos.Write(directory, segment + ".fnm");

            int docCount = 0;

            SetMatchingSegmentReaders();

            if (mergeDocStores)
            {
                // for merging we don't want to compress/uncompress the data, so to tell the FieldsReader that we're
                // in  merge mode, we use this FieldSelector
                FieldSelector fieldSelectorMerge = new AnonymousClassFieldSelector(this);

                // merge field values
                FieldsWriter fieldsWriter = new FieldsWriter(directory, segment, fieldInfos);

                try
                {
                    int idx = 0;
                    for (System.Collections.IEnumerator iter = readers.GetEnumerator(); iter.MoveNext();)
                    {
                        IndexReader   reader = (IndexReader)iter.Current;
                        SegmentReader matchingSegmentReader = matchingSegmentReaders[idx++];
                        FieldsReader  matchingFieldsReader  = null;
                        if (matchingSegmentReader != null)
                        {
                            FieldsReader fieldsReader = matchingSegmentReader.GetFieldsReader();
                            if (fieldsReader != null && fieldsReader.CanReadRawDocs())
                            {
                                matchingFieldsReader = fieldsReader;
                            }
                        }
                        if (reader.HasDeletions())
                        {
                            docCount += CopyFieldsWithDeletions(fieldSelectorMerge, fieldsWriter, reader, matchingFieldsReader);
                        }
                        else
                        {
                            docCount += CopyFieldsNoDeletions(fieldSelectorMerge, fieldsWriter, reader, matchingFieldsReader);
                        }
                    }
                }
                finally
                {
                    fieldsWriter.Close();
                }

                System.String fileName      = segment + "." + IndexFileNames.FIELDS_INDEX_EXTENSION;
                long          fdxFileLength = directory.FileLength(fileName);

                if (4 + ((long)docCount) * 8 != fdxFileLength)
                {
                    // This is most likely a bug in Sun JRE 1.6.0_04/_05;
                    // we detect that the bug has struck, here, and
                    // throw an exception to prevent the corruption from
                    // entering the index.  See LUCENE-1282 for
                    // details.
                    throw new System.SystemException("mergeFields produced an invalid result: docCount is " + docCount + " but fdx file size is " + fdxFileLength + " file=" + fileName + " file exists?=" + directory.FileExists(fileName) + "; now aborting this merge to prevent index corruption");
                }
            }
            // If we are skipping the doc stores, that means there
            // are no deletions in any of these segments, so we
            // just sum numDocs() of each segment to get total docCount
            else
            {
                for (System.Collections.IEnumerator iter = readers.GetEnumerator(); iter.MoveNext();)
                {
                    docCount += ((IndexReader)iter.Current).NumDocs();
                }
            }

            return(docCount);
        }