Example #1
0
 public LazyField(FieldsReader enclosingInstance, System.String name, Field.Store store, Field.Index index, Field.TermVector termVector, int toRead, long pointer, bool isBinary) : base(name, store, index, termVector)
 {
     InitBlock(enclosingInstance);
     this.toRead   = toRead;
     this.pointer  = pointer;
     this.isBinary = isBinary;
     if (isBinary)
     {
         binaryLength = toRead;
     }
     lazy = true;
 }
Example #2
0
			public LazyField(FieldsReader enclosingInstance, System.String name, Field.Store store, Field.Index index, Field.TermVector termVector, int toRead, long pointer, bool isBinary):base(name, store, index, termVector)
			{
				InitBlock(enclosingInstance);
				this.toRead = toRead;
				this.pointer = pointer;
				this.isBinary = isBinary;
				if (isBinary)
					binaryLength = toRead;
				lazy = true;
			}
Example #3
0
			private void  InitBlock(FieldsReader enclosingInstance)
			{
				this.enclosingInstance = enclosingInstance;
			}
Example #4
0
			internal void  OpenDocStores(SegmentInfo si)
			{
				lock (this)
				{
					
					System.Diagnostics.Debug.Assert(si.name.Equals(segment));
					
					if (fieldsReaderOrig == null)
					{
						Directory storeDir;
						if (si.GetDocStoreOffset() != - 1)
						{
							if (si.GetDocStoreIsCompoundFile())
							{
								System.Diagnostics.Debug.Assert(storeCFSReader == null);
								storeCFSReader = new CompoundFileReader(dir, si.GetDocStoreSegment() + "." + IndexFileNames.COMPOUND_FILE_STORE_EXTENSION, readBufferSize);
								storeDir = storeCFSReader;
								System.Diagnostics.Debug.Assert(storeDir != null);
							}
							else
							{
								storeDir = dir;
								System.Diagnostics.Debug.Assert(storeDir != null);
							}
						}
						else if (si.GetUseCompoundFile())
						{
							// In some cases, we were originally opened when CFS
							// was not used, but then we are asked to open doc
							// stores after the segment has switched to CFS
							if (cfsReader == null)
							{
								cfsReader = new CompoundFileReader(dir, segment + "." + IndexFileNames.COMPOUND_FILE_EXTENSION, readBufferSize);
							}
							storeDir = cfsReader;
							System.Diagnostics.Debug.Assert(storeDir != null);
						}
						else
						{
							storeDir = dir;
							System.Diagnostics.Debug.Assert(storeDir != null);
						}
						
						System.String storesSegment;
						if (si.GetDocStoreOffset() != - 1)
						{
							storesSegment = si.GetDocStoreSegment();
						}
						else
						{
							storesSegment = segment;
						}
						
						fieldsReaderOrig = new FieldsReader(storeDir, storesSegment, fieldInfos, readBufferSize, si.GetDocStoreOffset(), si.docCount);
						
						// Verify two sources of "maxDoc" agree:
						if (si.GetDocStoreOffset() == - 1 && fieldsReaderOrig.Size() != si.docCount)
						{
							throw new CorruptIndexException("doc counts differ for segment " + segment + ": fieldsReader shows " + fieldsReaderOrig.Size() + " but segmentInfo shows " + si.docCount);
						}
						
						if (fieldInfos.HasVectors())
						{
							// open term vector files only as needed
							termVectorsReaderOrig = new TermVectorsReader(storeDir, storesSegment, fieldInfos, readBufferSize, si.GetDocStoreOffset(), si.docCount);
						}
					}
				}
			}
Example #5
0
 private void  InitBlock(FieldsReader enclosingInstance)
 {
     this.enclosingInstance = enclosingInstance;
 }
Example #6
0
		private int CopyFieldsNoDeletions(FieldSelector fieldSelectorMerge, FieldsWriter fieldsWriter, IndexReader reader, FieldsReader matchingFieldsReader)
		{
			int maxDoc = reader.MaxDoc();
			int docCount = 0;
			if (matchingFieldsReader != null)
			{
				// We can bulk-copy because the fieldInfos are "congruent"
				while (docCount < maxDoc)
				{
					int len = System.Math.Min(MAX_RAW_MERGE_DOCS, maxDoc - docCount);
					IndexInput stream = matchingFieldsReader.RawDocs(rawDocLengths, docCount, len);
					fieldsWriter.AddRawDocuments(stream, rawDocLengths, len);
					docCount += len;
					checkAbort.Work(300 * len);
				}
			}
			else
			{
				for (; docCount < maxDoc; docCount++)
				{
					// NOTE: it's very important to first assign to doc then pass it to
					// termVectorsWriter.addAllDocVectors; see LUCENE-1282
					Document doc = reader.Document(docCount, fieldSelectorMerge);
					fieldsWriter.AddDocument(doc);
					checkAbort.Work(300);
				}
			}
			return docCount;
		}
Example #7
0
		private int CopyFieldsWithDeletions(FieldSelector fieldSelectorMerge, FieldsWriter fieldsWriter, IndexReader reader, FieldsReader matchingFieldsReader)
		{
			int docCount = 0;
			int maxDoc = reader.MaxDoc();
			if (matchingFieldsReader != null)
			{
				// We can bulk-copy because the fieldInfos are "congruent"
				for (int j = 0; j < maxDoc; )
				{
					if (reader.IsDeleted(j))
					{
						// skip deleted docs
						++j;
						continue;
					}
					// We can optimize this case (doing a bulk byte copy) since the field 
					// numbers are identical
					int start = j, numDocs = 0;
					do 
					{
						j++;
						numDocs++;
						if (j >= maxDoc)
							break;
						if (reader.IsDeleted(j))
						{
							j++;
							break;
						}
					}
					while (numDocs < MAX_RAW_MERGE_DOCS);
					
					IndexInput stream = matchingFieldsReader.RawDocs(rawDocLengths, start, numDocs);
					fieldsWriter.AddRawDocuments(stream, rawDocLengths, numDocs);
					docCount += numDocs;
					checkAbort.Work(300 * numDocs);
				}
			}
			else
			{
				for (int j = 0; j < maxDoc; j++)
				{
					if (reader.IsDeleted(j))
					{
						// skip deleted docs
						continue;
					}
					// NOTE: it's very important to first assign to doc then pass it to
					// termVectorsWriter.addAllDocVectors; see LUCENE-1282
					Document doc = reader.Document(j, fieldSelectorMerge);
					fieldsWriter.AddDocument(doc);
					docCount++;
					checkAbort.Work(300);
				}
			}
			return docCount;
		}
Example #8
0
        private int CopyFieldsNoDeletions(FieldSelector fieldSelectorMerge, FieldsWriter fieldsWriter, IndexReader reader, FieldsReader matchingFieldsReader)
        {
            int maxDoc   = reader.MaxDoc();
            int docCount = 0;

            if (matchingFieldsReader != null)
            {
                // We can bulk-copy because the fieldInfos are "congruent"
                while (docCount < maxDoc)
                {
                    int        len    = System.Math.Min(MAX_RAW_MERGE_DOCS, maxDoc - docCount);
                    IndexInput stream = matchingFieldsReader.RawDocs(rawDocLengths, docCount, len);
                    fieldsWriter.AddRawDocuments(stream, rawDocLengths, len);
                    docCount += len;
                    checkAbort.Work(300 * len);
                }
            }
            else
            {
                for (; docCount < maxDoc; docCount++)
                {
                    // NOTE: it's very important to first assign to doc then pass it to
                    // termVectorsWriter.addAllDocVectors; see LUCENE-1282
                    Document doc = reader.Document(docCount, fieldSelectorMerge);
                    fieldsWriter.AddDocument(doc);
                    checkAbort.Work(300);
                }
            }
            return(docCount);
        }
Example #9
0
        private int CopyFieldsWithDeletions(FieldSelector fieldSelectorMerge, FieldsWriter fieldsWriter, IndexReader reader, FieldsReader matchingFieldsReader)
        {
            int docCount = 0;
            int maxDoc   = reader.MaxDoc();

            if (matchingFieldsReader != null)
            {
                // We can bulk-copy because the fieldInfos are "congruent"
                for (int j = 0; j < maxDoc;)
                {
                    if (reader.IsDeleted(j))
                    {
                        // skip deleted docs
                        ++j;
                        continue;
                    }
                    // We can optimize this case (doing a bulk byte copy) since the field
                    // numbers are identical
                    int start = j, numDocs = 0;
                    do
                    {
                        j++;
                        numDocs++;
                        if (j >= maxDoc)
                        {
                            break;
                        }
                        if (reader.IsDeleted(j))
                        {
                            j++;
                            break;
                        }
                    }while (numDocs < MAX_RAW_MERGE_DOCS);

                    IndexInput stream = matchingFieldsReader.RawDocs(rawDocLengths, start, numDocs);
                    fieldsWriter.AddRawDocuments(stream, rawDocLengths, numDocs);
                    docCount += numDocs;
                    checkAbort.Work(300 * numDocs);
                }
            }
            else
            {
                for (int j = 0; j < maxDoc; j++)
                {
                    if (reader.IsDeleted(j))
                    {
                        // skip deleted docs
                        continue;
                    }
                    // NOTE: it's very important to first assign to doc then pass it to
                    // termVectorsWriter.addAllDocVectors; see LUCENE-1282
                    Document doc = reader.Document(j, fieldSelectorMerge);
                    fieldsWriter.AddDocument(doc);
                    docCount++;
                    checkAbort.Work(300);
                }
            }
            return(docCount);
        }
Example #10
0
        /// <summary> </summary>
        /// <returns> The number of documents in all of the readers
        /// </returns>
        /// <throws>  CorruptIndexException if the index is corrupt </throws>
        /// <throws>  IOException if there is a low-level IO error </throws>
        private int MergeFields()
        {
            if (!mergeDocStores)
            {
                // When we are not merging by doc stores, that means
                // all segments were written as part of a single
                // autoCommit=false IndexWriter session, so their field
                // name -> number mapping are the same.  So, we start
                // with the fieldInfos of the last segment in this
                // case, to keep that numbering.
                SegmentReader sr = (SegmentReader)readers[readers.Count - 1];
                fieldInfos = (FieldInfos)sr.core.fieldInfos.Clone();
            }
            else
            {
                fieldInfos = new FieldInfos();                 // merge field names
            }

            for (System.Collections.IEnumerator iter = readers.GetEnumerator(); iter.MoveNext();)
            {
                IndexReader reader = (IndexReader)iter.Current;
                if (reader is SegmentReader)
                {
                    SegmentReader segmentReader       = (SegmentReader)reader;
                    FieldInfos    readerFieldInfos    = segmentReader.FieldInfos();
                    int           numReaderFieldInfos = readerFieldInfos.Size();
                    for (int j = 0; j < numReaderFieldInfos; j++)
                    {
                        FieldInfo fi = readerFieldInfos.FieldInfo(j);
                        fieldInfos.Add(fi.name, fi.isIndexed, fi.storeTermVector, fi.storePositionWithTermVector, fi.storeOffsetWithTermVector, !reader.HasNorms(fi.name), fi.storePayloads, fi.omitTermFreqAndPositions);
                    }
                }
                else
                {
                    AddIndexed(reader, fieldInfos, reader.GetFieldNames(FieldOption.TERMVECTOR_WITH_POSITION_OFFSET), true, true, true, false, false);
                    AddIndexed(reader, fieldInfos, reader.GetFieldNames(FieldOption.TERMVECTOR_WITH_POSITION), true, true, false, false, false);
                    AddIndexed(reader, fieldInfos, reader.GetFieldNames(FieldOption.TERMVECTOR_WITH_OFFSET), true, false, true, false, false);
                    AddIndexed(reader, fieldInfos, reader.GetFieldNames(FieldOption.TERMVECTOR), true, false, false, false, false);
                    AddIndexed(reader, fieldInfos, reader.GetFieldNames(FieldOption.OMIT_TERM_FREQ_AND_POSITIONS), false, false, false, false, true);
                    AddIndexed(reader, fieldInfos, reader.GetFieldNames(FieldOption.STORES_PAYLOADS), false, false, false, true, false);
                    AddIndexed(reader, fieldInfos, reader.GetFieldNames(FieldOption.INDEXED), false, false, false, false, false);
                    fieldInfos.Add(reader.GetFieldNames(FieldOption.UNINDEXED), false);
                }
            }
            fieldInfos.Write(directory, segment + ".fnm");

            int docCount = 0;

            SetMatchingSegmentReaders();

            if (mergeDocStores)
            {
                // for merging we don't want to compress/uncompress the data, so to tell the FieldsReader that we're
                // in  merge mode, we use this FieldSelector
                FieldSelector fieldSelectorMerge = new AnonymousClassFieldSelector(this);

                // merge field values
                FieldsWriter fieldsWriter = new FieldsWriter(directory, segment, fieldInfos);

                try
                {
                    int idx = 0;
                    for (System.Collections.IEnumerator iter = readers.GetEnumerator(); iter.MoveNext();)
                    {
                        IndexReader   reader = (IndexReader)iter.Current;
                        SegmentReader matchingSegmentReader = matchingSegmentReaders[idx++];
                        FieldsReader  matchingFieldsReader  = null;
                        if (matchingSegmentReader != null)
                        {
                            FieldsReader fieldsReader = matchingSegmentReader.GetFieldsReader();
                            if (fieldsReader != null && fieldsReader.CanReadRawDocs())
                            {
                                matchingFieldsReader = fieldsReader;
                            }
                        }
                        if (reader.HasDeletions())
                        {
                            docCount += CopyFieldsWithDeletions(fieldSelectorMerge, fieldsWriter, reader, matchingFieldsReader);
                        }
                        else
                        {
                            docCount += CopyFieldsNoDeletions(fieldSelectorMerge, fieldsWriter, reader, matchingFieldsReader);
                        }
                    }
                }
                finally
                {
                    fieldsWriter.Close();
                }

                System.String fileName      = segment + "." + IndexFileNames.FIELDS_INDEX_EXTENSION;
                long          fdxFileLength = directory.FileLength(fileName);

                if (4 + ((long)docCount) * 8 != fdxFileLength)
                {
                    // This is most likely a bug in Sun JRE 1.6.0_04/_05;
                    // we detect that the bug has struck, here, and
                    // throw an exception to prevent the corruption from
                    // entering the index.  See LUCENE-1282 for
                    // details.
                    throw new System.SystemException("mergeFields produced an invalid result: docCount is " + docCount + " but fdx file size is " + fdxFileLength + " file=" + fileName + " file exists?=" + directory.FileExists(fileName) + "; now aborting this merge to prevent index corruption");
                }
            }
            // If we are skipping the doc stores, that means there
            // are no deletions in any of these segments, so we
            // just sum numDocs() of each segment to get total docCount
            else
            {
                for (System.Collections.IEnumerator iter = readers.GetEnumerator(); iter.MoveNext();)
                {
                    docCount += ((IndexReader)iter.Current).NumDocs();
                }
            }

            return(docCount);
        }