Beispiel #1
0
		public void  Read(IndexInput input, FieldInfos fieldInfos)
		{
            this.term = null; // invalidate cache
			int start = input.ReadVInt();
			int length = input.ReadVInt();
			int totalLength = start + length;
			if (preUTF8Strings)
			{
				text.SetLength(totalLength);
				input.ReadChars(text.result, start, length);
			}
			else
			{
				
				if (dirty)
				{
					// Fully convert all bytes since bytes is dirty
					UnicodeUtil.UTF16toUTF8(text.result, 0, text.length, bytes);
					bytes.SetLength(totalLength);
					input.ReadBytes(bytes.result, start, length);
					UnicodeUtil.UTF8toUTF16(bytes.result, 0, totalLength, text);
					dirty = false;
				}
				else
				{
					// Incrementally convert only the UTF8 bytes that are new:
					bytes.SetLength(totalLength);
					input.ReadBytes(bytes.result, start, length);
					UnicodeUtil.UTF8toUTF16(bytes.result, start, length, text);
				}
			}
			this.field = fieldInfos.FieldName(input.ReadVInt());
		}
		internal SegmentTermEnum(IndexInput i, FieldInfos fis, bool isi)
		{
			input = i;
			fieldInfos = fis;
			isIndex = isi;
			maxSkipLevels = 1; // use single-level skip lists for formats > -3 
			
			int firstInt = input.ReadInt();
			if (firstInt >= 0)
			{
				// original-format file, without explicit format version number
				format = 0;
				size = firstInt;
				
				// back-compatible settings
				indexInterval = 128;
				skipInterval = System.Int32.MaxValue; // switch off skipTo optimization
			}
			else
			{
				// we have a format version number
				format = firstInt;
				
				// check that it is a format we can understand
				if (format < TermInfosWriter.FORMAT_CURRENT)
					throw new CorruptIndexException("Unknown format version:" + format + " expected " + TermInfosWriter.FORMAT_CURRENT + " or higher");
				
				size = input.ReadLong(); // read the size
				
				if (format == - 1)
				{
					if (!isIndex)
					{
						indexInterval = input.ReadInt();
						formatM1SkipInterval = input.ReadInt();
					}
					// switch off skipTo optimization for file format prior to 1.4rc2 in order to avoid a bug in 
					// skipTo implementation of these versions
					skipInterval = System.Int32.MaxValue;
				}
				else
				{
					indexInterval = input.ReadInt();
					skipInterval = input.ReadInt();
					if (format <= TermInfosWriter.FORMAT)
					{
						// this new format introduces multi-level skipping
						maxSkipLevels = input.ReadInt();
					}
				}
				System.Diagnostics.Debug.Assert(indexInterval > 0, "indexInterval=" + indexInterval + " is negative; must be > 0");
				System.Diagnostics.Debug.Assert(skipInterval > 0, "skipInterval=" + skipInterval + " is negative; must be > 0");
			}
			if (format > TermInfosWriter.FORMAT_VERSION_UTF8_LENGTH_IN_BYTES)
			{
				termBuffer.SetPreUTF8Strings();
				scanBuffer.SetPreUTF8Strings();
				prevBuffer.SetPreUTF8Strings();
			}
		}
		public DocFieldProcessorPerThread(DocumentsWriterThreadState threadState, DocFieldProcessor docFieldProcessor)
		{
			InitBlock();
			this.docState = threadState.docState;
			this.docFieldProcessor = docFieldProcessor;
			this.fieldInfos = docFieldProcessor.fieldInfos;
			this.consumer = docFieldProcessor.consumer.AddThread(this);
			fieldsWriter = docFieldProcessor.fieldsWriter.AddThread(docState);
		}
		public TermVectorsWriter(Directory directory, System.String segment, FieldInfos fieldInfos)
		{
			// Open files for TermVector storage
			tvx = directory.CreateOutput(segment + "." + IndexFileNames.VECTORS_INDEX_EXTENSION);
			tvx.WriteInt(TermVectorsReader.FORMAT_CURRENT);
			tvd = directory.CreateOutput(segment + "." + IndexFileNames.VECTORS_DOCUMENTS_EXTENSION);
			tvd.WriteInt(TermVectorsReader.FORMAT_CURRENT);
			tvf = directory.CreateOutput(segment + "." + IndexFileNames.VECTORS_FIELDS_EXTENSION);
			tvf.WriteInt(TermVectorsReader.FORMAT_CURRENT);
			
			this.fieldInfos = fieldInfos;
		}
Beispiel #5
0
		// Used only by clone
		private FieldsReader(FieldInfos fieldInfos, int numTotalDocs, int size, int format, int formatSize, int docStoreOffset, IndexInput cloneableFieldsStream, IndexInput cloneableIndexStream)
		{
			this.fieldInfos = fieldInfos;
			this.numTotalDocs = numTotalDocs;
			this.size = size;
			this.format = format;
			this.formatSize = formatSize;
			this.docStoreOffset = docStoreOffset;
			this.cloneableFieldsStream = cloneableFieldsStream;
			this.cloneableIndexStream = cloneableIndexStream;
			fieldsStream = (IndexInput) cloneableFieldsStream.Clone();
			indexStream = (IndexInput) cloneableIndexStream.Clone();
		}
		public FormatPostingsFieldsWriter(SegmentWriteState state, FieldInfos fieldInfos):base()
		{
			
			dir = state.directory;
			segment = state.segmentName;
			totalNumDocs = state.numDocs;
			this.fieldInfos = fieldInfos;
			termsOut = new TermInfosWriter(dir, segment, fieldInfos, state.termIndexInterval);
			
			// TODO: this is a nasty abstraction violation (that we
			// peek down to find freqOut/proxOut) -- we need a
			// better abstraction here whereby these child consumers
			// can provide skip data or not
			skipListWriter = new DefaultSkipListWriter(termsOut.skipInterval, termsOut.maxSkipLevels, totalNumDocs, null, null);
			
			state.flushedFiles.Add(state.SegmentFileName(IndexFileNames.TERMS_EXTENSION));
			state.flushedFiles.Add(state.SegmentFileName(IndexFileNames.TERMS_INDEX_EXTENSION));
			
			termsWriter = new FormatPostingsTermsWriter(state, this);
		}
 internal TermVectorsReader(Directory d, System.String segment, FieldInfos fieldInfos, int readBufferSize) : this(d, segment, fieldInfos, readBufferSize, -1, 0)
 {
 }
Beispiel #8
0
		internal FieldsReader(Directory d, System.String segment, FieldInfos fn, int readBufferSize):this(d, segment, fn, readBufferSize, - 1, 0)
		{
		}
Beispiel #9
0
		internal override void  SetFieldInfos(FieldInfos fieldInfos)
		{
			this.fieldInfos = fieldInfos;
		}
		internal TermInfosReader(Directory dir, System.String seg, FieldInfos fis, int readBufferSize, int indexDivisor)
		{
			bool success = false;
			
			if (indexDivisor < 1 && indexDivisor != - 1)
			{
				throw new System.ArgumentException("indexDivisor must be -1 (don't load terms index) or greater than 0: got " + indexDivisor);
			}
			
			try
			{
				directory = dir;
				segment = seg;
				fieldInfos = fis;
				
				origEnum = new SegmentTermEnum(directory.OpenInput(segment + "." + IndexFileNames.TERMS_EXTENSION, readBufferSize), fieldInfos, false);
				size = origEnum.size;
				
				
				if (indexDivisor != - 1)
				{
					// Load terms index
					totalIndexInterval = origEnum.indexInterval * indexDivisor;
					var indexEnum = new SegmentTermEnum(directory.OpenInput(segment + "." + IndexFileNames.TERMS_INDEX_EXTENSION, readBufferSize), fieldInfos, true);
					
					try
					{
						int indexSize = 1 + ((int) indexEnum.size - 1) / indexDivisor; // otherwise read index
						
						indexTerms = new Term[indexSize];
						indexInfos = new TermInfo[indexSize];
						indexPointers = new long[indexSize];
						
						for (int i = 0; indexEnum.Next(); i++)
						{
							indexTerms[i] = indexEnum.Term;
							indexInfos[i] = indexEnum.TermInfo();
							indexPointers[i] = indexEnum.indexPointer;
							
							for (int j = 1; j < indexDivisor; j++)
								if (!indexEnum.Next())
									break;
						}
					}
					finally
					{
						indexEnum.Close();
					}
				}
				else
				{
					// Do not load terms index:
					totalIndexInterval = - 1;
					indexTerms = null;
					indexInfos = null;
					indexPointers = null;
				}
				success = true;
			}
			finally
			{
				// With lock-less commits, it's entirely possible (and
				// fine) to hit a FileNotFound exception above. In
				// this case, we want to explicitly close any subset
				// of things that were opened so that we don't have to
				// wait for a GC to do so.
				if (!success)
				{
					Dispose();
				}
			}
		}
Beispiel #11
0
		/// <summary> </summary>
		/// <returns> The number of documents in all of the readers
		/// </returns>
		/// <throws>  CorruptIndexException if the index is corrupt </throws>
		/// <throws>  IOException if there is a low-level IO error </throws>
		private int MergeFields()
		{
			
			if (!mergeDocStores)
			{
				// When we are not merging by doc stores, their field
				// name -> number mapping are the same.  So, we start
				// with the fieldInfos of the last segment in this
				// case, to keep that numbering.
				SegmentReader sr = (SegmentReader) readers[readers.Count - 1];
				fieldInfos = (FieldInfos) sr.core.fieldInfos.Clone();
			}
			else
			{
				fieldInfos = new FieldInfos(); // merge field names
			}
			
			foreach(IndexReader reader in readers)
			{
				if (reader is SegmentReader)
				{
					SegmentReader segmentReader = (SegmentReader) reader;
					FieldInfos readerFieldInfos = segmentReader.FieldInfos();
					int numReaderFieldInfos = readerFieldInfos.Size();
					for (int j = 0; j < numReaderFieldInfos; j++)
					{
						FieldInfo fi = readerFieldInfos.FieldInfo(j);
						fieldInfos.Add(fi.name, fi.isIndexed, fi.storeTermVector, fi.storePositionWithTermVector, fi.storeOffsetWithTermVector, !reader.HasNorms(fi.name), fi.storePayloads, fi.omitTermFreqAndPositions);
					}
				}
				else
				{
					AddIndexed(reader, fieldInfos, reader.GetFieldNames(IndexReader.FieldOption.TERMVECTOR_WITH_POSITION_OFFSET), true, true, true, false, false);
					AddIndexed(reader, fieldInfos, reader.GetFieldNames(IndexReader.FieldOption.TERMVECTOR_WITH_POSITION), true, true, false, false, false);
					AddIndexed(reader, fieldInfos, reader.GetFieldNames(IndexReader.FieldOption.TERMVECTOR_WITH_OFFSET), true, false, true, false, false);
					AddIndexed(reader, fieldInfos, reader.GetFieldNames(IndexReader.FieldOption.TERMVECTOR), true, false, false, false, false);
					AddIndexed(reader, fieldInfos, reader.GetFieldNames(IndexReader.FieldOption.OMIT_TERM_FREQ_AND_POSITIONS), false, false, false, false, true);
					AddIndexed(reader, fieldInfos, reader.GetFieldNames(IndexReader.FieldOption.STORES_PAYLOADS), false, false, false, true, false);
					AddIndexed(reader, fieldInfos, reader.GetFieldNames(IndexReader.FieldOption.INDEXED), false, false, false, false, false);
					fieldInfos.Add(reader.GetFieldNames(IndexReader.FieldOption.UNINDEXED), false);
				}
			}
			fieldInfos.Write(directory, segment + ".fnm");
			
			int docCount = 0;
			
			SetMatchingSegmentReaders();
			
			if (mergeDocStores)
			{
				// merge field values
				FieldsWriter fieldsWriter = new FieldsWriter(directory, segment, fieldInfos);
				
				try
				{
					int idx = 0;
					foreach(IndexReader reader in readers)
					{
						SegmentReader matchingSegmentReader = matchingSegmentReaders[idx++];
						FieldsReader matchingFieldsReader = null;
						if (matchingSegmentReader != null)
						{
							FieldsReader fieldsReader = matchingSegmentReader.GetFieldsReader();
							if (fieldsReader != null && fieldsReader.CanReadRawDocs())
							{
								matchingFieldsReader = fieldsReader;
							}
						}
						if (reader.HasDeletions)
						{
							docCount += CopyFieldsWithDeletions(fieldsWriter, reader, matchingFieldsReader);
						}
						else
						{
							docCount += CopyFieldsNoDeletions(fieldsWriter, reader, matchingFieldsReader);
						}
					}
				}
				finally
				{
					fieldsWriter.Dispose();
				}
				
				System.String fileName = segment + "." + IndexFileNames.FIELDS_INDEX_EXTENSION;
				long fdxFileLength = directory.FileLength(fileName);
				
				if (4 + ((long) docCount) * 8 != fdxFileLength)
				// This is most likely a bug in Sun JRE 1.6.0_04/_05;
				// we detect that the bug has struck, here, and
				// throw an exception to prevent the corruption from
				// entering the index.  See LUCENE-1282 for
				// details.
					throw new System.SystemException("mergeFields produced an invalid result: docCount is " + docCount + " but fdx file size is " + fdxFileLength + " file=" + fileName + " file exists?=" + directory.FileExists(fileName) + "; now aborting this merge to prevent index corruption");
			}
			// If we are skipping the doc stores, that means there
			// are no deletions in any of these segments, so we
			// just sum numDocs() of each segment to get total docCount
			else
			{
				foreach(IndexReader reader in readers)
				{
					docCount += reader.NumDocs();
				}
			}
			
			return docCount;
		}
 internal TermVectorsReader(Directory d, System.String segment, FieldInfos fieldInfos) : this(d, segment, fieldInfos, BufferedIndexInput.BUFFER_SIZE)
 {
 }
Beispiel #13
0
		internal FieldsWriter(IndexOutput fdx, IndexOutput fdt, FieldInfos fn)
		{
			fieldInfos = fn;
			fieldsStream = fdt;
			indexStream = fdx;
			doClose = false;
		}
		internal override void  SetFieldInfos(FieldInfos fieldInfos)
		{
			base.SetFieldInfos(fieldInfos);
			one.SetFieldInfos(fieldInfos);
			two.SetFieldInfos(fieldInfos);
		}
Beispiel #15
0
		/// <summary> Returns a deep clone of this FieldInfos instance.</summary>
		public Object Clone()
		{
            lock (this)
            {
                var fis = new FieldInfos();
                int numField = byNumber.Count;
                for (int i = 0; i < numField; i++)
                {
                    var fi = (FieldInfo)byNumber[i].Clone();
                    fis.byNumber.Add(fi);
                    fis.byName[fi.name] = fi;
                }
                return fis;
            }
		}
		internal virtual void  SetFieldInfos(FieldInfos fieldInfos)
		{
			this.fieldInfos = fieldInfos;
		}
Beispiel #17
0
		public /*internal*/ FieldsReader(Directory d, String segment, FieldInfos fn):this(d, segment, fn, BufferedIndexInput.BUFFER_SIZE, - 1, 0)
		{
		}
        internal TermVectorsReader(Directory d, System.String segment, FieldInfos fieldInfos, int readBufferSize, int docStoreOffset, int size)
        {
            bool success = false;

            try
            {
                if (d.FileExists(segment + "." + IndexFileNames.VECTORS_INDEX_EXTENSION))
                {
                    tvx    = d.OpenInput(segment + "." + IndexFileNames.VECTORS_INDEX_EXTENSION, readBufferSize);
                    format = CheckValidFormat(tvx);
                    tvd    = d.OpenInput(segment + "." + IndexFileNames.VECTORS_DOCUMENTS_EXTENSION, readBufferSize);
                    int tvdFormat = CheckValidFormat(tvd);
                    tvf = d.OpenInput(segment + "." + IndexFileNames.VECTORS_FIELDS_EXTENSION, readBufferSize);
                    int tvfFormat = CheckValidFormat(tvf);

                    System.Diagnostics.Debug.Assert(format == tvdFormat);
                    System.Diagnostics.Debug.Assert(format == tvfFormat);

                    if (format >= FORMAT_VERSION2)
                    {
                        System.Diagnostics.Debug.Assert((tvx.Length() - FORMAT_SIZE) % 16 == 0);
                        numTotalDocs = (int)(tvx.Length() >> 4);
                    }
                    else
                    {
                        System.Diagnostics.Debug.Assert((tvx.Length() - FORMAT_SIZE) % 8 == 0);
                        numTotalDocs = (int)(tvx.Length() >> 3);
                    }

                    if (-1 == docStoreOffset)
                    {
                        this.docStoreOffset = 0;
                        this.size           = numTotalDocs;
                        System.Diagnostics.Debug.Assert(size == 0 || numTotalDocs == size);
                    }
                    else
                    {
                        this.docStoreOffset = docStoreOffset;
                        this.size           = size;
                        // Verify the file is long enough to hold all of our
                        // docs
                        System.Diagnostics.Debug.Assert(numTotalDocs >= size + docStoreOffset, "numTotalDocs=" + numTotalDocs + " size=" + size + " docStoreOffset=" + docStoreOffset);
                    }
                }
                else
                {
                    // If all documents flushed in a segment had hit
                    // non-aborting exceptions, it's possible that
                    // FieldInfos.hasVectors returns true yet the term
                    // vector files don't exist.
                    format = 0;
                }


                this.fieldInfos = fieldInfos;
                success         = true;
            }
            finally
            {
                // With lock-less commits, it's entirely possible (and
                // fine) to hit a FileNotFound exception above. In
                // this case, we want to explicitly close any subset
                // of things that were opened so that we don't have to
                // wait for a GC to do so.
                if (!success)
                {
                    Dispose();
                }
            }
        }
Beispiel #19
0
		internal override void  SetFieldInfos(FieldInfos fieldInfos)
		{
			base.SetFieldInfos(fieldInfos);
			consumer.SetFieldInfos(fieldInfos);
			endConsumer.SetFieldInfos(fieldInfos);
		}
Beispiel #20
0
		internal override void  SetFieldInfos(FieldInfos fieldInfos)
		{
			this.fieldInfos = fieldInfos;
			consumer.SetFieldInfos(fieldInfos);
		}
Beispiel #21
0
		internal FieldsReader(Directory d, System.String segment, FieldInfos fn, int readBufferSize, int docStoreOffset, int size)
		{
			bool success = false;
			isOriginal = true;
			try
			{
				fieldInfos = fn;
				
				cloneableFieldsStream = d.OpenInput(segment + "." + IndexFileNames.FIELDS_EXTENSION, readBufferSize);
				cloneableIndexStream = d.OpenInput(segment + "." + IndexFileNames.FIELDS_INDEX_EXTENSION, readBufferSize);
				
				// First version of fdx did not include a format
				// header, but, the first int will always be 0 in that
				// case
				int firstInt = cloneableIndexStream.ReadInt();
				format = firstInt == 0 ? 0 : firstInt;
				
				if (format > FieldsWriter.FORMAT_CURRENT)
					throw new CorruptIndexException("Incompatible format version: " + format + " expected " + FieldsWriter.FORMAT_CURRENT + " or lower");
				
				formatSize = format > FieldsWriter.FORMAT ? 4 : 0;
				
				if (format < FieldsWriter.FORMAT_VERSION_UTF8_LENGTH_IN_BYTES)
					cloneableFieldsStream.SetModifiedUTF8StringsMode();
				
				fieldsStream = (IndexInput) cloneableFieldsStream.Clone();
				
				long indexSize = cloneableIndexStream.Length() - formatSize;
				
				if (docStoreOffset != - 1)
				{
					// We read only a slice out of this shared fields file
					this.docStoreOffset = docStoreOffset;
					this.size = size;
					
					// Verify the file is long enough to hold all of our
					// docs
					System.Diagnostics.Debug.Assert(((int)(indexSize / 8)) >= size + this.docStoreOffset, "indexSize=" + indexSize + " size=" + size + " docStoreOffset=" + docStoreOffset);
				}
				else
				{
					this.docStoreOffset = 0;
					this.size = (int) (indexSize >> 3);
				}
				
				indexStream = (IndexInput) cloneableIndexStream.Clone();
				numTotalDocs = (int) (indexSize >> 3);
				success = true;
			}
			finally
			{
				// With lock-less commits, it's entirely possible (and
				// fine) to hit a FileNotFound exception above. In
				// this case, we want to explicitly close any subset
				// of things that were opened so that we don't have to
				// wait for a GC to do so.
				if (!success)
				{
					Dispose();
				}
			}
		}
		internal abstract void  SetFieldInfos(FieldInfos fieldInfos);
		public StoredFieldsWriter(DocumentsWriter docWriter, FieldInfos fieldInfos)
		{
			InitBlock();
			this.docWriter = docWriter;
			this.fieldInfos = fieldInfos;
		}
Beispiel #24
0
		internal FieldsWriter(Directory d, System.String segment, FieldInfos fn)
		{
			fieldInfos = fn;
			
			bool success = false;
			String fieldsName = segment + "." + IndexFileNames.FIELDS_EXTENSION;
			try
			{
				fieldsStream = d.CreateOutput(fieldsName);
				fieldsStream.WriteInt(FORMAT_CURRENT);
				success = true;
			}
			finally
			{
				if (!success)
				{
					try
					{
						Dispose();
					}
					catch (System.Exception)
					{
						// Suppress so we keep throwing the original exception
					}
					try
					{
						d.DeleteFile(fieldsName);
					}
					catch (System.Exception)
					{
						// Suppress so we keep throwing the original exception
					}
				}
			}
			
			success = false;
			String indexName = segment + "." + IndexFileNames.FIELDS_INDEX_EXTENSION;
			try
			{
				indexStream = d.CreateOutput(indexName);
				indexStream.WriteInt(FORMAT_CURRENT);
				success = true;
			}
			finally
			{
				if (!success)
				{
					try
					{
						Dispose();
					}
					catch (System.IO.IOException)
					{
					}
					try
					{
						d.DeleteFile(fieldsName);
					}
					catch (System.Exception)
					{
						// Suppress so we keep throwing the original exception
					}
					try
					{
						d.DeleteFile(indexName);
					}
					catch (System.Exception)
					{
						// Suppress so we keep throwing the original exception
					}
				}
			}
			
			doClose = true;
		}
		internal TermInfosWriter(Directory directory, System.String segment, FieldInfos fis, int interval)
		{
			Initialize(directory, segment, fis, interval, false);
			other = new TermInfosWriter(directory, segment, fis, interval, true);
			other.other = this;
		}
Beispiel #26
0
 private void AddIndexed(IndexReader reader, FieldInfos fInfos, ICollection<string> names, bool storeTermVectors, bool storePositionWithTermVector, bool storeOffsetWithTermVector, bool storePayloads, bool omitTFAndPositions)
 {
     foreach (var field in names)
     {
         fInfos.Add(field, true, storeTermVectors, storePositionWithTermVector, storeOffsetWithTermVector,
                    !reader.HasNorms(field), storePayloads, omitTFAndPositions);
     }
 }
		private TermInfosWriter(Directory directory, System.String segment, FieldInfos fis, int interval, bool isIndex)
		{
			Initialize(directory, segment, fis, interval, isIndex);
		}
		private void  Initialize(Directory directory, System.String segment, FieldInfos fis, int interval, bool isi)
		{
			indexInterval = interval;
			fieldInfos = fis;
			isIndex = isi;
			output = directory.CreateOutput(segment + (isIndex?".tii":".tis"));
			output.WriteInt(FORMAT_CURRENT); // write format
			output.WriteLong(0); // leave space for size
			output.WriteInt(indexInterval); // write indexInterval
			output.WriteInt(skipInterval); // write skipInterval
			output.WriteInt(maxSkipLevels); // write maxSkipLevels
			System.Diagnostics.Debug.Assert(InitUTF16Results());
		}
Beispiel #29
0
 public StoredFieldsWriter(DocumentsWriter docWriter, FieldInfos fieldInfos)
 {
     InitBlock();
     this.docWriter  = docWriter;
     this.fieldInfos = fieldInfos;
 }