Ejemplo n.º 1
0
		public void  Read(IndexInput input, FieldInfos fieldInfos)
		{
            this.term = null; // invalidate cache
			int start = input.ReadVInt();
			int length = input.ReadVInt();
			int totalLength = start + length;
			if (preUTF8Strings)
			{
				text.SetLength(totalLength);
				input.ReadChars(text.result, start, length);
			}
			else
			{
				
				if (dirty)
				{
					// Fully convert all bytes since bytes is dirty
					UnicodeUtil.UTF16toUTF8(text.result, 0, text.length, bytes);
					bytes.SetLength(totalLength);
					input.ReadBytes(bytes.result, start, length);
					UnicodeUtil.UTF8toUTF16(bytes.result, 0, totalLength, text);
					dirty = false;
				}
				else
				{
					// Incrementally convert only the UTF8 bytes that are new:
					bytes.SetLength(totalLength);
					input.ReadBytes(bytes.result, start, length);
					UnicodeUtil.UTF8toUTF16(bytes.result, start, length, text);
				}
			}
			this.field = fieldInfos.FieldName(input.ReadVInt());
		}
Ejemplo n.º 2
0
		internal SegmentTermEnum(IndexInput i, FieldInfos fis, bool isi)
		{
			input = i;
			fieldInfos = fis;
			isIndex = isi;
			maxSkipLevels = 1; // use single-level skip lists for formats > -3 
			
			int firstInt = input.ReadInt();
			if (firstInt >= 0)
			{
				// original-format file, without explicit format version number
				format = 0;
				size = firstInt;
				
				// back-compatible settings
				indexInterval = 128;
				skipInterval = System.Int32.MaxValue; // switch off skipTo optimization
			}
			else
			{
				// we have a format version number
				format = firstInt;
				
				// check that it is a format we can understand
				if (format < TermInfosWriter.FORMAT_CURRENT)
					throw new CorruptIndexException("Unknown format version:" + format + " expected " + TermInfosWriter.FORMAT_CURRENT + " or higher");
				
				size = input.ReadLong(); // read the size
				
				if (format == - 1)
				{
					if (!isIndex)
					{
						indexInterval = input.ReadInt();
						formatM1SkipInterval = input.ReadInt();
					}
					// switch off skipTo optimization for file format prior to 1.4rc2 in order to avoid a bug in 
					// skipTo implementation of these versions
					skipInterval = System.Int32.MaxValue;
				}
				else
				{
					indexInterval = input.ReadInt();
					skipInterval = input.ReadInt();
					if (format <= TermInfosWriter.FORMAT)
					{
						// this new format introduces multi-level skipping
						maxSkipLevels = input.ReadInt();
					}
				}
				System.Diagnostics.Debug.Assert(indexInterval > 0, "indexInterval=" + indexInterval + " is negative; must be > 0");
				System.Diagnostics.Debug.Assert(skipInterval > 0, "skipInterval=" + skipInterval + " is negative; must be > 0");
			}
			if (format > TermInfosWriter.FORMAT_VERSION_UTF8_LENGTH_IN_BYTES)
			{
				termBuffer.SetPreUTF8Strings();
				scanBuffer.SetPreUTF8Strings();
				prevBuffer.SetPreUTF8Strings();
			}
		}
Ejemplo n.º 3
0
		public /*protected internal*/ SegmentTermDocs(SegmentReader parent)
		{
			this.parent = parent;
			this.freqStream = (IndexInput) parent.core.freqStream.Clone();
			lock (parent)
			{
				this.deletedDocs = parent.deletedDocs;
			}
			this.skipInterval = parent.core.GetTermsReader().SkipInterval;
			this.maxSkipLevels = parent.core.GetTermsReader().MaxSkipLevels;
		}
Ejemplo n.º 4
0
		public CompoundFileReader(Directory dir, System.String name, int readBufferSize)
		{
			directory = dir;
			fileName = name;
			this.readBufferSize = readBufferSize;
			
			bool success = false;
			
			try
			{
				stream = dir.OpenInput(name, readBufferSize);
				
				// read the directory and init files
				int count = stream.ReadVInt();
				FileEntry entry = null;
				for (int i = 0; i < count; i++)
				{
					long offset = stream.ReadLong();
					System.String id = stream.ReadString();
					
					if (entry != null)
					{
						// set length of the previous entry
						entry.length = offset - entry.offset;
					}

					entry = new FileEntry {offset = offset};
					entries[id] = entry;
				}
				
				// set the length of the final entry
				if (entry != null)
				{
					entry.length = stream.Length() - entry.offset;
				}
				
				success = true;
			}
			finally
			{
				if (!success && (stream != null))
				{
					try
					{
						stream.Close();
					}
					catch (System.IO.IOException)
					{
					}
				}
			}
		}
Ejemplo n.º 5
0
		// Used only by clone
		private FieldsReader(FieldInfos fieldInfos, int numTotalDocs, int size, int format, int formatSize, int docStoreOffset, IndexInput cloneableFieldsStream, IndexInput cloneableIndexStream)
		{
			this.fieldInfos = fieldInfos;
			this.numTotalDocs = numTotalDocs;
			this.size = size;
			this.format = format;
			this.formatSize = formatSize;
			this.docStoreOffset = docStoreOffset;
			this.cloneableFieldsStream = cloneableFieldsStream;
			this.cloneableIndexStream = cloneableIndexStream;
			fieldsStream = (IndexInput) cloneableFieldsStream.Clone();
			indexStream = (IndexInput) cloneableIndexStream.Clone();
		}
Ejemplo n.º 6
0
	    protected override void Dispose(bool disposing)
        {
            if (isDisposed) return;

            if (disposing)
            {
                if (main != null)
                {
                    main.Dispose();
                }
            }

            main = null;
            isDisposed = true;
        }
		protected MultiLevelSkipListReader(IndexInput skipStream, int maxSkipLevels, int skipInterval)
		{
			this.skipStream = new IndexInput[maxSkipLevels];
			this.skipPointer = new long[maxSkipLevels];
			this.childPointer = new long[maxSkipLevels];
			this.numSkipped = new int[maxSkipLevels];
			this.maxNumberOfSkipLevels = maxSkipLevels;
			this.skipInterval = new int[maxSkipLevels];
			this.skipStream[0] = skipStream;
			this.inputIsBuffered = (skipStream is BufferedIndexInput);
			this.skipInterval[0] = skipInterval;
			for (int i = 1; i < maxSkipLevels; i++)
			{
				// cache skip intervals
				this.skipInterval[i] = this.skipInterval[i - 1] * skipInterval;
			}
			skipDoc = new int[maxSkipLevels];
		}
Ejemplo n.º 8
0
		/// <summary>read as a d-gaps list </summary>
		private void  ReadDgaps(IndexInput input)
		{
			size = input.ReadInt(); // (re)read size
			count = input.ReadInt(); // read count
			bits = new byte[(size >> 3) + 1]; // allocate bits
			int last = 0;
			int n = Count();
			while (n > 0)
			{
				last += input.ReadVInt();
				bits[last] = input.ReadByte();
				n -= BYTE_COUNTS[bits[last] & 0xFF];
			}
		}
		/// <summary> Subclasses must implement the actual skip data encoding in this method.
		/// 
		/// </summary>
		/// <param name="level">the level skip data shall be read from
		/// </param>
		/// <param name="skipStream">the skip stream to read from
		/// </param>
		protected internal abstract int ReadSkipData(int level, IndexInput skipStream);
Ejemplo n.º 10
0
			internal CSIndexInput(IndexInput @base, long fileOffset, long length):this(@base, fileOffset, length, BufferedIndexInput.BUFFER_SIZE)
			{
			}
Ejemplo n.º 11
0
			internal CSIndexInput(IndexInput @base, long fileOffset, long length, int readBufferSize):base(readBufferSize)
			{
				this.base_Renamed = (IndexInput) @base.Clone();
				this.fileOffset = fileOffset;
				this.length = length;
			}
Ejemplo n.º 12
0
		/// <summary>Copy numBytes bytes from input to ourself. </summary>
		public virtual void  CopyBytes(IndexInput input, long numBytes)
		{
			System.Diagnostics.Debug.Assert(numBytes >= 0, "numBytes=" + numBytes);
			long left = numBytes;
			if (copyBuffer == null)
				copyBuffer = new byte[COPY_BUFFER_SIZE];
			while (left > 0)
			{
				int toCopy;
				if (left > COPY_BUFFER_SIZE)
					toCopy = COPY_BUFFER_SIZE;
				else
					toCopy = (int) left;
				input.ReadBytes(copyBuffer, 0, toCopy);
				WriteBytes(copyBuffer, 0, toCopy);
				left -= toCopy;
			}
		}
Ejemplo n.º 13
0
	    protected override void Dispose(bool disposing)
        {
            lock (this)
            {
                if (isDisposed) return;
                if (disposing)
                {
                    if (entries != null)
                    {
                        entries.Clear();
                    }
                    if (stream != null)
                    {
                        stream.Close();
                    }
                }

                entries = null;
                stream = null;
                isDisposed = true;
            }
        }
Ejemplo n.º 14
0
		protected internal override int ReadSkipData(int level, IndexInput skipStream)
		{
			int delta;
			if (currentFieldStoresPayloads)
			{
				// the current field stores payloads.
				// if the doc delta is odd then we have
				// to read the current payload length
				// because it differs from the length of the
				// previous payload
				delta = skipStream.ReadVInt();
				if ((delta & 1) != 0)
				{
					payloadLength[level] = skipStream.ReadVInt();
				}
				delta = Number.URShift(delta, 1);
			}
			else
			{
				delta = skipStream.ReadVInt();
			}
			freqPointer[level] += skipStream.ReadVInt();
			proxPointer[level] += skipStream.ReadVInt();
			
			return delta;
		}
Ejemplo n.º 15
0
		public ChecksumIndexInput(IndexInput main)
		{
			this.main = main;
            digest = new CRC32();
		}
Ejemplo n.º 16
0
		/// <summary>Read as a bit set </summary>
		private void  ReadBits(IndexInput input)
		{
			count = input.ReadInt(); // read count
			bits = new byte[(size >> 3) + 1]; // allocate bits
			input.ReadBytes(bits, 0, bits.Length);
		}
Ejemplo n.º 17
0
	    /// <summary> Construct a new SegmentInfo instance by reading a
		/// previously saved SegmentInfo from input.
		/// 
		/// </summary>
		/// <param name="dir">directory to load from
		/// </param>
		/// <param name="format">format of the segments info file
		/// </param>
		/// <param name="input">input handle to read segment info from
		/// </param>
		internal SegmentInfo(Directory dir, int format, IndexInput input)
		{
			this.dir = dir;
			name = input.ReadString();
			docCount = input.ReadInt();
			if (format <= SegmentInfos.FORMAT_LOCKLESS)
			{
				delGen = input.ReadLong();
				if (format <= SegmentInfos.FORMAT_SHARED_DOC_STORE)
				{
					docStoreOffset = input.ReadInt();
					if (docStoreOffset != - 1)
					{
						docStoreSegment = input.ReadString();
						docStoreIsCompoundFile = (1 == input.ReadByte());
					}
					else
					{
						docStoreSegment = name;
						docStoreIsCompoundFile = false;
					}
				}
				else
				{
					docStoreOffset = - 1;
					docStoreSegment = name;
					docStoreIsCompoundFile = false;
				}
				if (format <= SegmentInfos.FORMAT_SINGLE_NORM_FILE)
				{
					hasSingleNormFile = (1 == input.ReadByte());
				}
				else
				{
					hasSingleNormFile = false;
				}
				int numNormGen = input.ReadInt();
				if (numNormGen == NO)
				{
					normGen = null;
				}
				else
				{
					normGen = new long[numNormGen];
					for (int j = 0; j < numNormGen; j++)
					{
						normGen[j] = input.ReadLong();
					}
				}
				isCompoundFile = (sbyte) input.ReadByte();
				preLockless = (isCompoundFile == CHECK_DIR);
				if (format <= SegmentInfos.FORMAT_DEL_COUNT)
				{
					delCount = input.ReadInt();
					System.Diagnostics.Debug.Assert(delCount <= docCount);
				}
				else
					delCount = - 1;
				if (format <= SegmentInfos.FORMAT_HAS_PROX)
					hasProx = input.ReadByte() == 1;
				else
					hasProx = true;
				
				if (format <= SegmentInfos.FORMAT_DIAGNOSTICS)
				{
					diagnostics = input.ReadStringStringMap();
				}
				else
				{
					diagnostics = new Dictionary<string,string>();
				}
			}
			else
			{
				delGen = CHECK_DIR;
				normGen = null;
				isCompoundFile = (sbyte) (CHECK_DIR);
				preLockless = true;
				hasSingleNormFile = false;
				docStoreOffset = - 1;
				docStoreIsCompoundFile = false;
				docStoreSegment = null;
				delCount = - 1;
				hasProx = true;
				diagnostics = new Dictionary<string,string>();
			}
		}
Ejemplo n.º 18
0
		private void  Read(IndexInput input, String fileName)
		{
			int firstInt = input.ReadVInt();
			
			if (firstInt < 0)
			{
				// This is a real format
				format = firstInt;
			}
			else
			{
				format = FORMAT_PRE;
			}
			
			if (format != FORMAT_PRE & format != FORMAT_START)
			{
				throw new CorruptIndexException("unrecognized format " + format + " in file \"" + fileName + "\"");
			}
			
			int size;
			if (format == FORMAT_PRE)
			{
				size = firstInt;
			}
			else
			{
				size = input.ReadVInt(); //read in the size
			}
			
			for (int i = 0; i < size; i++)
			{
				String name = StringHelper.Intern(input.ReadString());
				byte bits = input.ReadByte();
				bool isIndexed = (bits & IS_INDEXED) != 0;
				bool storeTermVector = (bits & STORE_TERMVECTOR) != 0;
				bool storePositionsWithTermVector = (bits & STORE_POSITIONS_WITH_TERMVECTOR) != 0;
				bool storeOffsetWithTermVector = (bits & STORE_OFFSET_WITH_TERMVECTOR) != 0;
				bool omitNorms = (bits & OMIT_NORMS) != 0;
				bool storePayloads = (bits & STORE_PAYLOADS) != 0;
				bool omitTermFreqAndPositions = (bits & OMIT_TERM_FREQ_AND_POSITIONS) != 0;
				
				AddInternal(name, isIndexed, storeTermVector, storePositionsWithTermVector, storeOffsetWithTermVector, omitNorms, storePayloads, omitTermFreqAndPositions);
			}
			
			if (input.FilePointer != input.Length())
			{
				throw new CorruptIndexException("did not read all bytes from file \"" + fileName + "\": read " + input.FilePointer + " vs size " + input.Length());
			}
		}
Ejemplo n.º 19
0
		// It is not always neccessary to move the prox pointer
		// to a new document after the freq pointer has been moved.
		// Consider for example a phrase query with two terms:
		// the freq pointer for term 1 has to move to document x
		// to answer the question if the term occurs in that document. But
		// only if term 2 also matches document x, the positions have to be
		// read to figure out if term 1 and term 2 appear next
		// to each other in document x and thus satisfy the query.
		// So we move the prox pointer lazily to the document
		// as soon as positions are requested.
		private void  LazySkip()
		{
			if (proxStream == null)
			{
				// clone lazily
				proxStream = (IndexInput) parent.core.proxStream.Clone();
			}
			
			// we might have to skip the current payload
			// if it was not read yet
			SkipPayload();
			
			if (lazySkipPointer != - 1)
			{
				proxStream.Seek(lazySkipPointer);
				lazySkipPointer = - 1;
			}
			
			if (lazySkipProxCount != 0)
			{
				SkipPositions(lazySkipProxCount);
				lazySkipProxCount = 0;
			}
		}
Ejemplo n.º 20
0
		internal SegmentTermPositions(SegmentReader p):base(p)
		{
			this.proxStream = null; // the proxStream will be cloned lazily when nextPosition() is called for the first time
		}
			internal SkipBuffer(IndexInput input, int length)
			{
				data = new byte[length];
				pointer = input.FilePointer;
				input.ReadBytes(data, 0, length);
			}
Ejemplo n.º 22
0
		internal DefaultSkipListReader(IndexInput skipStream, int maxSkipLevels, int skipInterval):base(skipStream, maxSkipLevels, skipInterval)
		{
			freqPointer = new long[maxSkipLevels];
			proxPointer = new long[maxSkipLevels];
			payloadLength = new int[maxSkipLevels];
		}
Ejemplo n.º 23
0
		/// <summary>Bulk write a contiguous series of documents.  The
		/// lengths array is the length (in bytes) of each raw
		/// document.  The stream IndexInput is the
		/// fieldsStream from which we should bulk-copy all
		/// bytes. 
		/// </summary>
		internal void  AddRawDocuments(IndexInput stream, int[] lengths, int numDocs)
		{
			long position = fieldsStream.FilePointer;
			long start = position;
			for (int i = 0; i < numDocs; i++)
			{
				indexStream.WriteLong(position);
				position += lengths[i];
			}
			fieldsStream.CopyBytes(stream, position - start);
			System.Diagnostics.Debug.Assert(fieldsStream.FilePointer == position);
		}
Ejemplo n.º 24
0
		internal FieldsReader(Directory d, System.String segment, FieldInfos fn, int readBufferSize, int docStoreOffset, int size)
		{
			bool success = false;
			isOriginal = true;
			try
			{
				fieldInfos = fn;
				
				cloneableFieldsStream = d.OpenInput(segment + "." + IndexFileNames.FIELDS_EXTENSION, readBufferSize);
				cloneableIndexStream = d.OpenInput(segment + "." + IndexFileNames.FIELDS_INDEX_EXTENSION, readBufferSize);
				
				// First version of fdx did not include a format
				// header, but, the first int will always be 0 in that
				// case
				int firstInt = cloneableIndexStream.ReadInt();
				format = firstInt == 0 ? 0 : firstInt;
				
				if (format > FieldsWriter.FORMAT_CURRENT)
					throw new CorruptIndexException("Incompatible format version: " + format + " expected " + FieldsWriter.FORMAT_CURRENT + " or lower");
				
				formatSize = format > FieldsWriter.FORMAT ? 4 : 0;
				
				if (format < FieldsWriter.FORMAT_VERSION_UTF8_LENGTH_IN_BYTES)
					cloneableFieldsStream.SetModifiedUTF8StringsMode();
				
				fieldsStream = (IndexInput) cloneableFieldsStream.Clone();
				
				long indexSize = cloneableIndexStream.Length() - formatSize;
				
				if (docStoreOffset != - 1)
				{
					// We read only a slice out of this shared fields file
					this.docStoreOffset = docStoreOffset;
					this.size = size;
					
					// Verify the file is long enough to hold all of our
					// docs
					System.Diagnostics.Debug.Assert(((int)(indexSize / 8)) >= size + this.docStoreOffset, "indexSize=" + indexSize + " size=" + size + " docStoreOffset=" + docStoreOffset);
				}
				else
				{
					this.docStoreOffset = 0;
					this.size = (int) (indexSize >> 3);
				}
				
				indexStream = (IndexInput) cloneableIndexStream.Clone();
				numTotalDocs = (int) (indexSize >> 3);
				success = true;
			}
			finally
			{
				// With lock-less commits, it's entirely possible (and
				// fine) to hit a FileNotFound exception above. In
				// this case, we want to explicitly close any subset
				// of things that were opened so that we don't have to
				// wait for a GC to do so.
				if (!success)
				{
					Dispose();
				}
			}
		}