public void Read(IndexInput input, FieldInfos fieldInfos) { this.term = null; // invalidate cache int start = input.ReadVInt(); int length = input.ReadVInt(); int totalLength = start + length; if (preUTF8Strings) { text.SetLength(totalLength); input.ReadChars(text.result, start, length); } else { if (dirty) { // Fully convert all bytes since bytes is dirty UnicodeUtil.UTF16toUTF8(text.result, 0, text.length, bytes); bytes.SetLength(totalLength); input.ReadBytes(bytes.result, start, length); UnicodeUtil.UTF8toUTF16(bytes.result, 0, totalLength, text); dirty = false; } else { // Incrementally convert only the UTF8 bytes that are new: bytes.SetLength(totalLength); input.ReadBytes(bytes.result, start, length); UnicodeUtil.UTF8toUTF16(bytes.result, start, length, text); } } this.field = fieldInfos.FieldName(input.ReadVInt()); }
internal SegmentTermEnum(IndexInput i, FieldInfos fis, bool isi) { input = i; fieldInfos = fis; isIndex = isi; maxSkipLevels = 1; // use single-level skip lists for formats > -3 int firstInt = input.ReadInt(); if (firstInt >= 0) { // original-format file, without explicit format version number format = 0; size = firstInt; // back-compatible settings indexInterval = 128; skipInterval = System.Int32.MaxValue; // switch off skipTo optimization } else { // we have a format version number format = firstInt; // check that it is a format we can understand if (format < TermInfosWriter.FORMAT_CURRENT) throw new CorruptIndexException("Unknown format version:" + format + " expected " + TermInfosWriter.FORMAT_CURRENT + " or higher"); size = input.ReadLong(); // read the size if (format == - 1) { if (!isIndex) { indexInterval = input.ReadInt(); formatM1SkipInterval = input.ReadInt(); } // switch off skipTo optimization for file format prior to 1.4rc2 in order to avoid a bug in // skipTo implementation of these versions skipInterval = System.Int32.MaxValue; } else { indexInterval = input.ReadInt(); skipInterval = input.ReadInt(); if (format <= TermInfosWriter.FORMAT) { // this new format introduces multi-level skipping maxSkipLevels = input.ReadInt(); } } System.Diagnostics.Debug.Assert(indexInterval > 0, "indexInterval=" + indexInterval + " is negative; must be > 0"); System.Diagnostics.Debug.Assert(skipInterval > 0, "skipInterval=" + skipInterval + " is negative; must be > 0"); } if (format > TermInfosWriter.FORMAT_VERSION_UTF8_LENGTH_IN_BYTES) { termBuffer.SetPreUTF8Strings(); scanBuffer.SetPreUTF8Strings(); prevBuffer.SetPreUTF8Strings(); } }
public /*protected internal*/ SegmentTermDocs(SegmentReader parent) { this.parent = parent; this.freqStream = (IndexInput) parent.core.freqStream.Clone(); lock (parent) { this.deletedDocs = parent.deletedDocs; } this.skipInterval = parent.core.GetTermsReader().SkipInterval; this.maxSkipLevels = parent.core.GetTermsReader().MaxSkipLevels; }
public CompoundFileReader(Directory dir, System.String name, int readBufferSize) { directory = dir; fileName = name; this.readBufferSize = readBufferSize; bool success = false; try { stream = dir.OpenInput(name, readBufferSize); // read the directory and init files int count = stream.ReadVInt(); FileEntry entry = null; for (int i = 0; i < count; i++) { long offset = stream.ReadLong(); System.String id = stream.ReadString(); if (entry != null) { // set length of the previous entry entry.length = offset - entry.offset; } entry = new FileEntry {offset = offset}; entries[id] = entry; } // set the length of the final entry if (entry != null) { entry.length = stream.Length() - entry.offset; } success = true; } finally { if (!success && (stream != null)) { try { stream.Close(); } catch (System.IO.IOException) { } } } }
// Used only by clone private FieldsReader(FieldInfos fieldInfos, int numTotalDocs, int size, int format, int formatSize, int docStoreOffset, IndexInput cloneableFieldsStream, IndexInput cloneableIndexStream) { this.fieldInfos = fieldInfos; this.numTotalDocs = numTotalDocs; this.size = size; this.format = format; this.formatSize = formatSize; this.docStoreOffset = docStoreOffset; this.cloneableFieldsStream = cloneableFieldsStream; this.cloneableIndexStream = cloneableIndexStream; fieldsStream = (IndexInput) cloneableFieldsStream.Clone(); indexStream = (IndexInput) cloneableIndexStream.Clone(); }
protected override void Dispose(bool disposing) { if (isDisposed) return; if (disposing) { if (main != null) { main.Dispose(); } } main = null; isDisposed = true; }
protected MultiLevelSkipListReader(IndexInput skipStream, int maxSkipLevels, int skipInterval) { this.skipStream = new IndexInput[maxSkipLevels]; this.skipPointer = new long[maxSkipLevels]; this.childPointer = new long[maxSkipLevels]; this.numSkipped = new int[maxSkipLevels]; this.maxNumberOfSkipLevels = maxSkipLevels; this.skipInterval = new int[maxSkipLevels]; this.skipStream[0] = skipStream; this.inputIsBuffered = (skipStream is BufferedIndexInput); this.skipInterval[0] = skipInterval; for (int i = 1; i < maxSkipLevels; i++) { // cache skip intervals this.skipInterval[i] = this.skipInterval[i - 1] * skipInterval; } skipDoc = new int[maxSkipLevels]; }
/// <summary>read as a d-gaps list </summary> private void ReadDgaps(IndexInput input) { size = input.ReadInt(); // (re)read size count = input.ReadInt(); // read count bits = new byte[(size >> 3) + 1]; // allocate bits int last = 0; int n = Count(); while (n > 0) { last += input.ReadVInt(); bits[last] = input.ReadByte(); n -= BYTE_COUNTS[bits[last] & 0xFF]; } }
/// <summary> Subclasses must implement the actual skip data encoding in this method. /// /// </summary> /// <param name="level">the level skip data shall be read from /// </param> /// <param name="skipStream">the skip stream to read from /// </param> protected internal abstract int ReadSkipData(int level, IndexInput skipStream);
internal CSIndexInput(IndexInput @base, long fileOffset, long length):this(@base, fileOffset, length, BufferedIndexInput.BUFFER_SIZE) { }
internal CSIndexInput(IndexInput @base, long fileOffset, long length, int readBufferSize):base(readBufferSize) { this.base_Renamed = (IndexInput) @base.Clone(); this.fileOffset = fileOffset; this.length = length; }
/// <summary>Copy numBytes bytes from input to ourself. </summary> public virtual void CopyBytes(IndexInput input, long numBytes) { System.Diagnostics.Debug.Assert(numBytes >= 0, "numBytes=" + numBytes); long left = numBytes; if (copyBuffer == null) copyBuffer = new byte[COPY_BUFFER_SIZE]; while (left > 0) { int toCopy; if (left > COPY_BUFFER_SIZE) toCopy = COPY_BUFFER_SIZE; else toCopy = (int) left; input.ReadBytes(copyBuffer, 0, toCopy); WriteBytes(copyBuffer, 0, toCopy); left -= toCopy; } }
protected override void Dispose(bool disposing) { lock (this) { if (isDisposed) return; if (disposing) { if (entries != null) { entries.Clear(); } if (stream != null) { stream.Close(); } } entries = null; stream = null; isDisposed = true; } }
protected internal override int ReadSkipData(int level, IndexInput skipStream) { int delta; if (currentFieldStoresPayloads) { // the current field stores payloads. // if the doc delta is odd then we have // to read the current payload length // because it differs from the length of the // previous payload delta = skipStream.ReadVInt(); if ((delta & 1) != 0) { payloadLength[level] = skipStream.ReadVInt(); } delta = Number.URShift(delta, 1); } else { delta = skipStream.ReadVInt(); } freqPointer[level] += skipStream.ReadVInt(); proxPointer[level] += skipStream.ReadVInt(); return delta; }
public ChecksumIndexInput(IndexInput main) { this.main = main; digest = new CRC32(); }
/// <summary>Read as a bit set </summary> private void ReadBits(IndexInput input) { count = input.ReadInt(); // read count bits = new byte[(size >> 3) + 1]; // allocate bits input.ReadBytes(bits, 0, bits.Length); }
/// <summary> Construct a new SegmentInfo instance by reading a /// previously saved SegmentInfo from input. /// /// </summary> /// <param name="dir">directory to load from /// </param> /// <param name="format">format of the segments info file /// </param> /// <param name="input">input handle to read segment info from /// </param> internal SegmentInfo(Directory dir, int format, IndexInput input) { this.dir = dir; name = input.ReadString(); docCount = input.ReadInt(); if (format <= SegmentInfos.FORMAT_LOCKLESS) { delGen = input.ReadLong(); if (format <= SegmentInfos.FORMAT_SHARED_DOC_STORE) { docStoreOffset = input.ReadInt(); if (docStoreOffset != - 1) { docStoreSegment = input.ReadString(); docStoreIsCompoundFile = (1 == input.ReadByte()); } else { docStoreSegment = name; docStoreIsCompoundFile = false; } } else { docStoreOffset = - 1; docStoreSegment = name; docStoreIsCompoundFile = false; } if (format <= SegmentInfos.FORMAT_SINGLE_NORM_FILE) { hasSingleNormFile = (1 == input.ReadByte()); } else { hasSingleNormFile = false; } int numNormGen = input.ReadInt(); if (numNormGen == NO) { normGen = null; } else { normGen = new long[numNormGen]; for (int j = 0; j < numNormGen; j++) { normGen[j] = input.ReadLong(); } } isCompoundFile = (sbyte) input.ReadByte(); preLockless = (isCompoundFile == CHECK_DIR); if (format <= SegmentInfos.FORMAT_DEL_COUNT) { delCount = input.ReadInt(); System.Diagnostics.Debug.Assert(delCount <= docCount); } else delCount = - 1; if (format <= SegmentInfos.FORMAT_HAS_PROX) hasProx = input.ReadByte() == 1; else hasProx = true; if (format <= SegmentInfos.FORMAT_DIAGNOSTICS) { diagnostics = input.ReadStringStringMap(); } else { diagnostics = new Dictionary<string,string>(); } } else { delGen = CHECK_DIR; normGen = null; isCompoundFile = (sbyte) (CHECK_DIR); preLockless = true; hasSingleNormFile = false; docStoreOffset = - 1; docStoreIsCompoundFile = false; docStoreSegment = null; delCount = - 1; hasProx = true; diagnostics = new Dictionary<string,string>(); } }
private void Read(IndexInput input, String fileName) { int firstInt = input.ReadVInt(); if (firstInt < 0) { // This is a real format format = firstInt; } else { format = FORMAT_PRE; } if (format != FORMAT_PRE & format != FORMAT_START) { throw new CorruptIndexException("unrecognized format " + format + " in file \"" + fileName + "\""); } int size; if (format == FORMAT_PRE) { size = firstInt; } else { size = input.ReadVInt(); //read in the size } for (int i = 0; i < size; i++) { String name = StringHelper.Intern(input.ReadString()); byte bits = input.ReadByte(); bool isIndexed = (bits & IS_INDEXED) != 0; bool storeTermVector = (bits & STORE_TERMVECTOR) != 0; bool storePositionsWithTermVector = (bits & STORE_POSITIONS_WITH_TERMVECTOR) != 0; bool storeOffsetWithTermVector = (bits & STORE_OFFSET_WITH_TERMVECTOR) != 0; bool omitNorms = (bits & OMIT_NORMS) != 0; bool storePayloads = (bits & STORE_PAYLOADS) != 0; bool omitTermFreqAndPositions = (bits & OMIT_TERM_FREQ_AND_POSITIONS) != 0; AddInternal(name, isIndexed, storeTermVector, storePositionsWithTermVector, storeOffsetWithTermVector, omitNorms, storePayloads, omitTermFreqAndPositions); } if (input.FilePointer != input.Length()) { throw new CorruptIndexException("did not read all bytes from file \"" + fileName + "\": read " + input.FilePointer + " vs size " + input.Length()); } }
// It is not always neccessary to move the prox pointer // to a new document after the freq pointer has been moved. // Consider for example a phrase query with two terms: // the freq pointer for term 1 has to move to document x // to answer the question if the term occurs in that document. But // only if term 2 also matches document x, the positions have to be // read to figure out if term 1 and term 2 appear next // to each other in document x and thus satisfy the query. // So we move the prox pointer lazily to the document // as soon as positions are requested. private void LazySkip() { if (proxStream == null) { // clone lazily proxStream = (IndexInput) parent.core.proxStream.Clone(); } // we might have to skip the current payload // if it was not read yet SkipPayload(); if (lazySkipPointer != - 1) { proxStream.Seek(lazySkipPointer); lazySkipPointer = - 1; } if (lazySkipProxCount != 0) { SkipPositions(lazySkipProxCount); lazySkipProxCount = 0; } }
internal SegmentTermPositions(SegmentReader p):base(p) { this.proxStream = null; // the proxStream will be cloned lazily when nextPosition() is called for the first time }
internal SkipBuffer(IndexInput input, int length) { data = new byte[length]; pointer = input.FilePointer; input.ReadBytes(data, 0, length); }
internal DefaultSkipListReader(IndexInput skipStream, int maxSkipLevels, int skipInterval):base(skipStream, maxSkipLevels, skipInterval) { freqPointer = new long[maxSkipLevels]; proxPointer = new long[maxSkipLevels]; payloadLength = new int[maxSkipLevels]; }
/// <summary>Bulk write a contiguous series of documents. The /// lengths array is the length (in bytes) of each raw /// document. The stream IndexInput is the /// fieldsStream from which we should bulk-copy all /// bytes. /// </summary> internal void AddRawDocuments(IndexInput stream, int[] lengths, int numDocs) { long position = fieldsStream.FilePointer; long start = position; for (int i = 0; i < numDocs; i++) { indexStream.WriteLong(position); position += lengths[i]; } fieldsStream.CopyBytes(stream, position - start); System.Diagnostics.Debug.Assert(fieldsStream.FilePointer == position); }
internal FieldsReader(Directory d, System.String segment, FieldInfos fn, int readBufferSize, int docStoreOffset, int size) { bool success = false; isOriginal = true; try { fieldInfos = fn; cloneableFieldsStream = d.OpenInput(segment + "." + IndexFileNames.FIELDS_EXTENSION, readBufferSize); cloneableIndexStream = d.OpenInput(segment + "." + IndexFileNames.FIELDS_INDEX_EXTENSION, readBufferSize); // First version of fdx did not include a format // header, but, the first int will always be 0 in that // case int firstInt = cloneableIndexStream.ReadInt(); format = firstInt == 0 ? 0 : firstInt; if (format > FieldsWriter.FORMAT_CURRENT) throw new CorruptIndexException("Incompatible format version: " + format + " expected " + FieldsWriter.FORMAT_CURRENT + " or lower"); formatSize = format > FieldsWriter.FORMAT ? 4 : 0; if (format < FieldsWriter.FORMAT_VERSION_UTF8_LENGTH_IN_BYTES) cloneableFieldsStream.SetModifiedUTF8StringsMode(); fieldsStream = (IndexInput) cloneableFieldsStream.Clone(); long indexSize = cloneableIndexStream.Length() - formatSize; if (docStoreOffset != - 1) { // We read only a slice out of this shared fields file this.docStoreOffset = docStoreOffset; this.size = size; // Verify the file is long enough to hold all of our // docs System.Diagnostics.Debug.Assert(((int)(indexSize / 8)) >= size + this.docStoreOffset, "indexSize=" + indexSize + " size=" + size + " docStoreOffset=" + docStoreOffset); } else { this.docStoreOffset = 0; this.size = (int) (indexSize >> 3); } indexStream = (IndexInput) cloneableIndexStream.Clone(); numTotalDocs = (int) (indexSize >> 3); success = true; } finally { // With lock-less commits, it's entirely possible (and // fine) to hit a FileNotFound exception above. In // this case, we want to explicitly close any subset // of things that were opened so that we don't have to // wait for a GC to do so. if (!success) { Dispose(); } } }