public void Read(IndexInput input, FieldInfos fieldInfos) { this.term = null; // invalidate cache int start = input.ReadVInt(); int length = input.ReadVInt(); int totalLength = start + length; if (preUTF8Strings) { text.SetLength(totalLength); input.ReadChars(text.result, start, length); } else { if (dirty) { // Fully convert all bytes since bytes is dirty UnicodeUtil.UTF16toUTF8(text.result, 0, text.length, bytes); bytes.SetLength(totalLength); input.ReadBytes(bytes.result, start, length); UnicodeUtil.UTF8toUTF16(bytes.result, 0, totalLength, text); dirty = false; } else { // Incrementally convert only the UTF8 bytes that are new: bytes.SetLength(totalLength); input.ReadBytes(bytes.result, start, length); UnicodeUtil.UTF8toUTF16(bytes.result, start, length, text); } } this.field = fieldInfos.FieldName(input.ReadVInt()); }
private int CopyFieldsNoDeletions(FieldSelector fieldSelectorMerge, FieldsWriter fieldsWriter, IndexReader reader, FieldsReader matchingFieldsReader) { int maxDoc = reader.MaxDoc(); int docCount = 0; if (matchingFieldsReader != null) { // We can bulk-copy because the fieldInfos are "congruent" while (docCount < maxDoc) { int len = System.Math.Min(MAX_RAW_MERGE_DOCS, maxDoc - docCount); IndexInput stream = matchingFieldsReader.RawDocs(rawDocLengths, docCount, len); fieldsWriter.AddRawDocuments(stream, rawDocLengths, len); docCount += len; checkAbort.Work(300 * len); } } else { for (; docCount < maxDoc; docCount++) { // NOTE: it's very important to first assign to doc then pass it to // termVectorsWriter.addAllDocVectors; see LUCENE-1282 Document doc = reader.Document(docCount, fieldSelectorMerge); fieldsWriter.AddDocument(doc); checkAbort.Work(300); } } return(docCount); }
protected internal override int ReadSkipData(int level, IndexInput skipStream) { int delta; if (currentFieldStoresPayloads) { // the current field stores payloads. // if the doc delta is odd then we have // to read the current payload length // because it differs from the length of the // previous payload delta = skipStream.ReadVInt(); if ((delta & 1) != 0) { payloadLength[level] = skipStream.ReadVInt(); } delta = SupportClass.Number.URShift(delta, 1); } else { delta = skipStream.ReadVInt(); } freqPointer[level] += skipStream.ReadVInt(); proxPointer[level] += skipStream.ReadVInt(); return(delta); }
private int CopyFieldsWithDeletions(FieldSelector fieldSelectorMerge, FieldsWriter fieldsWriter, IndexReader reader, FieldsReader matchingFieldsReader) { int docCount = 0; int maxDoc = reader.MaxDoc(); if (matchingFieldsReader != null) { // We can bulk-copy because the fieldInfos are "congruent" for (int j = 0; j < maxDoc;) { if (reader.IsDeleted(j)) { // skip deleted docs ++j; continue; } // We can optimize this case (doing a bulk byte copy) since the field // numbers are identical int start = j, numDocs = 0; do { j++; numDocs++; if (j >= maxDoc) { break; } if (reader.IsDeleted(j)) { j++; break; } }while (numDocs < MAX_RAW_MERGE_DOCS); IndexInput stream = matchingFieldsReader.RawDocs(rawDocLengths, start, numDocs); fieldsWriter.AddRawDocuments(stream, rawDocLengths, numDocs); docCount += numDocs; checkAbort.Work(300 * numDocs); } } else { for (int j = 0; j < maxDoc; j++) { if (reader.IsDeleted(j)) { // skip deleted docs continue; } // NOTE: it's very important to first assign to doc then pass it to // termVectorsWriter.addAllDocVectors; see LUCENE-1282 Document doc = reader.Document(j, fieldSelectorMerge); fieldsWriter.AddDocument(doc); docCount++; checkAbort.Work(300); } } return(docCount); }
private int CheckValidFormat(IndexInput in_Renamed) { int format = in_Renamed.ReadInt(); if (format > FORMAT_CURRENT) { throw new CorruptIndexException("Incompatible format version: " + format + " expected " + FORMAT_CURRENT + " or less"); } return(format); }
public /*protected internal*/ SegmentTermDocs(SegmentReader parent) { this.parent = parent; this.freqStream = (IndexInput)parent.core.freqStream.Clone(); lock (parent) { this.deletedDocs = parent.deletedDocs; } this.skipInterval = parent.core.GetTermsReader().GetSkipInterval(); this.maxSkipLevels = parent.core.GetTermsReader().GetMaxSkipLevels(); }
private IndexInput GetFieldStream() { IndexInput localFieldsStream = (IndexInput)Enclosing_Instance.fieldsStreamTL.Get(); if (localFieldsStream == null) { localFieldsStream = (IndexInput)Enclosing_Instance.cloneableFieldsStream.Clone(); Enclosing_Instance.fieldsStreamTL.Set(localFieldsStream); } return(localFieldsStream); }
public /*protected internal*/ SegmentTermDocs(SegmentReader parent) { this.parent = parent; this.freqStream = (IndexInput) parent.core.freqStream.Clone(); lock (parent) { this.deletedDocs = parent.deletedDocs; } this.skipInterval = parent.core.GetTermsReader().GetSkipInterval(); this.maxSkipLevels = parent.core.GetTermsReader().GetMaxSkipLevels(); }
public CompoundFileReader(Directory dir, System.String name, int readBufferSize) { directory = dir; fileName = name; this.readBufferSize = readBufferSize; bool success = false; try { stream = dir.OpenInput(name, readBufferSize); // read the directory and init files int count = stream.ReadVInt(); FileEntry entry = null; for (int i = 0; i < count; i++) { long offset = stream.ReadLong(); System.String id = stream.ReadString(); if (entry != null) { // set length of the previous entry entry.length = offset - entry.offset; } entry = new FileEntry(); entry.offset = offset; entries[id] = entry; } // set the length of the final entry if (entry != null) { entry.length = stream.Length() - entry.offset; } success = true; } finally { if (!success && (stream != null)) { try { stream.Close(); } catch (System.IO.IOException e) { } } } }
public override byte[] GetBinaryValue(byte[] result) { Enclosing_Instance.EnsureOpen(); if (isBinary) { if (fieldsData == null) { // Allocate new buffer if result is null or too small byte[] b; if (result == null || result.Length < toRead) { b = new byte[toRead]; } else { b = result; } IndexInput localFieldsStream = GetFieldStream(); // Throw this IOException since IndexReader.document does so anyway, so probably not that big of a change for people // since they are already handling this exception when getting the document try { localFieldsStream.Seek(pointer); localFieldsStream.ReadBytes(b, 0, toRead); if (isCompressed == true) { fieldsData = Enclosing_Instance.Uncompress(b); } else { fieldsData = b; } } catch (System.IO.IOException e) { throw new FieldReaderException(e); } binaryOffset = 0; binaryLength = toRead; } return((byte[])fieldsData); } else { return(null); } }
/// <summary>Bulk write a contiguous series of documents. The /// lengths array is the length (in bytes) of each raw /// document. The stream IndexInput is the /// fieldsStream from which we should bulk-copy all /// bytes. /// </summary> internal void AddRawDocuments(IndexInput stream, int[] lengths, int numDocs) { long position = fieldsStream.GetFilePointer(); long start = position; for (int i = 0; i < numDocs; i++) { indexStream.WriteLong(position); position += lengths[i]; } fieldsStream.CopyBytes(stream, position - start); System.Diagnostics.Debug.Assert(fieldsStream.GetFilePointer() == position); }
// Used only by clone private FieldsReader(FieldInfos fieldInfos, int numTotalDocs, int size, int format, int formatSize, int docStoreOffset, IndexInput cloneableFieldsStream, IndexInput cloneableIndexStream) { this.fieldInfos = fieldInfos; this.numTotalDocs = numTotalDocs; this.size = size; this.format = format; this.formatSize = formatSize; this.docStoreOffset = docStoreOffset; this.cloneableFieldsStream = cloneableFieldsStream; this.cloneableIndexStream = cloneableIndexStream; fieldsStream = (IndexInput) cloneableFieldsStream.Clone(); indexStream = (IndexInput) cloneableIndexStream.Clone(); }
// Used only by clone private FieldsReader(FieldInfos fieldInfos, int numTotalDocs, int size, int format, int formatSize, int docStoreOffset, IndexInput cloneableFieldsStream, IndexInput cloneableIndexStream) { this.fieldInfos = fieldInfos; this.numTotalDocs = numTotalDocs; this.size = size; this.format = format; this.formatSize = formatSize; this.docStoreOffset = docStoreOffset; this.cloneableFieldsStream = cloneableFieldsStream; this.cloneableIndexStream = cloneableIndexStream; fieldsStream = (IndexInput)cloneableFieldsStream.Clone(); indexStream = (IndexInput)cloneableIndexStream.Clone(); }
private void Read(IndexInput input, System.String fileName) { int firstInt = input.ReadVInt(); if (firstInt < 0) { // This is a real format format = firstInt; } else { format = FORMAT_PRE; } if (format != FORMAT_PRE & format != FORMAT_START) { throw new CorruptIndexException("unrecognized format " + format + " in file \"" + fileName + "\""); } int size; if (format == FORMAT_PRE) { size = firstInt; } else { size = input.ReadVInt(); //read in the size } for (int i = 0; i < size; i++) { System.String name = StringHelper.Intern(input.ReadString()); byte bits = input.ReadByte(); bool isIndexed = (bits & IS_INDEXED) != 0; bool storeTermVector = (bits & STORE_TERMVECTOR) != 0; bool storePositionsWithTermVector = (bits & STORE_POSITIONS_WITH_TERMVECTOR) != 0; bool storeOffsetWithTermVector = (bits & STORE_OFFSET_WITH_TERMVECTOR) != 0; bool omitNorms = (bits & OMIT_NORMS) != 0; bool storePayloads = (bits & STORE_PAYLOADS) != 0; bool omitTermFreqAndPositions = (bits & OMIT_TERM_FREQ_AND_POSITIONS) != 0; AddInternal(name, isIndexed, storeTermVector, storePositionsWithTermVector, storeOffsetWithTermVector, omitNorms, storePayloads, omitTermFreqAndPositions); } if (input.GetFilePointer() != input.Length()) { throw new CorruptIndexException("did not read all bytes from file \"" + fileName + "\": read " + input.GetFilePointer() + " vs size " + input.Length()); } }
public override void Close() { lock (this) { if (stream == null) { throw new System.IO.IOException("Already closed"); } entries.Clear(); stream.Close(); stream = null; } }
/// <summary>Copy the contents of the file with specified extension into the /// provided output stream. Use the provided buffer for moving data /// to reduce memory allocation. /// </summary> private void CopyFile(FileEntry source, IndexOutput os, byte[] buffer) { IndexInput is_Renamed = null; try { long startPtr = os.GetFilePointer(); is_Renamed = directory.OpenInput(source.file); long length = is_Renamed.Length(); long remainder = length; int chunk = buffer.Length; while (remainder > 0) { int len = (int)System.Math.Min(chunk, remainder); is_Renamed.ReadBytes(buffer, 0, len, false); os.WriteBytes(buffer, len); remainder -= len; if (checkAbort != null) { // Roughly every 2 MB we will check if // it's time to abort checkAbort.Work(80); } } // Verify that remainder is 0 if (remainder != 0) { throw new System.IO.IOException("Non-zero remainder length after copying: " + remainder + " (id: " + source.file + ", length: " + length + ", buffer size: " + chunk + ")"); } // Verify that the output length diff is equal to original file long endPtr = os.GetFilePointer(); long diff = endPtr - startPtr; if (diff != length) { throw new System.IO.IOException("Difference in the output file offsets " + diff + " does not match the original file length " + length); } } finally { if (is_Renamed != null) { is_Renamed.Close(); } } }
/// <summary>read as a d-gaps list </summary> private void ReadDgaps(IndexInput input) { size = input.ReadInt(); // (re)read size count = input.ReadInt(); // read count bits = new byte[(size >> 3) + 1]; // allocate bits int last = 0; int n = Count(); while (n > 0) { last += input.ReadVInt(); bits[last] = input.ReadByte(); n -= BYTE_COUNTS[bits[last] & 0xFF]; } }
/// <summary>The value of the field as a String, or null. If null, the Reader value, /// binary value, or TokenStream value is used. Exactly one of stringValue(), /// readerValue(), binaryValue(), and tokenStreamValue() must be set. /// </summary> public override System.String StringValue() { Enclosing_Instance.EnsureOpen(); if (isBinary) { return(null); } else { if (fieldsData == null) { IndexInput localFieldsStream = GetFieldStream(); try { localFieldsStream.Seek(pointer); if (isCompressed) { byte[] b = new byte[toRead]; localFieldsStream.ReadBytes(b, 0, b.Length); fieldsData = System.Text.Encoding.GetEncoding("UTF-8").GetString(Enclosing_Instance.Uncompress(b)); } else { if (Enclosing_Instance.format >= FieldsWriter.FORMAT_VERSION_UTF8_LENGTH_IN_BYTES) { byte[] bytes = new byte[toRead]; localFieldsStream.ReadBytes(bytes, 0, toRead); fieldsData = System.Text.Encoding.GetEncoding("UTF-8").GetString(bytes); } else { //read in chars b/c we already know the length we need to read char[] chars = new char[toRead]; localFieldsStream.ReadChars(chars, 0, toRead); fieldsData = new System.String(chars); } } } catch (System.IO.IOException e) { throw new FieldReaderException(e); } } return((System.String)fieldsData); } }
public MultiLevelSkipListReader(IndexInput skipStream, int maxSkipLevels, int skipInterval) { this.skipStream = new IndexInput[maxSkipLevels]; this.skipPointer = new long[maxSkipLevels]; this.childPointer = new long[maxSkipLevels]; this.numSkipped = new int[maxSkipLevels]; this.maxNumberOfSkipLevels = maxSkipLevels; this.skipInterval = new int[maxSkipLevels]; this.skipStream[0] = skipStream; this.inputIsBuffered = (skipStream is BufferedIndexInput); this.skipInterval[0] = skipInterval; for (int i = 1; i < maxSkipLevels; i++) { // cache skip intervals this.skipInterval[i] = this.skipInterval[i - 1] * skipInterval; } skipDoc = new int[maxSkipLevels]; }
/// <summary> Construct a FieldInfos object using the directory and the name of the file /// IndexInput /// </summary> /// <param name="d">The directory to open the IndexInput from /// </param> /// <param name="name">The name of the file to open the IndexInput from in the Directory /// </param> /// <throws> IOException </throws> public /*internal*/ FieldInfos(Directory d, System.String name) { IndexInput input = d.OpenInput(name); try { try { Read(input, name); } catch (System.IO.IOException ioe) { if (format == FORMAT_PRE) { // LUCENE-1623: FORMAT_PRE (before there was a // format) may be 2.3.2 (pre-utf8) or 2.4.x (utf8) // encoding; retry with input set to pre-utf8 input.Seek(0); input.SetModifiedUTF8StringsMode(); byNumber.Clear(); byName.Clear(); try { Read(input, name); } catch (System.Exception t) { // Ignore any new exception & throw original IOE throw ioe; } } else { // The IOException cannot be caused by // LUCENE-1623, so re-throw it throw ioe; } } } finally { input.Close(); } }
/// <summary>Constructs a bit vector from the file <code>name</code> in Directory /// <code>d</code>, as written by the {@link #write} method. /// </summary> public BitVector(Directory d, System.String name) { IndexInput input = d.OpenInput(name); try { size = input.ReadInt(); // read size if (size == -1) { ReadDgaps(input); } else { ReadBits(input); } } finally { input.Close(); } }
// It is not always neccessary to move the prox pointer // to a new document after the freq pointer has been moved. // Consider for example a phrase query with two terms: // the freq pointer for term 1 has to move to document x // to answer the question if the term occurs in that document. But // only if term 2 also matches document x, the positions have to be // read to figure out if term 1 and term 2 appear next // to each other in document x and thus satisfy the query. // So we move the prox pointer lazily to the document // as soon as positions are requested. private void LazySkip() { if (proxStream == null) { // clone lazily proxStream = (IndexInput)parent.core.proxStream.Clone(); } // we might have to skip the current payload // if it was not read yet SkipPayload(); if (lazySkipPointer != -1) { proxStream.Seek(lazySkipPointer); lazySkipPointer = -1; } if (lazySkipProxCount != 0) { SkipPositions(lazySkipProxCount); lazySkipProxCount = 0; } }
internal FieldsReader(Directory d, System.String segment, FieldInfos fn, int readBufferSize, int docStoreOffset, int size) { bool success = false; isOriginal = true; try { fieldInfos = fn; cloneableFieldsStream = d.OpenInput(segment + "." + IndexFileNames.FIELDS_EXTENSION, readBufferSize); cloneableIndexStream = d.OpenInput(segment + "." + IndexFileNames.FIELDS_INDEX_EXTENSION, readBufferSize); // First version of fdx did not include a format // header, but, the first int will always be 0 in that // case int firstInt = cloneableIndexStream.ReadInt(); if (firstInt == 0) format = 0; else format = firstInt; if (format > FieldsWriter.FORMAT_CURRENT /* extra support for Lucene 3.0 indexes: */ && format != FieldsWriter.FORMAT_LUCENE_3_0_NO_COMPRESSED_FIELDS ) throw new CorruptIndexException("Incompatible format version: " + format + " expected " + FieldsWriter.FORMAT_CURRENT + " or lower"); if (format > FieldsWriter.FORMAT) formatSize = 4; else formatSize = 0; if (format < FieldsWriter.FORMAT_VERSION_UTF8_LENGTH_IN_BYTES) cloneableFieldsStream.SetModifiedUTF8StringsMode(); fieldsStream = (IndexInput) cloneableFieldsStream.Clone(); long indexSize = cloneableIndexStream.Length() - formatSize; if (docStoreOffset != - 1) { // We read only a slice out of this shared fields file this.docStoreOffset = docStoreOffset; this.size = size; // Verify the file is long enough to hold all of our // docs System.Diagnostics.Debug.Assert(((int)(indexSize / 8)) >= size + this.docStoreOffset, "indexSize=" + indexSize + " size=" + size + " docStoreOffset=" + docStoreOffset); } else { this.docStoreOffset = 0; this.size = (int) (indexSize >> 3); } indexStream = (IndexInput) cloneableIndexStream.Clone(); numTotalDocs = (int) (indexSize >> 3); success = true; } finally { // With lock-less commits, it's entirely possible (and // fine) to hit a FileNotFound exception above. In // this case, we want to explicitly close any subset // of things that were opened so that we don't have to // wait for a GC to do so. if (!success) { Close(); } } }
internal SkipBuffer(IndexInput input, int length) { data = new byte[length]; pointer = input.GetFilePointer(); input.ReadBytes(data, 0, length); }
internal SegmentTermEnum(IndexInput i, FieldInfos fis, bool isi) { input = i; fieldInfos = fis; isIndex = isi; maxSkipLevels = 1; // use single-level skip lists for formats > -3 int firstInt = input.ReadInt(); if (firstInt >= 0) { // original-format file, without explicit format version number format = 0; size = firstInt; // back-compatible settings indexInterval = 128; skipInterval = System.Int32.MaxValue; // switch off skipTo optimization } else { // we have a format version number format = firstInt; // check that it is a format we can understand if (format < TermInfosWriter.FORMAT_CURRENT) { throw new CorruptIndexException("Unknown format version:" + format + " expected " + TermInfosWriter.FORMAT_CURRENT + " or higher"); } size = input.ReadLong(); // read the size if (format == -1) { if (!isIndex) { indexInterval = input.ReadInt(); formatM1SkipInterval = input.ReadInt(); } // switch off skipTo optimization for file format prior to 1.4rc2 in order to avoid a bug in // skipTo implementation of these versions skipInterval = System.Int32.MaxValue; } else { indexInterval = input.ReadInt(); skipInterval = input.ReadInt(); if (format <= TermInfosWriter.FORMAT) { // this new format introduces multi-level skipping maxSkipLevels = input.ReadInt(); } } System.Diagnostics.Debug.Assert(indexInterval > 0, "indexInterval=" + indexInterval + " is negative; must be > 0"); System.Diagnostics.Debug.Assert(skipInterval > 0, "skipInterval=" + skipInterval + " is negative; must be > 0"); } if (format > TermInfosWriter.FORMAT_VERSION_UTF8_LENGTH_IN_BYTES) { termBuffer.SetPreUTF8Strings(); scanBuffer.SetPreUTF8Strings(); prevBuffer.SetPreUTF8Strings(); } }
internal FieldsReader(Directory d, System.String segment, FieldInfos fn, int readBufferSize, int docStoreOffset, int size) { bool success = false; isOriginal = true; try { fieldInfos = fn; cloneableFieldsStream = d.OpenInput(segment + "." + IndexFileNames.FIELDS_EXTENSION, readBufferSize); cloneableIndexStream = d.OpenInput(segment + "." + IndexFileNames.FIELDS_INDEX_EXTENSION, readBufferSize); // First version of fdx did not include a format // header, but, the first int will always be 0 in that // case int firstInt = cloneableIndexStream.ReadInt(); if (firstInt == 0) { format = 0; } else { format = firstInt; } if (format > FieldsWriter.FORMAT_CURRENT && /* extra support for Lucene 3.0 indexes: */ format != FieldsWriter.FORMAT_LUCENE_3_0_NO_COMPRESSED_FIELDS ) { throw new CorruptIndexException("Incompatible format version: " + format + " expected " + FieldsWriter.FORMAT_CURRENT + " or lower"); } if (format > FieldsWriter.FORMAT) { formatSize = 4; } else { formatSize = 0; } if (format < FieldsWriter.FORMAT_VERSION_UTF8_LENGTH_IN_BYTES) { cloneableFieldsStream.SetModifiedUTF8StringsMode(); } fieldsStream = (IndexInput)cloneableFieldsStream.Clone(); long indexSize = cloneableIndexStream.Length() - formatSize; if (docStoreOffset != -1) { // We read only a slice out of this shared fields file this.docStoreOffset = docStoreOffset; this.size = size; // Verify the file is long enough to hold all of our // docs System.Diagnostics.Debug.Assert(((int)(indexSize / 8)) >= size + this.docStoreOffset, "indexSize=" + indexSize + " size=" + size + " docStoreOffset=" + docStoreOffset); } else { this.docStoreOffset = 0; this.size = (int)(indexSize >> 3); } indexStream = (IndexInput)cloneableIndexStream.Clone(); numTotalDocs = (int)(indexSize >> 3); success = true; } finally { // With lock-less commits, it's entirely possible (and // fine) to hit a FileNotFound exception above. In // this case, we want to explicitly close any subset // of things that were opened so that we don't have to // wait for a GC to do so. if (!success) { Close(); } } }
internal CSIndexInput(IndexInput base_Renamed, long fileOffset, long length, int readBufferSize) : base(readBufferSize) { this.base_Renamed = (IndexInput)base_Renamed.Clone(); this.fileOffset = fileOffset; this.length = length; }
/// <summary> Construct a new SegmentInfo instance by reading a /// previously saved SegmentInfo from input. /// /// </summary> /// <param name="dir">directory to load from /// </param> /// <param name="format">format of the segments info file /// </param> /// <param name="input">input handle to read segment info from /// </param> internal SegmentInfo(Directory dir, int format, IndexInput input) { this.dir = dir; name = input.ReadString(); docCount = input.ReadInt(); if (format <= SegmentInfos.FORMAT_LOCKLESS) { delGen = input.ReadLong(); if (format <= SegmentInfos.FORMAT_SHARED_DOC_STORE) { docStoreOffset = input.ReadInt(); if (docStoreOffset != - 1) { docStoreSegment = input.ReadString(); docStoreIsCompoundFile = (1 == input.ReadByte()); } else { docStoreSegment = name; docStoreIsCompoundFile = false; } } else { docStoreOffset = - 1; docStoreSegment = name; docStoreIsCompoundFile = false; } if (format <= SegmentInfos.FORMAT_SINGLE_NORM_FILE) { hasSingleNormFile = (1 == input.ReadByte()); } else { hasSingleNormFile = false; } int numNormGen = input.ReadInt(); if (numNormGen == NO) { normGen = null; } else { normGen = new long[numNormGen]; for (int j = 0; j < numNormGen; j++) { normGen[j] = input.ReadLong(); } } isCompoundFile = (sbyte) input.ReadByte(); preLockless = (isCompoundFile == CHECK_DIR); if (format <= SegmentInfos.FORMAT_DEL_COUNT) { delCount = input.ReadInt(); System.Diagnostics.Debug.Assert(delCount <= docCount); } else delCount = - 1; if (format <= SegmentInfos.FORMAT_HAS_PROX) hasProx = input.ReadByte() == 1; else hasProx = true; if (format <= SegmentInfos.FORMAT_DIAGNOSTICS) { diagnostics = input.ReadStringStringMap(); } else { diagnostics = new System.Collections.Generic.Dictionary<string,string>(); } } else { delGen = CHECK_DIR; normGen = null; isCompoundFile = (sbyte) (CHECK_DIR); preLockless = true; hasSingleNormFile = false; docStoreOffset = - 1; docStoreIsCompoundFile = false; docStoreSegment = null; delCount = - 1; hasProx = true; diagnostics = new System.Collections.Generic.Dictionary<string,string>(); } }
internal CoreReaders(SegmentReader origInstance, Directory dir, SegmentInfo si, int readBufferSize, int termsIndexDivisor) { segment = si.name; this.readBufferSize = readBufferSize; this.dir = dir; bool success = false; try { Directory dir0 = dir; if (si.GetUseCompoundFile()) { cfsReader = new CompoundFileReader(dir, segment + "." + IndexFileNames.COMPOUND_FILE_EXTENSION, readBufferSize); dir0 = cfsReader; } cfsDir = dir0; fieldInfos = new FieldInfos(cfsDir, segment + "." + IndexFileNames.FIELD_INFOS_EXTENSION); this.termsIndexDivisor = termsIndexDivisor; TermInfosReader reader = new TermInfosReader(cfsDir, segment, fieldInfos, readBufferSize, termsIndexDivisor); if (termsIndexDivisor == - 1) { tisNoIndex = reader; } else { tis = reader; tisNoIndex = null; } // make sure that all index files have been read or are kept open // so that if an index update removes them we'll still have them freqStream = cfsDir.OpenInput(segment + "." + IndexFileNames.FREQ_EXTENSION, readBufferSize); if (fieldInfos.HasProx()) { proxStream = cfsDir.OpenInput(segment + "." + IndexFileNames.PROX_EXTENSION, readBufferSize); } else { proxStream = null; } success = true; } finally { if (!success) { DecRef(); } } // Must assign this at the end -- if we hit an // exception above core, we don't want to attempt to // purge the FieldCache (will hit NPE because core is // not assigned yet). this.origInstance = origInstance; }
internal CSIndexInput(IndexInput base_Renamed, long fileOffset, long length, int readBufferSize):base(readBufferSize) { this.base_Renamed = (IndexInput) base_Renamed.Clone(); this.fileOffset = fileOffset; this.length = length; }
public override void Close() { lock (this) { if (stream == null) throw new System.IO.IOException("Already closed"); entries.Clear(); stream.Close(); stream = null; } }
internal TermVectorsReader(Directory d, System.String segment, FieldInfos fieldInfos, int readBufferSize, int docStoreOffset, int size) { bool success = false; try { if (d.FileExists(segment + "." + IndexFileNames.VECTORS_INDEX_EXTENSION)) { tvx = d.OpenInput(segment + "." + IndexFileNames.VECTORS_INDEX_EXTENSION, readBufferSize); format = CheckValidFormat(tvx); tvd = d.OpenInput(segment + "." + IndexFileNames.VECTORS_DOCUMENTS_EXTENSION, readBufferSize); int tvdFormat = CheckValidFormat(tvd); tvf = d.OpenInput(segment + "." + IndexFileNames.VECTORS_FIELDS_EXTENSION, readBufferSize); int tvfFormat = CheckValidFormat(tvf); System.Diagnostics.Debug.Assert(format == tvdFormat); System.Diagnostics.Debug.Assert(format == tvfFormat); if (format >= FORMAT_VERSION2) { System.Diagnostics.Debug.Assert((tvx.Length() - FORMAT_SIZE) % 16 == 0); numTotalDocs = (int)(tvx.Length() >> 4); } else { System.Diagnostics.Debug.Assert((tvx.Length() - FORMAT_SIZE) % 8 == 0); numTotalDocs = (int)(tvx.Length() >> 3); } if (-1 == docStoreOffset) { this.docStoreOffset = 0; this.size = numTotalDocs; System.Diagnostics.Debug.Assert(size == 0 || numTotalDocs == size); } else { this.docStoreOffset = docStoreOffset; this.size = size; // Verify the file is long enough to hold all of our // docs System.Diagnostics.Debug.Assert(numTotalDocs >= size + docStoreOffset, "numTotalDocs=" + numTotalDocs + " size=" + size + " docStoreOffset=" + docStoreOffset); } } else { // If all documents flushed in a segment had hit // non-aborting exceptions, it's possible that // FieldInfos.hasVectors returns true yet the term // vector files don't exist. format = 0; } this.fieldInfos = fieldInfos; success = true; } finally { // With lock-less commits, it's entirely possible (and // fine) to hit a FileNotFound exception above. In // this case, we want to explicitly close any subset // of things that were opened so that we don't have to // wait for a GC to do so. if (!success) { Close(); } } }
internal SegmentTermPositions(SegmentReader p) : base(p) { this.proxStream = null; // the proxStream will be cloned lazily when nextPosition() is called for the first time }
private int CheckValidFormat(IndexInput in_Renamed) { int format = in_Renamed.ReadInt(); if (format > FORMAT_CURRENT) { throw new CorruptIndexException("Incompatible format version: " + format + " expected " + FORMAT_CURRENT + " or less"); } return format; }
/// <summary>Read as a bit set </summary> private void ReadBits(IndexInput input) { count = input.ReadInt(); // read count bits = new byte[(size >> 3) + 1]; // allocate bits input.ReadBytes(bits, 0, bits.Length); }
private void OpenNorms(Directory cfsDir, int readBufferSize) { long nextNormSeek = SegmentMerger.NORMS_HEADER.Length; //skip header (header unused for now) int maxDoc = MaxDoc(); for (int i = 0; i < core.fieldInfos.Size(); i++) { FieldInfo fi = core.fieldInfos.FieldInfo(i); if (norms.Contains(fi.name)) { // in case this SegmentReader is being re-opened, we might be able to // reuse some norm instances and skip loading them here continue; } if (fi.isIndexed && !fi.omitNorms) { Directory d = Directory(); System.String fileName = si.GetNormFileName(fi.number); if (!si.HasSeparateNorms(fi.number)) { d = cfsDir; } // singleNormFile means multiple norms share this file bool singleNormFile = fileName.EndsWith("." + IndexFileNames.NORMS_EXTENSION); IndexInput normInput = null; long normSeek; if (singleNormFile) { normSeek = nextNormSeek; if (singleNormStream == null) { singleNormStream = d.OpenInput(fileName, readBufferSize); singleNormRef = new Ref(); } else { singleNormRef.IncRef(); } // All norms in the .nrm file can share a single IndexInput since // they are only used in a synchronized context. // If this were to change in the future, a clone could be done here. normInput = singleNormStream; } else { normSeek = 0; normInput = d.OpenInput(fileName); } norms[fi.name] = new Norm(this, normInput, fi.number, normSeek); nextNormSeek += maxDoc; // increment also if some norms are separate } } }
public Norm(SegmentReader enclosingInstance, IndexInput in_Renamed, int number, long normSeek) { InitBlock(enclosingInstance); this.in_Renamed = in_Renamed; this.number = number; this.normSeek = normSeek; }
internal CSIndexInput(IndexInput base_Renamed, long fileOffset, long length) : this(base_Renamed, fileOffset, length, BufferedIndexInput.BUFFER_SIZE) { }
private void CloseInput() { if (in_Renamed != null) { if (in_Renamed != Enclosing_Instance.singleNormStream) { // It's private to us -- just close it in_Renamed.Close(); } else { // We are sharing this with others -- decRef and // maybe close the shared norm stream if (Enclosing_Instance.singleNormRef.DecRef() == 0) { Enclosing_Instance.singleNormStream.Close(); Enclosing_Instance.singleNormStream = null; } } in_Renamed = null; } }
internal CSIndexInput(IndexInput base_Renamed, long fileOffset, long length):this(base_Renamed, fileOffset, length, BufferedIndexInput.BUFFER_SIZE) { }
internal DefaultSkipListReader(IndexInput skipStream, int maxSkipLevels, int skipInterval):base(skipStream, maxSkipLevels, skipInterval) { freqPointer = new long[maxSkipLevels]; proxPointer = new long[maxSkipLevels]; payloadLength = new int[maxSkipLevels]; }
/// <summary>Returns a {@link Status} instance detailing /// the state of the index. /// /// </summary> /// <param name="onlySegments">list of specific segment names to check /// /// <p/>As this method checks every byte in the specified /// segments, on a large index it can take quite a long /// time to run. /// /// <p/><b>WARNING</b>: make sure /// you only call this when the index is not opened by any /// writer. /// </param> public virtual Status CheckIndex_Renamed_Method(System.Collections.IList onlySegments) { System.Globalization.NumberFormatInfo nf = System.Globalization.CultureInfo.CurrentCulture.NumberFormat; SegmentInfos sis = new SegmentInfos(); Status result = new Status(); result.dir = dir; try { sis.Read(dir); } catch (System.Exception t) { Msg("ERROR: could not read any segments file in directory"); result.missingSegments = true; if (infoStream != null) { infoStream.WriteLine(t.StackTrace); } return(result); } int numSegments = sis.Count; System.String segmentsFileName = sis.GetCurrentSegmentFileName(); IndexInput input = null; try { input = dir.OpenInput(segmentsFileName); } catch (System.Exception t) { Msg("ERROR: could not open segments file in directory"); if (infoStream != null) { infoStream.WriteLine(t.StackTrace); } result.cantOpenSegments = true; return(result); } int format = 0; try { format = input.ReadInt(); } catch (System.Exception t) { Msg("ERROR: could not read segment file version in directory"); if (infoStream != null) { infoStream.WriteLine(t.StackTrace); } result.missingSegmentVersion = true; return(result); } finally { if (input != null) { input.Close(); } } System.String sFormat = ""; bool skip = false; if (format == SegmentInfos.FORMAT) { sFormat = "FORMAT [Lucene Pre-2.1]"; } if (format == SegmentInfos.FORMAT_LOCKLESS) { sFormat = "FORMAT_LOCKLESS [Lucene 2.1]"; } else if (format == SegmentInfos.FORMAT_SINGLE_NORM_FILE) { sFormat = "FORMAT_SINGLE_NORM_FILE [Lucene 2.2]"; } else if (format == SegmentInfos.FORMAT_SHARED_DOC_STORE) { sFormat = "FORMAT_SHARED_DOC_STORE [Lucene 2.3]"; } else { if (format == SegmentInfos.FORMAT_CHECKSUM) { sFormat = "FORMAT_CHECKSUM [Lucene 2.4]"; } else if (format == SegmentInfos.FORMAT_DEL_COUNT) { sFormat = "FORMAT_DEL_COUNT [Lucene 2.4]"; } else if (format == SegmentInfos.FORMAT_HAS_PROX) { sFormat = "FORMAT_HAS_PROX [Lucene 2.4]"; } else if (format == SegmentInfos.FORMAT_USER_DATA) { sFormat = "FORMAT_USER_DATA [Lucene 2.9]"; } else if (format == SegmentInfos.FORMAT_DIAGNOSTICS) { sFormat = "FORMAT_DIAGNOSTICS [Lucene 2.9]"; } else if (format < SegmentInfos.CURRENT_FORMAT) { sFormat = "int=" + format + " [newer version of Lucene than this tool]"; skip = true; } else { sFormat = format + " [Lucene 1.3 or prior]"; } } result.segmentsFileName = segmentsFileName; result.numSegments = numSegments; result.segmentFormat = sFormat; result.userData = sis.GetUserData(); System.String userDataString; if (sis.GetUserData().Count > 0) { userDataString = " userData=" + SupportClass.CollectionsHelper.CollectionToString(sis.GetUserData()); } else { userDataString = ""; } Msg("Segments file=" + segmentsFileName + " numSegments=" + numSegments + " version=" + sFormat + userDataString); if (onlySegments != null) { result.partial = true; if (infoStream != null) { infoStream.Write("\nChecking only these segments:"); } System.Collections.IEnumerator it = onlySegments.GetEnumerator(); while (it.MoveNext()) { if (infoStream != null) { infoStream.Write(" " + it.Current); } } System.Collections.IEnumerator e = onlySegments.GetEnumerator(); while (e.MoveNext() == true) { result.segmentsChecked.Add(e.Current); } Msg(":"); } if (skip) { Msg("\nERROR: this index appears to be created by a newer version of Lucene than this tool was compiled on; please re-compile this tool on the matching version of Lucene; exiting"); result.toolOutOfDate = true; return(result); } result.newSegments = (SegmentInfos)sis.Clone(); result.newSegments.Clear(); for (int i = 0; i < numSegments; i++) { SegmentInfo info = sis.Info(i); if (onlySegments != null && !onlySegments.Contains(info.name)) { continue; } Status.SegmentInfoStatus segInfoStat = new Status.SegmentInfoStatus(); result.segmentInfos.Add(segInfoStat); Msg(" " + (1 + i) + " of " + numSegments + ": name=" + info.name + " docCount=" + info.docCount); segInfoStat.name = info.name; segInfoStat.docCount = info.docCount; int toLoseDocCount = info.docCount; SegmentReader reader = null; try { Msg(" compound=" + info.GetUseCompoundFile()); segInfoStat.compound = info.GetUseCompoundFile(); Msg(" hasProx=" + info.GetHasProx()); segInfoStat.hasProx = info.GetHasProx(); Msg(" numFiles=" + info.Files().Count); segInfoStat.numFiles = info.Files().Count; Msg(System.String.Format(nf, " size (MB)={0:f}", new System.Object[] { (info.SizeInBytes() / (1024.0 * 1024.0)) })); segInfoStat.sizeMB = info.SizeInBytes() / (1024.0 * 1024.0); System.Collections.Generic.IDictionary <string, string> diagnostics = info.GetDiagnostics(); segInfoStat.diagnostics = diagnostics; if (diagnostics.Count > 0) { Msg(" diagnostics = " + SupportClass.CollectionsHelper.CollectionToString(diagnostics)); } int docStoreOffset = info.GetDocStoreOffset(); if (docStoreOffset != -1) { Msg(" docStoreOffset=" + docStoreOffset); segInfoStat.docStoreOffset = docStoreOffset; Msg(" docStoreSegment=" + info.GetDocStoreSegment()); segInfoStat.docStoreSegment = info.GetDocStoreSegment(); Msg(" docStoreIsCompoundFile=" + info.GetDocStoreIsCompoundFile()); segInfoStat.docStoreCompoundFile = info.GetDocStoreIsCompoundFile(); } System.String delFileName = info.GetDelFileName(); if (delFileName == null) { Msg(" no deletions"); segInfoStat.hasDeletions = false; } else { Msg(" has deletions [delFileName=" + delFileName + "]"); segInfoStat.hasDeletions = true; segInfoStat.deletionsFileName = delFileName; } if (infoStream != null) { infoStream.Write(" test: open reader........."); } reader = SegmentReader.Get(info); segInfoStat.openReaderPassed = true; int numDocs = reader.NumDocs(); toLoseDocCount = numDocs; if (reader.HasDeletions()) { if (reader.deletedDocs.Count() != info.GetDelCount()) { throw new System.SystemException("delete count mismatch: info=" + info.GetDelCount() + " vs deletedDocs.count()=" + reader.deletedDocs.Count()); } if (reader.deletedDocs.Count() > reader.MaxDoc()) { throw new System.SystemException("too many deleted docs: maxDoc()=" + reader.MaxDoc() + " vs deletedDocs.count()=" + reader.deletedDocs.Count()); } if (info.docCount - numDocs != info.GetDelCount()) { throw new System.SystemException("delete count mismatch: info=" + info.GetDelCount() + " vs reader=" + (info.docCount - numDocs)); } segInfoStat.numDeleted = info.docCount - numDocs; Msg("OK [" + (segInfoStat.numDeleted) + " deleted docs]"); } else { if (info.GetDelCount() != 0) { throw new System.SystemException("delete count mismatch: info=" + info.GetDelCount() + " vs reader=" + (info.docCount - numDocs)); } Msg("OK"); } if (reader.MaxDoc() != info.docCount) { throw new System.SystemException("SegmentReader.maxDoc() " + reader.MaxDoc() + " != SegmentInfos.docCount " + info.docCount); } // Test getFieldNames() if (infoStream != null) { infoStream.Write(" test: fields.............."); } System.Collections.Generic.ICollection <string> fieldNames = reader.GetFieldNames(IndexReader.FieldOption.ALL); Msg("OK [" + fieldNames.Count + " fields]"); segInfoStat.numFields = fieldNames.Count; // Test Field Norms segInfoStat.fieldNormStatus = TestFieldNorms(fieldNames, reader); // Test the Term Index segInfoStat.termIndexStatus = TestTermIndex(info, reader); // Test Stored Fields segInfoStat.storedFieldStatus = TestStoredFields(info, reader, nf); // Test Term Vectors segInfoStat.termVectorStatus = TestTermVectors(info, reader, nf); // Rethrow the first exception we encountered // This will cause stats for failed segments to be incremented properly if (segInfoStat.fieldNormStatus.error != null) { throw new System.SystemException("Field Norm test failed"); } else if (segInfoStat.termIndexStatus.error != null) { throw new System.SystemException("Term Index test failed"); } else if (segInfoStat.storedFieldStatus.error != null) { throw new System.SystemException("Stored Field test failed"); } else if (segInfoStat.termVectorStatus.error != null) { throw new System.SystemException("Term Vector test failed"); } Msg(""); } catch (System.Exception t) { Msg("FAILED"); System.String comment; comment = "fixIndex() would remove reference to this segment"; Msg(" WARNING: " + comment + "; full exception:"); if (infoStream != null) { infoStream.WriteLine(t.StackTrace); } Msg(""); result.totLoseDocCount += toLoseDocCount; result.numBadSegments++; continue; } finally { if (reader != null) { reader.Close(); } } // Keeper result.newSegments.Add(info.Clone()); } if (0 == result.numBadSegments) { result.clean = true; Msg("No problems were detected with this index.\n"); } else { Msg("WARNING: " + result.numBadSegments + " broken segments (containing " + result.totLoseDocCount + " documents) detected"); } return(result); }
/// <summary> Subclasses must implement the actual skip data encoding in this method. /// /// </summary> /// <param name="level">the level skip data shall be read from /// </param> /// <param name="skipStream">the skip stream to read from /// </param> protected internal abstract int ReadSkipData(int level, IndexInput skipStream);
protected internal override int ReadSkipData(int level, IndexInput skipStream) { int delta; if (currentFieldStoresPayloads) { // the current field stores payloads. // if the doc delta is odd then we have // to read the current payload length // because it differs from the length of the // previous payload delta = skipStream.ReadVInt(); if ((delta & 1) != 0) { payloadLength[level] = skipStream.ReadVInt(); } delta = SupportClass.Number.URShift(delta, 1); } else { delta = skipStream.ReadVInt(); } freqPointer[level] += skipStream.ReadVInt(); proxPointer[level] += skipStream.ReadVInt(); return delta; }
/// <summary> Construct a new SegmentInfo instance by reading a /// previously saved SegmentInfo from input. /// /// </summary> /// <param name="dir">directory to load from /// </param> /// <param name="format">format of the segments info file /// </param> /// <param name="input">input handle to read segment info from /// </param> internal SegmentInfo(Directory dir, int format, IndexInput input) { this.dir = dir; name = input.ReadString(); docCount = input.ReadInt(); if (format <= SegmentInfos.FORMAT_LOCKLESS) { delGen = input.ReadLong(); if (format <= SegmentInfos.FORMAT_SHARED_DOC_STORE) { docStoreOffset = input.ReadInt(); if (docStoreOffset != -1) { docStoreSegment = input.ReadString(); docStoreIsCompoundFile = (1 == input.ReadByte()); } else { docStoreSegment = name; docStoreIsCompoundFile = false; } } else { docStoreOffset = -1; docStoreSegment = name; docStoreIsCompoundFile = false; } if (format <= SegmentInfos.FORMAT_SINGLE_NORM_FILE) { hasSingleNormFile = (1 == input.ReadByte()); } else { hasSingleNormFile = false; } int numNormGen = input.ReadInt(); if (numNormGen == NO) { normGen = null; } else { normGen = new long[numNormGen]; for (int j = 0; j < numNormGen; j++) { normGen[j] = input.ReadLong(); } } isCompoundFile = (sbyte)input.ReadByte(); preLockless = (isCompoundFile == CHECK_DIR); if (format <= SegmentInfos.FORMAT_DEL_COUNT) { delCount = input.ReadInt(); System.Diagnostics.Debug.Assert(delCount <= docCount); } else { delCount = -1; } if (format <= SegmentInfos.FORMAT_HAS_PROX) { hasProx = input.ReadByte() == 1; } else { hasProx = true; } if (format <= SegmentInfos.FORMAT_DIAGNOSTICS) { diagnostics = input.ReadStringStringMap(); } else { diagnostics = new System.Collections.Generic.Dictionary <string, string>(); } } else { delGen = CHECK_DIR; normGen = null; isCompoundFile = (sbyte)(CHECK_DIR); preLockless = true; hasSingleNormFile = false; docStoreOffset = -1; docStoreIsCompoundFile = false; docStoreSegment = null; delCount = -1; hasProx = true; diagnostics = new System.Collections.Generic.Dictionary <string, string>(); } }
// It is not always neccessary to move the prox pointer // to a new document after the freq pointer has been moved. // Consider for example a phrase query with two terms: // the freq pointer for term 1 has to move to document x // to answer the question if the term occurs in that document. But // only if term 2 also matches document x, the positions have to be // read to figure out if term 1 and term 2 appear next // to each other in document x and thus satisfy the query. // So we move the prox pointer lazily to the document // as soon as positions are requested. private void LazySkip() { if (proxStream == null) { // clone lazily proxStream = (IndexInput) parent.core.proxStream.Clone(); } // we might have to skip the current payload // if it was not read yet SkipPayload(); if (lazySkipPointer != - 1) { proxStream.Seek(lazySkipPointer); lazySkipPointer = - 1; } if (lazySkipProxCount != 0) { SkipPositions(lazySkipProxCount); lazySkipProxCount = 0; } }
internal SegmentTermPositions(SegmentReader p):base(p) { this.proxStream = null; // the proxStream will be cloned lazily when nextPosition() is called for the first time }