/// <summary>Read norms into a pre-allocated array. </summary> public override void Norms(System.String field, byte[] bytes, int offset) { lock (this) { Norm norm = (Norm)norms[field]; if (norm == null) { return; // use zeros in array } if (norm.bytes != null) { // can copy from cache Array.Copy(norm.bytes, 0, bytes, offset, MaxDoc()); return; } InputStream normStream = (InputStream)norm.in_Renamed.Clone(); try { // read from disk normStream.Seek(0); normStream.ReadBytes(bytes, offset, MaxDoc()); } finally { normStream.Close(); } } }
public /*internal*/ SegmentTermDocs(SegmentReader parent) { this.parent = parent; this.freqStream = (InputStream) parent.freqStream.Clone(); this.deletedDocs = parent.deletedDocs; this.skipInterval = parent.tis.GetSkipInterval(); }
public /*internal*/ SegmentTermDocs(SegmentReader parent) { this.parent = parent; this.freqStream = (InputStream)parent.freqStream.Clone(); this.deletedDocs = parent.deletedDocs; this.skipInterval = parent.tis.GetSkipInterval(); }
private void CheckValidFormat(InputStream in_Renamed) { int format = in_Renamed.ReadInt(); if (format > TermVectorsWriter.FORMAT_VERSION) { throw new System.IO.IOException("Incompatible format version: " + format + " expected " + TermVectorsWriter.FORMAT_VERSION + " or less"); } }
public /*internal*/ FieldsReader(Directory d, System.String segment, FieldInfos fn) { fieldInfos = fn; fieldsStream = d.OpenFile(segment + ".fdt"); indexStream = d.OpenFile(segment + ".fdx"); size = (int)(indexStream.Length() / 8); }
public /*internal*/ FieldsReader(Directory d, System.String segment, FieldInfos fn) { fieldInfos = fn; fieldsStream = d.OpenFile(segment + ".fdt"); indexStream = d.OpenFile(segment + ".fdx"); size = (int) (indexStream.Length() / 8); }
public CompoundFileReader(Directory dir, System.String name) { directory = dir; fileName = name; bool success = false; try { stream = dir.OpenFile(name); // read the directory and init files int count = stream.ReadVInt(); FileEntry entry = null; for (int i = 0; i < count; i++) { long offset = stream.ReadLong(); System.String id = stream.ReadString(); if (entry != null) { // set length of the previous entry entry.length = offset - entry.offset; } entry = new FileEntry(); entry.offset = offset; entries[id] = entry; } // set the length of the final entry if (entry != null) { entry.length = stream.Length() - entry.offset; } success = true; } finally { if (!success && (stream != null)) { try { stream.Close(); } catch (System.IO.IOException e) { } } } }
/// <summary> Construct a FieldInfos object using the directory and the name of the file /// InputStream /// </summary> /// <param name="d">The directory to open the InputStream from /// </param> /// <param name="name">The name of the file to open the InputStream from in the Directory /// </param> /// <throws> IOException </throws> /// <summary> /// </summary> /// <seealso cref="#read"> /// </seealso> public /*internal*/ FieldInfos(Directory d, System.String name) { InputStream input = d.OpenFile(name); try { Read(input); } finally { input.Close(); } }
private void Read(InputStream input) { int size = input.ReadVInt(); //read in the size for (int i = 0; i < size; i++) { System.String name = String.Intern(input.ReadString()); byte bits = input.ReadByte(); bool isIndexed = (bits & 0x1) != 0; bool storeTermVector = (bits & 0x2) != 0; AddInternal(name, isIndexed, storeTermVector); } }
public void Read(Directory directory) { InputStream input = directory.OpenFile("segments"); try { int format = input.ReadInt(); if (format < 0) { // file contains explicit format info // check that it is a format we can understand if (format < FORMAT) { throw new System.IO.IOException("Unknown format version: " + format); } version = input.ReadLong(); // read version counter = input.ReadInt(); // read counter } else { // file is in old format without explicit format info counter = format; } for (int i = input.ReadInt(); i > 0; i--) { // read segmentInfos SegmentInfo si = new SegmentInfo(input.ReadString(), input.ReadInt(), directory); Add(si); } if (format >= 0) { // in old format the version number may be at the end of the file if (input.GetFilePointer() >= input.Length()) { version = 0; } // old file format without version number else { version = input.ReadLong(); // read version } } } finally { input.Close(); } }
public override void Close() { lock (this) { if (stream == null) { throw new System.IO.IOException("Already closed"); } entries.Clear(); stream.Close(); stream = null; } }
internal SegmentTermEnum(InputStream i, FieldInfos fis, bool isi) { input = i; fieldInfos = fis; isIndex = isi; int firstInt = input.ReadInt(); if (firstInt >= 0) { // original-format file, without explicit format version number format = 0; size = firstInt; // back-compatible settings indexInterval = 128; skipInterval = System.Int32.MaxValue; // switch off skipTo optimization } else { // we have a format version number format = firstInt; // check that it is a format we can understand if (format < TermInfosWriter.FORMAT) { throw new System.IO.IOException("Unknown format version:" + format); } size = input.ReadLong(); // read the size if (format == -1) { if (!isIndex) { indexInterval = input.ReadInt(); formatM1SkipInterval = input.ReadInt(); } // switch off skipTo optimization for file format prior to 1.4rc2 in order to avoid a bug in // skipTo implementation of these versions skipInterval = System.Int32.MaxValue; } else { indexInterval = input.ReadInt(); skipInterval = input.ReadInt(); } } }
public /*internal*/ TermVectorsReader(Directory d, System.String segment, FieldInfos fieldInfos) { if (d.FileExists(segment + TermVectorsWriter.TVX_EXTENSION)) { tvx = d.OpenFile(segment + TermVectorsWriter.TVX_EXTENSION); CheckValidFormat(tvx); tvd = d.OpenFile(segment + TermVectorsWriter.TVD_EXTENSION); CheckValidFormat(tvd); tvf = d.OpenFile(segment + TermVectorsWriter.TVF_EXTENSION); CheckValidFormat(tvf); size = (int) tvx.Length() / 8; } this.fieldInfos = fieldInfos; }
public /*internal*/ TermVectorsReader(Directory d, System.String segment, FieldInfos fieldInfos) { if (d.FileExists(segment + TermVectorsWriter.TVX_EXTENSION)) { tvx = d.OpenFile(segment + TermVectorsWriter.TVX_EXTENSION); CheckValidFormat(tvx); tvd = d.OpenFile(segment + TermVectorsWriter.TVD_EXTENSION); CheckValidFormat(tvd); tvf = d.OpenFile(segment + TermVectorsWriter.TVF_EXTENSION); CheckValidFormat(tvf); size = (int)tvx.Length() / 8; } this.fieldInfos = fieldInfos; }
/// <summary>Constructs a bit vector from the file <code>name</code> in Directory /// <code>d</code>, as written by the {@link #write} method. /// </summary> public BitVector(Directory d, System.String name) { InputStream input = d.OpenFile(name); try { size = input.ReadInt(); // read size count = input.ReadInt(); // read count bits = new byte[(size >> 3) + 1]; // allocate bits input.ReadBytes(bits, 0, bits.Length); // read bits } finally { input.Close(); } }
internal SegmentTermEnum(InputStream i, FieldInfos fis, bool isi) { input = i; fieldInfos = fis; isIndex = isi; int firstInt = input.ReadInt(); if (firstInt >= 0) { // original-format file, without explicit format version number format = 0; size = firstInt; // back-compatible settings indexInterval = 128; skipInterval = System.Int32.MaxValue; // switch off skipTo optimization } else { // we have a format version number format = firstInt; // check that it is a format we can understand if (format < TermInfosWriter.FORMAT) throw new System.IO.IOException("Unknown format version:" + format); size = input.ReadLong(); // read the size if (format == - 1) { if (!isIndex) { indexInterval = input.ReadInt(); formatM1SkipInterval = input.ReadInt(); } // switch off skipTo optimization for file format prior to 1.4rc2 in order to avoid a bug in // skipTo implementation of these versions skipInterval = System.Int32.MaxValue; } else { indexInterval = input.ReadInt(); skipInterval = input.ReadInt(); } } }
/// <summary>Copy the contents of the file with specified extension into the /// provided output stream. Use the provided buffer for moving data /// to reduce memory allocation. /// </summary> private void CopyFile(FileEntry source, OutputStream os, byte[] buffer) { InputStream is_Renamed = null; try { long startPtr = os.GetFilePointer(); is_Renamed = directory.OpenFile(source.file); long length = is_Renamed.Length(); long remainder = length; int chunk = buffer.Length; while (remainder > 0) { int len = (int)System.Math.Min(chunk, remainder); is_Renamed.ReadBytes(buffer, 0, len); os.WriteBytes(buffer, len); remainder -= len; } // Verify that remainder is 0 if (remainder != 0) { throw new System.IO.IOException("Non-zero remainder length after copying: " + remainder + " (id: " + source.file + ", length: " + length + ", buffer size: " + chunk + ")"); } // Verify that the output length diff is equal to original file long endPtr = os.GetFilePointer(); long diff = endPtr - startPtr; if (diff != length) { throw new System.IO.IOException("Difference in the output file offsets " + diff + " does not match the original file length " + length); } } finally { if (is_Renamed != null) { is_Renamed.Close(); } } }
private void Initialize(SegmentInfo si) { segment = si.name; // Use compound file directory for some files, if it exists Directory cfsDir = Directory(); if (Directory().FileExists(segment + ".cfs")) { cfsReader = new CompoundFileReader(Directory(), segment + ".cfs"); cfsDir = cfsReader; } // No compound file exists - use the multi-file format fieldInfos = new FieldInfos(cfsDir, segment + ".fnm"); fieldsReader = new FieldsReader(cfsDir, segment, fieldInfos); tis = new TermInfosReader(cfsDir, segment, fieldInfos); // NOTE: the bitvector is stored using the regular directory, not cfs if (HasDeletions(si)) { deletedDocs = new BitVector(Directory(), segment + ".del"); } // make sure that all index files have been read or are kept open // so that if an index update removes them we'll still have them freqStream = cfsDir.OpenFile(segment + ".frq"); proxStream = cfsDir.OpenFile(segment + ".prx"); OpenNorms(cfsDir); if (fieldInfos.HasVectors()) { // open term vector files only as needed termVectorsReader = new TermVectorsReader(cfsDir, segment, fieldInfos); } }
/// <summary> Current version number from segments file.</summary> public static long ReadCurrentVersion(Directory directory) { InputStream input = directory.OpenFile("segments"); int format = 0; long version = 0; try { format = input.ReadInt(); if (format < 0) { if (format < FORMAT) { throw new System.IO.IOException("Unknown format version: " + format); } version = input.ReadLong(); // read version } } finally { input.Close(); } if (format < 0) { return(version); } // We cannot be sure about the format of the file. // Therefore we have to read the whole file and cannot simply seek to the version entry. SegmentInfos sis = new SegmentInfos(); sis.Read(directory); return(sis.GetVersion()); }
private System.Collections.ArrayList ReadDeleteableFiles() { System.Collections.ArrayList result = System.Collections.ArrayList.Synchronized(new System.Collections.ArrayList(10)); if (!directory.FileExists("deletable")) { return(result); } InputStream input = directory.OpenFile("deletable"); try { for (int i = input.ReadInt(); i > 0; i--) { // read file names result.Add(input.ReadString()); } } finally { input.Close(); } return(result); }
internal CSInputStream(InputStream base_Renamed, long fileOffset, long length) { this.base_Renamed = base_Renamed; this.fileOffset = fileOffset; this.length = length; // variable in the superclass }
private void Initialize(SegmentInfo si) { segment = si.name; // Use compound file directory for some files, if it exists Directory cfsDir = Directory(); if (Directory().FileExists(segment + ".cfs")) { cfsReader = new CompoundFileReader(Directory(), segment + ".cfs"); cfsDir = cfsReader; } // No compound file exists - use the multi-file format fieldInfos = new FieldInfos(cfsDir, segment + ".fnm"); fieldsReader = new FieldsReader(cfsDir, segment, fieldInfos); tis = new TermInfosReader(cfsDir, segment, fieldInfos); // NOTE: the bitvector is stored using the regular directory, not cfs if (HasDeletions(si)) deletedDocs = new BitVector(Directory(), segment + ".del"); // make sure that all index files have been read or are kept open // so that if an index update removes them we'll still have them freqStream = cfsDir.OpenFile(segment + ".frq"); proxStream = cfsDir.OpenFile(segment + ".prx"); OpenNorms(cfsDir); if (fieldInfos.HasVectors()) { // open term vector files only as needed termVectorsReader = new TermVectorsReader(cfsDir, segment, fieldInfos); } }
public Norm(SegmentReader enclosingInstance, InputStream in_Renamed, int number) { InitBlock(enclosingInstance); this.in_Renamed = in_Renamed; this.number = number; }
internal SegmentTermPositions(SegmentReader p) : base(p) { this.proxStream = (InputStream)parent.proxStream.Clone(); }
/// <summary>Optimized implementation. </summary> public virtual bool SkipTo(int target) { if (df >= skipInterval) { // optimized case if (skipStream == null) { skipStream = (InputStream)freqStream.Clone(); // lazily clone } if (!haveSkipped) { // lazily seek skip stream skipStream.Seek(skipPointer); haveSkipped = true; } // scan skip data int lastSkipDoc = skipDoc; long lastFreqPointer = freqStream.GetFilePointer(); long lastProxPointer = -1; int numSkipped = -1 - (count % skipInterval); while (target > skipDoc) { lastSkipDoc = skipDoc; lastFreqPointer = freqPointer; lastProxPointer = proxPointer; if (skipDoc != 0 && skipDoc >= doc) { numSkipped += skipInterval; } if (skipCount >= numSkips) { break; } skipDoc += skipStream.ReadVInt(); freqPointer += skipStream.ReadVInt(); proxPointer += skipStream.ReadVInt(); skipCount++; } // if we found something to skip, then skip it if (lastFreqPointer > freqStream.GetFilePointer()) { freqStream.Seek(lastFreqPointer); SkipProx(lastProxPointer); doc = lastSkipDoc; count += numSkipped; } } // done skipping, now just scan do { if (!Next()) { return(false); } }while (target > doc); return(true); }
/// <summary>Optimized implementation. </summary> public virtual bool SkipTo(int target) { if (df >= skipInterval) { // optimized case if (skipStream == null) skipStream = (InputStream) freqStream.Clone(); // lazily clone if (!haveSkipped) { // lazily seek skip stream skipStream.Seek(skipPointer); haveSkipped = true; } // scan skip data int lastSkipDoc = skipDoc; long lastFreqPointer = freqStream.GetFilePointer(); long lastProxPointer = - 1; int numSkipped = - 1 - (count % skipInterval); while (target > skipDoc) { lastSkipDoc = skipDoc; lastFreqPointer = freqPointer; lastProxPointer = proxPointer; if (skipDoc != 0 && skipDoc >= doc) numSkipped += skipInterval; if (skipCount >= numSkips) break; skipDoc += skipStream.ReadVInt(); freqPointer += skipStream.ReadVInt(); proxPointer += skipStream.ReadVInt(); skipCount++; } // if we found something to skip, then skip it if (lastFreqPointer > freqStream.GetFilePointer()) { freqStream.Seek(lastFreqPointer); SkipProx(lastProxPointer); doc = lastSkipDoc; count += numSkipped; } } // done skipping, now just scan do { if (!Next()) return false; } while (target > doc); return true; }
public override void Close() { lock (this) { if (stream == null) throw new System.IO.IOException("Already closed"); entries.Clear(); stream.Close(); stream = null; } }
internal SegmentTermPositions(SegmentReader p):base(p) { this.proxStream = (InputStream) parent.proxStream.Clone(); }