internal override void Seek(TermInfo ti) { base.Seek(ti); if (ti != null) proxStream.Seek(ti.proxPointer); proxCount = 0; }
/// <summary>Retrieve the length (in bytes) of the tvd and tvf /// entries for the next numDocs starting with /// startDocID. This is used for bulk copying when /// merging segments, if the field numbers are /// congruent. Once this returns, the tvf & tvd streams /// are seeked to the startDocID. /// </summary> internal void RawDocs(int[] tvdLengths, int[] tvfLengths, int startDocID, int numDocs) { if (tvx == null) { for (int i = 0; i < tvdLengths.Length; i++) { tvdLengths[i] = 0; } for (int i = 0; i < tvfLengths.Length; i++) { tvfLengths[i] = 0; } return; } // SegmentMerger calls canReadRawDocs() first and should // not call us if that returns false. if (format < FORMAT_VERSION2) { throw new System.SystemException("cannot read raw docs with older term vector formats"); } SeekTvx(startDocID); long tvdPosition = tvx.ReadLong(); tvd.Seek(tvdPosition); long tvfPosition = tvx.ReadLong(); tvf.Seek(tvfPosition); long lastTvdPosition = tvdPosition; long lastTvfPosition = tvfPosition; int count = 0; while (count < numDocs) { int docID = docStoreOffset + startDocID + count + 1; System.Diagnostics.Debug.Assert(docID <= numTotalDocs); if (docID < numTotalDocs) { tvdPosition = tvx.ReadLong(); tvfPosition = tvx.ReadLong(); } else { tvdPosition = tvd.Length(); tvfPosition = tvf.Length(); System.Diagnostics.Debug.Assert(count == numDocs - 1); } tvdLengths[count] = (int)(tvdPosition - lastTvdPosition); tvfLengths[count] = (int)(tvfPosition - lastTvfPosition); count++; lastTvdPosition = tvdPosition; lastTvfPosition = tvfPosition; } }
private void SkipPayload() { if (needToLoadPayload && payloadLength > 0) { proxStream.Seek(proxStream.GetFilePointer() + payloadLength); } needToLoadPayload = false; }
public virtual void TestClonedStreamsClosing() { SetUp_2(); CompoundFileReader cr = new CompoundFileReader(dir, "f.comp"); // basic clone IndexInput expected = dir.OpenInput("f11"); // this test only works for FSIndexInput Assert.IsTrue(_TestHelper.IsSimpleFSIndexInput(expected)); Assert.IsTrue(_TestHelper.IsSimpleFSIndexInputOpen(expected)); IndexInput one = cr.OpenInput("f11"); Assert.IsTrue(IsCSIndexInputOpen(one)); IndexInput two = (IndexInput)one.Clone(); Assert.IsTrue(IsCSIndexInputOpen(two)); AssertSameStreams("basic clone one", expected, one); expected.Seek(0); AssertSameStreams("basic clone two", expected, two); // Now close the first stream one.Close(); Assert.IsTrue(IsCSIndexInputOpen(one), "Only close when cr is closed"); // The following should really fail since we couldn't expect to // access a file once close has been called on it (regardless of // buffering and/or clone magic) expected.Seek(0); two.Seek(0); AssertSameStreams("basic clone two/2", expected, two); // Now close the compound reader cr.Close(); Assert.IsFalse(IsCSIndexInputOpen(one), "Now closed one"); Assert.IsFalse(IsCSIndexInputOpen(two), "Now closed two"); // The following may also fail since the compound stream is closed expected.Seek(0); two.Seek(0); //assertSameStreams("basic clone two/3", expected, two); // Now close the second clone two.Close(); expected.Seek(0); two.Seek(0); //assertSameStreams("basic clone two/4", expected, two); expected.Close(); }
private void SeekTvx(int docNum) { if (format < FORMAT_VERSION2) { tvx.Seek((docNum + docStoreOffset) * 8L + FORMAT_SIZE); } else { tvx.Seek((docNum + docStoreOffset) * 16L + FORMAT_SIZE); } }
/// <summary>The value of the field as a String, or null. If null, the Reader value, /// binary value, or TokenStream value is used. Exactly one of stringValue(), /// readerValue(), binaryValue(), and tokenStreamValue() must be set. /// </summary> public override System.String StringValue() { Enclosing_Instance.EnsureOpen(); if (fieldsData == null) { IndexInput localFieldsStream = GetFieldStream(); try { localFieldsStream.Seek(pointer); if (isCompressed) { byte[] b = new byte[toRead]; localFieldsStream.ReadBytes(b, 0, b.Length); fieldsData = System.Text.Encoding.GetEncoding("UTF-8").GetString(Enclosing_Instance.Uncompress(b)); } else { //read in chars b/c we already know the length we need to read char[] chars = new char[toRead]; localFieldsStream.ReadChars(chars, 0, toRead); fieldsData = new System.String(chars); } } catch (System.IO.IOException e) { throw new FieldReaderException(e); } } return(fieldsData is System.String ? (System.String)fieldsData : null); }
/// <summary>The value of the field in Binary, or null. If null, the Reader value, /// String value, or TokenStream value is used. Exactly one of stringValue(), /// readerValue(), binaryValue(), and tokenStreamValue() must be set. /// </summary> public override byte[] BinaryValue() { Enclosing_Instance.EnsureOpen(); if (fieldsData == null) { byte[] b = new byte[toRead]; IndexInput localFieldsStream = GetFieldStream(); //Throw this IO Exception since IndexREader.document does so anyway, so probably not that big of a change for people //since they are already handling this exception when getting the document try { localFieldsStream.Seek(pointer); localFieldsStream.ReadBytes(b, 0, b.Length); if (isCompressed == true) { fieldsData = Enclosing_Instance.Uncompress(b); } else { fieldsData = b; } } catch (System.IO.IOException e) { throw new FieldReaderException(e); } } return(fieldsData is byte[] ? (byte[])fieldsData : null); }
/// <summary>Read norms into a pre-allocated array. </summary> public override void Norms(System.String field, byte[] bytes, int offset) { lock (this) { Norm norm = (Norm)norms[field]; if (norm == null) { Array.Copy(FakeNorms(), 0, bytes, offset, MaxDoc()); return; } if (norm.bytes != null) { // can copy from cache Array.Copy(norm.bytes, 0, bytes, offset, MaxDoc()); return; } IndexInput normStream = (IndexInput)norm.in_Renamed.Clone(); try { // read from disk normStream.Seek(0); normStream.ReadBytes(bytes, offset, MaxDoc()); } finally { normStream.Close(); } } }
private void Demo_FSIndexInputBug(Directory fsdir, System.String file) { // Setup the test file - we need more than 1024 bytes IndexOutput os = fsdir.CreateOutput(file, null); for (int i = 0; i < 2000; i++) { os.WriteByte((byte)i); } os.Close(); IndexInput in_Renamed = fsdir.OpenInput(file, null); // This read primes the buffer in IndexInput byte b = in_Renamed.ReadByte(null); // Close the file in_Renamed.Close(); // ERROR: this call should fail, but succeeds because the buffer // is still filled b = in_Renamed.ReadByte(null); // ERROR: this call should fail, but succeeds for some reason as well in_Renamed.Seek(1099, null); // OK: this call correctly fails. We are now past the 1024 internal // buffer, so an actual IO is attempted, which fails Assert.Throws <NullReferenceException>(() => in_Renamed.ReadByte(null), "expected readByte() to throw exception"); }
internal void Seek(long pointer, int p, Term t, TermInfo ti) { input.Seek(pointer); position = p; termBuffer.Set(t); prevBuffer.Reset(); termInfo.Set(ti); }
private void AssertSameStreams(System.String msg, IndexInput expected, IndexInput actual, long seekTo) { if (seekTo >= 0 && seekTo < expected.Length()) { expected.Seek(seekTo); actual.Seek(seekTo); AssertSameStreams(msg + ", seek(mid)", expected, actual); } }
public virtual void TestReadPastEOF() { SetUp_2(); CompoundFileReader cr = new CompoundFileReader(dir, "f.comp", null); IndexInput is_Renamed = cr.OpenInput("f2", null); is_Renamed.Seek(is_Renamed.Length(null) - 10, null); byte[] b = new byte[100]; is_Renamed.ReadBytes(b, 0, 10, null); Assert.Throws <System.IO.IOException>(() => is_Renamed.ReadByte(null), "Single byte read past end of file"); is_Renamed.Seek(is_Renamed.Length(null) - 10, null); Assert.Throws <System.IO.IOException>(() => is_Renamed.ReadBytes(b, 0, 50, null), "Block read past end of file"); is_Renamed.Close(); cr.Close(); }
/// <summary>Expert: implements buffer refill. Reads bytes from the current /// position in the input. /// </summary> /// <param name="b">the array to read bytes into /// </param> /// <param name="offset">the offset in the array to start storing bytes /// </param> /// <param name="len">the number of bytes to read /// </param> public override void ReadInternal(byte[] b, int offset, int len) { long start = GetFilePointer(); if (start + len > length) { throw new System.IO.IOException("read past EOF"); } base_Renamed.Seek(fileOffset + start); base_Renamed.ReadBytes(b, offset, len, false); }
internal virtual void Seek(TermInfo ti) { count = 0; if (ti == null) { df = 0; } else { df = ti.docFreq; doc = 0; skipDoc = 0; skipCount = 0; numSkips = df / skipInterval; freqPointer = ti.freqPointer; proxPointer = ti.proxPointer; skipPointer = freqPointer + ti.skipOffset; freqStream.Seek(freqPointer); haveSkipped = false; } }
internal virtual void Seek(TermInfo ti, Term term) { count = 0; FieldInfo fi = parent.fieldInfos.FieldInfo(term.field); currentFieldStoresPayloads = (fi != null) ? fi.storePayloads : false; if (ti == null) { df = 0; } else { df = ti.docFreq; doc = 0; freqBasePointer = ti.freqPointer; proxBasePointer = ti.proxPointer; skipPointer = freqBasePointer + ti.skipOffset; freqStream.Seek(freqBasePointer); haveSkipped = false; } }
public override byte[] GetBinaryValue(byte[] result) { Enclosing_Instance.EnsureOpen(); if (isBinary) { if (fieldsData == null) { // Allocate new buffer if result is null or too small byte[] b; if (result == null || result.Length < toRead) { b = new byte[toRead]; } else { b = result; } IndexInput localFieldsStream = GetFieldStream(); // Throw this IOException since IndexReader.document does so anyway, so probably not that big of a change for people // since they are already handling this exception when getting the document try { localFieldsStream.Seek(pointer); localFieldsStream.ReadBytes(b, 0, toRead); if (isCompressed == true) { fieldsData = Enclosing_Instance.Uncompress(b); } else { fieldsData = b; } } catch (System.IO.IOException e) { throw new FieldReaderException(e); } binaryOffset = 0; binaryLength = toRead; } return((byte[])fieldsData); } else { return(null); } }
/// <summary> Construct a FieldInfos object using the directory and the name of the file /// IndexInput /// </summary> /// <param name="d">The directory to open the IndexInput from /// </param> /// <param name="name">The name of the file to open the IndexInput from in the Directory /// </param> /// <throws> IOException </throws> public /*internal*/ FieldInfos(Directory d, String name) { IndexInput input = d.OpenInput(name); try { try { Read(input, name); } catch (System.IO.IOException) { if (format == FORMAT_PRE) { // LUCENE-1623: FORMAT_PRE (before there was a // format) may be 2.3.2 (pre-utf8) or 2.4.x (utf8) // encoding; retry with input set to pre-utf8 input.Seek(0); input.SetModifiedUTF8StringsMode(); byNumber.Clear(); byName.Clear(); bool rethrow = false; try { Read(input, name); } catch (Exception) { // Ignore any new exception & set to throw original IOE rethrow = true; } if (rethrow) { // Preserve stack trace throw; } } else { // The IOException cannot be caused by // LUCENE-1623, so re-throw it throw; } } } finally { input.Close(); } }
public /*internal*/ Document Doc(int n, FieldSelector fieldSelector) { SeekIndex(n); long position = indexStream.ReadLong(); fieldsStream.Seek(position); Document doc = new Document(); int numFields = fieldsStream.ReadVInt(); for (int i = 0; i < numFields; i++) { int fieldNumber = fieldsStream.ReadVInt(); FieldInfo fi = fieldInfos.FieldInfo(fieldNumber); FieldSelectorResult acceptField = fieldSelector == null?FieldSelectorResult.LOAD:fieldSelector.Accept(fi.name); byte bits = fieldsStream.ReadByte(); System.Diagnostics.Debug.Assert(bits <= FieldsWriter.FIELD_IS_COMPRESSED + FieldsWriter.FIELD_IS_TOKENIZED + FieldsWriter.FIELD_IS_BINARY); bool compressed = (bits & FieldsWriter.FIELD_IS_COMPRESSED) != 0; bool tokenize = (bits & FieldsWriter.FIELD_IS_TOKENIZED) != 0; bool binary = (bits & FieldsWriter.FIELD_IS_BINARY) != 0; //TODO: Find an alternative approach here if this list continues to grow beyond the //list of 5 or 6 currently here. See Lucene 762 for discussion if (acceptField.Equals(FieldSelectorResult.LOAD)) { AddField(doc, fi, binary, compressed, tokenize); } else if (acceptField.Equals(FieldSelectorResult.LOAD_FOR_MERGE)) { AddFieldForMerge(doc, fi, binary, compressed, tokenize); } else if (acceptField.Equals(FieldSelectorResult.LOAD_AND_BREAK)) { AddField(doc, fi, binary, compressed, tokenize); break; //Get out of this loop } else if (acceptField.Equals(FieldSelectorResult.LAZY_LOAD)) { AddFieldLazy(doc, fi, binary, compressed, tokenize); } else if (acceptField.Equals(FieldSelectorResult.SIZE)) { SkipField(binary, compressed, AddFieldSize(doc, fi, binary, compressed)); } else if (acceptField.Equals(FieldSelectorResult.SIZE_AND_BREAK)) { AddFieldSize(doc, fi, binary, compressed); break; } else { SkipField(binary, compressed); } } return(doc); }
// It is not always neccessary to move the prox pointer // to a new document after the freq pointer has been moved. // Consider for example a phrase query with two terms: // the freq pointer for term 1 has to move to document x // to answer the question if the term occurs in that document. But // only if term 2 also matches document x, the positions have to be // read to figure out if term 1 and term 2 appear next // to each other in document x and thus satisfy the query. // So we move the prox pointer lazily to the document // as soon as positions are requested. private void LazySkip() { if (lazySkipPointer != 0) { proxStream.Seek(lazySkipPointer); lazySkipPointer = 0; } if (lazySkipDocCount != 0) { SkipPositions(lazySkipDocCount); lazySkipDocCount = 0; } }
public virtual void TestReadPastEOF() { SetUp_2(); CompoundFileReader cr = new CompoundFileReader(dir, "f.comp"); IndexInput is_Renamed = cr.OpenInput("f2"); is_Renamed.Seek(is_Renamed.Length() - 10); byte[] b = new byte[100]; is_Renamed.ReadBytes(b, 0, 10); try { byte test = is_Renamed.ReadByte(); Assert.Fail("Single byte read past end of file"); } catch (System.IO.IOException e) { /* success */ //System.out.println("SUCCESS: single byte read past end of file: " + e); } is_Renamed.Seek(is_Renamed.Length() - 10); try { is_Renamed.ReadBytes(b, 0, 50); Assert.Fail("Block read past end of file"); } catch (System.IO.IOException e) { /* success */ //System.out.println("SUCCESS: block read past end of file: " + e); } is_Renamed.Close(); cr.Close(); }
/// <summary>The value of the field as a String, or null. If null, the Reader value, /// binary value, or TokenStream value is used. Exactly one of stringValue(), /// readerValue(), binaryValue(), and tokenStreamValue() must be set. /// </summary> public override System.String StringValue() { Enclosing_Instance.EnsureOpen(); if (isBinary) { return(null); } else { if (fieldsData == null) { IndexInput localFieldsStream = GetFieldStream(); try { localFieldsStream.Seek(pointer); if (isCompressed) { byte[] b = new byte[toRead]; localFieldsStream.ReadBytes(b, 0, b.Length); fieldsData = System.Text.Encoding.GetEncoding("UTF-8").GetString(Enclosing_Instance.Uncompress(b)); } else { if (Enclosing_Instance.format >= FieldsWriter.FORMAT_VERSION_UTF8_LENGTH_IN_BYTES) { byte[] bytes = new byte[toRead]; localFieldsStream.ReadBytes(bytes, 0, toRead); fieldsData = System.Text.Encoding.UTF8.GetString(bytes); } else { //read in chars b/c we already know the length we need to read char[] chars = new char[toRead]; localFieldsStream.ReadChars(chars, 0, toRead); fieldsData = new System.String(chars); } } } catch (System.IO.IOException e) { throw new FieldReaderException(e); } } } return((string)fieldsData); }
public override long Get(int index) { int blockOffset = index / ValuesPerBlock; long skip = ((long)blockOffset) << 3; try { @in.Seek(StartPointer + skip); long block = @in.ReadLong(); int offsetInBlock = index % ValuesPerBlock; return(((long)((ulong)block >> (offsetInBlock * bitsPerValue))) & Mask); } catch (System.IO.IOException e) { throw new InvalidOperationException("failed", e); } }
/// <summary> /// returns an address instance for prefix-compressed binary values. /// @lucene.internal /// </summary> protected internal virtual MonotonicBlockPackedReader GetIntervalInstance(IndexInput data, FieldInfo field, BinaryEntry bytes) { MonotonicBlockPackedReader addresses; long interval = bytes.AddressInterval; lock (AddressInstances) { MonotonicBlockPackedReader addrInstance; if (!AddressInstances.TryGetValue(field.Number, out addrInstance)) { data.Seek(bytes.AddressesOffset); long size; if (bytes.Count % interval == 0) { size = bytes.Count / interval; } else { size = 1L + bytes.Count / interval; } addrInstance = new MonotonicBlockPackedReader(data, bytes.PackedIntsVersion, bytes.BlockSize, size, false); AddressInstances[field.Number] = addrInstance; RamBytesUsed_Renamed.AddAndGet(addrInstance.RamBytesUsed() + RamUsageEstimator.NUM_BYTES_INT); } addresses = addrInstance; } return addresses; }
/// <summary> /// returns an address instance for sortedset ordinal lists /// @lucene.internal /// </summary> protected internal virtual MonotonicBlockPackedReader GetOrdIndexInstance(IndexInput data, FieldInfo field, NumericEntry entry) { MonotonicBlockPackedReader ordIndex; lock (OrdIndexInstances) { MonotonicBlockPackedReader ordIndexInstance; if (!OrdIndexInstances.TryGetValue(field.Number, out ordIndexInstance)) { data.Seek(entry.Offset); ordIndexInstance = new MonotonicBlockPackedReader(data, entry.PackedIntsVersion, entry.BlockSize, entry.Count, false); OrdIndexInstances[field.Number] = ordIndexInstance; RamBytesUsed_Renamed.AddAndGet(ordIndexInstance.RamBytesUsed() + RamUsageEstimator.NUM_BYTES_INT); } ordIndex = ordIndexInstance; } return ordIndex; }
internal virtual TermsEnum GetTermsEnum(IndexInput input) { input.Seek(Bytes.Offset); return new TermsEnumAnonymousInnerClassHelper(this, input); }
private void SeekIndex(int docID) { indexStream.Seek(formatSize + (docID + docStoreOffset) * 8L); }
public /*internal*/ Document Doc(int n) { indexStream.Seek(n * 8L); long position = indexStream.ReadLong(); fieldsStream.Seek(position); Document doc = new Document(); int numFields = fieldsStream.ReadVInt(); for (int i = 0; i < numFields; i++) { int fieldNumber = fieldsStream.ReadVInt(); FieldInfo fi = fieldInfos.FieldInfo(fieldNumber); byte bits = fieldsStream.ReadByte(); bool compressed = (bits & FieldsWriter.FIELD_IS_COMPRESSED) != 0; bool tokenize = (bits & FieldsWriter.FIELD_IS_TOKENIZED) != 0; if ((bits & FieldsWriter.FIELD_IS_BINARY) != 0) { byte[] b = new byte[fieldsStream.ReadVInt()]; fieldsStream.ReadBytes(b, 0, b.Length); if (compressed) { doc.Add(new Field(fi.name, Uncompress(b), Field.Store.COMPRESS)); } else { doc.Add(new Field(fi.name, b, Field.Store.YES)); } } else { Field.Index index; Field.Store store = Field.Store.YES; if (fi.isIndexed && tokenize) { index = Field.Index.TOKENIZED; } else if (fi.isIndexed && !tokenize) { index = Field.Index.UN_TOKENIZED; } else { index = Field.Index.NO; } Field.TermVector termVector = null; if (fi.storeTermVector) { if (fi.storeOffsetWithTermVector) { if (fi.storePositionWithTermVector) { termVector = Field.TermVector.WITH_POSITIONS_OFFSETS; } else { termVector = Field.TermVector.WITH_OFFSETS; } } else if (fi.storePositionWithTermVector) { termVector = Field.TermVector.WITH_POSITIONS; } else { termVector = Field.TermVector.YES; } } else { termVector = Field.TermVector.NO; } if (compressed) { store = Field.Store.COMPRESS; byte[] b = new byte[fieldsStream.ReadVInt()]; fieldsStream.ReadBytes(b, 0, b.Length); Field f = new Field(fi.name, System.Text.Encoding.GetEncoding("UTF-8").GetString(Uncompress(b)), store, index, termVector); f.SetOmitNorms(fi.omitNorms); doc.Add(f); } else { Field f = new Field(fi.name, fieldsStream.ReadString(), store, index, termVector); f.SetOmitNorms(fi.omitNorms); doc.Add(f); } } } return(doc); }
/// <summary>Optimized implementation. </summary> public virtual bool SkipTo(int target) { if (df >= skipInterval) { // optimized case if (skipStream == null) { skipStream = (IndexInput)freqStream.Clone(); // lazily clone } if (!haveSkipped) { // lazily seek skip stream skipStream.Seek(skipPointer); haveSkipped = true; } // scan skip data int lastSkipDoc = skipDoc; long lastFreqPointer = freqStream.GetFilePointer(); long lastProxPointer = -1; int numSkipped = -1 - (count % skipInterval); while (target > skipDoc) { lastSkipDoc = skipDoc; lastFreqPointer = freqPointer; lastProxPointer = proxPointer; if (skipDoc != 0 && skipDoc >= doc) { numSkipped += skipInterval; } if (skipCount >= numSkips) { break; } skipDoc += skipStream.ReadVInt(); freqPointer += skipStream.ReadVInt(); proxPointer += skipStream.ReadVInt(); skipCount++; } // if we found something to skip, then skip it if (lastFreqPointer > freqStream.GetFilePointer()) { freqStream.Seek(lastFreqPointer); SkipProx(lastProxPointer); doc = lastSkipDoc; count += numSkipped; } } // done skipping, now just scan do { if (!Next()) { return(false); } }while (target > doc); return(true); }
public virtual void TestRandomAccessClones() { SetUp_2(); CompoundFileReader cr = new CompoundFileReader(dir, "f.comp"); // Open two files IndexInput e1 = cr.OpenInput("f11"); IndexInput e2 = cr.OpenInput("f3"); IndexInput a1 = (IndexInput)e1.Clone(); IndexInput a2 = (IndexInput)e2.Clone(); // Seek the first pair e1.Seek(100); a1.Seek(100); Assert.AreEqual(100, e1.GetFilePointer()); Assert.AreEqual(100, a1.GetFilePointer()); byte be1 = e1.ReadByte(); byte ba1 = a1.ReadByte(); Assert.AreEqual(be1, ba1); // Now seek the second pair e2.Seek(1027); a2.Seek(1027); Assert.AreEqual(1027, e2.GetFilePointer()); Assert.AreEqual(1027, a2.GetFilePointer()); byte be2 = e2.ReadByte(); byte ba2 = a2.ReadByte(); Assert.AreEqual(be2, ba2); // Now make sure the first one didn't move Assert.AreEqual(101, e1.GetFilePointer()); Assert.AreEqual(101, a1.GetFilePointer()); be1 = e1.ReadByte(); ba1 = a1.ReadByte(); Assert.AreEqual(be1, ba1); // Now more the first one again, past the buffer length e1.Seek(1910); a1.Seek(1910); Assert.AreEqual(1910, e1.GetFilePointer()); Assert.AreEqual(1910, a1.GetFilePointer()); be1 = e1.ReadByte(); ba1 = a1.ReadByte(); Assert.AreEqual(be1, ba1); // Now make sure the second set didn't move Assert.AreEqual(1028, e2.GetFilePointer()); Assert.AreEqual(1028, a2.GetFilePointer()); be2 = e2.ReadByte(); ba2 = a2.ReadByte(); Assert.AreEqual(be2, ba2); // Move the second set back, again cross the buffer size e2.Seek(17); a2.Seek(17); Assert.AreEqual(17, e2.GetFilePointer()); Assert.AreEqual(17, a2.GetFilePointer()); be2 = e2.ReadByte(); ba2 = a2.ReadByte(); Assert.AreEqual(be2, ba2); // Finally, make sure the first set didn't move // Now make sure the first one didn't move Assert.AreEqual(1911, e1.GetFilePointer()); Assert.AreEqual(1911, a1.GetFilePointer()); be1 = e1.ReadByte(); ba1 = a1.ReadByte(); Assert.AreEqual(be1, ba1); e1.Close(); e2.Close(); a1.Close(); a2.Close(); cr.Close(); }
/// <summary> Retrieve the term vector for the given document and field</summary> /// <param name="docNum">The document number to retrieve the vector for /// </param> /// <param name="field">The field within the document to retrieve /// </param> /// <returns> The TermFreqVector for the document and field or null if there is no termVector for this field. /// </returns> /// <throws> IOException if there is an error reading the term vector files </throws> public /*internal*/ virtual TermFreqVector Get(int docNum, System.String field) { // Check if no term vectors are available for this segment at all int fieldNumber = fieldInfos.FieldNumber(field); TermFreqVector result = null; if (tvx != null) { //We need to account for the FORMAT_SIZE at when seeking in the tvx //We don't need to do this in other seeks because we already have the // file pointer //that was written in another file tvx.Seek((docNum * 8L) + TermVectorsWriter.FORMAT_SIZE); //System.out.println("TVX Pointer: " + tvx.getFilePointer()); long position = tvx.ReadLong(); tvd.Seek(position); int fieldCount = tvd.ReadVInt(); //System.out.println("Num Fields: " + fieldCount); // There are only a few fields per document. We opt for a full scan // rather then requiring that they be ordered. We need to read through // all of the fields anyway to get to the tvf pointers. int number = 0; int found = -1; for (int i = 0; i < fieldCount; i++) { if (tvdFormat == TermVectorsWriter.FORMAT_VERSION) { number = tvd.ReadVInt(); } else { number += tvd.ReadVInt(); } if (number == fieldNumber) { found = i; } } // This field, although valid in the segment, was not found in this // document if (found != -1) { // Compute position in the tvf file position = 0; for (int i = 0; i <= found; i++) { position += tvd.ReadVLong(); } result = ReadTermVector(field, position); } else { //System.out.println("Field not found"); } } else { //System.out.println("No tvx file"); } return(result); }
public override void SeekInternal(long pos) { //simOutage(); delegate_Renamed.Seek(pos); }
/// <summary> </summary> /// <param name="field">The field to read in /// </param> /// <param name="tvfPointer">The pointer within the tvf file where we should start reading /// </param> /// <returns> The TermVector located at that position /// </returns> /// <throws> IOException </throws> private SegmentTermVector ReadTermVector(System.String field, long tvfPointer) { // Now read the data from specified position //We don't need to offset by the FORMAT here since the pointer already includes the offset tvf.Seek(tvfPointer); int numTerms = tvf.ReadVInt(); //System.out.println("Num Terms: " + numTerms); // If no terms - return a constant empty termvector. However, this should never occur! if (numTerms == 0) { return(new SegmentTermVector(field, null, null)); } bool storePositions; bool storeOffsets; if (tvfFormat == TermVectorsWriter.FORMAT_VERSION) { byte bits = tvf.ReadByte(); storePositions = (bits & TermVectorsWriter.STORE_POSITIONS_WITH_TERMVECTOR) != 0; storeOffsets = (bits & TermVectorsWriter.STORE_OFFSET_WITH_TERMVECTOR) != 0; } else { tvf.ReadVInt(); storePositions = false; storeOffsets = false; } System.String[] terms = new System.String[numTerms]; int[] termFreqs = new int[numTerms]; // we may not need these, but declare them int[][] positions = null; TermVectorOffsetInfo[][] offsets = null; if (storePositions) { positions = new int[numTerms][]; } if (storeOffsets) { offsets = new TermVectorOffsetInfo[numTerms][]; } int start = 0; int deltaLength = 0; int totalLength = 0; char[] buffer = new char[10]; // init the buffer with a length of 10 character char[] previousBuffer = new char[] {}; for (int i = 0; i < numTerms; i++) { start = tvf.ReadVInt(); deltaLength = tvf.ReadVInt(); totalLength = start + deltaLength; if (buffer.Length < totalLength) { // increase buffer buffer = null; // give a hint to garbage collector buffer = new char[totalLength]; if (start > 0) { // just copy if necessary Array.Copy(previousBuffer, 0, buffer, 0, start); } } tvf.ReadChars(buffer, start, deltaLength); terms[i] = new System.String(buffer, 0, totalLength); previousBuffer = buffer; int freq = tvf.ReadVInt(); termFreqs[i] = freq; if (storePositions) { //read in the positions int[] pos = new int[freq]; positions[i] = pos; int prevPosition = 0; for (int j = 0; j < freq; j++) { pos[j] = prevPosition + tvf.ReadVInt(); prevPosition = pos[j]; } } if (storeOffsets) { TermVectorOffsetInfo[] offs = new TermVectorOffsetInfo[freq]; offsets[i] = offs; int prevOffset = 0; for (int j = 0; j < freq; j++) { int startOffset = prevOffset + tvf.ReadVInt(); int endOffset = startOffset + tvf.ReadVInt(); offs[j] = new TermVectorOffsetInfo(startOffset, endOffset); prevOffset = endOffset; } } } SegmentTermVector tv; if (storePositions || storeOffsets) { tv = new SegmentTermPositionVector(field, terms, termFreqs, positions, offsets); } else { tv = new SegmentTermVector(field, terms, termFreqs); } return(tv); }
/// <summary> </summary> /// <param name="field">The field to read in /// </param> /// <param name="tvfPointer">The pointer within the tvf file where we should start reading /// </param> /// <param name="mapper">The mapper used to map the TermVector /// </param> /// <returns> The TermVector located at that position /// </returns> /// <throws> IOException </throws> private void ReadTermVector(System.String field, long tvfPointer, TermVectorMapper mapper) { // Now read the data from specified position //We don't need to offset by the FORMAT here since the pointer already includes the offset tvf.Seek(tvfPointer); int numTerms = tvf.ReadVInt(); //System.out.println("Num Terms: " + numTerms); // If no terms - return a constant empty termvector. However, this should never occur! if (numTerms == 0) { return; } bool storePositions; bool storeOffsets; if (tvfFormat == FORMAT_VERSION) { byte bits = tvf.ReadByte(); storePositions = (bits & STORE_POSITIONS_WITH_TERMVECTOR) != 0; storeOffsets = (bits & STORE_OFFSET_WITH_TERMVECTOR) != 0; } else { tvf.ReadVInt(); storePositions = false; storeOffsets = false; } mapper.SetExpectations(field, numTerms, storeOffsets, storePositions); int start = 0; int deltaLength = 0; int totalLength = 0; char[] buffer = new char[10]; // init the buffer with a length of 10 character char[] previousBuffer = new char[] {}; for (int i = 0; i < numTerms; i++) { start = tvf.ReadVInt(); deltaLength = tvf.ReadVInt(); totalLength = start + deltaLength; if (buffer.Length < totalLength) { // increase buffer buffer = null; // give a hint to garbage collector buffer = new char[totalLength]; if (start > 0) { // just copy if necessary Array.Copy(previousBuffer, 0, buffer, 0, start); } } tvf.ReadChars(buffer, start, deltaLength); System.String term = new System.String(buffer, 0, totalLength); previousBuffer = buffer; int freq = tvf.ReadVInt(); int[] positions = null; if (storePositions) { //read in the positions //does the mapper even care about positions? if (mapper.IsIgnoringPositions() == false) { positions = new int[freq]; int prevPosition = 0; for (int j = 0; j < freq; j++) { positions[j] = prevPosition + tvf.ReadVInt(); prevPosition = positions[j]; } } else { //we need to skip over the positions. Since these are VInts, I don't believe there is anyway to know for sure how far to skip // for (int j = 0; j < freq; j++) { tvf.ReadVInt(); } } } TermVectorOffsetInfo[] offsets = null; if (storeOffsets) { //does the mapper even care about offsets? if (mapper.IsIgnoringOffsets() == false) { offsets = new TermVectorOffsetInfo[freq]; int prevOffset = 0; for (int j = 0; j < freq; j++) { int startOffset = prevOffset + tvf.ReadVInt(); int endOffset = startOffset + tvf.ReadVInt(); offsets[j] = new TermVectorOffsetInfo(startOffset, endOffset); prevOffset = endOffset; } } else { for (int j = 0; j < freq; j++) { tvf.ReadVInt(); tvf.ReadVInt(); } } } mapper.Map(term, freq, offsets, positions); } }