/// <summary>Read a log file from start to end positions.</summary> /// <remarks> /// Read a log file from start to end positions. The offsets may be negative, /// in which case they are relative to the end of the file. For example, /// Reader(taskid, kind, 0, -1) is the entire file and /// Reader(taskid, kind, -4197, -1) is the last 4196 bytes. /// </remarks> /// <param name="taskid">the id of the task to read the log file for</param> /// <param name="kind">the kind of log to read</param> /// <param name="start">the offset to read from (negative is relative to tail)</param> /// <param name="end">the offset to read upto (negative is relative to tail)</param> /// <param name="isCleanup">whether the attempt is cleanup attempt or not</param> /// <exception cref="System.IO.IOException"/> public Reader(TaskAttemptID taskid, TaskLog.LogName kind, long start, long end, bool isCleanup) { // find the right log file TaskLog.LogFileDetail fileDetail = GetLogFileDetail(taskid, kind, isCleanup); // calculate the start and stop long size = fileDetail.length; if (start < 0) { start += size + 1; } if (end < 0) { end += size + 1; } start = Math.Max(0, Math.Min(start, size)); end = Math.Max(0, Math.Min(end, size)); start += fileDetail.start; end += fileDetail.start; bytesRemaining = end - start; string owner = ObtainLogDirOwner(taskid); file = SecureIOUtils.OpenForRead(new FilePath(fileDetail.location, kind.ToString( )), owner, null); // skip upto start long pos = 0; while (pos < start) { long result = file.Skip(start - pos); if (result < 0) { bytesRemaining = 0; break; } pos += result; } }
public virtual void TestPrototypeInflaterGzip() { CompressionCodec gzip = new GzipCodec(); // used only for file extension localFs.Delete(workDir, true); // localFs = FileSystem instance System.Console.Out.WriteLine(ColorBrBlue + "testPrototypeInflaterGzip() using " + "non-native/Java Inflater and manual gzip header/trailer parsing" + ColorNormal ); // copy prebuilt (correct!) version of concat.gz to HDFS string fn = "concat" + gzip.GetDefaultExtension(); Path fnLocal = new Path(Runtime.GetProperty("test.concat.data", "/tmp"), fn); Path fnHDFS = new Path(workDir, fn); localFs.CopyFromLocalFile(fnLocal, fnHDFS); FileInputStream @in = new FileInputStream(fnLocal.ToString()); NUnit.Framework.Assert.AreEqual("concat bytes available", 148, @in.Available()); // should wrap all of this header-reading stuff in a running-CRC wrapper // (did so in BuiltInGzipDecompressor; see below) byte[] compressedBuf = new byte[256]; int numBytesRead = @in.Read(compressedBuf, 0, 10); NUnit.Framework.Assert.AreEqual("header bytes read", 10, numBytesRead); NUnit.Framework.Assert.AreEqual("1st byte", unchecked ((int)(0x1f)), compressedBuf [0] & unchecked ((int)(0xff))); NUnit.Framework.Assert.AreEqual("2nd byte", unchecked ((int)(0x8b)), compressedBuf [1] & unchecked ((int)(0xff))); NUnit.Framework.Assert.AreEqual("3rd byte (compression method)", 8, compressedBuf [2] & unchecked ((int)(0xff))); byte flags = unchecked ((byte)(compressedBuf[3] & unchecked ((int)(0xff)))); if ((flags & unchecked ((int)(0x04))) != 0) { // FEXTRA numBytesRead = @in.Read(compressedBuf, 0, 2); NUnit.Framework.Assert.AreEqual("XLEN bytes read", 2, numBytesRead); int xlen = ((compressedBuf[1] << 8) | compressedBuf[0]) & unchecked ((int)(0xffff) ); @in.Skip(xlen); } if ((flags & unchecked ((int)(0x08))) != 0) { // FNAME while ((numBytesRead = @in.Read()) != 0) { NUnit.Framework.Assert.IsFalse("unexpected end-of-file while reading filename", numBytesRead == -1); } } if ((flags & unchecked ((int)(0x10))) != 0) { // FCOMMENT while ((numBytesRead = @in.Read()) != 0) { NUnit.Framework.Assert.IsFalse("unexpected end-of-file while reading comment", numBytesRead == -1); } } if ((flags & unchecked ((int)(0xe0))) != 0) { // reserved NUnit.Framework.Assert.IsTrue("reserved bits are set??", (flags & unchecked ((int) (0xe0))) == 0); } if ((flags & unchecked ((int)(0x02))) != 0) { // FHCRC numBytesRead = @in.Read(compressedBuf, 0, 2); NUnit.Framework.Assert.AreEqual("CRC16 bytes read", 2, numBytesRead); int crc16 = ((compressedBuf[1] << 8) | compressedBuf[0]) & unchecked ((int)(0xffff )); } // ready to go! next bytes should be start of deflated stream, suitable // for Inflater numBytesRead = @in.Read(compressedBuf); // Inflater docs refer to a "dummy byte": no clue what that's about; // appears to work fine without one byte[] uncompressedBuf = new byte[256]; Inflater inflater = new Inflater(true); inflater.SetInput(compressedBuf, 0, numBytesRead); try { int numBytesUncompressed = inflater.Inflate(uncompressedBuf); string outString = Sharpen.Runtime.GetStringForBytes(uncompressedBuf, 0, numBytesUncompressed , "UTF-8"); System.Console.Out.WriteLine("uncompressed data of first gzip member = [" + outString + "]"); } catch (SharpZipBaseException ex) { throw new IOException(ex.Message); } @in.Close(); }
/// <exception cref="System.IO.IOException"/> public virtual long Skip(long n) { lock (this) { if (Log.IsDebugEnabled()) { Log.Debug("skip " + n); } if (n <= 0) { return(0); } if (!verifyChecksum) { return(dataIn.Skip(n)); } // caller made sure newPosition is not beyond EOF. int remaining = slowReadBuff.Remaining(); int position = slowReadBuff.Position(); int newPosition = position + (int)n; // if the new offset is already read into dataBuff, just reposition if (n <= remaining) { System.Diagnostics.Debug.Assert(offsetFromChunkBoundary == 0); slowReadBuff.Position(newPosition); return(n); } // for small gap, read through to keep the data/checksum in sync if (n - remaining <= bytesPerChecksum) { slowReadBuff.Position(position + remaining); if (skipBuf == null) { skipBuf = new byte[bytesPerChecksum]; } int ret = Read(skipBuf, 0, (int)(n - remaining)); return(remaining + ret); } // optimize for big gap: discard the current buffer, skip to // the beginning of the appropriate checksum chunk and then // read to the middle of that chunk to be in sync with checksums. // We can't use this.offsetFromChunkBoundary because we need to know how // many bytes of the offset were really read. Calling read(..) with a // positive this.offsetFromChunkBoundary causes that many bytes to get // silently skipped. int myOffsetFromChunkBoundary = newPosition % bytesPerChecksum; long toskip = n - remaining - myOffsetFromChunkBoundary; slowReadBuff.Position(slowReadBuff.Limit()); checksumBuff.Position(checksumBuff.Limit()); IOUtils.SkipFully(dataIn, toskip); long checkSumOffset = (toskip / bytesPerChecksum) * checksumSize; IOUtils.SkipFully(checksumIn, checkSumOffset); // read into the middle of the chunk if (skipBuf == null) { skipBuf = new byte[bytesPerChecksum]; } System.Diagnostics.Debug.Assert(skipBuf.Length == bytesPerChecksum); System.Diagnostics.Debug.Assert(myOffsetFromChunkBoundary < bytesPerChecksum); int ret_1 = Read(skipBuf, 0, myOffsetFromChunkBoundary); if (ret_1 == -1) { // EOS return(toskip + remaining); } else { return(toskip + remaining + ret_1); } } }