/// <summary>Writes to a stream the given number of bytes specified</summary> /// <param name="byteAm">the file size in number of bytes to write</param> /// <param name="out">the outputstream to write to</param> /// <returns>the number of bytes written + time taken</returns> /// <exception cref="System.IO.IOException"/> internal virtual DataWriter.GenerateOutput WriteSegment(long byteAm, OutputStream @out) { long headerLen = GetHeaderLength(); if (byteAm < headerLen) { // not enough bytes to write even the header return(new DataWriter.GenerateOutput(0, 0)); } // adjust for header length byteAm -= headerLen; if (byteAm < 0) { byteAm = 0; } DataWriter.WriteInfo header = WriteHeader(@out, byteAm); DataHasher hasher = new DataHasher(header.GetHashValue()); DataWriter.GenerateOutput pRes = WritePieces(byteAm, 0, hasher, @out); long bytesWritten = pRes.GetBytesWritten() + header.GetBytesWritten(); long timeTaken = header.GetTimeTaken() + pRes.GetTimeTaken(); return(new DataWriter.GenerateOutput(bytesWritten, timeTaken)); }
/// <summary> /// Generates a full segment (aligned to bytes per long) of the given byte /// amount size /// </summary> /// <param name="byteAm">long aligned size</param> /// <param name="startOffset">starting hash offset</param> /// <param name="hasher">hasher to use for generating data given an offset</param> /// <returns>GenerateResult containing new offset and byte buffer</returns> private DataWriter.GenerateResult GenerateFullSegment(int byteAm, long startOffset , DataHasher hasher) { if (byteAm <= 0) { throw new ArgumentException("Byte amount must be greater than zero and not " + byteAm ); } if ((byteAm % Constants.BytesPerLong) != 0) { throw new ArgumentException("Byte amount " + byteAm + " must be a multiple of " + Constants.BytesPerLong); } // generate all the segments ByteBuffer allBytes = ByteBuffer.Wrap(new byte[byteAm]); long offset = startOffset; ByteBuffer buf = ByteBuffer.Wrap(new byte[Constants.BytesPerLong]); for (long i = 0; i < byteAm; i += Constants.BytesPerLong) { buf.Rewind(); buf.PutLong(hasher.Generate(offset)); buf.Rewind(); allBytes.Put(buf); offset += Constants.BytesPerLong; } allBytes.Rewind(); return(new DataWriter.GenerateResult(offset, allBytes)); }
/// <summary>Generates a partial segment which is less than bytes per long size</summary> /// <param name="byteAm">the number of bytes to generate (less than bytes per long)</param> /// <param name="offset">the staring offset</param> /// <param name="hasher">hasher to use for generating data given an offset</param> /// <returns>GenerateResult containing new offset and byte buffer</returns> private DataWriter.GenerateResult GeneratePartialSegment(int byteAm, long offset, DataHasher hasher) { if (byteAm > Constants.BytesPerLong) { throw new ArgumentException("Partial bytes must be less or equal to " + Constants .BytesPerLong); } if (byteAm <= 0) { throw new ArgumentException("Partial bytes must be greater than zero and not " + byteAm); } ByteBuffer buf = ByteBuffer.Wrap(new byte[Constants.BytesPerLong]); buf.PutLong(hasher.Generate(offset)); ByteBuffer allBytes = ByteBuffer.Wrap(new byte[byteAm]); buf.Rewind(); for (int i = 0; i < byteAm; ++i) { allBytes.Put(buf.Get()); } allBytes.Rewind(); return(new DataWriter.GenerateResult(offset, allBytes)); }
/// <summary>Verifies a buffer of a given size using the given start hash offset</summary> /// <param name="buf">the buffer to verify</param> /// <param name="size">the number of bytes to be used in that buffer</param> /// <param name="startOffset">the start hash offset</param> /// <param name="hasher">the hasher to use for calculating expected values</param> /// <returns>ResumeBytes a set of data about the next offset and chunks analyzed</returns> private DataVerifier.VerifyInfo VerifyBuffer(ByteBuffer buf, int size, long startOffset , DataHasher hasher) { ByteBuffer cmpBuf = ByteBuffer.Wrap(new byte[BytesPerLong]); long hashOffset = startOffset; long chunksSame = 0; long chunksDifferent = 0; for (long i = 0; i < size; ++i) { cmpBuf.Put(buf.Get()); if (!cmpBuf.HasRemaining()) { cmpBuf.Rewind(); long receivedData = cmpBuf.GetLong(); cmpBuf.Rewind(); long expected = hasher.Generate(hashOffset); hashOffset += BytesPerLong; if (receivedData == expected) { ++chunksSame; } else { ++chunksDifferent; } } } // any left over?? if (cmpBuf.HasRemaining() && cmpBuf.Position() != 0) { // partial capture // zero fill and compare with zero filled int curSize = cmpBuf.Position(); while (cmpBuf.HasRemaining()) { cmpBuf.Put(unchecked ((byte)0)); } long expected = hasher.Generate(hashOffset); ByteBuffer tempBuf = ByteBuffer.Wrap(new byte[BytesPerLong]); tempBuf.PutLong(expected); tempBuf.Position(curSize); while (tempBuf.HasRemaining()) { tempBuf.Put(unchecked ((byte)0)); } cmpBuf.Rewind(); tempBuf.Rewind(); if (cmpBuf.Equals(tempBuf)) { ++chunksSame; } else { ++chunksDifferent; } } return(new DataVerifier.VerifyInfo(chunksSame, chunksDifferent)); }
/// <summary> /// Verifies a given number of bytes from a file - less number of bytes may be /// read if a header can not be read in due to the byte limit /// </summary> /// <param name="byteAm"> /// the byte amount to limit to (should be less than or equal to file /// size) /// </param> /// <param name="bytesRead">the starting byte location</param> /// <param name="in">the input stream to read from</param> /// <returns>VerifyOutput with data about reads</returns> /// <exception cref="System.IO.IOException">if a read failure occurs</exception> /// <exception cref="BadFileException"> /// if a header can not be read or end of file is reached /// unexpectedly /// </exception> /// <exception cref="Org.Apache.Hadoop.FS.Slive.BadFileException"/> private DataVerifier.VerifyOutput VerifyBytes(long byteAm, long bytesRead, DataInputStream @in) { if (byteAm <= 0) { return(new DataVerifier.VerifyOutput(0, 0, 0, 0)); } long chunksSame = 0; long chunksDifferent = 0; long readTime = 0; long bytesLeft = byteAm; long bufLeft = 0; long bufRead = 0; long seqNum = 0; DataHasher hasher = null; ByteBuffer readBuf = ByteBuffer.Wrap(new byte[bufferSize]); while (bytesLeft > 0) { if (bufLeft <= 0) { if (bytesLeft < DataWriter.GetHeaderLength()) { // no bytes left to read a header break; } // time to read a new header DataVerifier.ReadInfo header = null; try { header = ReadHeader(@in); } catch (EOFException) { // eof ok on header reads // but not on data readers break; } ++seqNum; hasher = new DataHasher(header.GetHashValue()); bufLeft = header.GetByteAm(); readTime += header.GetTimeTaken(); bytesRead += header.GetBytesRead(); bytesLeft -= header.GetBytesRead(); bufRead = 0; // number of bytes to read greater than how many we want to read if (bufLeft > bytesLeft) { bufLeft = bytesLeft; } // does the buffer amount have anything?? if (bufLeft <= 0) { continue; } } // figure out the buffer size to read int bufSize = bufferSize; if (bytesLeft < bufSize) { bufSize = (int)bytesLeft; } if (bufLeft < bufSize) { bufSize = (int)bufLeft; } // read it in try { readBuf.Rewind(); long startTime = Timer.Now(); @in.ReadFully(((byte[])readBuf.Array()), 0, bufSize); readTime += Timer.Elapsed(startTime); } catch (EOFException e) { throw new BadFileException("Could not read the number of expected data bytes " + bufSize + " due to unexpected end of file during sequence " + seqNum, e); } // update the counters bytesRead += bufSize; bytesLeft -= bufSize; bufLeft -= bufSize; // verify what we read readBuf.Rewind(); // figure out the expected hash offset start point long vOffset = DetermineOffset(bufRead); // now update for new position bufRead += bufSize; // verify DataVerifier.VerifyInfo verifyRes = VerifyBuffer(readBuf, bufSize, vOffset, hasher ); // update the verification counters chunksSame += verifyRes.GetSame(); chunksDifferent += verifyRes.GetDifferent(); } return(new DataVerifier.VerifyOutput(chunksSame, chunksDifferent, bytesRead, readTime )); }
/// <summary> /// Writes a set of bytes to the output stream, for full segments it will write /// out the complete segment but for partial segments, ie when the last /// position does not fill up a full long then a partial set will be written /// out containing the needed bytes from the expected full segment /// </summary> /// <param name="byteAm">the amount of bytes to write</param> /// <param name="startPos">a BYTES_PER_LONG aligned start position</param> /// <param name="hasher">hasher to use for generating data given an offset</param> /// <param name="out">the output stream to write to</param> /// <returns>how many bytes were written</returns> /// <exception cref="System.IO.IOException"/> private DataWriter.GenerateOutput WritePieces(long byteAm, long startPos, DataHasher hasher, OutputStream @out) { if (byteAm <= 0) { return(new DataWriter.GenerateOutput(0, 0)); } if (startPos < 0) { startPos = 0; } int leftOver = (int)(byteAm % bufferSize); long fullPieces = byteAm / bufferSize; long offset = startPos; long bytesWritten = 0; long timeTaken = 0; // write the full pieces that fit in the buffer size for (long i = 0; i < fullPieces; ++i) { DataWriter.GenerateResult genData = GenerateFullSegment(bufferSize, offset, hasher ); offset = genData.GetOffset(); ByteBuffer gBuf = genData.GetBuffer(); { byte[] buf = ((byte[])gBuf.Array()); long startTime = Timer.Now(); @out.Write(buf); timeTaken += Timer.Elapsed(startTime); bytesWritten += buf.Length; } } if (leftOver > 0) { ByteBuffer leftOverBuf = ByteBuffer.Wrap(new byte[leftOver]); int bytesLeft = leftOver % Constants.BytesPerLong; leftOver = leftOver - bytesLeft; // collect the piece which do not fit in the buffer size but is // also greater or eq than BYTES_PER_LONG and a multiple of it if (leftOver > 0) { DataWriter.GenerateResult genData = GenerateFullSegment(leftOver, offset, hasher); offset = genData.GetOffset(); leftOverBuf.Put(genData.GetBuffer()); } // collect any single partial byte segment if (bytesLeft > 0) { DataWriter.GenerateResult genData = GeneratePartialSegment(bytesLeft, offset, hasher ); offset = genData.GetOffset(); leftOverBuf.Put(genData.GetBuffer()); } // do the write of both leftOverBuf.Rewind(); { byte[] buf = ((byte[])leftOverBuf.Array()); long startTime = Timer.Now(); @out.Write(buf); timeTaken += Timer.Elapsed(startTime); bytesWritten += buf.Length; } } return(new DataWriter.GenerateOutput(bytesWritten, timeTaken)); }