private BlockReaderLocal(BlockReaderLocal.Builder builder) { this.replica = builder.replica; this.dataIn = replica.GetDataStream().GetChannel(); this.dataPos = builder.dataPos; this.checksumIn = replica.GetMetaStream().GetChannel(); BlockMetadataHeader header = builder.replica.GetMetaHeader(); this.checksum = header.GetChecksum(); this.verifyChecksum = builder.verifyChecksum && (this.checksum.GetChecksumType(). id != DataChecksum.ChecksumNull); this.filename = builder.filename; this.block = builder.block; this.bytesPerChecksum = checksum.GetBytesPerChecksum(); this.checksumSize = checksum.GetChecksumSize(); this.maxAllocatedChunks = (bytesPerChecksum == 0) ? 0 : ((builder.bufferSize + bytesPerChecksum - 1) / bytesPerChecksum); // Calculate the effective maximum readahead. // We can't do more readahead than there is space in the buffer. int maxReadaheadChunks = (bytesPerChecksum == 0) ? 0 : ((Math.Min(builder.bufferSize , builder.maxReadahead) + bytesPerChecksum - 1) / bytesPerChecksum); if (maxReadaheadChunks == 0) { this.zeroReadaheadRequested = true; maxReadaheadChunks = 1; } else { this.zeroReadaheadRequested = false; } this.maxReadaheadLength = maxReadaheadChunks * bytesPerChecksum; this.storageType = builder.storageType; }
/// <exception cref="System.IO.IOException"/> public TestFileDescriptorPair() { fis = new FileInputStream[2]; for (int i = 0; i < 2; i++) { string name = dir.GetDir() + "/file" + i; FileOutputStream fos = new FileOutputStream(name); if (i == 0) { // write 'data' file fos.Write(1); } else { // write 'metadata' file BlockMetadataHeader header = new BlockMetadataHeader((short)1, DataChecksum.NewDataChecksum (DataChecksum.Type.Null, 4)); DataOutputStream dos = new DataOutputStream(fos); BlockMetadataHeader.WriteHeader(dos, header); dos.Close(); } fos.Close(); fis[i] = new FileInputStream(name); } }
/// <summary>Verifies the block's checksum.</summary> /// <remarks>Verifies the block's checksum. This is an I/O intensive operation.</remarks> /// <exception cref="System.IO.IOException"/> /// <exception cref="Org.Apache.Hadoop.FS.ChecksumException"/> private static void VerifyChecksum(long length, FileInputStream metaIn, FileChannel blockChannel, string blockFileName) { // Verify the checksum from the block's meta file // Get the DataChecksum from the meta file header BlockMetadataHeader header = BlockMetadataHeader.ReadHeader(new DataInputStream(new BufferedInputStream(metaIn, BlockMetadataHeader.GetHeaderSize()))); FileChannel metaChannel = null; try { metaChannel = metaIn.GetChannel(); if (metaChannel == null) { throw new IOException("Block InputStream meta file has no FileChannel."); } DataChecksum checksum = header.GetChecksum(); int bytesPerChecksum = checksum.GetBytesPerChecksum(); int checksumSize = checksum.GetChecksumSize(); int numChunks = (8 * 1024 * 1024) / bytesPerChecksum; ByteBuffer blockBuf = ByteBuffer.Allocate(numChunks * bytesPerChecksum); ByteBuffer checksumBuf = ByteBuffer.Allocate(numChunks * checksumSize); // Verify the checksum int bytesVerified = 0; while (bytesVerified < length) { Preconditions.CheckState(bytesVerified % bytesPerChecksum == 0, "Unexpected partial chunk before EOF" ); System.Diagnostics.Debug.Assert(bytesVerified % bytesPerChecksum == 0); int bytesRead = FillBuffer(blockChannel, blockBuf); if (bytesRead == -1) { throw new IOException("checksum verification failed: premature EOF"); } blockBuf.Flip(); // Number of read chunks, including partial chunk at end int chunks = (bytesRead + bytesPerChecksum - 1) / bytesPerChecksum; checksumBuf.Limit(chunks * checksumSize); FillBuffer(metaChannel, checksumBuf); checksumBuf.Flip(); checksum.VerifyChunkedSums(blockBuf, checksumBuf, blockFileName, bytesVerified); // Success bytesVerified += bytesRead; blockBuf.Clear(); checksumBuf.Clear(); } } finally { IOUtils.CloseQuietly(metaChannel); } }
/// <exception cref="System.IO.IOException"/> public ShortCircuitReplica(ExtendedBlockId key, FileInputStream dataStream, FileInputStream metaStream, ShortCircuitCache cache, long creationTimeMs, ShortCircuitShm.Slot slot) { this.key = key; this.dataStream = dataStream; this.metaStream = metaStream; this.metaHeader = BlockMetadataHeader.PreadHeader(metaStream.GetChannel()); if (metaHeader.GetVersion() != 1) { throw new IOException("invalid metadata header version " + metaHeader.GetVersion( ) + ". Can only handle version 1."); } this.cache = cache; this.creationTimeMs = creationTimeMs; this.slot = slot; }
/// <exception cref="System.IO.IOException"/> /// <exception cref="System.Exception"/> private void DoShortCircuitReadAfterEvictionTest() { string MethodName = GenericTestUtils.GetMethodName(); Path path1 = new Path("/" + MethodName + ".01.dat"); Path path2 = new Path("/" + MethodName + ".02.dat"); int Seed = unchecked ((int)(0xFADED)); MakeRandomTestFile(path1, BlockSize, true, Seed); // Verify short-circuit read from RAM_DISK. EnsureFileReplicasOnStorageType(path1, StorageType.RamDisk); FilePath metaFile = cluster.GetBlockMetadataFile(0, DFSTestUtil.GetFirstBlock(fs, path1)); NUnit.Framework.Assert.IsTrue(metaFile.Length() <= BlockMetadataHeader.GetHeaderSize ()); NUnit.Framework.Assert.IsTrue(VerifyReadRandomFile(path1, BlockSize, Seed)); // Sleep for a short time to allow the lazy writer thread to do its job. Sharpen.Thread.Sleep(3 * LazyWriterIntervalSec * 1000); // Verify short-circuit read from RAM_DISK once again. EnsureFileReplicasOnStorageType(path1, StorageType.RamDisk); metaFile = cluster.GetBlockMetadataFile(0, DFSTestUtil.GetFirstBlock(fs, path1)); NUnit.Framework.Assert.IsTrue(metaFile.Length() <= BlockMetadataHeader.GetHeaderSize ()); NUnit.Framework.Assert.IsTrue(VerifyReadRandomFile(path1, BlockSize, Seed)); // Create another file with a replica on RAM_DISK, which evicts the first. MakeRandomTestFile(path2, BlockSize, true, Seed); Sharpen.Thread.Sleep(3 * LazyWriterIntervalSec * 1000); TriggerBlockReport(); // Verify short-circuit read still works from DEFAULT storage. This time, // we'll have a checksum written during lazy persistence. EnsureFileReplicasOnStorageType(path1, StorageType.Default); metaFile = cluster.GetBlockMetadataFile(0, DFSTestUtil.GetFirstBlock(fs, path1)); NUnit.Framework.Assert.IsTrue(metaFile.Length() > BlockMetadataHeader.GetHeaderSize ()); NUnit.Framework.Assert.IsTrue(VerifyReadRandomFile(path1, BlockSize, Seed)); // In the implementation of legacy short-circuit reads, any failure is // trapped silently, reverts back to a remote read, and also disables all // subsequent legacy short-circuit reads in the ClientContext. If the test // uses legacy, then assert that it didn't get disabled. ClientContext clientContext = client.GetClientContext(); if (clientContext.GetUseLegacyBlockReaderLocal()) { NUnit.Framework.Assert.IsFalse(clientContext.GetDisableLegacyBlockReaderLocal()); } }
/// <summary>Read from the block file into a buffer.</summary> /// <remarks> /// Read from the block file into a buffer. /// This function overwrites checksumBuf. It will increment dataPos. /// </remarks> /// <param name="buf"> /// The buffer to read into. May be dataBuf. /// The position and limit of this buffer should be set to /// multiples of the checksum size. /// </param> /// <param name="canSkipChecksum">True if we can skip checksumming.</param> /// <returns>Total bytes read. 0 on EOF.</returns> /// <exception cref="System.IO.IOException"/> private int FillBuffer(ByteBuffer buf, bool canSkipChecksum) { lock (this) { TraceScope scope = Trace.StartSpan("BlockReaderLocal#fillBuffer(" + block.GetBlockId () + ")", Sampler.Never); try { int total = 0; long startDataPos = dataPos; int startBufPos = buf.Position(); while (buf.HasRemaining()) { int nRead = dataIn.Read(buf, dataPos); if (nRead < 0) { break; } dataPos += nRead; total += nRead; } if (canSkipChecksum) { FreeChecksumBufIfExists(); return(total); } if (total > 0) { try { buf.Limit(buf.Position()); buf.Position(startBufPos); CreateChecksumBufIfNeeded(); int checksumsNeeded = (total + bytesPerChecksum - 1) / bytesPerChecksum; checksumBuf.Clear(); checksumBuf.Limit(checksumsNeeded * checksumSize); long checksumPos = BlockMetadataHeader.GetHeaderSize() + ((startDataPos / bytesPerChecksum ) * checksumSize); while (checksumBuf.HasRemaining()) { int nRead = checksumIn.Read(checksumBuf, checksumPos); if (nRead < 0) { throw new IOException("Got unexpected checksum file EOF at " + checksumPos + ", block file position " + startDataPos + " for " + "block " + block + " of file " + filename); } checksumPos += nRead; } checksumBuf.Flip(); checksum.VerifyChunkedSums(buf, checksumBuf, filename, startDataPos); } finally { buf.Position(buf.Limit()); } } return(total); } finally { scope.Close(); } } }
/// <summary>Find out the number of bytes in the block that match its crc.</summary> /// <remarks> /// Find out the number of bytes in the block that match its crc. /// This algorithm assumes that data corruption caused by unexpected /// datanode shutdown occurs only in the last crc chunk. So it checks /// only the last chunk. /// </remarks> /// <param name="blockFile">the block file</param> /// <param name="genStamp">generation stamp of the block</param> /// <returns>the number of valid bytes</returns> private long ValidateIntegrityAndSetLength(FilePath blockFile, long genStamp) { DataInputStream checksumIn = null; InputStream blockIn = null; try { FilePath metaFile = FsDatasetUtil.GetMetaFile(blockFile, genStamp); long blockFileLen = blockFile.Length(); long metaFileLen = metaFile.Length(); int crcHeaderLen = DataChecksum.GetChecksumHeaderSize(); if (!blockFile.Exists() || blockFileLen == 0 || !metaFile.Exists() || metaFileLen < crcHeaderLen) { return(0); } checksumIn = new DataInputStream(new BufferedInputStream(new FileInputStream(metaFile ), HdfsConstants.IoFileBufferSize)); // read and handle the common header here. For now just a version DataChecksum checksum = BlockMetadataHeader.ReadDataChecksum(checksumIn, metaFile ); int bytesPerChecksum = checksum.GetBytesPerChecksum(); int checksumSize = checksum.GetChecksumSize(); long numChunks = Math.Min((blockFileLen + bytesPerChecksum - 1) / bytesPerChecksum , (metaFileLen - crcHeaderLen) / checksumSize); if (numChunks == 0) { return(0); } IOUtils.SkipFully(checksumIn, (numChunks - 1) * checksumSize); blockIn = new FileInputStream(blockFile); long lastChunkStartPos = (numChunks - 1) * bytesPerChecksum; IOUtils.SkipFully(blockIn, lastChunkStartPos); int lastChunkSize = (int)Math.Min(bytesPerChecksum, blockFileLen - lastChunkStartPos ); byte[] buf = new byte[lastChunkSize + checksumSize]; checksumIn.ReadFully(buf, lastChunkSize, checksumSize); IOUtils.ReadFully(blockIn, buf, 0, lastChunkSize); checksum.Update(buf, 0, lastChunkSize); long validFileLength; if (checksum.Compare(buf, lastChunkSize)) { // last chunk matches crc validFileLength = lastChunkStartPos + lastChunkSize; } else { // last chunck is corrupt validFileLength = lastChunkStartPos; } // truncate if extra bytes are present without CRC if (blockFile.Length() > validFileLength) { RandomAccessFile blockRAF = new RandomAccessFile(blockFile, "rw"); try { // truncate blockFile blockRAF.SetLength(validFileLength); } finally { blockRAF.Close(); } } return(validFileLength); } catch (IOException e) { FsDatasetImpl.Log.Warn(e); return(0); } finally { IOUtils.CloseStream(checksumIn); IOUtils.CloseStream(blockIn); } }
/// <exception cref="System.IO.IOException"/> internal override int Run(IList <string> args) { if (args.Count == 0) { System.Console.Out.WriteLine(this.usageText); System.Console.Out.WriteLine(this.helpText + "\n"); return(1); } string blockFile = StringUtils.PopOptionWithArgument("-block", args); string metaFile = StringUtils.PopOptionWithArgument("-meta", args); if (metaFile == null) { System.Console.Error.WriteLine("You must specify a meta file with -meta"); return(1); } FileInputStream metaStream = null; FileInputStream dataStream = null; FileChannel metaChannel = null; FileChannel dataChannel = null; DataInputStream checksumStream = null; try { BlockMetadataHeader header; try { metaStream = new FileInputStream(metaFile); checksumStream = new DataInputStream(metaStream); header = BlockMetadataHeader.ReadHeader(checksumStream); metaChannel = metaStream.GetChannel(); metaChannel.Position(DebugAdmin.HeaderLen); } catch (RuntimeException e) { System.Console.Error.WriteLine("Failed to read HDFS metadata file header for " + metaFile + ": " + StringUtils.StringifyException(e)); return(1); } catch (IOException e) { System.Console.Error.WriteLine("Failed to read HDFS metadata file header for " + metaFile + ": " + StringUtils.StringifyException(e)); return(1); } DataChecksum checksum = header.GetChecksum(); System.Console.Out.WriteLine("Checksum type: " + checksum.ToString()); if (blockFile == null) { return(0); } ByteBuffer metaBuf; ByteBuffer dataBuf; try { dataStream = new FileInputStream(blockFile); dataChannel = dataStream.GetChannel(); int ChecksumsPerBuf = 1024 * 32; metaBuf = ByteBuffer.Allocate(checksum.GetChecksumSize() * ChecksumsPerBuf); dataBuf = ByteBuffer.Allocate(checksum.GetBytesPerChecksum() * ChecksumsPerBuf); } catch (IOException e) { System.Console.Error.WriteLine("Failed to open HDFS block file for " + blockFile + ": " + StringUtils.StringifyException(e)); return(1); } long offset = 0; while (true) { dataBuf.Clear(); int dataRead = -1; try { dataRead = dataChannel.Read(dataBuf); if (dataRead < 0) { break; } } catch (IOException e) { System.Console.Error.WriteLine("Got I/O error reading block file " + blockFile + "from disk at offset " + dataChannel.Position() + ": " + StringUtils.StringifyException (e)); return(1); } try { int csumToRead = (((checksum.GetBytesPerChecksum() - 1) + dataRead) / checksum.GetBytesPerChecksum ()) * checksum.GetChecksumSize(); metaBuf.Clear(); metaBuf.Limit(csumToRead); metaChannel.Read(metaBuf); dataBuf.Flip(); metaBuf.Flip(); } catch (IOException e) { System.Console.Error.WriteLine("Got I/O error reading metadata file " + metaFile + "from disk at offset " + metaChannel.Position() + ": " + StringUtils.StringifyException (e)); return(1); } try { checksum.VerifyChunkedSums(dataBuf, metaBuf, blockFile, offset); } catch (IOException e) { System.Console.Out.WriteLine("verifyChunkedSums error: " + StringUtils.StringifyException (e)); return(1); } offset += dataRead; } System.Console.Out.WriteLine("Checksum verification succeeded on block file " + blockFile ); return(0); } finally { IOUtils.Cleanup(null, metaStream, dataStream, checksumStream); } }
// Multiple datanodes could be running on the local machine. Store proxies in // a map keyed by the ipc port of the datanode. // reader for the data file // reader for the checksum file /// <summary>The only way this object can be instantiated.</summary> /// <exception cref="System.IO.IOException"/> internal static BlockReaderLocalLegacy NewBlockReader(DFSClient.Conf conf, UserGroupInformation userGroupInformation, Configuration configuration, string file, ExtendedBlock blk , Org.Apache.Hadoop.Security.Token.Token <BlockTokenIdentifier> token, DatanodeInfo node, long startOffset, long length, StorageType storageType) { BlockReaderLocalLegacy.LocalDatanodeInfo localDatanodeInfo = GetLocalDatanodeInfo (node.GetIpcPort()); // check the cache first BlockLocalPathInfo pathinfo = localDatanodeInfo.GetBlockLocalPathInfo(blk); if (pathinfo == null) { if (userGroupInformation == null) { userGroupInformation = UserGroupInformation.GetCurrentUser(); } pathinfo = GetBlockPathInfo(userGroupInformation, blk, node, configuration, conf. socketTimeout, token, conf.connectToDnViaHostname, storageType); } // check to see if the file exists. It may so happen that the // HDFS file has been deleted and this block-lookup is occurring // on behalf of a new HDFS file. This time, the block file could // be residing in a different portion of the fs.data.dir directory. // In this case, we remove this entry from the cache. The next // call to this method will re-populate the cache. FileInputStream dataIn = null; FileInputStream checksumIn = null; BlockReaderLocalLegacy localBlockReader = null; bool skipChecksumCheck = conf.skipShortCircuitChecksums || storageType.IsTransient (); try { // get a local file system FilePath blkfile = new FilePath(pathinfo.GetBlockPath()); dataIn = new FileInputStream(blkfile); if (Log.IsDebugEnabled()) { Log.Debug("New BlockReaderLocalLegacy for file " + blkfile + " of size " + blkfile .Length() + " startOffset " + startOffset + " length " + length + " short circuit checksum " + !skipChecksumCheck); } if (!skipChecksumCheck) { // get the metadata file FilePath metafile = new FilePath(pathinfo.GetMetaPath()); checksumIn = new FileInputStream(metafile); DataChecksum checksum = BlockMetadataHeader.ReadDataChecksum(new DataInputStream( checksumIn), blk); long firstChunkOffset = startOffset - (startOffset % checksum.GetBytesPerChecksum ()); localBlockReader = new BlockReaderLocalLegacy(conf, file, blk, token, startOffset , length, pathinfo, checksum, true, dataIn, firstChunkOffset, checksumIn); } else { localBlockReader = new BlockReaderLocalLegacy(conf, file, blk, token, startOffset , length, pathinfo, dataIn); } } catch (IOException e) { // remove from cache localDatanodeInfo.RemoveBlockLocalPathInfo(blk); DFSClient.Log.Warn("BlockReaderLocalLegacy: Removing " + blk + " from cache because local file " + pathinfo.GetBlockPath() + " could not be opened."); throw; } finally { if (localBlockReader == null) { if (dataIn != null) { dataIn.Close(); } if (checksumIn != null) { checksumIn.Close(); } } } return(localBlockReader); }