/// <exception cref="System.IO.IOException"/> protected internal override int ReadChunk(long pos, byte[] buf, int offset, int len , byte[] checksum) { bool eof = false; if (NeedChecksum()) { System.Diagnostics.Debug.Assert(checksum != null); // we have a checksum buffer System.Diagnostics.Debug.Assert(checksum.Length % ChecksumSize == 0); // it is sane length System.Diagnostics.Debug.Assert(len >= bytesPerSum); // we must read at least one chunk int checksumsToRead = Math.Min(len / bytesPerSum, checksum.Length / ChecksumSize); // number of checksums based on len to read // size of checksum buffer long checksumPos = GetChecksumFilePos(pos); if (checksumPos != sums.GetPos()) { sums.Seek(checksumPos); } int sumLenRead = sums.Read(checksum, 0, ChecksumSize * checksumsToRead); if (sumLenRead >= 0 && sumLenRead % ChecksumSize != 0) { throw new EOFException("Checksum file not a length multiple of checksum size " + "in " + file + " at " + pos + " checksumpos: " + checksumPos + " sumLenread: " + sumLenRead); } if (sumLenRead <= 0) { // we're at the end of the file eof = true; } else { // Adjust amount of data to read based on how many checksum chunks we read len = Math.Min(len, bytesPerSum * (sumLenRead / ChecksumSize)); } } if (pos != datas.GetPos()) { datas.Seek(pos); } int nread = ReadFully(datas, buf, offset, len); if (eof && nread > 0) { throw new ChecksumException("Checksum error: " + file + " at " + pos, pos); } return(nread); }
/// <exception cref="System.IO.IOException"/> internal static string ReadFile(FileSystem fs, Path name, int buflen) { byte[] b = new byte[buflen]; int offset = 0; FSDataInputStream @in = fs.Open(name); for (int remaining; (remaining = b.Length - offset) > 0 && (n = @in.Read(b, offset , remaining)) != -1; offset += n) { } Assert.Equal(offset, Math.Min(b.Length, @in.GetPos())); @in.Close(); string s = Runtime.GetStringForBytes(b, 0, offset); return(s); }
/// <exception cref="System.IO.IOException"/> internal override object DoIO(Reporter reporter, string name, long offset) { // open file FSDataInputStream @in = null; Path p = new Path(name); try { @in = fs.Open(p); } catch (IOException) { return(name + "@(missing)"); } @in.Seek(offset); long actualSize = 0; try { long blockSize = fs.GetDefaultBlockSize(p); reporter.SetStatus("reading " + name + "@" + offset + "/" + blockSize); for (int curSize = bufferSize; curSize == bufferSize && actualSize < blockSize; actualSize += curSize) { curSize = @in.Read(buffer, 0, bufferSize); } } catch (IOException) { Log.Info("Corrupted block detected in \"" + name + "\" at " + offset); return(name + "@" + offset); } finally { @in.Close(); } return(actualSize); }
public virtual void TestTruncatedChecksum() { Path testPath = new Path(TestRootDir, "testtruncatedcrc"); FSDataOutputStream fout = localFs.Create(testPath); fout.Write(Runtime.GetBytesForString("testing truncation")); fout.Close(); // Read in the checksum Path checksumFile = localFs.GetChecksumFile(testPath); FileSystem rawFs = localFs.GetRawFileSystem(); FSDataInputStream checksumStream = rawFs.Open(checksumFile); byte[] buf = new byte[8192]; int read = checksumStream.Read(buf, 0, buf.Length); checksumStream.Close(); // Now rewrite the checksum file with the last byte missing FSDataOutputStream replaceStream = rawFs.Create(checksumFile); replaceStream.Write(buf, 0, read - 1); replaceStream.Close(); // Now reading the file should fail with a ChecksumException try { FileSystemTestHelper.ReadFile(localFs, testPath, 1024); NUnit.Framework.Assert.Fail("Did not throw a ChecksumException when reading truncated " + "crc file"); } catch (ChecksumException) { } // telling it not to verify checksums, should avoid issue. localFs.SetVerifyChecksum(false); string str = FileSystemTestHelper.ReadFile(localFs, testPath, 1024).ToString(); Assert.True("read", "testing truncation".Equals(str)); }
/// <summary> /// When mark() is used on BufferedInputStream, the request /// size on the checksum file system can be small. /// </summary> /// <remarks> /// When mark() is used on BufferedInputStream, the request /// size on the checksum file system can be small. However, /// checksum file system currently depends on the request size /// >= bytesPerSum to work properly. /// </remarks> /// <exception cref="System.IO.IOException"/> public virtual void TestTruncatedInputBug() { int ioBufSize = 512; int fileSize = ioBufSize * 4; int filePos = 0; Configuration conf = new Configuration(); conf.SetInt("io.file.buffer.size", ioBufSize); FileSystem fileSys = FileSystem.GetLocal(conf); try { // First create a test input file. Path testFile = new Path(TestRootDir, "HADOOP-1489"); WriteFile(fileSys, testFile, fileSize); Assert.True(fileSys.Exists(testFile)); Assert.True(fileSys.GetFileStatus(testFile).GetLen() == fileSize ); // Now read the file for ioBufSize bytes FSDataInputStream @in = fileSys.Open(testFile, ioBufSize); // seek beyond data buffered by open filePos += ioBufSize * 2 + (ioBufSize - 10); @in.Seek(filePos); // read 4 more bytes before marking for (int i = 0; i < 4; ++i) { if (@in.Read() == -1) { break; } ++filePos; } // Now set mark() to trigger the bug // NOTE: in the fixed code, mark() does nothing (not supported) and // hence won't trigger this bug. @in.Mark(1); System.Console.Out.WriteLine("MARKED"); // Try to read the rest while (filePos < fileSize) { if (@in.Read() == -1) { break; } ++filePos; } @in.Close(); System.Console.Out.WriteLine("Read " + filePos + " bytes." + " file size=" + fileSize ); Assert.True(filePos == fileSize); } finally { try { fileSys.Close(); } catch (Exception) { } } }
/// <exception cref="System.IO.IOException"/> public virtual int Read(byte[] b, int off, int len) { return(stream.Read(b, off, len)); }
public virtual void Test2GBMmapLimit() { Assume.AssumeTrue(BlockReaderTestUtil.ShouldTestLargeFiles()); HdfsConfiguration conf = InitZeroCopyTest(); long TestFileLength = 2469605888L; conf.Set(DFSConfigKeys.DfsChecksumTypeKey, "NULL"); conf.SetLong(DFSConfigKeys.DfsBlockSizeKey, TestFileLength); MiniDFSCluster cluster = null; Path TestPath = new Path("/a"); string Context = "test2GBMmapLimit"; conf.Set(DFSConfigKeys.DfsClientContext, Context); FSDataInputStream fsIn = null; FSDataInputStream fsIn2 = null; ByteBuffer buf1 = null; ByteBuffer buf2 = null; try { cluster = new MiniDFSCluster.Builder(conf).NumDataNodes(1).Build(); cluster.WaitActive(); DistributedFileSystem fs = cluster.GetFileSystem(); DFSTestUtil.CreateFile(fs, TestPath, TestFileLength, (short)1, unchecked ((int)(0xB ))); DFSTestUtil.WaitReplication(fs, TestPath, (short)1); fsIn = fs.Open(TestPath); buf1 = fsIn.Read(null, 1, EnumSet.Of(ReadOption.SkipChecksums)); NUnit.Framework.Assert.AreEqual(1, buf1.Remaining()); fsIn.ReleaseBuffer(buf1); buf1 = null; fsIn.Seek(2147483640L); buf1 = fsIn.Read(null, 1024, EnumSet.Of(ReadOption.SkipChecksums)); NUnit.Framework.Assert.AreEqual(7, buf1.Remaining()); NUnit.Framework.Assert.AreEqual(int.MaxValue, buf1.Limit()); fsIn.ReleaseBuffer(buf1); buf1 = null; NUnit.Framework.Assert.AreEqual(2147483647L, fsIn.GetPos()); try { buf1 = fsIn.Read(null, 1024, EnumSet.Of(ReadOption.SkipChecksums)); NUnit.Framework.Assert.Fail("expected UnsupportedOperationException"); } catch (NotSupportedException) { } // expected; can't read past 2GB boundary. fsIn.Close(); fsIn = null; // Now create another file with normal-sized blocks, and verify we // can read past 2GB Path TestPath2 = new Path("/b"); conf.SetLong(DFSConfigKeys.DfsBlockSizeKey, 268435456L); DFSTestUtil.CreateFile(fs, TestPath2, 1024 * 1024, TestFileLength, 268435456L, (short )1, unchecked ((int)(0xA))); fsIn2 = fs.Open(TestPath2); fsIn2.Seek(2147483640L); buf2 = fsIn2.Read(null, 1024, EnumSet.Of(ReadOption.SkipChecksums)); NUnit.Framework.Assert.AreEqual(8, buf2.Remaining()); NUnit.Framework.Assert.AreEqual(2147483648L, fsIn2.GetPos()); fsIn2.ReleaseBuffer(buf2); buf2 = null; buf2 = fsIn2.Read(null, 1024, EnumSet.Of(ReadOption.SkipChecksums)); NUnit.Framework.Assert.AreEqual(1024, buf2.Remaining()); NUnit.Framework.Assert.AreEqual(2147484672L, fsIn2.GetPos()); fsIn2.ReleaseBuffer(buf2); buf2 = null; } finally { if (buf1 != null) { fsIn.ReleaseBuffer(buf1); } if (buf2 != null) { fsIn2.ReleaseBuffer(buf2); } IOUtils.Cleanup(null, fsIn, fsIn2); if (cluster != null) { cluster.Shutdown(); } } }
public virtual void TestZeroCopyReads() { HdfsConfiguration conf = InitZeroCopyTest(); MiniDFSCluster cluster = null; Path TestPath = new Path("/a"); FSDataInputStream fsIn = null; int TestFileLength = 3 * BlockSize; FileSystem fs = null; try { cluster = new MiniDFSCluster.Builder(conf).NumDataNodes(1).Build(); cluster.WaitActive(); fs = cluster.GetFileSystem(); DFSTestUtil.CreateFile(fs, TestPath, TestFileLength, (short)1, 7567L); try { DFSTestUtil.WaitReplication(fs, TestPath, (short)1); } catch (Exception e) { NUnit.Framework.Assert.Fail("unexpected InterruptedException during " + "waitReplication: " + e); } catch (TimeoutException e) { NUnit.Framework.Assert.Fail("unexpected TimeoutException during " + "waitReplication: " + e); } fsIn = fs.Open(TestPath); byte[] original = new byte[TestFileLength]; IOUtils.ReadFully(fsIn, original, 0, TestFileLength); fsIn.Close(); fsIn = fs.Open(TestPath); ByteBuffer result = fsIn.Read(null, BlockSize, EnumSet.Of(ReadOption.SkipChecksums )); NUnit.Framework.Assert.AreEqual(BlockSize, result.Remaining()); HdfsDataInputStream dfsIn = (HdfsDataInputStream)fsIn; NUnit.Framework.Assert.AreEqual(BlockSize, dfsIn.GetReadStatistics().GetTotalBytesRead ()); NUnit.Framework.Assert.AreEqual(BlockSize, dfsIn.GetReadStatistics().GetTotalZeroCopyBytesRead ()); Assert.AssertArrayEquals(Arrays.CopyOfRange(original, 0, BlockSize), ByteBufferToArray (result)); fsIn.ReleaseBuffer(result); } finally { if (fsIn != null) { fsIn.Close(); } if (fs != null) { fs.Close(); } if (cluster != null) { cluster.Shutdown(); } } }
public virtual void TestClientMmapDisable() { HdfsConfiguration conf = InitZeroCopyTest(); conf.SetBoolean(DFSConfigKeys.DfsClientMmapEnabled, false); MiniDFSCluster cluster = null; Path TestPath = new Path("/a"); int TestFileLength = 16385; int RandomSeed = 23453; string Context = "testClientMmapDisable"; FSDataInputStream fsIn = null; DistributedFileSystem fs = null; conf.Set(DFSConfigKeys.DfsClientContext, Context); try { // With DFS_CLIENT_MMAP_ENABLED set to false, we should not do memory // mapped reads. cluster = new MiniDFSCluster.Builder(conf).NumDataNodes(1).Build(); cluster.WaitActive(); fs = cluster.GetFileSystem(); DFSTestUtil.CreateFile(fs, TestPath, TestFileLength, (short)1, RandomSeed); DFSTestUtil.WaitReplication(fs, TestPath, (short)1); fsIn = fs.Open(TestPath); try { fsIn.Read(null, 1, EnumSet.Of(ReadOption.SkipChecksums)); NUnit.Framework.Assert.Fail("expected zero-copy read to fail when client mmaps " + "were disabled."); } catch (NotSupportedException) { } } finally { if (fsIn != null) { fsIn.Close(); } if (fs != null) { fs.Close(); } if (cluster != null) { cluster.Shutdown(); } } fsIn = null; fs = null; cluster = null; try { // Now try again with DFS_CLIENT_MMAP_CACHE_SIZE == 0. It should work. conf.SetBoolean(DFSConfigKeys.DfsClientMmapEnabled, true); conf.SetInt(DFSConfigKeys.DfsClientMmapCacheSize, 0); conf.Set(DFSConfigKeys.DfsClientContext, Context + ".1"); cluster = new MiniDFSCluster.Builder(conf).NumDataNodes(1).Build(); cluster.WaitActive(); fs = cluster.GetFileSystem(); DFSTestUtil.CreateFile(fs, TestPath, TestFileLength, (short)1, RandomSeed); DFSTestUtil.WaitReplication(fs, TestPath, (short)1); fsIn = fs.Open(TestPath); ByteBuffer buf = fsIn.Read(null, 1, EnumSet.Of(ReadOption.SkipChecksums)); fsIn.ReleaseBuffer(buf); // Test EOF behavior IOUtils.SkipFully(fsIn, TestFileLength - 1); buf = fsIn.Read(null, 1, EnumSet.Of(ReadOption.SkipChecksums)); NUnit.Framework.Assert.AreEqual(null, buf); } finally { if (fsIn != null) { fsIn.Close(); } if (fs != null) { fs.Close(); } if (cluster != null) { cluster.Shutdown(); } } }
/// <summary> /// Test that we can zero-copy read cached data even without disabling /// checksums. /// </summary> /// <exception cref="System.Exception"/> public virtual void TestZeroCopyReadOfCachedData() { BlockReaderTestUtil.EnableShortCircuitShmTracing(); BlockReaderTestUtil.EnableBlockReaderFactoryTracing(); BlockReaderTestUtil.EnableHdfsCachingTracing(); int TestFileLength = BlockSize; Path TestPath = new Path("/a"); int RandomSeed = 23453; HdfsConfiguration conf = InitZeroCopyTest(); conf.SetBoolean(DFSConfigKeys.DfsClientReadShortcircuitSkipChecksumKey, false); string Context = "testZeroCopyReadOfCachedData"; conf.Set(DFSConfigKeys.DfsClientContext, Context); conf.SetLong(DFSConfigKeys.DfsDatanodeMaxLockedMemoryKey, DFSTestUtil.RoundUpToMultiple (TestFileLength, (int)NativeIO.POSIX.GetCacheManipulator().GetOperatingSystemPageSize ())); MiniDFSCluster cluster = null; ByteBuffer result = null; ByteBuffer result2 = null; cluster = new MiniDFSCluster.Builder(conf).NumDataNodes(1).Build(); cluster.WaitActive(); FsDatasetSpi <object> fsd = cluster.GetDataNodes()[0].GetFSDataset(); DistributedFileSystem fs = cluster.GetFileSystem(); DFSTestUtil.CreateFile(fs, TestPath, TestFileLength, (short)1, RandomSeed); DFSTestUtil.WaitReplication(fs, TestPath, (short)1); byte[] original = DFSTestUtil.CalculateFileContentsFromSeed(RandomSeed, TestFileLength ); // Prior to caching, the file can't be read via zero-copy FSDataInputStream fsIn = fs.Open(TestPath); try { result = fsIn.Read(null, TestFileLength / 2, EnumSet.NoneOf <ReadOption>()); NUnit.Framework.Assert.Fail("expected UnsupportedOperationException"); } catch (NotSupportedException) { } // expected // Cache the file fs.AddCachePool(new CachePoolInfo("pool1")); long directiveId = fs.AddCacheDirective(new CacheDirectiveInfo.Builder().SetPath( TestPath).SetReplication((short)1).SetPool("pool1").Build()); int numBlocks = (int)Math.Ceil((double)TestFileLength / BlockSize); DFSTestUtil.VerifyExpectedCacheUsage(DFSTestUtil.RoundUpToMultiple(TestFileLength , BlockSize), numBlocks, cluster.GetDataNodes()[0].GetFSDataset()); try { result = fsIn.Read(null, TestFileLength, EnumSet.NoneOf <ReadOption>()); } catch (NotSupportedException) { NUnit.Framework.Assert.Fail("expected to be able to read cached file via zero-copy" ); } Assert.AssertArrayEquals(Arrays.CopyOfRange(original, 0, BlockSize), ByteBufferToArray (result)); // Test that files opened after the cache operation has finished // still get the benefits of zero-copy (regression test for HDFS-6086) FSDataInputStream fsIn2 = fs.Open(TestPath); try { result2 = fsIn2.Read(null, TestFileLength, EnumSet.NoneOf <ReadOption>()); } catch (NotSupportedException) { NUnit.Framework.Assert.Fail("expected to be able to read cached file via zero-copy" ); } Assert.AssertArrayEquals(Arrays.CopyOfRange(original, 0, BlockSize), ByteBufferToArray (result2)); fsIn2.ReleaseBuffer(result2); fsIn2.Close(); // check that the replica is anchored ExtendedBlock firstBlock = DFSTestUtil.GetFirstBlock(fs, TestPath); ShortCircuitCache cache = ClientContext.Get(Context, new DFSClient.Conf(conf)).GetShortCircuitCache (); WaitForReplicaAnchorStatus(cache, firstBlock, true, true, 1); // Uncache the replica fs.RemoveCacheDirective(directiveId); WaitForReplicaAnchorStatus(cache, firstBlock, false, true, 1); fsIn.ReleaseBuffer(result); WaitForReplicaAnchorStatus(cache, firstBlock, false, false, 1); DFSTestUtil.VerifyExpectedCacheUsage(0, 0, fsd); fsIn.Close(); fs.Close(); cluster.Shutdown(); }
public virtual void TestZeroCopyMmapCache() { HdfsConfiguration conf = InitZeroCopyTest(); MiniDFSCluster cluster = null; Path TestPath = new Path("/a"); int TestFileLength = 5 * BlockSize; int RandomSeed = 23453; string Context = "testZeroCopyMmapCacheContext"; FSDataInputStream fsIn = null; ByteBuffer[] results = new ByteBuffer[] { null, null, null, null }; DistributedFileSystem fs = null; conf.Set(DFSConfigKeys.DfsClientContext, Context); cluster = new MiniDFSCluster.Builder(conf).NumDataNodes(1).Build(); cluster.WaitActive(); fs = cluster.GetFileSystem(); DFSTestUtil.CreateFile(fs, TestPath, TestFileLength, (short)1, RandomSeed); try { DFSTestUtil.WaitReplication(fs, TestPath, (short)1); } catch (Exception e) { NUnit.Framework.Assert.Fail("unexpected InterruptedException during " + "waitReplication: " + e); } catch (TimeoutException e) { NUnit.Framework.Assert.Fail("unexpected TimeoutException during " + "waitReplication: " + e); } fsIn = fs.Open(TestPath); byte[] original = new byte[TestFileLength]; IOUtils.ReadFully(fsIn, original, 0, TestFileLength); fsIn.Close(); fsIn = fs.Open(TestPath); ShortCircuitCache cache = ClientContext.Get(Context, new DFSClient.Conf(conf)).GetShortCircuitCache (); cache.Accept(new TestEnhancedByteBufferAccess.CountingVisitor(0, 5, 5, 0)); results[0] = fsIn.Read(null, BlockSize, EnumSet.Of(ReadOption.SkipChecksums)); fsIn.Seek(0); results[1] = fsIn.Read(null, BlockSize, EnumSet.Of(ReadOption.SkipChecksums)); // The mmap should be of the first block of the file. ExtendedBlock firstBlock = DFSTestUtil.GetFirstBlock(fs, TestPath); cache.Accept(new _CacheVisitor_373(firstBlock)); // The replica should not yet be evictable, since we have it open. // Read more blocks. results[2] = fsIn.Read(null, BlockSize, EnumSet.Of(ReadOption.SkipChecksums)); results[3] = fsIn.Read(null, BlockSize, EnumSet.Of(ReadOption.SkipChecksums)); // we should have 3 mmaps, 1 evictable cache.Accept(new TestEnhancedByteBufferAccess.CountingVisitor(3, 5, 2, 0)); // After we close the cursors, the mmaps should be evictable for // a brief period of time. Then, they should be closed (we're // using a very quick timeout) foreach (ByteBuffer buffer in results) { if (buffer != null) { fsIn.ReleaseBuffer(buffer); } } fsIn.Close(); GenericTestUtils.WaitFor(new _Supplier_407(cache), 10, 60000); cache.Accept(new TestEnhancedByteBufferAccess.CountingVisitor(0, -1, -1, -1)); fs.Close(); cluster.Shutdown(); }