/// <exception cref="System.IO.IOException"/> /// <exception cref="Org.Apache.Hadoop.FS.UnresolvedLinkException"/> public ChecksumFSInputChecker(ChecksumFs fs, Path file, int bufferSize) : base(file, fs.GetFileStatus(file).GetReplication()) { this.datas = fs.GetRawFs().Open(file, bufferSize); this.fs = fs; Path sumFile = fs.GetChecksumFile(file); try { int sumBufferSize = fs.GetSumBufferSize(fs.GetBytesPerSum(), bufferSize); sums = fs.GetRawFs().Open(sumFile, sumBufferSize); byte[] version = new byte[ChecksumVersion.Length]; sums.ReadFully(version); if (!Arrays.Equals(version, ChecksumVersion)) { throw new IOException("Not a checksum file: " + sumFile); } this.bytesPerSum = sums.ReadInt(); Set(fs.verifyChecksum, DataChecksum.NewCrc32(), bytesPerSum, 4); } catch (FileNotFoundException) { // quietly ignore Set(fs.verifyChecksum, null, 1, 0); } catch (IOException e) { // loudly ignore Log.Warn("Problem opening checksum file: " + file + ". Ignoring exception: ", e); Set(fs.verifyChecksum, null, 1, 0); } }
/// <exception cref="System.IO.IOException"/> private string ReadFile(string @out) { Path path = new Path(@out); FileStatus stat = lfs.GetFileStatus(path); FSDataInputStream @in = lfs.Open(path); byte[] buffer = new byte[(int)stat.GetLen()]; @in.ReadFully(buffer); @in.Close(); lfs.Delete(path, false); return(Runtime.GetStringForBytes(buffer)); }
/// <exception cref="System.IO.IOException"/> private void VerifyRead(FSDataInputStream stm, byte[] fileContents, int seekOff, int toRead) { byte[] @out = new byte[toRead]; stm.Seek(seekOff); stm.ReadFully(@out); byte[] expected = Arrays.CopyOfRange(fileContents, seekOff, seekOff + toRead); if (!Arrays.Equals(@out, expected)) { string s = "\nExpected: " + StringUtils.ByteToHexString(expected) + "\ngot: " + StringUtils.ByteToHexString(@out) + "\noff=" + seekOff + " len=" + toRead; NUnit.Framework.Assert.Fail(s); } }
/// <exception cref="System.IO.IOException"/> private void VerifyFile(FileSystem fs, Path file, int bytesToVerify, byte[] expectedBytes ) { FSDataInputStream @in = fs.Open(file); try { byte[] readBuf = new byte[bytesToVerify]; @in.ReadFully(readBuf, 0, bytesToVerify); for (int i = 0; i < bytesToVerify; i++) { Assert.Equal(expectedBytes[i], readBuf[i]); } } finally { @in.Close(); } }
/// <summary>Write a file and read it in, validating the result.</summary> /// <remarks> /// Write a file and read it in, validating the result. Optional flags control /// whether file overwrite operations should be enabled, and whether the /// file should be deleted afterwards. /// If there is a mismatch between what was written and what was expected, /// a small range of bytes either side of the first error are logged to aid /// diagnosing what problem occurred -whether it was a previous file /// or a corrupting of the current file. This assumes that two /// sequential runs to the same path use datasets with different character /// moduli. /// </remarks> /// <param name="path">path to write to</param> /// <param name="len">length of data</param> /// <param name="overwrite">should the create option allow overwrites?</param> /// <param name="delete"> /// should the file be deleted afterwards? -with a verification /// that it worked. Deletion is not attempted if an assertion has failed /// earlier -it is not in a <code>finally{}</code> block. /// </param> /// <exception cref="System.IO.IOException">IO problems</exception> protected internal virtual void WriteAndRead(Org.Apache.Hadoop.FS.Path path, byte [] src, int len, bool overwrite, bool delete) { Assert.True("Not enough data in source array to write " + len + " bytes", src.Length >= len); fs.Mkdirs(path.GetParent()); FSDataOutputStream @out = fs.Create(path, overwrite, fs.GetConf().GetInt("io.file.buffer.size" , 4096), (short)1, GetBlockSize()); @out.Write(src, 0, len); @out.Close(); Assert.True("Exists", fs.Exists(path)); Assert.Equal("Length", len, fs.GetFileStatus(path).GetLen()); FSDataInputStream @in = fs.Open(path); byte[] buf = new byte[len]; @in.ReadFully(0, buf); @in.Close(); Assert.Equal(len, buf.Length); int errors = 0; int first_error_byte = -1; for (int i = 0; i < len; i++) { if (src[i] != buf[i]) { if (errors == 0) { first_error_byte = i; } errors++; } } if (errors > 0) { string message = string.Format(" %d errors in file of length %d", errors, len); Log.Warn(message); // the range either side of the first error to print // this is a purely arbitrary number, to aid user debugging int overlap = 10; for (int i_1 = Math.Max(0, first_error_byte - overlap); i_1 < Math.Min(first_error_byte + overlap, len); i_1++) { byte actual = buf[i_1]; byte expected = src[i_1]; string letter = ToChar(actual); string line = string.Format("[%04d] %2x %s\n", i_1, actual, letter); if (expected != actual) { line = string.Format("[%04d] %2x %s -expected %2x %s\n", i_1, actual, letter, expected , ToChar(expected)); } Log.Warn(line); } Fail(message); } if (delete) { bool deleted = fs.Delete(path, false); Assert.True("Deleted", deleted); NUnit.Framework.Assert.IsFalse("No longer exists", fs.Exists(path)); } }