/// <exception cref="System.IO.IOException"/> public CryptoFSDataOutputStream(FSDataOutputStream @out, CryptoCodec codec, int bufferSize , byte[] key, byte[] iv) : base(new CryptoOutputStream(@out, codec, bufferSize, key, iv, @out.GetPos()), null , @out.GetPos()) { this.fsOut = @out; }
/// <exception cref="System.IO.IOException"/> private static JobSplit.SplitMetaInfo[] WriteOldSplits(InputSplit[] splits, FSDataOutputStream @out, Configuration conf) { JobSplit.SplitMetaInfo[] info = new JobSplit.SplitMetaInfo[splits.Length]; if (splits.Length != 0) { int i = 0; long offset = @out.GetPos(); int maxBlockLocations = conf.GetInt(MRConfig.MaxBlockLocationsKey, MRConfig.MaxBlockLocationsDefault ); foreach (InputSplit split in splits) { long prevLen = @out.GetPos(); Text.WriteString(@out, split.GetType().FullName); split.Write(@out); long currLen = @out.GetPos(); string[] locations = split.GetLocations(); if (locations.Length > maxBlockLocations) { Log.Warn("Max block location exceeded for split: " + split + " splitsize: " + locations .Length + " maxsize: " + maxBlockLocations); locations = Arrays.CopyOf(locations, maxBlockLocations); } info[i++] = new JobSplit.SplitMetaInfo(locations, offset, split.GetLength()); offset += currLen - prevLen; } } return(info); }
/// <exception cref="System.IO.IOException"/> private void CopyPartitions(Path mapOutputPath, Path indexPath) { FileSystem localFs = FileSystem.GetLocal(jobConf); FileSystem rfs = ((LocalFileSystem)localFs).GetRaw(); FSDataOutputStream rawOutput = rfs.Create(mapOutputPath, true, BufSize); SpillRecord spillRecord = new SpillRecord(numberOfPartitions); IndexRecord indexRecord = new IndexRecord(); for (int i = 0; i < numberOfPartitions; i++) { indexRecord.startOffset = rawOutput.GetPos(); byte[] buffer = outStreams[i].ToByteArray(); IFileOutputStream checksumOutput = new IFileOutputStream(rawOutput); checksumOutput.Write(buffer); // Write checksum. checksumOutput.Finish(); // Write index record indexRecord.rawLength = (long)buffer.Length; indexRecord.partLength = rawOutput.GetPos() - indexRecord.startOffset; spillRecord.PutIndex(indexRecord, i); reporter.Progress(); } rawOutput.Close(); spillRecord.WriteToFile(indexPath, jobConf); }
/// <exception cref="System.IO.IOException"/> /// <exception cref="System.Exception"/> private static JobSplit.SplitMetaInfo[] WriteNewSplits <T>(Configuration conf, T[] array, FSDataOutputStream @out) where T : InputSplit { JobSplit.SplitMetaInfo[] info = new JobSplit.SplitMetaInfo[array.Length]; if (array.Length != 0) { SerializationFactory factory = new SerializationFactory(conf); int i = 0; int maxBlockLocations = conf.GetInt(MRConfig.MaxBlockLocationsKey, MRConfig.MaxBlockLocationsDefault ); long offset = @out.GetPos(); foreach (T split in array) { long prevCount = @out.GetPos(); Text.WriteString(@out, split.GetType().FullName); Org.Apache.Hadoop.IO.Serializer.Serializer <T> serializer = factory.GetSerializer( (Type)split.GetType()); serializer.Open(@out); serializer.Serialize(split); long currCount = @out.GetPos(); string[] locations = split.GetLocations(); if (locations.Length > maxBlockLocations) { Log.Warn("Max block location exceeded for split: " + split + " splitsize: " + locations .Length + " maxsize: " + maxBlockLocations); locations = Arrays.CopyOf(locations, maxBlockLocations); } info[i++] = new JobSplit.SplitMetaInfo(locations, offset, split.GetLength()); offset += currCount - prevCount; } } return(info); }
/// <summary>Verify that the compressed data size is less than raw data size.</summary> /// <exception cref="System.IO.IOException"/> public virtual void TestFailureCompressionNotWorking() { if (skip) { return; } long rawDataSize = WriteRecords(10000, false, false, false); if (!Runtime.EqualsIgnoreCase(compression, Compression.Algorithm.None.GetName ())) { Assert.True(@out.GetPos() < rawDataSize); } CloseOutput(); }
/// <summary>Constructor</summary> /// <param name="fout">FS output stream.</param> /// <param name="compressionName"> /// Name of the compression algorithm, which will be used for all /// data blocks. /// </param> /// <exception cref="System.IO.IOException"/> /// <seealso cref="Compression.GetSupportedAlgorithms()"/> public Writer(FSDataOutputStream fout, string compressionName, Configuration conf ) { if (fout.GetPos() != 0) { throw new IOException("Output file not at zero offset."); } this.@out = fout; this.conf = conf; dataIndex = new BCFile.DataIndex(compressionName); metaIndex = new BCFile.MetaIndex(); fsOutputBuffer = new BytesWritable(); BCFile.Magic.Write(fout); }
/// <summary> /// Append to a partial CRC chunk and the first write does not fill up the /// partial CRC trunk /// </summary> /// <exception cref="System.IO.IOException"/> private void TestAppendToPartialChunk(bool appendToNewBlock) { Path p = new Path("/partialChunk/foo" + (appendToNewBlock ? "0" : "1")); int fileLen = 513; System.Console.Out.WriteLine("p=" + p); byte[] fileContents = AppendTestUtil.InitBuffer(fileLen); // create a new file. FSDataOutputStream stm = AppendTestUtil.CreateFile(fs, p, 1); // create 1 byte file stm.Write(fileContents, 0, 1); stm.Close(); System.Console.Out.WriteLine("Wrote 1 byte and closed the file " + p); // append to file stm = appendToNewBlock ? fs.Append(p, EnumSet.Of(CreateFlag.Append, CreateFlag.NewBlock ), 4096, null) : fs.Append(p); // Append to a partial CRC trunk stm.Write(fileContents, 1, 1); stm.Hflush(); // The partial CRC trunk is not full yet and close the file stm.Close(); System.Console.Out.WriteLine("Append 1 byte and closed the file " + p); // write the remainder of the file stm = appendToNewBlock ? fs.Append(p, EnumSet.Of(CreateFlag.Append, CreateFlag.NewBlock ), 4096, null) : fs.Append(p); // ensure getPos is set to reflect existing size of the file NUnit.Framework.Assert.AreEqual(2, stm.GetPos()); // append to a partial CRC trunk stm.Write(fileContents, 2, 1); // The partial chunk is not full yet, force to send a packet to DN stm.Hflush(); System.Console.Out.WriteLine("Append and flush 1 byte"); // The partial chunk is not full yet, force to send another packet to DN stm.Write(fileContents, 3, 2); stm.Hflush(); System.Console.Out.WriteLine("Append and flush 2 byte"); // fill up the partial chunk and close the file stm.Write(fileContents, 5, fileLen - 5); stm.Close(); System.Console.Out.WriteLine("Flush 508 byte and closed the file " + p); // verify that entire file is good AppendTestUtil.CheckFullFile(fs, p, fileLen, fileContents, "Failed to append to a partial chunk" ); }
/// <exception cref="System.IO.IOException"/> public virtual void Close() { // When IFile writer is created by BackupStore, we do not have // Key and Value classes set. So, check before closing the // serializers if (keyClass != null) { keySerializer.Close(); valueSerializer.Close(); } // Write EOF_MARKER for key/value length WritableUtils.WriteVInt(@out, EofMarker); WritableUtils.WriteVInt(@out, EofMarker); decompressedBytesWritten += 2 * WritableUtils.GetVIntSize(EofMarker); //Flush the stream @out.Flush(); if (compressOutput) { // Flush compressedOut.Finish(); compressedOut.ResetState(); } // Close the underlying stream iff we own it... if (ownOutputStream) { @out.Close(); } else { // Write the checksum checksumOut.Finish(); } compressedBytesWritten = rawOut.GetPos() - start; if (compressOutput) { // Return back the compressor CodecPool.ReturnCompressor(compressor); compressor = null; } @out = null; if (writtenRecordsCounter != null) { writtenRecordsCounter.Increment(numRecordsWritten); } }
/// <exception cref="System.IO.IOException"/> internal virtual void WriteFile(Path file, FSDataOutputStream stm, int size) { long blocksBefore = stm.GetPos() / BlockSize; TestFileCreation.WriteFile(stm, BlockSize); // need to make sure the full block is completely flushed to the DataNodes // (see FSOutputSummer#flush) stm.Flush(); int blocksAfter = 0; // wait until the block is allocated by DataStreamer BlockLocation[] locatedBlocks; while (blocksAfter <= blocksBefore) { locatedBlocks = DFSClientAdapter.GetDFSClient(hdfs).GetBlockLocations(file.ToString (), 0L, BlockSize * NumBlocks); blocksAfter = locatedBlocks == null ? 0 : locatedBlocks.Length; } }
/// <summary>Wraps a given FSDataOutputStream with a CryptoOutputStream.</summary> /// <remarks> /// Wraps a given FSDataOutputStream with a CryptoOutputStream. The size of the /// data buffer required for the stream is specified by the /// "mapreduce.job.encrypted-intermediate-data.buffer.kb" Job configuration /// variable. /// </remarks> /// <param name="conf"/> /// <param name="out"/> /// <returns>FSDataOutputStream</returns> /// <exception cref="System.IO.IOException"/> public static FSDataOutputStream WrapIfNecessary(Configuration conf, FSDataOutputStream @out) { if (IsEncryptedSpillEnabled(conf)) { @out.Write(((byte[])ByteBuffer.Allocate(8).PutLong(@out.GetPos()).Array())); byte[] iv = CreateIV(conf); @out.Write(iv); if (Log.IsDebugEnabled()) { Log.Debug("IV written to Stream [" + Base64.EncodeBase64URLSafeString(iv) + "]"); } return(new CryptoFSDataOutputStream(@out, CryptoCodec.GetInstance(conf), GetBufferSize (conf), GetEncryptionKey(), iv)); } else { return(@out); } }
/// <param name="compressionAlgo">The compression algorithm to be used to for compression. /// </param> /// <exception cref="System.IO.IOException"/> public WBlockState(Compression.Algorithm compressionAlgo, FSDataOutputStream fsOut , BytesWritable fsOutputBuffer, Configuration conf) { // !null only if using native // Hadoop compression this.compressAlgo = compressionAlgo; this.fsOut = fsOut; this.posStart = fsOut.GetPos(); fsOutputBuffer.Capacity = TFile.GetFSOutputBufferSize(conf); this.fsBufferedOutput = new SimpleBufferedOutputStream(this.fsOut, fsOutputBuffer .Bytes); this.compressor = compressAlgo.GetCompressor(); try { this.@out = compressionAlgo.CreateCompressionStream(fsBufferedOutput, compressor, 0); } catch (IOException e) { compressAlgo.ReturnCompressor(compressor); throw; } }
/// <exception cref="System.IO.IOException"/> public override long GetPos() { return(fsOut.GetPos()); }
public virtual void TestSimpleAppend() { Configuration conf = new HdfsConfiguration(); conf.SetInt(DFSConfigKeys.DfsDatanodeHandlerCountKey, 50); fileContents = AppendTestUtil.InitBuffer(AppendTestUtil.FileSize); MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).Build(); FileSystem fs = cluster.GetFileSystem(); try { { // test appending to a file. // create a new file. Path file1 = new Path("/simpleAppend.dat"); FSDataOutputStream stm = AppendTestUtil.CreateFile(fs, file1, 1); System.Console.Out.WriteLine("Created file simpleAppend.dat"); // write to file int mid = 186; // io.bytes.per.checksum bytes System.Console.Out.WriteLine("Writing " + mid + " bytes to file " + file1); stm.Write(fileContents, 0, mid); stm.Close(); System.Console.Out.WriteLine("Wrote and Closed first part of file."); // write to file int mid2 = 607; // io.bytes.per.checksum bytes System.Console.Out.WriteLine("Writing " + mid + " bytes to file " + file1); stm = fs.Append(file1); stm.Write(fileContents, mid, mid2 - mid); stm.Close(); System.Console.Out.WriteLine("Wrote and Closed second part of file."); // write the remainder of the file stm = fs.Append(file1); // ensure getPos is set to reflect existing size of the file NUnit.Framework.Assert.IsTrue(stm.GetPos() > 0); System.Console.Out.WriteLine("Writing " + (AppendTestUtil.FileSize - mid2) + " bytes to file " + file1); stm.Write(fileContents, mid2, AppendTestUtil.FileSize - mid2); System.Console.Out.WriteLine("Written second part of file"); stm.Close(); System.Console.Out.WriteLine("Wrote and Closed second part of file."); // verify that entire file is good AppendTestUtil.CheckFullFile(fs, file1, AppendTestUtil.FileSize, fileContents, "Read 2" ); } { // test appending to an non-existing file. FSDataOutputStream @out = null; try { @out = fs.Append(new Path("/non-existing.dat")); NUnit.Framework.Assert.Fail("Expected to have FileNotFoundException"); } catch (FileNotFoundException fnfe) { System.Console.Out.WriteLine("Good: got " + fnfe); Sharpen.Runtime.PrintStackTrace(fnfe, System.Console.Out); } finally { IOUtils.CloseStream(@out); } } { // test append permission. //set root to all writable Path root = new Path("/"); fs.SetPermission(root, new FsPermission((short)0x1ff)); fs.Close(); // login as a different user UserGroupInformation superuser = UserGroupInformation.GetCurrentUser(); string username = "******"; string group = "testappendgroup"; NUnit.Framework.Assert.IsFalse(superuser.GetShortUserName().Equals(username)); NUnit.Framework.Assert.IsFalse(Arrays.AsList(superuser.GetGroupNames()).Contains( group)); UserGroupInformation appenduser = UserGroupInformation.CreateUserForTesting(username , new string[] { group }); fs = DFSTestUtil.GetFileSystemAs(appenduser, conf); // create a file Path dir = new Path(root, GetType().Name); Path foo = new Path(dir, "foo.dat"); FSDataOutputStream @out = null; int offset = 0; try { @out = fs.Create(foo); int len = 10 + AppendTestUtil.NextInt(100); @out.Write(fileContents, offset, len); offset += len; } finally { IOUtils.CloseStream(@out); } // change dir and foo to minimal permissions. fs.SetPermission(dir, new FsPermission((short)0x40)); fs.SetPermission(foo, new FsPermission((short)0x80)); // try append, should success @out = null; try { @out = fs.Append(foo); int len = 10 + AppendTestUtil.NextInt(100); @out.Write(fileContents, offset, len); offset += len; } finally { IOUtils.CloseStream(@out); } // change dir and foo to all but no write on foo. fs.SetPermission(foo, new FsPermission((short)0x17f)); fs.SetPermission(dir, new FsPermission((short)0x1ff)); // try append, should fail @out = null; try { @out = fs.Append(foo); NUnit.Framework.Assert.Fail("Expected to have AccessControlException"); } catch (AccessControlException ace) { System.Console.Out.WriteLine("Good: got " + ace); Sharpen.Runtime.PrintStackTrace(ace, System.Console.Out); } finally { IOUtils.CloseStream(@out); } } } catch (IOException e) { System.Console.Out.WriteLine("Exception :" + e); throw; } catch (Exception e) { System.Console.Out.WriteLine("Throwable :" + e); Sharpen.Runtime.PrintStackTrace(e); throw new IOException("Throwable : " + e); } finally { fs.Close(); cluster.Shutdown(); } }
/// <summary>Get the current position in file.</summary> /// <returns>The current byte offset in underlying file.</returns> /// <exception cref="System.IO.IOException"/> internal long GetCurrentPos() { return(fsOut.GetPos() + fsBufferedOutput.Size()); }