protected internal override bool IsSplitable(JobContext context, Path file) { CompressionCodec codec = new CompressionCodecFactory(context.GetConfiguration()). GetCodec(file); return(null == codec); }
protected internal virtual bool IsSplitable(FileSystem fs, Path file) { CompressionCodec codec = new CompressionCodecFactory(fs.GetConf()).GetCodec(file); if (null == codec) { return(true); } return(codec is SplittableCompressionCodec); }
/// <summary> /// Create a compression instance using the codec specified by /// <code>codecClassName</code> /// </summary> /// <exception cref="System.IO.IOException"/> internal static Org.Apache.Hadoop.Hdfs.Server.Namenode.FSImageCompression CreateCompression (Configuration conf, string codecClassName) { CompressionCodecFactory factory = new CompressionCodecFactory(conf); CompressionCodec codec = factory.GetCodecByClassName(codecClassName); if (codec == null) { throw new IOException("Not a supported codec: " + codecClassName); } return(new Org.Apache.Hadoop.Hdfs.Server.Namenode.FSImageCompression(codec)); }
/// <exception cref="System.IO.IOException"/> public override void Initialize(InputSplit genericSplit, TaskAttemptContext context ) { FileSplit split = (FileSplit)genericSplit; Configuration job = context.GetConfiguration(); this.maxLineLength = job.GetInt(MaxLineLength, int.MaxValue); start = split.GetStart(); end = start + split.GetLength(); Path file = split.GetPath(); // open the file and seek to the start of the split FileSystem fs = file.GetFileSystem(job); fileIn = fs.Open(file); CompressionCodec codec = new CompressionCodecFactory(job).GetCodec(file); if (null != codec) { isCompressedInput = true; decompressor = CodecPool.GetDecompressor(codec); if (codec is SplittableCompressionCodec) { SplitCompressionInputStream cIn = ((SplittableCompressionCodec)codec).CreateInputStream (fileIn, decompressor, start, end, SplittableCompressionCodec.READ_MODE.Byblock); @in = new CompressedSplitLineReader(cIn, job, this.recordDelimiterBytes); start = cIn.GetAdjustedStart(); end = cIn.GetAdjustedEnd(); filePosition = cIn; } else { @in = new SplitLineReader(codec.CreateInputStream(fileIn, decompressor), job, this .recordDelimiterBytes); filePosition = fileIn; } } else { fileIn.Seek(start); @in = new UncompressedSplitLineReader(fileIn, job, this.recordDelimiterBytes, split .GetLength()); filePosition = fileIn; } // If this is not the first split, we always throw away first record // because we always (except the last split) read one extra line in // next() method. if (start != 0) { start += @in.ReadLine(new Text(), 0, MaxBytesToConsume(start)); } this.pos = start; }
/// <exception cref="System.IO.IOException"/> public LineRecordReader(Configuration job, FileSplit split, byte[] recordDelimiter ) { this.maxLineLength = job.GetInt(LineRecordReader.MaxLineLength, int.MaxValue); start = split.GetStart(); end = start + split.GetLength(); Path file = split.GetPath(); compressionCodecs = new CompressionCodecFactory(job); codec = compressionCodecs.GetCodec(file); // open the file and seek to the start of the split FileSystem fs = file.GetFileSystem(job); fileIn = fs.Open(file); if (IsCompressedInput()) { decompressor = CodecPool.GetDecompressor(codec); if (codec is SplittableCompressionCodec) { SplitCompressionInputStream cIn = ((SplittableCompressionCodec)codec).CreateInputStream (fileIn, decompressor, start, end, SplittableCompressionCodec.READ_MODE.Byblock); @in = new CompressedSplitLineReader(cIn, job, recordDelimiter); start = cIn.GetAdjustedStart(); end = cIn.GetAdjustedEnd(); filePosition = cIn; } else { // take pos from compressed stream @in = new SplitLineReader(codec.CreateInputStream(fileIn, decompressor), job, recordDelimiter ); filePosition = fileIn; } } else { fileIn.Seek(start); @in = new UncompressedSplitLineReader(fileIn, job, recordDelimiter, split.GetLength ()); filePosition = fileIn; } // If this is not the first split, we always throw away first record // because we always (except the last split) read one extra line in // next() method. if (start != 0) { start += @in.ReadLine(new Text(), 0, MaxBytesToConsume(start)); } this.pos = start; }
// This is also called from the old FixedLengthRecordReader API implementation /// <exception cref="System.IO.IOException"/> public virtual void Initialize(Configuration job, long splitStart, long splitLength , Path file) { start = splitStart; end = start + splitLength; long partialRecordLength = start % recordLength; long numBytesToSkip = 0; if (partialRecordLength != 0) { numBytesToSkip = recordLength - partialRecordLength; } // open the file and seek to the start of the split FileSystem fs = file.GetFileSystem(job); fileIn = fs.Open(file); CompressionCodec codec = new CompressionCodecFactory(job).GetCodec(file); if (null != codec) { isCompressedInput = true; decompressor = CodecPool.GetDecompressor(codec); CompressionInputStream cIn = codec.CreateInputStream(fileIn, decompressor); filePosition = cIn; inputStream = cIn; numRecordsRemainingInSplit = long.MaxValue; Log.Info("Compressed input; cannot compute number of records in the split"); } else { fileIn.Seek(start); filePosition = fileIn; inputStream = fileIn; long splitSize = end - start - numBytesToSkip; numRecordsRemainingInSplit = (splitSize + recordLength - 1) / recordLength; if (numRecordsRemainingInSplit < 0) { numRecordsRemainingInSplit = 0; } Log.Info("Expecting " + numRecordsRemainingInSplit + " records each with a length of " + recordLength + " bytes in the split with an effective size of " + splitSize + " bytes"); } if (numBytesToSkip != 0) { start += inputStream.Skip(numBytesToSkip); } this.pos = start; }
protected internal virtual Configuration SetupCommonConfig() { tmpOivImgDir = Files.CreateTempDir(); Configuration conf = new Configuration(); conf.SetInt(DFSConfigKeys.DfsNamenodeCheckpointCheckPeriodKey, 1); conf.SetInt(DFSConfigKeys.DfsNamenodeCheckpointTxnsKey, 5); conf.SetInt(DFSConfigKeys.DfsHaTaileditsPeriodKey, 1); conf.Set(DFSConfigKeys.DfsNamenodeLegacyOivImageDirKey, tmpOivImgDir.GetAbsolutePath ()); conf.SetBoolean(DFSConfigKeys.DfsImageCompressKey, true); conf.Set(DFSConfigKeys.DfsImageCompressionCodecKey, typeof(TestStandbyCheckpoints.SlowCodec ).GetCanonicalName()); CompressionCodecFactory.SetCodecClasses(conf, ImmutableList.Of <Type>(typeof(TestStandbyCheckpoints.SlowCodec ))); return(conf); }
public virtual void SetUp() { Configuration conf = new Configuration(); conf.SetInt(DFSConfigKeys.DfsNamenodeCheckpointCheckPeriodKey, 1); conf.SetInt(DFSConfigKeys.DfsNamenodeCheckpointTxnsKey, 5); conf.SetInt(DFSConfigKeys.DfsHaTaileditsPeriodKey, 1); conf.Set(DFSConfigKeys.DfsNamenodeSharedEditsDirKey, BKJMUtil.CreateJournalURI("/bootstrapStandby" ).ToString()); BKJMUtil.AddJournalManagerDefinition(conf); conf.SetBoolean(DFSConfigKeys.DfsImageCompressKey, true); conf.Set(DFSConfigKeys.DfsImageCompressionCodecKey, typeof(TestStandbyCheckpoints.SlowCodec ).GetCanonicalName()); CompressionCodecFactory.SetCodecClasses(conf, ImmutableList.Of <Type>(typeof(TestStandbyCheckpoints.SlowCodec ))); MiniDFSNNTopology topology = new MiniDFSNNTopology().AddNameservice(new MiniDFSNNTopology.NSConf ("ns1").AddNN(new MiniDFSNNTopology.NNConf("nn1").SetHttpPort(10001)).AddNN(new MiniDFSNNTopology.NNConf("nn2").SetHttpPort(10002))); cluster = new MiniDFSCluster.Builder(conf).NnTopology(topology).NumDataNodes(1).ManageNameDfsSharedDirs (false).Build(); cluster.WaitActive(); }
public virtual void Configure(JobConf conf) { compressionCodecs = new CompressionCodecFactory(conf); }
/* (non-Javadoc) * @see ImageLoader#processImage(java.io.DataInputStream, ImageVisitor, boolean) */ /// <exception cref="System.IO.IOException"/> public override void LoadImage(DataInputStream @in, ImageVisitor v, bool skipBlocks ) { bool done = false; try { v.Start(); v.VisitEnclosingElement(ImageVisitor.ImageElement.FsImage); imageVersion = @in.ReadInt(); if (!CanLoadVersion(imageVersion)) { throw new IOException("Cannot process fslayout version " + imageVersion); } if (NameNodeLayoutVersion.Supports(LayoutVersion.Feature.AddLayoutFlags, imageVersion )) { LayoutFlags.Read(@in); } v.Visit(ImageVisitor.ImageElement.ImageVersion, imageVersion); v.Visit(ImageVisitor.ImageElement.NamespaceId, @in.ReadInt()); long numInodes = @in.ReadLong(); v.Visit(ImageVisitor.ImageElement.GenerationStamp, @in.ReadLong()); if (NameNodeLayoutVersion.Supports(LayoutVersion.Feature.SequentialBlockId, imageVersion )) { v.Visit(ImageVisitor.ImageElement.GenerationStampV2, @in.ReadLong()); v.Visit(ImageVisitor.ImageElement.GenerationStampV1Limit, @in.ReadLong()); v.Visit(ImageVisitor.ImageElement.LastAllocatedBlockId, @in.ReadLong()); } if (NameNodeLayoutVersion.Supports(LayoutVersion.Feature.StoredTxids, imageVersion )) { v.Visit(ImageVisitor.ImageElement.TransactionId, @in.ReadLong()); } if (NameNodeLayoutVersion.Supports(LayoutVersion.Feature.AddInodeId, imageVersion )) { v.Visit(ImageVisitor.ImageElement.LastInodeId, @in.ReadLong()); } bool supportSnapshot = NameNodeLayoutVersion.Supports(LayoutVersion.Feature.Snapshot , imageVersion); if (supportSnapshot) { v.Visit(ImageVisitor.ImageElement.SnapshotCounter, @in.ReadInt()); int numSnapshots = @in.ReadInt(); v.Visit(ImageVisitor.ImageElement.NumSnapshotsTotal, numSnapshots); for (int i = 0; i < numSnapshots; i++) { ProcessSnapshot(@in, v); } } if (NameNodeLayoutVersion.Supports(LayoutVersion.Feature.FsimageCompression, imageVersion )) { bool isCompressed = @in.ReadBoolean(); v.Visit(ImageVisitor.ImageElement.IsCompressed, isCompressed.ToString()); if (isCompressed) { string codecClassName = Text.ReadString(@in); v.Visit(ImageVisitor.ImageElement.CompressCodec, codecClassName); CompressionCodecFactory codecFac = new CompressionCodecFactory(new Configuration( )); CompressionCodec codec = codecFac.GetCodecByClassName(codecClassName); if (codec == null) { throw new IOException("Image compression codec not supported: " + codecClassName); } @in = new DataInputStream(codec.CreateInputStream(@in)); } } ProcessINodes(@in, v, numInodes, skipBlocks, supportSnapshot); subtreeMap.Clear(); dirNodeMap.Clear(); ProcessINodesUC(@in, v, skipBlocks); if (NameNodeLayoutVersion.Supports(LayoutVersion.Feature.DelegationToken, imageVersion )) { ProcessDelegationTokens(@in, v); } if (NameNodeLayoutVersion.Supports(LayoutVersion.Feature.Caching, imageVersion)) { ProcessCacheManagerState(@in, v); } v.LeaveEnclosingElement(); // FSImage done = true; } finally { if (done) { v.Finish(); } else { v.FinishAbnormally(); } } }
/// <exception cref="System.IO.IOException"/> protected internal override InputStream GetInputStream(PathData item) { FSDataInputStream i = (FSDataInputStream)base.GetInputStream(item); // Handle 0 and 1-byte files short leadBytes; try { leadBytes = i.ReadShort(); } catch (EOFException) { i.Seek(0); return(i); } switch (leadBytes) { case unchecked ((int)(0x1f8b)): { // Check type of stream first // RFC 1952 // Must be gzip i.Seek(0); return(new GZIPInputStream(i)); } case unchecked ((int)(0x5345)): { // 'S' 'E' // Might be a SequenceFile if (i.ReadByte() == 'Q') { i.Close(); return(new Display.TextRecordInputStream(this, item.stat)); } goto default; } default: { // Check the type of compression instead, depending on Codec class's // own detection methods, based on the provided path. CompressionCodecFactory cf = new CompressionCodecFactory(GetConf()); CompressionCodec codec = cf.GetCodec(item.path); if (codec != null) { i.Seek(0); return(codec.CreateInputStream(i)); } break; } case unchecked ((int)(0x4f62)): { // 'O' 'b' if (i.ReadByte() == 'j') { i.Close(); return(new Display.AvroFileInputStream(item.stat)); } break; } } // File is non-compressed, or not a file container we know. i.Seek(0); return(i); }