Beispiel #1
0
            /// <exception cref="System.IO.IOException"/>
            public Compressor GetCompressor()
            {
                CompressionCodec codec = GetCodec();

                if (codec != null)
                {
                    Compressor compressor = CodecPool.GetCompressor(codec);
                    if (compressor != null)
                    {
                        if (compressor.Finished())
                        {
                            // Somebody returns the compressor to CodecPool but is still using
                            // it.
                            Log.Warn("Compressor obtained from CodecPool already finished()");
                        }
                        else
                        {
                            if (Log.IsDebugEnabled())
                            {
                                Log.Debug("Got a compressor: " + compressor.GetHashCode());
                            }
                        }
                        compressor.Reset();
                    }
                    return(compressor);
                }
                return(null);
            }
        /// <exception cref="System.IO.IOException"/>
        public override RecordWriter <K, V> GetRecordWriter(FileSystem ignored, JobConf job
                                                            , string name, Progressable progress)
        {
            bool   isCompressed      = GetCompressOutput(job);
            string keyValueSeparator = job.Get("mapreduce.output.textoutputformat.separator",
                                               "\t");

            if (!isCompressed)
            {
                Path               file    = FileOutputFormat.GetTaskOutputPath(job, name);
                FileSystem         fs      = file.GetFileSystem(job);
                FSDataOutputStream fileOut = fs.Create(file, progress);
                return(new TextOutputFormat.LineRecordWriter <K, V>(fileOut, keyValueSeparator));
            }
            else
            {
                Type codecClass = GetOutputCompressorClass(job, typeof(GzipCodec));
                // create the named codec
                CompressionCodec codec = ReflectionUtils.NewInstance(codecClass, job);
                // build the filename including the extension
                Path file = FileOutputFormat.GetTaskOutputPath(job, name + codec.GetDefaultExtension
                                                                   ());
                FileSystem         fs      = file.GetFileSystem(job);
                FSDataOutputStream fileOut = fs.Create(file, progress);
                return(new TextOutputFormat.LineRecordWriter <K, V>(new DataOutputStream(codec.CreateOutputStream
                                                                                             (fileOut)), keyValueSeparator));
            }
        }
        public void TestDeserializeMessageSet(CompressionCodec codec)
        {
            using (var serialized = Pool.Reserve())
            {
                var set = new PartitionData
                {
                    Partition        = 42,
                    CompressionCodec = codec,
                    Messages         = new[]
                    {
                        new Message {
                            Key = Key, Value = Value
                        },
                        new Message {
                            Key = Key, Value = Value
                        }
                    }
                };
                set.Serialize(serialized, SerializationConfig.ByteArraySerializers);
                serialized.Position = 4;

                var deserialized = FetchPartitionResponse.DeserializeMessageSet(serialized, SerializationConfig.ByteArrayDeserializers);
                Assert.AreEqual(2, deserialized.Count);
                foreach (var msg in deserialized)
                {
                    Assert.AreEqual(0, msg.Offset);
                    CollectionAssert.AreEqual(Key, msg.Message.Key as byte[]);
                    CollectionAssert.AreEqual(Value, msg.Message.Value as byte[]);
                }
            }
        }
Beispiel #4
0
 public MergeQueue(Configuration conf, FileSystem fs, IList <Merger.Segment <K, V> >
                   segments, RawComparator <K> comparator, Progressable reporter, bool sortSegments,
                   CompressionCodec codec, TaskType taskType)
     : this(conf, fs, segments, comparator, reporter, sortSegments, taskType)
 {
     this.codec = codec;
 }
Beispiel #5
0
 /// <summary>Construct an IFile Reader.</summary>
 /// <param name="conf">Configuration File</param>
 /// <param name="in">The input stream</param>
 /// <param name="length">
 /// Length of the data in the stream, including the checksum
 /// bytes.
 /// </param>
 /// <param name="codec">codec</param>
 /// <param name="readsCounter">Counter for records read from disk</param>
 /// <exception cref="System.IO.IOException"/>
 public Reader(Configuration conf, FSDataInputStream @in, long length, CompressionCodec
               codec, Counters.Counter readsCounter)
 {
     // Count records read from disk
     // Possibly decompressed stream that we read
     readRecordsCounter = readsCounter;
     checksumIn         = new IFileInputStream(@in, length, conf);
     if (codec != null)
     {
         decompressor = CodecPool.GetDecompressor(codec);
         if (decompressor != null)
         {
             this.@in = codec.CreateInputStream(checksumIn, decompressor);
         }
         else
         {
             Log.Warn("Could not obtain decompressor from CodecPool");
             this.@in = checksumIn;
         }
     }
     else
     {
         this.@in = checksumIn;
     }
     this.dataIn     = new DataInputStream(this.@in);
     this.fileLength = length;
     if (conf != null)
     {
         bufferSize = conf.GetInt("io.file.buffer.size", DefaultBufferSize);
     }
 }
 public Context(TaskAttemptID reduceId, JobConf jobConf, FileSystem localFS, TaskUmbilicalProtocol
                umbilical, LocalDirAllocator localDirAllocator, Reporter reporter, CompressionCodec
                codec, Type combinerClass, Task.CombineOutputCollector <K, V> combineCollector,
                Counters.Counter spilledRecordsCounter, Counters.Counter reduceCombineInputCounter
                , Counters.Counter shuffledMapsCounter, Counters.Counter reduceShuffleBytes, Counters.Counter
                failedShuffleCounter, Counters.Counter mergedMapOutputsCounter, TaskStatus status
                , Progress copyPhase, Progress mergePhase, Task reduceTask, MapOutputFile mapOutputFile
                , IDictionary <TaskAttemptID, MapOutputFile> localMapFiles)
 {
     this.reduceId                  = reduceId;
     this.jobConf                   = jobConf;
     this.localFS                   = localFS;
     this.umbilical                 = umbilical;
     this.localDirAllocator         = localDirAllocator;
     this.reporter                  = reporter;
     this.codec                     = codec;
     this.combinerClass             = combinerClass;
     this.combineCollector          = combineCollector;
     this.spilledRecordsCounter     = spilledRecordsCounter;
     this.reduceCombineInputCounter = reduceCombineInputCounter;
     this.shuffledMapsCounter       = shuffledMapsCounter;
     this.reduceShuffleBytes        = reduceShuffleBytes;
     this.failedShuffleCounter      = failedShuffleCounter;
     this.mergedMapOutputsCounter   = mergedMapOutputsCounter;
     this.status                    = status;
     this.copyPhase                 = copyPhase;
     this.mergePhase                = mergePhase;
     this.reduceTask                = reduceTask;
     this.mapOutputFile             = mapOutputFile;
     this.localMapFiles             = localMapFiles;
 }
Beispiel #7
0
        // Create a file containing fixed length records with random data
        /// <exception cref="System.IO.IOException"/>
        private AList <string> CreateFile(Path targetFile, CompressionCodec codec, int recordLen
                                          , int numRecords)
        {
            AList <string> recordList = new AList <string>(numRecords);
            OutputStream   ostream    = localFs.Create(targetFile);

            if (codec != null)
            {
                ostream = codec.CreateOutputStream(ostream);
            }
            TextWriter writer = new OutputStreamWriter(ostream);

            try
            {
                StringBuilder sb = new StringBuilder();
                for (int i = 0; i < numRecords; i++)
                {
                    for (int j = 0; j < recordLen; j++)
                    {
                        sb.Append(chars[charRand.Next(chars.Length)]);
                    }
                    string recordData = sb.ToString();
                    recordList.AddItem(recordData);
                    writer.Write(recordData);
                    sb.Length = 0;
                }
            }
            finally
            {
                writer.Close();
            }
            return(recordList);
        }
 public DelimitedTextDataset(
     string folderName,
     string fileName,
     string folderPath = default,
     ColumnDelimiter columnDelimiter   = default,
     RowDelimiter rowDelimiter         = default,
     EncodingName encodingName         = default,
     CompressionCodec compressionCodec = default,
     CompressionLevel compressionLevel = default,
     QuoteChar quoteChar         = default,
     EscapeChar escapeChar       = default,
     bool firstRowAsHeader       = false,
     string nullValue            = default,
     IList <SchemaColumn> schema = default)
 {
     FolderName       = folderName;
     FileName         = fileName;
     FolderPath       = folderPath;
     ColumnDelimiter  = columnDelimiter;
     RowDelimiter     = rowDelimiter;
     EncodingName     = encodingName;
     CompressionCodec = compressionCodec;
     CompressionLevel = compressionLevel;
     QuoteChar        = quoteChar;
     EscapeChar       = escapeChar;
     FirstRowAsHeader = firstRowAsHeader;
     NullValue        = nullValue;
     Schema           = schema;
 }
Beispiel #9
0
 public Writer(Configuration conf, FileSystem fs, string dirName, Type keyClass, Type
               valClass, SequenceFile.CompressionType compress, CompressionCodec codec, Progressable
               progress)
     : this(conf, new Path(dirName), KeyClass(keyClass), ValueClass(valClass), Compression
                (compress, codec), Progressable(progress))
 {
 }
Beispiel #10
0
        public void Serialize(ReusableMemoryStream stream, CompressionCodec compressionCodec, Tuple <ISerializer, ISerializer> serializers)
        {
            var crcPos = stream.Position;

            stream.Write(Basics.MinusOne32, 0, 4); // crc placeholder
            var bodyPos = stream.Position;

            stream.WriteByte(0);                      // magic byte
            stream.WriteByte((byte)compressionCodec); // attributes

            if (SerializedKeyValue != null)
            {
                stream.Write(SerializedKeyValue.GetBuffer(), 0, (int)SerializedKeyValue.Length);
            }
            else
            {
                DoSerializeKeyValue(stream, serializers);
            }

            // update crc
            var crc    = Crc32.Compute(stream, bodyPos, stream.Position - bodyPos);
            var curPos = stream.Position;

            stream.Position = crcPos;
            BigEndianConverter.Write(stream, (int)crc);
            stream.Position = curPos;
        }
Beispiel #11
0
        /// <exception cref="System.IO.IOException"/>
        /// <exception cref="System.Exception"/>
        public override RecordWriter <K, V> GetRecordWriter(TaskAttemptContext job)
        {
            Configuration    conf              = job.GetConfiguration();
            bool             isCompressed      = GetCompressOutput(job);
            string           keyValueSeparator = conf.Get(Seperator, "\t");
            CompressionCodec codec             = null;
            string           extension         = string.Empty;

            if (isCompressed)
            {
                Type codecClass = GetOutputCompressorClass(job, typeof(GzipCodec));
                codec     = (CompressionCodec)ReflectionUtils.NewInstance(codecClass, conf);
                extension = codec.GetDefaultExtension();
            }
            Path       file = GetDefaultWorkFile(job, extension);
            FileSystem fs   = file.GetFileSystem(conf);

            if (!isCompressed)
            {
                FSDataOutputStream fileOut = fs.Create(file, false);
                return(new TextOutputFormat.LineRecordWriter <K, V>(fileOut, keyValueSeparator));
            }
            else
            {
                FSDataOutputStream fileOut = fs.Create(file, false);
                return(new TextOutputFormat.LineRecordWriter <K, V>(new DataOutputStream(codec.CreateOutputStream
                                                                                             (fileOut)), keyValueSeparator));
            }
        }
Beispiel #12
0
 /// <exception cref="System.IO.IOException"/>
 public Segment(Configuration conf, FileSystem fs, Path file, CompressionCodec codec
                , bool preserve, Counters.Counter mergedMapOutputsCounter, long rawDataLength)
     : this(conf, fs, file, 0, fs.GetFileStatus(file).GetLen(), codec, preserve, mergedMapOutputsCounter
            )
 {
     this.rawDataLength = rawDataLength;
 }
        public void runSeekTest(CompressionCodec codec)
        {
            TestInStream.OutputCollector collect = new TestInStream.OutputCollector();
            const int      COUNT = 16384;
            BitFieldWriter @out  = new BitFieldWriter(
                new OutStream("test", 500, codec, collect), 1);

            TestInStream.PositionCollector[] positions =
                new TestInStream.PositionCollector[COUNT];
            for (int i = 0; i < COUNT; ++i)
            {
                positions[i] = new TestInStream.PositionCollector();
                @out.getPosition(positions[i]);
                // test runs, non-runs
                if (i < COUNT / 2)
                {
                    @out.write(i & 1);
                }
                else
                {
                    @out.write((i / 3) & 1);
                }
            }
            @out.flush();
            ByteBuffer inBuf = ByteBuffer.allocate(collect.buffer.size());

            collect.buffer.setByteBuffer(inBuf, 0, collect.buffer.size());
            inBuf.flip();
#pragma warning disable 612
            BitFieldReader @in = new BitFieldReader(InStream.create(null, "test",
                                                                    new ByteBuffer[] { inBuf }, new long[] { 0 }, inBuf.remaining(),
                                                                    codec, 500), 1);
#pragma warning restore 612
            for (int i = 0; i < COUNT; ++i)
            {
                int x = @in.next();
                if (i < COUNT / 2)
                {
                    Assert.Equal(i & 1, x);
                }
                else
                {
                    Assert.Equal((i / 3) & 1, x);
                }
            }
            for (int i = COUNT - 1; i >= 0; --i)
            {
                @in.seek(positions[i]);
                int x = @in.next();
                if (i < COUNT / 2)
                {
                    Assert.Equal(i & 1, x);
                }
                else
                {
                    Assert.Equal((i / 3) & 1, x);
                }
            }
        }
 public MetadataReaderImpl(Func <Stream> streamCreator,
                           CompressionCodec codec, int bufferSize, int typeCount)
 {
     this.file       = streamCreator();
     this.codec      = codec;
     this.bufferSize = bufferSize;
     this.typeCount  = typeCount;
 }
Beispiel #15
0
 public DefaultDataReader(
     Func <Stream> streamCreator, string path, bool useZeroCopy, CompressionCodec codec)
 {
     this.streamCreator = streamCreator;
     this.path          = path;
     this.useZeroCopy   = useZeroCopy;
     this.codec         = codec;
 }
 public MetadataReaderImpl(Func<Stream> streamCreator,
     CompressionCodec codec, int bufferSize, int typeCount)
 {
     this.file = streamCreator();
     this.codec = codec;
     this.bufferSize = bufferSize;
     this.typeCount = typeCount;
 }
 public void runSeekTest(CompressionCodec codec)
 {
     TestInStream.OutputCollector collect = new TestInStream.OutputCollector();
     const int COUNT = 16384;
     BitFieldWriter @out = new BitFieldWriter(
         new OutStream("test", 500, codec, collect), 1);
     TestInStream.PositionCollector[] positions =
         new TestInStream.PositionCollector[COUNT];
     for (int i = 0; i < COUNT; ++i)
     {
         positions[i] = new TestInStream.PositionCollector();
         @out.getPosition(positions[i]);
         // test runs, non-runs
         if (i < COUNT / 2)
         {
             @out.write(i & 1);
         }
         else
         {
             @out.write((i / 3) & 1);
         }
     }
     @out.flush();
     ByteBuffer inBuf = ByteBuffer.allocate(collect.buffer.size());
     collect.buffer.setByteBuffer(inBuf, 0, collect.buffer.size());
     inBuf.flip();
     #pragma warning disable 612
     BitFieldReader @in = new BitFieldReader(InStream.create(null, "test",
         new ByteBuffer[] { inBuf }, new long[] { 0 }, inBuf.remaining(),
         codec, 500), 1);
     #pragma warning restore 612
     for (int i = 0; i < COUNT; ++i)
     {
         int x = @in.next();
         if (i < COUNT / 2)
         {
             Assert.Equal(i & 1, x);
         }
         else
         {
             Assert.Equal((i / 3) & 1, x);
         }
     }
     for (int i = COUNT - 1; i >= 0; --i)
     {
         @in.seek(positions[i]);
         int x = @in.next();
         if (i < COUNT / 2)
         {
             Assert.Equal(i & 1, x);
         }
         else
         {
             Assert.Equal((i / 3) & 1, x);
         }
     }
 }
        public virtual void TestSucceedAndFailedCopyMap <K, V>()
        {
            JobConf job = new JobConf();

            job.SetNumMapTasks(2);
            //mock creation
            TaskUmbilicalProtocol mockUmbilical = Org.Mockito.Mockito.Mock <TaskUmbilicalProtocol
                                                                            >();
            Reporter   mockReporter   = Org.Mockito.Mockito.Mock <Reporter>();
            FileSystem mockFileSystem = Org.Mockito.Mockito.Mock <FileSystem>();
            Type       combinerClass  = job.GetCombinerClass();

            Task.CombineOutputCollector <K, V> mockCombineOutputCollector = (Task.CombineOutputCollector
                                                                             <K, V>)Org.Mockito.Mockito.Mock <Task.CombineOutputCollector>();
            // needed for mock with generic
            TaskAttemptID     mockTaskAttemptID     = Org.Mockito.Mockito.Mock <TaskAttemptID>();
            LocalDirAllocator mockLocalDirAllocator = Org.Mockito.Mockito.Mock <LocalDirAllocator
                                                                                >();
            CompressionCodec mockCompressionCodec = Org.Mockito.Mockito.Mock <CompressionCodec
                                                                              >();

            Counters.Counter mockCounter       = Org.Mockito.Mockito.Mock <Counters.Counter>();
            TaskStatus       mockTaskStatus    = Org.Mockito.Mockito.Mock <TaskStatus>();
            Progress         mockProgress      = Org.Mockito.Mockito.Mock <Progress>();
            MapOutputFile    mockMapOutputFile = Org.Mockito.Mockito.Mock <MapOutputFile>();

            Org.Apache.Hadoop.Mapred.Task mockTask = Org.Mockito.Mockito.Mock <Org.Apache.Hadoop.Mapred.Task
                                                                               >();
            MapOutput <K, V> output = Org.Mockito.Mockito.Mock <MapOutput>();

            ShuffleConsumerPlugin.Context <K, V> context = new ShuffleConsumerPlugin.Context <K
                                                                                              , V>(mockTaskAttemptID, job, mockFileSystem, mockUmbilical, mockLocalDirAllocator
                                                                                                   , mockReporter, mockCompressionCodec, combinerClass, mockCombineOutputCollector,
                                                                                                   mockCounter, mockCounter, mockCounter, mockCounter, mockCounter, mockCounter, mockTaskStatus
                                                                                                   , mockProgress, mockProgress, mockTask, mockMapOutputFile, null);
            TaskStatus status   = new _TaskStatus_251();
            Progress   progress = new Progress();
            ShuffleSchedulerImpl <K, V> scheduler = new ShuffleSchedulerImpl <K, V>(job, status
                                                                                    , null, null, progress, context.GetShuffledMapsCounter(), context.GetReduceShuffleBytes
                                                                                        (), context.GetFailedShuffleCounter());
            MapHost       host1           = new MapHost("host1", null);
            TaskAttemptID failedAttemptID = new TaskAttemptID(new TaskID(new JobID("test", 0)
                                                                         , TaskType.Map, 0), 0);
            TaskAttemptID succeedAttemptID = new TaskAttemptID(new TaskID(new JobID("test", 0
                                                                                    ), TaskType.Map, 1), 1);

            // handle output fetch failure for failedAttemptID, part I
            scheduler.HostFailed(host1.GetHostName());
            // handle output fetch succeed for succeedAttemptID
            long bytes = (long)500 * 1024 * 1024;

            scheduler.CopySucceeded(succeedAttemptID, host1, bytes, 0, 500000, output);
            // handle output fetch failure for failedAttemptID, part II
            // for MAPREDUCE-6361: verify no NPE exception get thrown out
            scheduler.CopyFailed(failedAttemptID, host1, true, false);
        }
Beispiel #19
0
 public CompressedStream(long?fileId, string name, List <DiskRange> input, long length,
                         CompressionCodec codec, int bufferSize)
     : base(fileId, name, length)
 {
     this.bytes      = input;
     this.codec      = codec;
     this.bufferSize = bufferSize;
     currentOffset   = 0;
     currentRange    = 0;
 }
Beispiel #20
0
 private static OrcProto.Footer extractFooter(ByteBuffer bb, int footerAbsPos,
                                              int footerSize, CompressionCodec codec, int bufferSize)
 {
     bb.position(footerAbsPos);
     bb.limit(footerAbsPos + footerSize);
     return(OrcProto.Footer.ParseFrom(InStream.createCodedInputStream(null, "footer",
                                                                      new List <DiskRange> {
         new RecordReaderImpl.BufferChunk(bb, 0)
     }, footerSize, codec, bufferSize)));
 }
Beispiel #21
0
        protected internal override bool IsSplitable(FileSystem fs, Path file)
        {
            CompressionCodec codec = compressionCodecs.GetCodec(file);

            if (null == codec)
            {
                return(true);
            }
            return(codec is SplittableCompressionCodec);
        }
Beispiel #22
0
 private static OrcProto.Metadata extractMetadata(ByteBuffer bb, int metadataAbsPos,
                                                  int metadataSize, CompressionCodec codec, int bufferSize)
 {
     bb.position(metadataAbsPos);
     bb.limit(metadataAbsPos + metadataSize);
     return(OrcProto.Metadata.ParseFrom(InStream.createCodedInputStream(null, "metadata",
                                                                        new List <DiskRange> {
         new RecordReaderImpl.BufferChunk(bb, 0)
     }, metadataSize, codec, bufferSize)));
 }
Beispiel #23
0
 // Local directories
 /// <exception cref="System.IO.IOException"/>
 public static RawKeyValueIterator Merge <K, V>(Configuration conf, FileSystem fs,
                                                CompressionCodec codec, Path[] inputs, bool deleteInputs, int mergeFactor, Path
                                                tmpDir, RawComparator <K> comparator, Progressable reporter, Counters.Counter readsCounter
                                                , Counters.Counter writesCounter, Progress mergePhase)
 {
     System.Type keyClass   = typeof(K);
     System.Type valueClass = typeof(V);
     return(new Merger.MergeQueue <K, V>(conf, fs, inputs, deleteInputs, codec, comparator
                                         , reporter, null, TaskType.Reduce).Merge(keyClass, valueClass, mergeFactor, tmpDir
                                                                                  , readsCounter, writesCounter, mergePhase));
 }
        public virtual void TestConsumerApi()
        {
            JobConf jobConf = new JobConf();
            ShuffleConsumerPlugin <K, V> shuffleConsumerPlugin = new TestShufflePlugin.TestShuffleConsumerPlugin
                                                                 <K, V>();
            //mock creation
            ReduceTask            mockReduceTask = Org.Mockito.Mockito.Mock <ReduceTask>();
            TaskUmbilicalProtocol mockUmbilical  = Org.Mockito.Mockito.Mock <TaskUmbilicalProtocol
                                                                             >();
            Reporter   mockReporter   = Org.Mockito.Mockito.Mock <Reporter>();
            FileSystem mockFileSystem = Org.Mockito.Mockito.Mock <FileSystem>();
            Type       combinerClass  = jobConf.GetCombinerClass();

            Task.CombineOutputCollector <K, V> mockCombineOutputCollector = (Task.CombineOutputCollector
                                                                             <K, V>)Org.Mockito.Mockito.Mock <Task.CombineOutputCollector>();
            // needed for mock with generic
            TaskAttemptID     mockTaskAttemptID     = Org.Mockito.Mockito.Mock <TaskAttemptID>();
            LocalDirAllocator mockLocalDirAllocator = Org.Mockito.Mockito.Mock <LocalDirAllocator
                                                                                >();
            CompressionCodec mockCompressionCodec = Org.Mockito.Mockito.Mock <CompressionCodec
                                                                              >();

            Counters.Counter mockCounter       = Org.Mockito.Mockito.Mock <Counters.Counter>();
            TaskStatus       mockTaskStatus    = Org.Mockito.Mockito.Mock <TaskStatus>();
            Progress         mockProgress      = Org.Mockito.Mockito.Mock <Progress>();
            MapOutputFile    mockMapOutputFile = Org.Mockito.Mockito.Mock <MapOutputFile>();

            Org.Apache.Hadoop.Mapred.Task mockTask = Org.Mockito.Mockito.Mock <Org.Apache.Hadoop.Mapred.Task
                                                                               >();
            try
            {
                string[] dirs = jobConf.GetLocalDirs();
                // verify that these APIs are available through super class handler
                ShuffleConsumerPlugin.Context <K, V> context = new ShuffleConsumerPlugin.Context <K
                                                                                                  , V>(mockTaskAttemptID, jobConf, mockFileSystem, mockUmbilical, mockLocalDirAllocator
                                                                                                       , mockReporter, mockCompressionCodec, combinerClass, mockCombineOutputCollector,
                                                                                                       mockCounter, mockCounter, mockCounter, mockCounter, mockCounter, mockCounter, mockTaskStatus
                                                                                                       , mockProgress, mockProgress, mockTask, mockMapOutputFile, null);
                shuffleConsumerPlugin.Init(context);
                shuffleConsumerPlugin.Run();
                shuffleConsumerPlugin.Close();
            }
            catch (Exception e)
            {
                NUnit.Framework.Assert.IsTrue("Threw exception:" + e, false);
            }
            // verify that these APIs are available for 3rd party plugins
            mockReduceTask.GetTaskID();
            mockReduceTask.GetJobID();
            mockReduceTask.GetNumMaps();
            mockReduceTask.GetPartition();
            mockReporter.Progress();
        }
Beispiel #25
0
 /// <exception cref="System.IO.IOException"/>
 public static RawKeyValueIterator Merge <K, V>(Configuration conf, FileSystem fs,
                                                CompressionCodec codec, IList <Merger.Segment <K, V> > segments, int mergeFactor, Path
                                                tmpDir, RawComparator <K> comparator, Progressable reporter, bool sortSegments,
                                                Counters.Counter readsCounter, Counters.Counter writesCounter, Progress mergePhase
                                                , TaskType taskType)
 {
     System.Type keyClass   = typeof(K);
     System.Type valueClass = typeof(V);
     return(new Merger.MergeQueue <K, V>(conf, fs, segments, comparator, reporter, sortSegments
                                         , codec, taskType).Merge(keyClass, valueClass, mergeFactor, tmpDir, readsCounter
                                                                  , writesCounter, mergePhase));
 }
        /// <summary>
        /// Create a compression instance using the codec specified by
        /// <code>codecClassName</code>
        /// </summary>
        /// <exception cref="System.IO.IOException"/>
        internal static Org.Apache.Hadoop.Hdfs.Server.Namenode.FSImageCompression CreateCompression
            (Configuration conf, string codecClassName)
        {
            CompressionCodecFactory factory = new CompressionCodecFactory(conf);
            CompressionCodec        codec   = factory.GetCodecByClassName(codecClassName);

            if (codec == null)
            {
                throw new IOException("Not a supported codec: " + codecClassName);
            }
            return(new Org.Apache.Hadoop.Hdfs.Server.Namenode.FSImageCompression(codec));
        }
Beispiel #27
0
 public OutStream(
     string name,
     int bufferSize,
     CompressionCodec codec,
     OutputReceiver receiver)
 {
     this.name = name;
     this._bufferSize = bufferSize;
     this.codec = codec;
     this.receiver = receiver;
     this._suppress = false;
 }
Beispiel #28
0
 public OutStream(
     string name,
     int bufferSize,
     CompressionCodec codec,
     OutputReceiver receiver)
 {
     this.name        = name;
     this._bufferSize = bufferSize;
     this.codec       = codec;
     this.receiver    = receiver;
     this._suppress   = false;
 }
Beispiel #29
0
        // Remark: it might be the case that brokers are authorized to send us partial record batch.
        // So if the protocol exceptions in this method are triggered, you might want to investigate and remove
        // them altogether.
        public void Deserialize(ReusableMemoryStream input, Deserializers deserializers, long endOfAllBatches)
        {
            if (input.Position + BytesNecessaryToGetLength > endOfAllBatches)
            {
                throw new ProtocolException(
                          $"Trying to read a batch record at {input.Position} and the end of all batches is {endOfAllBatches}."
                          + $" There is not enough bytes remaining to even read the first fields...");
            }
            BaseOffset = BigEndianConverter.ReadInt64(input);
            var batchLength = BigEndianConverter.ReadInt32(input);
            var endOfBatch  = input.Position + batchLength;

            if (endOfAllBatches < endOfBatch)
            {
                throw new ProtocolException(
                          $"The record batch says it has length that stops at {endOfBatch} but the list of all batches stop at {endOfAllBatches}.");
            }
            PartitionLeaderEpoch = BigEndianConverter.ReadInt32(input);
            var magic = input.ReadByte();

            // Current magic value is 2
            if ((uint)magic != 2)
            {
                throw new UnsupportedMagicByteVersion((byte)magic, "2");
            }

            var crc = (uint)BigEndianConverter.ReadInt32(input);
            var afterCrcPosition = input.Position; // The crc is calculated starting from this position

            Crc32.CheckCrcCastagnoli((int)crc, input, afterCrcPosition, endOfBatch - afterCrcPosition);

            var attributes = BigEndianConverter.ReadInt16(input);

            CompressionCodec = (CompressionCodec)(attributes & CompressionCodecMask);
            IsTransactional  = (attributes & TransactionalFlagMask) != 0;
            IsControl        = (attributes & ControlFlagMask) != 0;
            TimestampType    = (attributes & TimestampTypeMask) > 0
                ? TimestampType.LogAppendTime
                : TimestampType.CreateTime;

            var lastOffsetDelta = BigEndianConverter.ReadInt32(input);

            var firstTimestamp = BigEndianConverter.ReadInt64(input);
            var maxTimestamp   = BigEndianConverter.ReadInt64(input);

            ProducerId    = BigEndianConverter.ReadInt64(input);
            ProducerEpoch = BigEndianConverter.ReadInt16(input);
            BaseSequence  = BigEndianConverter.ReadInt32(input);

            var numberOfRecords = BigEndianConverter.ReadInt32(input);

            Records = DeserializeRecords(input, numberOfRecords, endOfBatch, firstTimestamp, deserializers);
        }
Beispiel #30
0
        /**
         * Creates coded input stream (used for protobuf message parsing) with higher message size limit.
         *
         * @param name       the name of the stream
         * @param input      the list of ranges of bytes for the stream; from disk or cache
         * @param length     the length in bytes of the stream
         * @param codec      the compression codec
         * @param bufferSize the compression buffer size
         * @return coded input stream
         * @
         */
        public static CodedInputStream createCodedInputStream(long?fileId,
                                                              string name,
                                                              List <DiskRange> input,
                                                              long length,
                                                              CompressionCodec codec,
                                                              int bufferSize)
        {
            InStream         inStream         = create(fileId, name, input, length, codec, bufferSize);
            CodedInputStream codedInputStream = CodedInputStream.CreateInstance(inStream);

            codedInputStream.SetSizeLimit(PROTOBUF_MESSAGE_MAX_LIMIT);
            return(codedInputStream);
        }
Beispiel #31
0
 /// <exception cref="System.IO.IOException"/>
 public Segment(Configuration conf, FileSystem fs, Path file, long segmentOffset,
                long segmentLength, CompressionCodec codec, bool preserve, Counters.Counter mergedMapOutputsCounter
                )
 {
     this.conf              = conf;
     this.fs                = fs;
     this.file              = file;
     this.codec             = codec;
     this.preserve          = preserve;
     this.segmentOffset     = segmentOffset;
     this.segmentLength     = segmentLength;
     this.mapOutputsCounter = mergedMapOutputsCounter;
 }
Beispiel #32
0
        /// <exception cref="System.IO.IOException"/>
        public static InputStream WrapInputStreamForCompression(Configuration conf, string
                                                                codec, InputStream @in)
        {
            if (codec.IsEmpty())
            {
                return(@in);
            }
            FSImageCompression compression = FSImageCompression.CreateCompression(conf, codec
                                                                                  );
            CompressionCodec imageCodec = compression.GetImageCodec();

            return(imageCodec.CreateInputStream(@in));
        }
            /// <exception cref="System.IO.IOException"/>
            private void SaveInternal(FileOutputStream fout, FSImageCompression compression,
                                      string filePath)
            {
                StartupProgress prog     = NameNode.GetStartupProgress();
                MessageDigest   digester = MD5Hash.GetDigester();

                underlyingOutputStream = new DigestOutputStream(new BufferedOutputStream(fout), digester
                                                                );
                underlyingOutputStream.Write(FSImageUtil.MagicHeader);
                fileChannel = fout.GetChannel();
                FsImageProto.FileSummary.Builder b = FsImageProto.FileSummary.NewBuilder().SetOndiskVersion
                                                         (FSImageUtil.FileVersion).SetLayoutVersion(NameNodeLayoutVersion.CurrentLayoutVersion
                                                                                                    );
                codec = compression.GetImageCodec();
                if (codec != null)
                {
                    b.SetCodec(codec.GetType().GetCanonicalName());
                    sectionOutputStream = codec.CreateOutputStream(underlyingOutputStream);
                }
                else
                {
                    sectionOutputStream = underlyingOutputStream;
                }
                SaveNameSystemSection(b);
                // Check for cancellation right after serializing the name system section.
                // Some unit tests, such as TestSaveNamespace#testCancelSaveNameSpace
                // depends on this behavior.
                context.CheckCancelled();
                Step step = new Step(StepType.Inodes, filePath);

                prog.BeginStep(Phase.SavingCheckpoint, step);
                SaveInodes(b);
                SaveSnapshots(b);
                prog.EndStep(Phase.SavingCheckpoint, step);
                step = new Step(StepType.DelegationTokens, filePath);
                prog.BeginStep(Phase.SavingCheckpoint, step);
                SaveSecretManagerSection(b);
                prog.EndStep(Phase.SavingCheckpoint, step);
                step = new Step(StepType.CachePools, filePath);
                prog.BeginStep(Phase.SavingCheckpoint, step);
                SaveCacheManagerSection(b);
                prog.EndStep(Phase.SavingCheckpoint, step);
                SaveStringTableSection(b);
                // We use the underlyingOutputStream to write the header. Therefore flush
                // the buffered stream (which is potentially compressed) first.
                FlushSectionOutputStream();
                FsImageProto.FileSummary summary = ((FsImageProto.FileSummary)b.Build());
                SaveFileSummary(underlyingOutputStream, summary);
                underlyingOutputStream.Close();
                savedDigest = new MD5Hash(digester.Digest());
            }
Beispiel #34
0
 /**
  * Create an input stream from a list of disk ranges with data.
  * @param name the name of the stream
  * @param input the list of ranges of bytes for the stream; from disk or cache
  * @param length the length in bytes of the stream
  * @param codec the compression codec
  * @param bufferSize the compression buffer size
  * @param cache Low-level cache to use to put data, if any. Only works with compressed streams.
  * @return an input stream
  * @
  */
 public static InStream create(long? fileId,
                               string name,
                               List<DiskRange> input,
                               long length,
                               CompressionCodec codec,
                               int bufferSize)
 {
     if (codec == null)
     {
         return new UncompressedStream(fileId, name, input, length);
     }
     else
     {
         return new CompressedStream(fileId, name, input, length, codec, bufferSize);
     }
 }
Beispiel #35
0
        public WriterImpl(
            Stream stream,
            string path,
            OrcFile.WriterOptions options,
            ObjectInspector inspector,
            TypeDescription schema,
            long stripeSize,
            CompressionKind compress,
            int bufferSize,
            int rowIndexStride,
            MemoryManager memoryManager,
            bool addBlockPadding,
            OrcFile.Version version,
            OrcFile.WriterCallback callback,
            OrcFile.EncodingStrategy encodingStrategy,
            OrcFile.CompressionStrategy compressionStrategy,
            double paddingTolerance,
            long blockSizeValue,
            string bloomFilterColumnNames,
            double bloomFilterFpp)
        {
            this.baseStream = stream;
            this.streamFactory = new StreamFactory(this);
            this.path = path;
            this.options = options;
            this.callback = callback;
            this.schema = schema;
            this.adjustedStripeSize = stripeSize;
            this.defaultStripeSize = stripeSize;
            this.version = version;
            this.encodingStrategy = encodingStrategy;
            this.compressionStrategy = compressionStrategy;
            this.addBlockPadding = addBlockPadding;
            this.blockSize = blockSizeValue;
            this.paddingTolerance = paddingTolerance;
            this.compress = compress;
            this.rowIndexStride = rowIndexStride;
            this.memoryManager = memoryManager;
            buildIndex = rowIndexStride > 0;
            codec = createCodec(compress);
            int numColumns = schema.getMaximumId() + 1;
            this.bufferSize = getEstimatedBufferSize(defaultStripeSize, numColumns, bufferSize);
            if (version == OrcFile.Version.V_0_11)
            {
                /* do not write bloom filters for ORC v11 */
                this.bloomFilterColumns = new bool[schema.getMaximumId() + 1];
            }
            else
            {
                this.bloomFilterColumns =
                    OrcUtils.includeColumns(bloomFilterColumnNames, schema);
            }
            this.bloomFilterFpp = bloomFilterFpp;
            treeWriter = createTreeWriter(inspector, schema, streamFactory, false);
            if (buildIndex && rowIndexStride < MIN_ROW_INDEX_STRIDE)
            {
                throw new ArgumentException("Row stride must be at least " +
                    MIN_ROW_INDEX_STRIDE);
            }

            // ensure that we are able to handle callbacks before we register ourselves
            memoryManager.addWriter(path, stripeSize, this);
        }
 public DefaultDataReader(
     Func<Stream> streamCreator, string path, bool useZeroCopy, CompressionCodec codec)
 {
     this.streamCreator = streamCreator;
     this.path = path;
     this.useZeroCopy = useZeroCopy;
     this.codec = codec;
 }
 public static DataReader createDefaultDataReader(
     Func<Stream> streamCreator, string path, bool useZeroCopy, CompressionCodec codec)
 {
     return new DefaultDataReader(streamCreator, path, useZeroCopy, codec);
 }
 public void runSeekTest(CompressionCodec codec)
 {
     TestInStream.OutputCollector collect = new TestInStream.OutputCollector();
     RunLengthIntegerWriter @out = new RunLengthIntegerWriter(
         new OutStream("test", 1000, codec, collect), true);
     TestInStream.PositionCollector[] positions =
         new TestInStream.PositionCollector[4096];
     Random random = new Random(99);
     int[] junk = new int[2048];
     for (int i = 0; i < junk.Length; ++i)
     {
         junk[i] = random.Next();
     }
     for (int i = 0; i < 4096; ++i)
     {
         positions[i] = new TestInStream.PositionCollector();
         @out.getPosition(positions[i]);
         // test runs, incrementing runs, non-runs
         if (i < 1024)
         {
             @out.write(i / 4);
         }
         else if (i < 2048)
         {
             @out.write(2 * i);
         }
         else
         {
             @out.write(junk[i - 2048]);
         }
     }
     @out.flush();
     ByteBuffer inBuf = ByteBuffer.allocate(collect.buffer.size());
     collect.buffer.setByteBuffer(inBuf, 0, collect.buffer.size());
     inBuf.flip();
     #pragma warning disable 612
     RunLengthIntegerReader @in = new RunLengthIntegerReader(InStream.create
         (null, "test", new ByteBuffer[] { inBuf }, new long[] { 0 }, inBuf.remaining(),
             codec, 1000), true);
     #pragma warning restore 612
     for (int i = 0; i < 2048; ++i)
     {
         int x = (int)@in.next();
         if (i < 1024)
         {
             Assert.Equal(i / 4, x);
         }
         else if (i < 2048)
         {
             Assert.Equal(2 * i, x);
         }
         else
         {
             Assert.Equal(junk[i - 2048], x);
         }
     }
     for (int i = 2047; i >= 0; --i)
     {
         @in.seek(positions[i]);
         int x = (int)@in.next();
         if (i < 1024)
         {
             Assert.Equal(i / 4, x);
         }
         else if (i < 2048)
         {
             Assert.Equal(2 * i, x);
         }
         else
         {
             Assert.Equal(junk[i - 2048], x);
         }
     }
 }
 public StreamReaderBuilder setCompressionCodec(CompressionCodec compressionCodec)
 {
     this.compressionCodec = compressionCodec;
     return this;
 }
Beispiel #40
0
 public BufferedStream(string name, int bufferSize, CompressionCodec codec)
 {
     outStream = new OutStream(name, bufferSize, codec, this);
 }
        public static TreeReader[] createEncodedTreeReader(int numCols,
            List<OrcProto.Type> types,
            List<OrcProto.ColumnEncoding> encodings,
            EncodedColumnBatch<OrcBatchKey> batch,
            CompressionCodec codec, bool skipCorrupt)
        {
            long file = batch.getBatchKey().file;
            TreeReader[] treeReaders = new TreeReader[numCols];
            for (int i = 0; i < numCols; i++)
            {
                int columnIndex = batch.getColumnIxs()[i];
                ColumnStreamData[] streamBuffers = batch.getColumnData()[i];
                OrcProto.Type columnType = types[columnIndex];

                // EncodedColumnBatch is already decompressed, we don't really need to pass codec.
                // But we need to know if the original data is compressed or not. This is used to skip
                // positions in row index properly. If the file is originally compressed,
                // then 1st position (compressed offset) in row index should be skipped to get
                // uncompressed offset, else 1st position should not be skipped.
                // TODO: there should be a better way to do this, code just needs to be modified
                OrcProto.ColumnEncoding columnEncoding = encodings[columnIndex];

                // stream buffers are arranged in enum order of stream kind
                ColumnStreamData present = streamBuffers[Kind.PRESENT_VALUE],
                  data = streamBuffers[Kind.DATA_VALUE],
                  dictionary = streamBuffers[Kind.DICTIONARY_DATA_VALUE],
                  lengths = streamBuffers[Kind.LENGTH_VALUE],
                  secondary = streamBuffers[Kind.SECONDARY_VALUE];

                switch (columnType.Kind)
                {
                    case OrcProto.Type.Types.Kind.BINARY:
                        treeReaders[i] = BinaryStreamReader.builder()
                            .setFileId(file)
                            .setColumnIndex(columnIndex)
                            .setPresentStream(present)
                            .setDataStream(data)
                            .setLengthStream(lengths)
                            .setCompressionCodec(codec)
                            .setColumnEncoding(columnEncoding)
                            .build();
                        break;
                    case OrcProto.Type.Types.Kind.BOOLEAN:
                        treeReaders[i] = BooleanStreamReader.builder()
                            .setFileId(file)
                            .setColumnIndex(columnIndex)
                            .setPresentStream(present)
                            .setDataStream(data)
                            .setCompressionCodec(codec)
                            .build();
                        break;
                    case OrcProto.Type.Types.Kind.BYTE:
                        treeReaders[i] = ByteStreamReader.builder()
                            .setFileId(file)
                            .setColumnIndex(columnIndex)
                            .setPresentStream(present)
                            .setDataStream(data)
                            .setCompressionCodec(codec)
                            .build();
                        break;
                    case OrcProto.Type.Types.Kind.SHORT:
                        treeReaders[i] = ShortStreamReader.builder()
                            .setFileId(file)
                            .setColumnIndex(columnIndex)
                            .setPresentStream(present)
                            .setDataStream(data)
                            .setCompressionCodec(codec)
                            .setColumnEncoding(columnEncoding)
                            .build();
                        break;
                    case OrcProto.Type.Types.Kind.INT:
                        treeReaders[i] = IntStreamReader.builder()
                            .setFileId(file)
                            .setColumnIndex(columnIndex)
                            .setPresentStream(present)
                            .setDataStream(data)
                            .setCompressionCodec(codec)
                            .setColumnEncoding(columnEncoding)
                            .build();
                        break;
                    case OrcProto.Type.Types.Kind.LONG:
                        treeReaders[i] = LongStreamReader.builder()
                            .setFileId(file)
                            .setColumnIndex(columnIndex)
                            .setPresentStream(present)
                            .setDataStream(data)
                            .setCompressionCodec(codec)
                            .setColumnEncoding(columnEncoding)
                            .skipCorrupt(skipCorrupt)
                            .build();
                        break;
                    case OrcProto.Type.Types.Kind.FLOAT:
                        treeReaders[i] = FloatStreamReader.builder()
                            .setFileId(file)
                            .setColumnIndex(columnIndex)
                            .setPresentStream(present)
                            .setDataStream(data)
                            .setCompressionCodec(codec)
                            .build();
                        break;
                    case OrcProto.Type.Types.Kind.DOUBLE:
                        treeReaders[i] = DoubleStreamReader.builder()
                            .setFileId(file)
                            .setColumnIndex(columnIndex)
                            .setPresentStream(present)
                            .setDataStream(data)
                            .setCompressionCodec(codec)
                            .build();
                        break;
                    case OrcProto.Type.Types.Kind.CHAR:
                        treeReaders[i] = CharStreamReader.builder()
                            .setFileId(file)
                            .setColumnIndex(columnIndex)
                            .setMaxLength((int)columnType.MaximumLength)
                            .setPresentStream(present)
                            .setDataStream(data)
                            .setLengthStream(lengths)
                            .setDictionaryStream(dictionary)
                            .setCompressionCodec(codec)
                            .setColumnEncoding(columnEncoding)
                            .build();
                        break;
                    case OrcProto.Type.Types.Kind.VARCHAR:
                        treeReaders[i] = VarcharStreamReader.builder()
                            .setFileId(file)
                            .setColumnIndex(columnIndex)
                            .setMaxLength((int)columnType.MaximumLength)
                            .setPresentStream(present)
                            .setDataStream(data)
                            .setLengthStream(lengths)
                            .setDictionaryStream(dictionary)
                            .setCompressionCodec(codec)
                            .setColumnEncoding(columnEncoding)
                            .build();
                        break;
                    case OrcProto.Type.Types.Kind.STRING:
                        treeReaders[i] = StringStreamReader.builder()
                            .setFileId(file)
                            .setColumnIndex(columnIndex)
                            .setPresentStream(present)
                            .setDataStream(data)
                            .setLengthStream(lengths)
                            .setDictionaryStream(dictionary)
                            .setCompressionCodec(codec)
                            .setColumnEncoding(columnEncoding)
                            .build();
                        break;
                    case OrcProto.Type.Types.Kind.DECIMAL:
                        treeReaders[i] = DecimalStreamReader.builder()
                            .setFileId(file)
                            .setColumnIndex(columnIndex)
                            .setPrecision((int)columnType.Precision)
                            .setScale((int)columnType.Scale)
                            .setPresentStream(present)
                            .setValueStream(data)
                            .setScaleStream(secondary)
                            .setCompressionCodec(codec)
                            .setColumnEncoding(columnEncoding)
                            .build();
                        break;
                    case OrcProto.Type.Types.Kind.TIMESTAMP:
                        treeReaders[i] = TimestampStreamReader.builder()
                            .setFileId(file)
                            .setColumnIndex(columnIndex)
                            .setPresentStream(present)
                            .setSecondsStream(data)
                            .setNanosStream(secondary)
                            .setCompressionCodec(codec)
                            .setColumnEncoding(columnEncoding)
                            .skipCorrupt(skipCorrupt)
                            .build();
                        break;
                    case OrcProto.Type.Types.Kind.DATE:
                        treeReaders[i] = DateStreamReader.builder()
                            .setFileId(file)
                            .setColumnIndex(columnIndex)
                            .setPresentStream(present)
                            .setDataStream(data)
                            .setCompressionCodec(codec)
                            .setColumnEncoding(columnEncoding)
                            .build();
                        break;
                    default:
                        throw new NotSupportedException("Data type not supported yet! " + columnType);
                }
            }

            return treeReaders;
        }
Beispiel #42
0
 internal static InStream create(long? fileId,
                         string streamName,
                         ByteBuffer[] buffers,
                         long[] offsets,
                         long length,
                         CompressionCodec codec,
                         int bufferSize)
 {
     List<DiskRange> input = new List<DiskRange>(buffers.Length);
     for (int i = 0; i < buffers.Length; ++i)
     {
         input.Add(new RecordReaderImpl.BufferChunk(buffers[i], offsets[i]));
     }
     return create(fileId, streamName, input, length, codec, bufferSize);
 }
Beispiel #43
0
 /**
  * Creates coded input stream (used for protobuf message parsing) with higher message size limit.
  *
  * @param name       the name of the stream
  * @param input      the list of ranges of bytes for the stream; from disk or cache
  * @param length     the length in bytes of the stream
  * @param codec      the compression codec
  * @param bufferSize the compression buffer size
  * @return coded input stream
  * @
  */
 public static CodedInputStream createCodedInputStream(long? fileId,
     string name,
     List<DiskRange> input,
     long length,
     CompressionCodec codec,
     int bufferSize)
 {
     InStream inStream = create(fileId, name, input, length, codec, bufferSize);
     CodedInputStream codedInputStream = CodedInputStream.CreateInstance(inStream);
     codedInputStream.SetSizeLimit(PROTOBUF_MESSAGE_MAX_LIMIT);
     return codedInputStream;
 }
Beispiel #44
0
 public CompressedStream(long? fileId, string name, List<DiskRange> input, long length,
                         CompressionCodec codec, int bufferSize)
     : base(fileId, name, length)
 {
     this.bytes = input;
     this.codec = codec;
     this.bufferSize = bufferSize;
     currentOffset = 0;
     currentRange = 0;
 }