Exemplo n.º 1
0
        /// <summary>read the long value</summary>
        /// <exception cref="System.IO.IOException"/>
        internal static long ReadLong(DataInput @in)
        {
            LongWritable uLong = TlData.Get().ULong;

            uLong.ReadFields(@in);
            return(uLong.Get());
        }
Exemplo n.º 2
0
        /// <summary>Map file name and offset into statistical data.</summary>
        /// <remarks>
        /// Map file name and offset into statistical data.
        /// <p>
        /// The map task is to get the
        /// <tt>key</tt>, which contains the file name, and the
        /// <tt>value</tt>, which is the offset within the file.
        /// The parameters are passed to the abstract method
        /// <see cref="IOMapperBase{T}.DoIO(Org.Apache.Hadoop.Mapred.Reporter, string, long)"
        ///     />
        /// , which performs the io operation,
        /// usually read or write data, and then
        /// <see cref="IOMapperBase{T}.CollectStats(Org.Apache.Hadoop.Mapred.OutputCollector{K, V}, string, long, object)
        ///     "/>
        ///
        /// is called to prepare stat data for a subsequent reducer.
        /// </remarks>
        /// <exception cref="System.IO.IOException"/>
        public virtual void Map(Text key, LongWritable value, OutputCollector <Text, Text>
                                output, Reporter reporter)
        {
            string name      = key.ToString();
            long   longValue = value.Get();

            reporter.SetStatus("starting " + name + " ::host = " + hostName);
            this.stream = GetIOStream(name);
            T    statValue = null;
            long tStart    = Runtime.CurrentTimeMillis();

            try
            {
                statValue = DoIO(reporter, name, longValue);
            }
            finally
            {
                if (stream != null)
                {
                    stream.Close();
                }
            }
            long tEnd     = Runtime.CurrentTimeMillis();
            long execTime = tEnd - tStart;

            CollectStats(output, name, execTime, statValue);
            reporter.SetStatus("finished " + name + " ::host = " + hostName);
        }
Exemplo n.º 3
0
            /// <exception cref="System.IO.IOException"/>
            public virtual void Map(Text key, LongWritable value, OutputCollector <K, LongWritable
                                                                                   > collector, Reporter reporter)
            {
                string name = key.ToString();
                long   size = value.Get();
                long   seed = long.Parse(name);

                if (size == 0)
                {
                    return;
                }
                reporter.SetStatus("opening " + name);
                FSDataInputStream @in = fs.Open(new Path(DataDir, name));

                try
                {
                    for (int i = 0; i < SeeksPerFile; i++)
                    {
                        // generate a random position
                        long position = Math.Abs(random.NextLong()) % size;
                        // seek file to that position
                        reporter.SetStatus("seeking " + name);
                        @in.Seek(position);
                        byte b = @in.ReadByte();
                        // check that byte matches
                        byte checkByte = 0;
                        // advance random state to that position
                        random.SetSeed(seed);
                        for (int p = 0; p <= position; p += check.Length)
                        {
                            reporter.SetStatus("generating data for " + name);
                            if (fastCheck)
                            {
                                checkByte = unchecked ((byte)random.Next(byte.MaxValue));
                            }
                            else
                            {
                                random.NextBytes(check);
                                checkByte = check[(int)(position % check.Length)];
                            }
                        }
                        NUnit.Framework.Assert.AreEqual(b, checkByte);
                    }
                }
                finally
                {
                    @in.Close();
                }
            }
Exemplo n.º 4
0
        /// <summary>test DBRecordReader.</summary>
        /// <remarks>test DBRecordReader. This reader should creates keys, values, know about position..
        ///     </remarks>
        /// <exception cref="System.Exception"/>
        public virtual void TestDBRecordReader()
        {
            JobConf         job      = Org.Mockito.Mockito.Mock <JobConf>();
            DBConfiguration dbConfig = Org.Mockito.Mockito.Mock <DBConfiguration>();

            string[] fields = new string[] { "field1", "filed2" };
            DBInputFormat.DBRecordReader reader = new DBInputFormat.DBRecordReader(this, new
                                                                                   DBInputFormat.DBInputSplit(), typeof(DBInputFormat.NullDBWritable), job, DriverForTest
                                                                                   .GetConnection(), dbConfig, "condition", fields, "table");
            LongWritable key = reader.CreateKey();

            NUnit.Framework.Assert.AreEqual(0, key.Get());
            DBWritable value = ((DBWritable)reader.CreateValue());

            NUnit.Framework.Assert.AreEqual("org.apache.hadoop.mapred.lib.db.DBInputFormat$NullDBWritable"
                                            , value.GetType().FullName);
            NUnit.Framework.Assert.AreEqual(0, reader.GetPos());
            NUnit.Framework.Assert.IsFalse(reader.Next(key, value));
        }
Exemplo n.º 5
0
        private void ValidateInnerKeyValue(IntWritable k, TupleWritable v, int tupleSize,
                                           bool firstTuple, bool secondTuple)
        {
            string kvstr = "Unexpected tuple: " + Stringify(k, v);

            NUnit.Framework.Assert.IsTrue(kvstr, v.Size() == tupleSize);
            int          key  = k.Get();
            IntWritable  val0 = null;
            IntWritable  val1 = null;
            LongWritable val2 = null;

            NUnit.Framework.Assert.IsTrue(kvstr, key % 2 == 0 && key / 2 <= Items);
            NUnit.Framework.Assert.IsTrue(kvstr, key % 3 == 0 && key / 3 <= Items);
            NUnit.Framework.Assert.IsTrue(kvstr, key % 4 == 0 && key / 4 <= Items);
            if (firstTuple)
            {
                TupleWritable v0 = ((TupleWritable)v.Get(0));
                val0 = (IntWritable)v0.Get(0);
                val1 = (IntWritable)v0.Get(1);
                val2 = (LongWritable)v.Get(1);
            }
            else
            {
                if (secondTuple)
                {
                    val0 = (IntWritable)v.Get(0);
                    TupleWritable v1 = ((TupleWritable)v.Get(1));
                    val1 = (IntWritable)v1.Get(0);
                    val2 = (LongWritable)v1.Get(1);
                }
                else
                {
                    val0 = (IntWritable)v.Get(0);
                    val1 = (IntWritable)v.Get(1);
                    val2 = (LongWritable)v.Get(2);
                }
            }
            NUnit.Framework.Assert.IsTrue(kvstr, val0.Get() == 0);
            NUnit.Framework.Assert.IsTrue(kvstr, val1.Get() == 1);
            NUnit.Framework.Assert.IsTrue(kvstr, val2.Get() == 2);
        }
Exemplo n.º 6
0
        /// <summary>
        /// test on
        /// <see cref="Reader"/>
        /// iteration methods
        /// <pre>
        /// <c>next(), seek()</c>
        /// in and out of range.
        /// </pre>
        /// </summary>
        public virtual void TestArrayFileIteration()
        {
            int           Size = 10;
            Configuration conf = new Configuration();

            try
            {
                FileSystem       fs     = FileSystem.Get(conf);
                ArrayFile.Writer writer = new ArrayFile.Writer(conf, fs, TestFile, typeof(LongWritable
                                                                                          ), SequenceFile.CompressionType.Record, defaultProgressable);
                NUnit.Framework.Assert.IsNotNull("testArrayFileIteration error !!!", writer);
                for (int i = 0; i < Size; i++)
                {
                    writer.Append(new LongWritable(i));
                }
                writer.Close();
                ArrayFile.Reader reader       = new ArrayFile.Reader(fs, TestFile, conf);
                LongWritable     nextWritable = new LongWritable(0);
                for (int i_1 = 0; i_1 < Size; i_1++)
                {
                    nextWritable = (LongWritable)reader.Next(nextWritable);
                    Assert.Equal(nextWritable.Get(), i_1);
                }
                Assert.True("testArrayFileIteration seek error !!!", reader.Seek
                                (new LongWritable(6)));
                nextWritable = (LongWritable)reader.Next(nextWritable);
                Assert.True("testArrayFileIteration error !!!", reader.Key() ==
                            7);
                Assert.True("testArrayFileIteration error !!!", nextWritable.Equals
                                (new LongWritable(7)));
                NUnit.Framework.Assert.IsFalse("testArrayFileIteration error !!!", reader.Seek(new
                                                                                               LongWritable(Size + 5)));
                reader.Close();
            }
            catch (Exception)
            {
                Fail("testArrayFileWriterConstruction error !!!");
            }
        }
Exemplo n.º 7
0
        /// <exception cref="System.IO.IOException"/>
        private void RunRandomTests(CompressionCodec codec)
        {
            StringBuilder fileName = new StringBuilder("testFormat.txt");

            if (codec != null)
            {
                fileName.Append(".gz");
            }
            localFs.Delete(workDir, true);
            Path file = new Path(workDir, fileName.ToString());
            int  seed = new Random().Next();

            Log.Info("Seed = " + seed);
            Random        random   = new Random(seed);
            int           MaxTests = 20;
            LongWritable  key      = new LongWritable();
            BytesWritable value    = new BytesWritable();

            for (int i = 0; i < MaxTests; i++)
            {
                Log.Info("----------------------------------------------------------");
                // Maximum total records of 999
                int totalRecords = random.Next(999) + 1;
                // Test an empty file
                if (i == 8)
                {
                    totalRecords = 0;
                }
                // Maximum bytes in a record of 100K
                int recordLength = random.Next(1024 * 100) + 1;
                // For the 11th test, force a record length of 1
                if (i == 10)
                {
                    recordLength = 1;
                }
                // The total bytes in the test file
                int fileSize = (totalRecords * recordLength);
                Log.Info("totalRecords=" + totalRecords + " recordLength=" + recordLength);
                // Create the job
                JobConf job = new JobConf(defaultConf);
                if (codec != null)
                {
                    ReflectionUtils.SetConf(codec, job);
                }
                // Create the test file
                AList <string> recordList = CreateFile(file, codec, recordLength, totalRecords);
                NUnit.Framework.Assert.IsTrue(localFs.Exists(file));
                //set the fixed length record length config property for the job
                FixedLengthInputFormat.SetRecordLength(job, recordLength);
                int numSplits = 1;
                // Arbitrarily set number of splits.
                if (i > 0)
                {
                    if (i == (MaxTests - 1))
                    {
                        // Test a split size that is less than record len
                        numSplits = (int)(fileSize / Math.Floor(recordLength / 2));
                    }
                    else
                    {
                        if (MaxTests % i == 0)
                        {
                            // Let us create a split size that is forced to be
                            // smaller than the end file itself, (ensures 1+ splits)
                            numSplits = fileSize / (fileSize - random.Next(fileSize));
                        }
                        else
                        {
                            // Just pick a random split size with no upper bound
                            numSplits = Math.Max(1, fileSize / random.Next(int.MaxValue));
                        }
                    }
                    Log.Info("Number of splits set to: " + numSplits);
                }
                // Setup the input path
                FileInputFormat.SetInputPaths(job, workDir);
                // Try splitting the file in a variety of sizes
                FixedLengthInputFormat format = new FixedLengthInputFormat();
                format.Configure(job);
                InputSplit[] splits = format.GetSplits(job, numSplits);
                Log.Info("Actual number of splits = " + splits.Length);
                // Test combined split lengths = total file size
                long recordOffset = 0;
                int  recordNumber = 0;
                foreach (InputSplit split in splits)
                {
                    RecordReader <LongWritable, BytesWritable> reader = format.GetRecordReader(split,
                                                                                               job, voidReporter);
                    Type clazz = reader.GetType();
                    NUnit.Framework.Assert.AreEqual("RecordReader class should be FixedLengthRecordReader:"
                                                    , typeof(FixedLengthRecordReader), clazz);
                    // Plow through the records in this split
                    while (reader.Next(key, value))
                    {
                        NUnit.Framework.Assert.AreEqual("Checking key", (long)(recordNumber * recordLength
                                                                               ), key.Get());
                        string valueString = Sharpen.Runtime.GetStringForBytes(value.GetBytes(), 0, value
                                                                               .GetLength());
                        NUnit.Framework.Assert.AreEqual("Checking record length:", recordLength, value.GetLength
                                                            ());
                        NUnit.Framework.Assert.IsTrue("Checking for more records than expected:", recordNumber
                                                      < totalRecords);
                        string origRecord = recordList[recordNumber];
                        NUnit.Framework.Assert.AreEqual("Checking record content:", origRecord, valueString
                                                        );
                        recordNumber++;
                    }
                    reader.Close();
                }
                NUnit.Framework.Assert.AreEqual("Total original records should be total read records:"
                                                , recordList.Count, recordNumber);
            }
        }
Exemplo n.º 8
0
        public virtual void TestUncompressedInputCustomDelimiterPosValue()
        {
            Configuration conf = new Configuration();

            conf.SetInt("io.file.buffer.size", 10);
            conf.SetInt(LineRecordReader.MaxLineLength, int.MaxValue);
            string inputData = "abcdefghij++kl++mno";
            Path   inputFile = CreateInputFile(conf, inputData);
            string delimiter = "++";

            byte[] recordDelimiterBytes = Sharpen.Runtime.GetBytesForString(delimiter, Charsets
                                                                            .Utf8);
            int                splitLength = 15;
            FileSplit          split       = new FileSplit(inputFile, 0, splitLength, (string[])null);
            TaskAttemptContext context     = new TaskAttemptContextImpl(conf, new TaskAttemptID()
                                                                        );
            LineRecordReader reader = new LineRecordReader(recordDelimiterBytes);

            reader.Initialize(split, context);
            // Get first record: "abcdefghij"
            NUnit.Framework.Assert.IsTrue("Expected record got nothing", reader.NextKeyValue(
                                              ));
            LongWritable key   = reader.GetCurrentKey();
            Text         value = reader.GetCurrentValue();

            NUnit.Framework.Assert.AreEqual("Wrong length for record value", 10, value.GetLength
                                                ());
            NUnit.Framework.Assert.AreEqual("Wrong position after record read", 0, key.Get());
            // Get second record: "kl"
            NUnit.Framework.Assert.IsTrue("Expected record got nothing", reader.NextKeyValue(
                                              ));
            NUnit.Framework.Assert.AreEqual("Wrong length for record value", 2, value.GetLength
                                                ());
            // Key should be 12 right after "abcdefghij++"
            NUnit.Framework.Assert.AreEqual("Wrong position after record read", 12, key.Get()
                                            );
            // Get third record: "mno"
            NUnit.Framework.Assert.IsTrue("Expected record got nothing", reader.NextKeyValue(
                                              ));
            NUnit.Framework.Assert.AreEqual("Wrong length for record value", 3, value.GetLength
                                                ());
            // Key should be 16 right after "abcdefghij++kl++"
            NUnit.Framework.Assert.AreEqual("Wrong position after record read", 16, key.Get()
                                            );
            NUnit.Framework.Assert.IsFalse(reader.NextKeyValue());
            // Key should be 19 right after "abcdefghij++kl++mno"
            NUnit.Framework.Assert.AreEqual("Wrong position after record read", 19, key.Get()
                                            );
            // after refresh should be empty
            key = reader.GetCurrentKey();
            NUnit.Framework.Assert.IsNull("Unexpected key returned", key);
            reader.Close();
            split = new FileSplit(inputFile, splitLength, inputData.Length - splitLength, (string
                                                                                           [])null);
            reader = new LineRecordReader(recordDelimiterBytes);
            reader.Initialize(split, context);
            // No record is in the second split because the second split dropped
            // the first record, which was already reported by the first split.
            NUnit.Framework.Assert.IsFalse("Unexpected record returned", reader.NextKeyValue(
                                               ));
            key = reader.GetCurrentKey();
            NUnit.Framework.Assert.IsNull("Unexpected key returned", key);
            reader.Close();
            // multi char delimiter with starting part of the delimiter in the data
            inputData   = "abcd+efgh++ijk++mno";
            inputFile   = CreateInputFile(conf, inputData);
            splitLength = 5;
            split       = new FileSplit(inputFile, 0, splitLength, (string[])null);
            reader      = new LineRecordReader(recordDelimiterBytes);
            reader.Initialize(split, context);
            // Get first record: "abcd+efgh"
            NUnit.Framework.Assert.IsTrue("Expected record got nothing", reader.NextKeyValue(
                                              ));
            key   = reader.GetCurrentKey();
            value = reader.GetCurrentValue();
            NUnit.Framework.Assert.AreEqual("Wrong position after record read", 0, key.Get());
            NUnit.Framework.Assert.AreEqual("Wrong length for record value", 9, value.GetLength
                                                ());
            // should have jumped over the delimiter, no record
            NUnit.Framework.Assert.IsFalse(reader.NextKeyValue());
            NUnit.Framework.Assert.AreEqual("Wrong position after record read", 11, key.Get()
                                            );
            // after refresh should be empty
            key = reader.GetCurrentKey();
            NUnit.Framework.Assert.IsNull("Unexpected key returned", key);
            reader.Close();
            // next split: check for duplicate or dropped records
            split = new FileSplit(inputFile, splitLength, inputData.Length - splitLength, (string
                                                                                           [])null);
            reader = new LineRecordReader(recordDelimiterBytes);
            reader.Initialize(split, context);
            NUnit.Framework.Assert.IsTrue("Expected record got nothing", reader.NextKeyValue(
                                              ));
            key   = reader.GetCurrentKey();
            value = reader.GetCurrentValue();
            // Get second record: "ijk" first in this split
            NUnit.Framework.Assert.AreEqual("Wrong position after record read", 11, key.Get()
                                            );
            NUnit.Framework.Assert.AreEqual("Wrong length for record value", 3, value.GetLength
                                                ());
            // Get third record: "mno" second in this split
            NUnit.Framework.Assert.IsTrue("Expected record got nothing", reader.NextKeyValue(
                                              ));
            NUnit.Framework.Assert.AreEqual("Wrong position after record read", 16, key.Get()
                                            );
            NUnit.Framework.Assert.AreEqual("Wrong length for record value", 3, value.GetLength
                                                ());
            // should be at the end of the input
            NUnit.Framework.Assert.IsFalse(reader.NextKeyValue());
            NUnit.Framework.Assert.AreEqual("Wrong position after record read", 19, key.Get()
                                            );
            reader.Close();
            inputData            = "abcd|efgh|+|ij|kl|+|mno|pqr";
            inputFile            = CreateInputFile(conf, inputData);
            delimiter            = "|+|";
            recordDelimiterBytes = Sharpen.Runtime.GetBytesForString(delimiter, Charsets.Utf8
                                                                     );
            // walking over the buffer and split sizes checks for proper processing
            // of the ambiguous bytes of the delimiter
            for (int bufferSize = 1; bufferSize <= inputData.Length; bufferSize++)
            {
                for (int splitSize = 1; splitSize < inputData.Length; splitSize++)
                {
                    // track where we are in the inputdata
                    int keyPosition = 0;
                    conf.SetInt("io.file.buffer.size", bufferSize);
                    split  = new FileSplit(inputFile, 0, bufferSize, (string[])null);
                    reader = new LineRecordReader(recordDelimiterBytes);
                    reader.Initialize(split, context);
                    // Get the first record: "abcd|efgh" always possible
                    NUnit.Framework.Assert.IsTrue("Expected record got nothing", reader.NextKeyValue(
                                                      ));
                    key   = reader.GetCurrentKey();
                    value = reader.GetCurrentValue();
                    NUnit.Framework.Assert.IsTrue("abcd|efgh".Equals(value.ToString()));
                    // Position should be 0 right at the start
                    NUnit.Framework.Assert.AreEqual("Wrong position after record read", keyPosition,
                                                    key.Get());
                    // Position should be 12 right after the first "|+|"
                    keyPosition = 12;
                    // get the next record: "ij|kl" if the split/buffer allows it
                    if (reader.NextKeyValue())
                    {
                        // check the record info: "ij|kl"
                        NUnit.Framework.Assert.IsTrue("ij|kl".Equals(value.ToString()));
                        NUnit.Framework.Assert.AreEqual("Wrong position after record read", keyPosition,
                                                        key.Get());
                        // Position should be 20 after the second "|+|"
                        keyPosition = 20;
                    }
                    // get the third record: "mno|pqr" if the split/buffer allows it
                    if (reader.NextKeyValue())
                    {
                        // check the record info: "mno|pqr"
                        NUnit.Framework.Assert.IsTrue("mno|pqr".Equals(value.ToString()));
                        NUnit.Framework.Assert.AreEqual("Wrong position after record read", keyPosition,
                                                        key.Get());
                        // Position should be the end of the input
                        keyPosition = inputData.Length;
                    }
                    NUnit.Framework.Assert.IsFalse("Unexpected record returned", reader.NextKeyValue(
                                                       ));
                    // no more records can be read we should be at the last position
                    NUnit.Framework.Assert.AreEqual("Wrong position after record read", keyPosition,
                                                    key.Get());
                    // after refresh should be empty
                    key = reader.GetCurrentKey();
                    NUnit.Framework.Assert.IsNull("Unexpected key returned", key);
                    reader.Close();
                }
            }
        }
Exemplo n.º 9
0
        /// <summary>Run a map/reduce job for estimating Pi.</summary>
        /// <returns>the estimated value of Pi</returns>
        /// <exception cref="System.IO.IOException"/>
        /// <exception cref="System.TypeLoadException"/>
        /// <exception cref="System.Exception"/>
        public static BigDecimal EstimatePi(int numMaps, long numPoints, Path tmpDir, Configuration
                                            conf)
        {
            Job job = Job.GetInstance(conf);

            //setup job conf
            job.SetJobName(typeof(QuasiMonteCarlo).Name);
            job.SetJarByClass(typeof(QuasiMonteCarlo));
            job.SetInputFormatClass(typeof(SequenceFileInputFormat));
            job.SetOutputKeyClass(typeof(BooleanWritable));
            job.SetOutputValueClass(typeof(LongWritable));
            job.SetOutputFormatClass(typeof(SequenceFileOutputFormat));
            job.SetMapperClass(typeof(QuasiMonteCarlo.QmcMapper));
            job.SetReducerClass(typeof(QuasiMonteCarlo.QmcReducer));
            job.SetNumReduceTasks(1);
            // turn off speculative execution, because DFS doesn't handle
            // multiple writers to the same file.
            job.SetSpeculativeExecution(false);
            //setup input/output directories
            Path inDir  = new Path(tmpDir, "in");
            Path outDir = new Path(tmpDir, "out");

            FileInputFormat.SetInputPaths(job, inDir);
            FileOutputFormat.SetOutputPath(job, outDir);
            FileSystem fs = FileSystem.Get(conf);

            if (fs.Exists(tmpDir))
            {
                throw new IOException("Tmp directory " + fs.MakeQualified(tmpDir) + " already exists.  Please remove it first."
                                      );
            }
            if (!fs.Mkdirs(inDir))
            {
                throw new IOException("Cannot create input directory " + inDir);
            }
            try
            {
                //generate an input file for each map task
                for (int i = 0; i < numMaps; ++i)
                {
                    Path                file   = new Path(inDir, "part" + i);
                    LongWritable        offset = new LongWritable(i * numPoints);
                    LongWritable        size   = new LongWritable(numPoints);
                    SequenceFile.Writer writer = SequenceFile.CreateWriter(fs, conf, file, typeof(LongWritable
                                                                                                  ), typeof(LongWritable), SequenceFile.CompressionType.None);
                    try
                    {
                        writer.Append(offset, size);
                    }
                    finally
                    {
                        writer.Close();
                    }
                    System.Console.Out.WriteLine("Wrote input for Map #" + i);
                }
                //start a map/reduce job
                System.Console.Out.WriteLine("Starting Job");
                long startTime = Runtime.CurrentTimeMillis();
                job.WaitForCompletion(true);
                double duration = (Runtime.CurrentTimeMillis() - startTime) / 1000.0;
                System.Console.Out.WriteLine("Job Finished in " + duration + " seconds");
                //read outputs
                Path                inFile     = new Path(outDir, "reduce-out");
                LongWritable        numInside  = new LongWritable();
                LongWritable        numOutside = new LongWritable();
                SequenceFile.Reader reader     = new SequenceFile.Reader(fs, inFile, conf);
                try
                {
                    reader.Next(numInside, numOutside);
                }
                finally
                {
                    reader.Close();
                }
                //compute estimated value
                BigDecimal numTotal = BigDecimal.ValueOf(numMaps).Multiply(BigDecimal.ValueOf(numPoints
                                                                                              ));
                return(BigDecimal.ValueOf(4).SetScale(20).Multiply(BigDecimal.ValueOf(numInside.Get
                                                                                          ())).Divide(numTotal, RoundingMode.HalfUp));
            }
            finally
            {
                fs.Delete(tmpDir, true);
            }
        }
Exemplo n.º 10
0
            /// <exception cref="System.IO.IOException"/>
            public virtual InputSplit[] GetSplits(JobConf job, int numSplits)
            {
                Path       src = new Path(job.Get(GenericMRLoadGenerator.IndirectInputFile, null));
                FileSystem fs  = src.GetFileSystem(job);
                AList <GenericMRLoadGenerator.IndirectInputFormat.IndirectSplit> splits = new AList
                                                                                          <GenericMRLoadGenerator.IndirectInputFormat.IndirectSplit>(numSplits);
                LongWritable key = new LongWritable();

                Org.Apache.Hadoop.IO.Text value = new Org.Apache.Hadoop.IO.Text();
                for (SequenceFile.Reader sl = new SequenceFile.Reader(fs, src, job); sl.Next(key,
                                                                                             value);)
                {
                    splits.AddItem(new GenericMRLoadGenerator.IndirectInputFormat.IndirectSplit(new Path
                                                                                                    (value.ToString()), key.Get()));
                }
                return(Sharpen.Collections.ToArray(splits, new GenericMRLoadGenerator.IndirectInputFormat.IndirectSplit
                                                   [splits.Count]));
            }
Exemplo n.º 11
0
        /// <summary>Sends a packet with up to maxChunks chunks of data.</summary>
        /// <param name="pkt">buffer used for writing packet data</param>
        /// <param name="maxChunks">maximum number of chunks to send</param>
        /// <param name="out">stream to send data to</param>
        /// <param name="transferTo">use transferTo to send data</param>
        /// <param name="throttler">used for throttling data transfer bandwidth</param>
        /// <exception cref="System.IO.IOException"/>
        private int SendPacket(ByteBuffer pkt, int maxChunks, OutputStream @out, bool transferTo
                               , DataTransferThrottler throttler)
        {
            int dataLen   = (int)Math.Min(endOffset - offset, (chunkSize * (long)maxChunks));
            int numChunks = NumberOfChunks(dataLen);
            // Number of chunks be sent in the packet
            int  checksumDataLen = numChunks * checksumSize;
            int  packetLen       = dataLen + checksumDataLen + 4;
            bool lastDataPacket  = offset + dataLen == endOffset && dataLen > 0;
            // The packet buffer is organized as follows:
            // _______HHHHCCCCD?D?D?D?
            //        ^   ^
            //        |   \ checksumOff
            //        \ headerOff
            // _ padding, since the header is variable-length
            // H = header and length prefixes
            // C = checksums
            // D? = data, if transferTo is false.
            int headerLen = WritePacketHeader(pkt, dataLen, packetLen);
            // Per above, the header doesn't start at the beginning of the
            // buffer
            int headerOff   = pkt.Position() - headerLen;
            int checksumOff = pkt.Position();

            byte[] buf = ((byte[])pkt.Array());
            if (checksumSize > 0 && checksumIn != null)
            {
                ReadChecksum(buf, checksumOff, checksumDataLen);
                // write in progress that we need to use to get last checksum
                if (lastDataPacket && lastChunkChecksum != null)
                {
                    int    start           = checksumOff + checksumDataLen - checksumSize;
                    byte[] updatedChecksum = lastChunkChecksum.GetChecksum();
                    if (updatedChecksum != null)
                    {
                        System.Array.Copy(updatedChecksum, 0, buf, start, checksumSize);
                    }
                }
            }
            int dataOff = checksumOff + checksumDataLen;

            if (!transferTo)
            {
                // normal transfer
                IOUtils.ReadFully(blockIn, buf, dataOff, dataLen);
                if (verifyChecksum)
                {
                    VerifyChecksum(buf, dataOff, dataLen, numChunks, checksumOff);
                }
            }
            try
            {
                if (transferTo)
                {
                    SocketOutputStream sockOut = (SocketOutputStream)@out;
                    // First write header and checksums
                    sockOut.Write(buf, headerOff, dataOff - headerOff);
                    // no need to flush since we know out is not a buffered stream
                    FileChannel  fileCh       = ((FileInputStream)blockIn).GetChannel();
                    LongWritable waitTime     = new LongWritable();
                    LongWritable transferTime = new LongWritable();
                    sockOut.TransferToFully(fileCh, blockInPosition, dataLen, waitTime, transferTime);
                    datanode.metrics.AddSendDataPacketBlockedOnNetworkNanos(waitTime.Get());
                    datanode.metrics.AddSendDataPacketTransferNanos(transferTime.Get());
                    blockInPosition += dataLen;
                }
                else
                {
                    // normal transfer
                    @out.Write(buf, headerOff, dataOff + dataLen - headerOff);
                }
            }
            catch (IOException e)
            {
                if (e is SocketTimeoutException)
                {
                }
                else
                {
                    /*
                     * writing to client timed out.  This happens if the client reads
                     * part of a block and then decides not to read the rest (but leaves
                     * the socket open).
                     *
                     * Reporting of this case is done in DataXceiver#run
                     */
                    /* Exception while writing to the client. Connection closure from
                     * the other end is mostly the case and we do not care much about
                     * it. But other things can go wrong, especially in transferTo(),
                     * which we do not want to ignore.
                     *
                     * The message parsing below should not be considered as a good
                     * coding example. NEVER do it to drive a program logic. NEVER.
                     * It was done here because the NIO throws an IOException for EPIPE.
                     */
                    string ioem = e.Message;
                    if (!ioem.StartsWith("Broken pipe") && !ioem.StartsWith("Connection reset"))
                    {
                        Log.Error("BlockSender.sendChunks() exception: ", e);
                    }
                    datanode.GetBlockScanner().MarkSuspectBlock(volumeRef.GetVolume().GetStorageID(),
                                                                block);
                }
                throw IoeToSocketException(e);
            }
            if (throttler != null)
            {
                // rebalancing so throttle
                throttler.Throttle(packetLen);
            }
            return(dataLen);
        }
Exemplo n.º 12
0
            /// <exception cref="System.IO.IOException"/>
            public override IList <InputSplit> GetSplits(JobContext job)
            {
                Configuration      conf   = job.GetConfiguration();
                Path               src    = new Path(conf.Get(IndirectInputFile, null));
                FileSystem         fs     = src.GetFileSystem(conf);
                IList <InputSplit> splits = new AList <InputSplit>();
                LongWritable       key    = new LongWritable();

                Org.Apache.Hadoop.IO.Text value = new Org.Apache.Hadoop.IO.Text();
                for (SequenceFile.Reader sl = new SequenceFile.Reader(fs, src, conf); sl.Next(key
                                                                                              , value);)
                {
                    splits.AddItem(new GenericMRLoadGenerator.IndirectInputFormat.IndirectSplit(new Path
                                                                                                    (value.ToString()), key.Get()));
                }
                return(splits);
            }