Esempio n. 1
0
 /// <summary>Creates a new TaggedInputSplit.</summary>
 /// <param name="inputSplit">The InputSplit to be tagged</param>
 /// <param name="conf">The configuration to use</param>
 /// <param name="inputFormatClass">The InputFormat class to use for this job</param>
 /// <param name="mapperClass">The Mapper class to use for this job</param>
 public TaggedInputSplit(InputSplit inputSplit, Configuration conf, Type inputFormatClass
                         , Type mapperClass)
 {
     // Default constructor.
     this.inputSplitClass  = inputSplit.GetType();
     this.inputSplit       = inputSplit;
     this.conf             = conf;
     this.inputFormatClass = inputFormatClass;
     this.mapperClass      = mapperClass;
 }
Esempio n. 2
0
            /// <exception cref="System.IO.IOException"/>
            /// <exception cref="System.Exception"/>
            public override RecordReader CreateRecordReader(InputSplit split, TaskAttemptContext
                                                            taskContext)
            {
                // child types unknowable
                if (!(split is CompositeInputSplit))
                {
                    throw new IOException("Invalid split type:" + split.GetType().FullName);
                }
                CompositeInputSplit spl   = (CompositeInputSplit)split;
                int capacity              = kids.Count;
                CompositeRecordReader ret = null;

                try
                {
                    if (!rrCstrMap.Contains(ident))
                    {
                        throw new IOException("No RecordReader for " + ident);
                    }
                    ret = (CompositeRecordReader)rrCstrMap[ident].NewInstance(id, taskContext.GetConfiguration
                                                                                  (), capacity, cmpcl);
                }
                catch (MemberAccessException e)
                {
                    throw new IOException(e);
                }
                catch (InstantiationException e)
                {
                    throw new IOException(e);
                }
                catch (TargetInvocationException e)
                {
                    throw new IOException(e);
                }
                for (int i = 0; i < capacity; ++i)
                {
                    ret.Add(((ComposableRecordReader)kids[i].CreateRecordReader(spl.Get(i), taskContext
                                                                                )));
                }
                return((ComposableRecordReader)ret);
            }
Esempio n. 3
0
            /// <exception cref="System.IO.IOException"/>
            public override ComposableRecordReader GetRecordReader(InputSplit split, JobConf
                                                                   job, Reporter reporter)
            {
                // child types unknowable
                if (!(split is CompositeInputSplit))
                {
                    throw new IOException("Invalid split type:" + split.GetType().FullName);
                }
                CompositeInputSplit spl   = (CompositeInputSplit)split;
                int capacity              = kids.Count;
                CompositeRecordReader ret = null;

                try
                {
                    if (!rrCstrMap.Contains(ident))
                    {
                        throw new IOException("No RecordReader for " + ident);
                    }
                    ret = (CompositeRecordReader)rrCstrMap[ident].NewInstance(id, job, capacity, cmpcl
                                                                              );
                }
                catch (MemberAccessException e)
                {
                    throw (IOException)Sharpen.Extensions.InitCause(new IOException(), e);
                }
                catch (InstantiationException e)
                {
                    throw (IOException)Sharpen.Extensions.InitCause(new IOException(), e);
                }
                catch (TargetInvocationException e)
                {
                    throw (IOException)Sharpen.Extensions.InitCause(new IOException(), e);
                }
                for (int i = 0; i < capacity; ++i)
                {
                    ret.Add(kids[i].GetRecordReader(spl.Get(i), job, reporter));
                }
                return((ComposableRecordReader)ret);
            }
Esempio n. 4
0
        /// <exception cref="System.Exception"/>
        public virtual void TestFormat()
        {
            Job    job    = Job.GetInstance(new Configuration(defaultConf));
            Random random = new Random();
            long   seed   = random.NextLong();

            Log.Info("seed = " + seed);
            random.SetSeed(seed);
            localFs.Delete(workDir, true);
            FileInputFormat.SetInputPaths(job, workDir);
            int length   = 10000;
            int numFiles = 10;

            // create files with various lengths
            CreateFiles(length, numFiles, random);
            // create a combined split for the files
            CombineTextInputFormat format = new CombineTextInputFormat();

            for (int i = 0; i < 3; i++)
            {
                int numSplits = random.Next(length / 20) + 1;
                Log.Info("splitting: requesting = " + numSplits);
                IList <InputSplit> splits = format.GetSplits(job);
                Log.Info("splitting: got =        " + splits.Count);
                // we should have a single split as the length is comfortably smaller than
                // the block size
                NUnit.Framework.Assert.AreEqual("We got more than one splits!", 1, splits.Count);
                InputSplit split = splits[0];
                NUnit.Framework.Assert.AreEqual("It should be CombineFileSplit", typeof(CombineFileSplit
                                                                                        ), split.GetType());
                // check the split
                BitSet bits = new BitSet(length);
                Log.Debug("split= " + split);
                TaskAttemptContext context = MapReduceTestUtil.CreateDummyMapTaskAttemptContext(job
                                                                                                .GetConfiguration());
                RecordReader <LongWritable, Text> reader = format.CreateRecordReader(split, context
                                                                                     );
                NUnit.Framework.Assert.AreEqual("reader class is CombineFileRecordReader.", typeof(
                                                    CombineFileRecordReader), reader.GetType());
                MapContext <LongWritable, Text, LongWritable, Text> mcontext = new MapContextImpl <
                    LongWritable, Text, LongWritable, Text>(job.GetConfiguration(), context.GetTaskAttemptID
                                                                (), reader, null, null, MapReduceTestUtil.CreateDummyReporter(), split);
                reader.Initialize(split, mcontext);
                try
                {
                    int count = 0;
                    while (reader.NextKeyValue())
                    {
                        LongWritable key = reader.GetCurrentKey();
                        NUnit.Framework.Assert.IsNotNull("Key should not be null.", key);
                        Text value = reader.GetCurrentValue();
                        int  v     = System.Convert.ToInt32(value.ToString());
                        Log.Debug("read " + v);
                        NUnit.Framework.Assert.IsFalse("Key in multiple partitions.", bits.Get(v));
                        bits.Set(v);
                        count++;
                    }
                    Log.Debug("split=" + split + " count=" + count);
                }
                finally
                {
                    reader.Close();
                }
                NUnit.Framework.Assert.AreEqual("Some keys in no partition.", length, bits.Cardinality
                                                    ());
            }
        }
Esempio n. 5
0
        /// <exception cref="System.Exception"/>
        public virtual void TestFormat()
        {
            JobConf  job      = new JobConf(conf);
            Reporter reporter = Reporter.Null;
            Random   random   = new Random();
            long     seed     = random.NextLong();

            Log.Info("seed = " + seed);
            random.SetSeed(seed);
            localFs.Delete(workDir, true);
            FileInputFormat.SetInputPaths(job, workDir);
            int length   = 10000;
            int numFiles = 10;

            // create a file with various lengths
            CreateFiles(length, numFiles, random);
            // create a combine split for the files
            InputFormat <IntWritable, BytesWritable> format = new CombineSequenceFileInputFormat
                                                              <IntWritable, BytesWritable>();
            IntWritable   key   = new IntWritable();
            BytesWritable value = new BytesWritable();

            for (int i = 0; i < 3; i++)
            {
                int numSplits = random.Next(length / (SequenceFile.SyncInterval / 20)) + 1;
                Log.Info("splitting: requesting = " + numSplits);
                InputSplit[] splits = format.GetSplits(job, numSplits);
                Log.Info("splitting: got =        " + splits.Length);
                // we should have a single split as the length is comfortably smaller than
                // the block size
                NUnit.Framework.Assert.AreEqual("We got more than one splits!", 1, splits.Length);
                InputSplit split = splits[0];
                NUnit.Framework.Assert.AreEqual("It should be CombineFileSplit", typeof(CombineFileSplit
                                                                                        ), split.GetType());
                // check each split
                BitSet bits = new BitSet(length);
                RecordReader <IntWritable, BytesWritable> reader = format.GetRecordReader(split, job
                                                                                          , reporter);
                try
                {
                    while (reader.Next(key, value))
                    {
                        NUnit.Framework.Assert.IsFalse("Key in multiple partitions.", bits.Get(key.Get())
                                                       );
                        bits.Set(key.Get());
                    }
                }
                finally
                {
                    reader.Close();
                }
                NUnit.Framework.Assert.AreEqual("Some keys in no partition.", length, bits.Cardinality
                                                    ());
            }
        }
Esempio n. 6
0
        // A reporter that does nothing
        /// <exception cref="System.Exception"/>
        public virtual void TestFormat()
        {
            JobConf job    = new JobConf(defaultConf);
            Random  random = new Random();
            long    seed   = random.NextLong();

            Log.Info("seed = " + seed);
            random.SetSeed(seed);
            localFs.Delete(workDir, true);
            FileInputFormat.SetInputPaths(job, workDir);
            int length   = 10000;
            int numFiles = 10;

            CreateFiles(length, numFiles, random);
            // create a combined split for the files
            CombineTextInputFormat format = new CombineTextInputFormat();
            LongWritable           key    = new LongWritable();
            Text value = new Text();

            for (int i = 0; i < 3; i++)
            {
                int numSplits = random.Next(length / 20) + 1;
                Log.Info("splitting: requesting = " + numSplits);
                InputSplit[] splits = format.GetSplits(job, numSplits);
                Log.Info("splitting: got =        " + splits.Length);
                // we should have a single split as the length is comfortably smaller than
                // the block size
                NUnit.Framework.Assert.AreEqual("We got more than one splits!", 1, splits.Length);
                InputSplit split = splits[0];
                NUnit.Framework.Assert.AreEqual("It should be CombineFileSplit", typeof(CombineFileSplit
                                                                                        ), split.GetType());
                // check the split
                BitSet bits = new BitSet(length);
                Log.Debug("split= " + split);
                RecordReader <LongWritable, Text> reader = format.GetRecordReader(split, job, voidReporter
                                                                                  );
                try
                {
                    int count = 0;
                    while (reader.Next(key, value))
                    {
                        int v = System.Convert.ToInt32(value.ToString());
                        Log.Debug("read " + v);
                        if (bits.Get(v))
                        {
                            Log.Warn("conflict with " + v + " at position " + reader.GetPos());
                        }
                        NUnit.Framework.Assert.IsFalse("Key in multiple partitions.", bits.Get(v));
                        bits.Set(v);
                        count++;
                    }
                    Log.Info("splits=" + split + " count=" + count);
                }
                finally
                {
                    reader.Close();
                }
                NUnit.Framework.Assert.AreEqual("Some keys in no partition.", length, bits.Cardinality
                                                    ());
            }
        }