public virtual void Configure(JobConf job)
 {
     keyclass = (Type)job.GetClass <WritableComparable>("test.fakeif.keyclass", typeof(
                                                            NullWritable));
     valclass = (Type)job.GetClass <WritableComparable>("test.fakeif.valclass", typeof(
                                                            NullWritable));
 }
 public RR_ClassLoaderChecker(JobConf job)
 {
     NUnit.Framework.Assert.IsTrue("The class loader has not been inherited from " + typeof(
                                       CompositeRecordReader).Name, job.GetClassLoader() is TestWrappedRecordReaderClassloader.Fake_ClassLoader
                                   );
     keyclass = (Type)job.GetClass <WritableComparable>("test.fakeif.keyclass", typeof(
                                                            NullWritable));
     valclass = (Type)job.GetClass <WritableComparable>("test.fakeif.valclass", typeof(
                                                            NullWritable));
 }
Esempio n. 3
0
        /// <summary>Configures all the chain elements for the task.</summary>
        /// <param name="jobConf">chain job's JobConf.</param>
        public virtual void Configure(JobConf jobConf)
        {
            string prefix = GetPrefix(isMap);

            chainJobConf = jobConf;
            SerializationFactory serializationFactory = new SerializationFactory(chainJobConf
                                                                                 );
            int index = jobConf.GetInt(prefix + ChainMapperSize, 0);

            for (int i = 0; i < index; i++)
            {
                Type    klass = jobConf.GetClass <Mapper>(prefix + ChainMapperClass + i, null);
                JobConf mConf = new JobConf(GetChainElementConf(jobConf, prefix + ChainMapperConfig
                                                                + i));
                Mapper mapper = ReflectionUtils.NewInstance(klass, mConf);
                mappers.AddItem(mapper);
                if (mConf.GetBoolean(MapperByValue, true))
                {
                    mappersKeySerialization.AddItem(serializationFactory.GetSerialization(mConf.GetClass
                                                                                              (MapperOutputKeyClass, null)));
                    mappersValueSerialization.AddItem(serializationFactory.GetSerialization(mConf.GetClass
                                                                                                (MapperOutputValueClass, null)));
                }
                else
                {
                    mappersKeySerialization.AddItem(null);
                    mappersValueSerialization.AddItem(null);
                }
            }
            Type klass_1 = jobConf.GetClass <Reducer>(prefix + ChainReducerClass, null);

            if (klass_1 != null)
            {
                JobConf rConf = new JobConf(GetChainElementConf(jobConf, prefix + ChainReducerConfig
                                                                ));
                reducer = ReflectionUtils.NewInstance(klass_1, rConf);
                if (rConf.GetBoolean(ReducerByValue, true))
                {
                    reducerKeySerialization = serializationFactory.GetSerialization(rConf.GetClass(ReducerOutputKeyClass
                                                                                                   , null));
                    reducerValueSerialization = serializationFactory.GetSerialization(rConf.GetClass(
                                                                                          ReducerOutputValueClass, null));
                }
                else
                {
                    reducerKeySerialization   = null;
                    reducerValueSerialization = null;
                }
            }
        }
Esempio n. 4
0
        /// <summary>
        /// Given an expression and an optional comparator, build a tree of
        /// InputFormats using the comparator to sort keys.
        /// </summary>
        /// <exception cref="System.IO.IOException"/>
        internal static Parser.Node Parse(string expr, JobConf job)
        {
            if (null == expr)
            {
                throw new IOException("Expression is null");
            }
            Type cmpcl = job.GetClass <WritableComparator>("mapred.join.keycomparator", null);

            Parser.Lexer         lex = new Parser.Lexer(expr);
            Stack <Parser.Token> st  = new Stack <Parser.Token>();

            Parser.Token tok;
            while ((tok = lex.Next()) != null)
            {
                if (Parser.TType.Rparen.Equals(tok.GetType()))
                {
                    st.Push(Reduce(st, job));
                }
                else
                {
                    st.Push(tok);
                }
            }
            if (st.Count == 1 && Parser.TType.Cif.Equals(st.Peek().GetType()))
            {
                Parser.Node ret = st.Pop().GetNode();
                if (cmpcl != null)
                {
                    ret.SetKeyComparator(cmpcl);
                }
                return(ret);
            }
            throw new IOException("Missing ')'");
        }
Esempio n. 5
0
        /// <summary>Get a PathFilter instance of the filter set for the input paths.</summary>
        /// <returns>the PathFilter instance set for the job, NULL if none has been set.</returns>
        public static PathFilter GetInputPathFilter(JobConf conf)
        {
            Type filterClass = conf.GetClass <PathFilter>(FileInputFormat.PathfilterClass, null
                                                          );

            return((filterClass != null) ? ReflectionUtils.NewInstance(filterClass, conf) : null);
        }
Esempio n. 6
0
 /// <exception cref="System.IO.IOException"/>
 private void GetBaseOutputFormat(JobConf job)
 {
     baseOut = ReflectionUtils.NewInstance(job.GetClass <OutputFormat>("mapreduce.output.lazyoutputformat.outputformat"
                                                                       , null), job);
     if (baseOut == null)
     {
         throw new IOException("Ouput format not set for LazyOutputFormat");
     }
 }
Esempio n. 7
0
            /// <exception cref="System.IO.IOException"/>
            public virtual RecordReader GetRecordReader(InputSplit split, JobConf job, Reporter
                                                        reporter)
            {
                InputFormat indirIF = (InputFormat)ReflectionUtils.NewInstance(job.GetClass(GenericMRLoadGenerator
                                                                                            .IndirectInputFormat, typeof(SequenceFileInputFormat)), job);

                GenericMRLoadGenerator.IndirectInputFormat.IndirectSplit @is = ((GenericMRLoadGenerator.IndirectInputFormat.IndirectSplit
                                                                                 )split);
                return(indirIF.GetRecordReader(new FileSplit(@is.GetPath(), 0, @is.GetLength(), (
                                                                 string[])null), job, reporter));
            }
Esempio n. 8
0
        /// <summary>test configuration for db.</summary>
        /// <remarks>test configuration for db. should works DBConfiguration.* parameters.</remarks>
        public virtual void TestSetInput()
        {
            JobConf configuration = new JobConf();

            string[] fieldNames = new string[] { "field1", "field2" };
            DBInputFormat.SetInput(configuration, typeof(DBInputFormat.NullDBWritable), "table"
                                   , "conditions", "orderBy", fieldNames);
            NUnit.Framework.Assert.AreEqual("org.apache.hadoop.mapred.lib.db.DBInputFormat$NullDBWritable"
                                            , configuration.GetClass(DBConfiguration.InputClassProperty, null).FullName);
            NUnit.Framework.Assert.AreEqual("table", configuration.Get(DBConfiguration.InputTableNameProperty
                                                                       , null));
            string[] fields = configuration.GetStrings(DBConfiguration.InputFieldNamesProperty
                                                       );
            NUnit.Framework.Assert.AreEqual("field1", fields[0]);
            NUnit.Framework.Assert.AreEqual("field2", fields[1]);
            NUnit.Framework.Assert.AreEqual("conditions", configuration.Get(DBConfiguration.InputConditionsProperty
                                                                            , null));
            NUnit.Framework.Assert.AreEqual("orderBy", configuration.Get(DBConfiguration.InputOrderByProperty
                                                                         , null));
            configuration = new JobConf();
            DBInputFormat.SetInput(configuration, typeof(DBInputFormat.NullDBWritable), "query"
                                   , "countQuery");
            NUnit.Framework.Assert.AreEqual("query", configuration.Get(DBConfiguration.InputQuery
                                                                       , null));
            NUnit.Framework.Assert.AreEqual("countQuery", configuration.Get(DBConfiguration.InputCountQuery
                                                                            , null));
            JobConf jConfiguration = new JobConf();

            DBConfiguration.ConfigureDB(jConfiguration, "driverClass", "dbUrl", "user", "password"
                                        );
            NUnit.Framework.Assert.AreEqual("driverClass", jConfiguration.Get(DBConfiguration
                                                                              .DriverClassProperty));
            NUnit.Framework.Assert.AreEqual("dbUrl", jConfiguration.Get(DBConfiguration.UrlProperty
                                                                        ));
            NUnit.Framework.Assert.AreEqual("user", jConfiguration.Get(DBConfiguration.UsernameProperty
                                                                       ));
            NUnit.Framework.Assert.AreEqual("password", jConfiguration.Get(DBConfiguration.PasswordProperty
                                                                           ));
            jConfiguration = new JobConf();
            DBConfiguration.ConfigureDB(jConfiguration, "driverClass", "dbUrl");
            NUnit.Framework.Assert.AreEqual("driverClass", jConfiguration.Get(DBConfiguration
                                                                              .DriverClassProperty));
            NUnit.Framework.Assert.AreEqual("dbUrl", jConfiguration.Get(DBConfiguration.UrlProperty
                                                                        ));
            NUnit.Framework.Assert.IsNull(jConfiguration.Get(DBConfiguration.UsernameProperty
                                                             ));
            NUnit.Framework.Assert.IsNull(jConfiguration.Get(DBConfiguration.PasswordProperty
                                                             ));
        }
 /// <summary>Inform the parser of user-defined types.</summary>
 /// <exception cref="System.IO.IOException"/>
 private void AddUserIdentifiers(JobConf job)
 {
     Sharpen.Pattern x = Sharpen.Pattern.Compile("^mapred\\.join\\.define\\.(\\w+)$");
     foreach (KeyValuePair <string, string> kv in job)
     {
         Matcher m = x.Matcher(kv.Key);
         if (m.Matches())
         {
             try
             {
                 Parser.CNode.AddIdentifier(m.Group(1), job.GetClass <ComposableRecordReader>(m.Group
                                                                                                  (0), null));
             }
             catch (MissingMethodException e)
             {
                 throw (IOException)Sharpen.Extensions.InitCause(new IOException("Invalid define for "
                                                                                 + m.Group(1)), e);
             }
         }
     }
 }
Esempio n. 10
0
 public virtual void TestPluginAbility()
 {
     try
     {
         // create JobConf with mapreduce.job.shuffle.consumer.plugin=TestShuffleConsumerPlugin
         JobConf jobConf = new JobConf();
         jobConf.SetClass(MRConfig.ShuffleConsumerPlugin, typeof(TestShufflePlugin.TestShuffleConsumerPlugin
                                                                 ), typeof(ShuffleConsumerPlugin));
         ShuffleConsumerPlugin shuffleConsumerPlugin = null;
         Type clazz = jobConf.GetClass <ShuffleConsumerPlugin>(MRConfig.ShuffleConsumerPlugin
                                                               , typeof(Shuffle));
         NUnit.Framework.Assert.IsNotNull("Unable to get " + MRConfig.ShuffleConsumerPlugin
                                          , clazz);
         // load 3rd party plugin through core's factory method
         shuffleConsumerPlugin = ReflectionUtils.NewInstance(clazz, jobConf);
         NUnit.Framework.Assert.IsNotNull("Unable to load " + MRConfig.ShuffleConsumerPlugin
                                          , shuffleConsumerPlugin);
     }
     catch (Exception e)
     {
         NUnit.Framework.Assert.IsTrue("Threw exception:" + e, false);
     }
 }
Esempio n. 11
0
 /// <summary>Get the user's original partitioner.</summary>
 /// <param name="conf">the configuration to look in</param>
 /// <returns>the class that the user submitted</returns>
 internal static Type GetJavaPartitioner(JobConf conf)
 {
     return(conf.GetClass <Partitioner>(Org.Apache.Hadoop.Mapred.Pipes.Submitter.Partitioner
                                        , typeof(HashPartitioner)));
 }
Esempio n. 12
0
 /// <summary>
 /// Get the
 /// <see cref="Org.Apache.Hadoop.Mapreduce.InputFormat{K, V}"/>
 /// class for the job.
 /// </summary>
 /// <returns>
 /// the
 /// <see cref="Org.Apache.Hadoop.Mapreduce.InputFormat{K, V}"/>
 /// class for the job.
 /// </returns>
 /// <exception cref="System.TypeLoadException"/>
 public virtual Type GetInputFormatClass()
 {
     return((Type)conf.GetClass(InputFormatClassAttr, typeof(TextInputFormat)));
 }
Esempio n. 13
0
 /// <summary>Returns the value class for a named output.</summary>
 /// <param name="conf">job conf</param>
 /// <param name="namedOutput">named output</param>
 /// <returns>class of named output value</returns>
 public static Type GetNamedOutputValueClass(JobConf conf, string namedOutput)
 {
     CheckNamedOutput(conf, namedOutput, false);
     return(conf.GetClass <object>(MoPrefix + namedOutput + Value, null));
 }
Esempio n. 14
0
        /// <exception cref="System.IO.IOException"/>
        /// <exception cref="System.Exception"/>
        /// <exception cref="System.TypeLoadException"/>
        public override void Run(JobConf job, TaskUmbilicalProtocol umbilical)
        {
            job.SetBoolean(JobContext.SkipRecords, IsSkipping());
            if (IsMapOrReduce())
            {
                copyPhase   = GetProgress().AddPhase("copy");
                sortPhase   = GetProgress().AddPhase("sort");
                reducePhase = GetProgress().AddPhase("reduce");
            }
            // start thread that will handle communication with parent
            Task.TaskReporter reporter = StartReporter(umbilical);
            bool useNewApi             = job.GetUseNewReducer();

            Initialize(job, GetJobID(), reporter, useNewApi);
            // check if it is a cleanupJobTask
            if (jobCleanup)
            {
                RunJobCleanupTask(umbilical, reporter);
                return;
            }
            if (jobSetup)
            {
                RunJobSetupTask(umbilical, reporter);
                return;
            }
            if (taskCleanup)
            {
                RunTaskCleanupTask(umbilical, reporter);
                return;
            }
            // Initialize the codec
            codec = InitCodec();
            RawKeyValueIterator   rIter = null;
            ShuffleConsumerPlugin shuffleConsumerPlugin = null;
            Type combinerClass = conf.GetCombinerClass();

            Task.CombineOutputCollector combineCollector = (null != combinerClass) ? new Task.CombineOutputCollector
                                                               (reduceCombineOutputCounter, reporter, conf) : null;
            Type clazz = job.GetClass <ShuffleConsumerPlugin>(MRConfig.ShuffleConsumerPlugin,
                                                              typeof(Shuffle));

            shuffleConsumerPlugin = ReflectionUtils.NewInstance(clazz, job);
            Log.Info("Using ShuffleConsumerPlugin: " + shuffleConsumerPlugin);
            ShuffleConsumerPlugin.Context shuffleContext = new ShuffleConsumerPlugin.Context(
                GetTaskID(), job, FileSystem.GetLocal(job), umbilical, base.lDirAlloc, reporter,
                codec, combinerClass, combineCollector, spilledRecordsCounter, reduceCombineInputCounter
                , shuffledMapsCounter, reduceShuffleBytes, failedShuffleCounter, mergedMapOutputsCounter
                , taskStatus, copyPhase, sortPhase, this, mapOutputFile, localMapFiles);
            shuffleConsumerPlugin.Init(shuffleContext);
            rIter = shuffleConsumerPlugin.Run();
            // free up the data structures
            mapOutputFilesOnDisk.Clear();
            sortPhase.Complete();
            // sort is complete
            SetPhase(TaskStatus.Phase.Reduce);
            StatusUpdate(umbilical);
            Type          keyClass   = job.GetMapOutputKeyClass();
            Type          valueClass = job.GetMapOutputValueClass();
            RawComparator comparator = job.GetOutputValueGroupingComparator();

            if (useNewApi)
            {
                RunNewReducer(job, umbilical, reporter, rIter, comparator, keyClass, valueClass);
            }
            else
            {
                RunOldReducer(job, umbilical, reporter, rIter, comparator, keyClass, valueClass);
            }
            shuffleConsumerPlugin.Close();
            Done(umbilical, reporter);
        }
 /// <summary>
 /// Get the value class for the
 /// <see cref="Org.Apache.Hadoop.IO.SequenceFile"/>
 /// </summary>
 /// <returns>
 /// the value class of the
 /// <see cref="Org.Apache.Hadoop.IO.SequenceFile"/>
 /// </returns>
 public static Type GetSequenceFileOutputValueClass(JobConf conf)
 {
     return(conf.GetClass <Writable>(SequenceFileAsBinaryOutputFormat.ValueClass, conf.
                                     GetOutputValueClass().AsSubclass <Writable>()));
 }
Esempio n. 16
0
 /// <exception cref="System.IO.IOException"/>
 public virtual InputSplit[] GetSplits(JobConf job, int numSplits)
 {
     // Delegate the generation of input splits to the 'original' InputFormat
     return(ReflectionUtils.NewInstance(job.GetClass <InputFormat>(Submitter.InputFormat
                                                                   , typeof(TextInputFormat)), job).GetSplits(job, numSplits));
 }
Esempio n. 17
0
 /// <summary>Returns the named output OutputFormat.</summary>
 /// <param name="conf">job conf</param>
 /// <param name="namedOutput">named output</param>
 /// <returns>namedOutput OutputFormat</returns>
 public static Type GetNamedOutputFormatClass(JobConf conf, string namedOutput)
 {
     CheckNamedOutput(conf, namedOutput, false);
     return(conf.GetClass <OutputFormat>(MoPrefix + namedOutput + Format, null));
 }
Esempio n. 18
0
        /// <exception cref="System.Exception"/>
        public virtual int Run(string[] argv)
        {
            JobConf job = new JobConf(GetConf());

            job.SetJarByClass(typeof(GenericMRLoadGenerator));
            job.SetMapperClass(typeof(GenericMRLoadGenerator.SampleMapper));
            job.SetReducerClass(typeof(GenericMRLoadGenerator.SampleReducer));
            if (!ParseArgs(argv, job))
            {
                return(-1);
            }
            if (null == FileOutputFormat.GetOutputPath(job))
            {
                // No output dir? No writes
                job.SetOutputFormat(typeof(NullOutputFormat));
            }
            if (0 == FileInputFormat.GetInputPaths(job).Length)
            {
                // No input dir? Generate random data
                System.Console.Error.WriteLine("No input path; ignoring InputFormat");
                ConfRandom(job);
            }
            else
            {
                if (null != job.GetClass(GenericMRLoadGenerator.IndirectInputFormat, null))
                {
                    // specified IndirectInputFormat? Build src list
                    JobClient jClient        = new JobClient(job);
                    Path      tmpDir         = new Path(jClient.GetFs().GetHomeDirectory(), ".staging");
                    Random    r              = new Random();
                    Path      indirInputFile = new Path(tmpDir, Sharpen.Extensions.ToString(r.Next(int.MaxValue
                                                                                                   ), 36) + "_files");
                    job.Set(GenericMRLoadGenerator.IndirectInputFile, indirInputFile.ToString());
                    SequenceFile.Writer writer = SequenceFile.CreateWriter(tmpDir.GetFileSystem(job),
                                                                           job, indirInputFile, typeof(LongWritable), typeof(Text), SequenceFile.CompressionType
                                                                           .None);
                    try
                    {
                        foreach (Path p in FileInputFormat.GetInputPaths(job))
                        {
                            FileSystem   fs        = p.GetFileSystem(job);
                            Stack <Path> pathstack = new Stack <Path>();
                            pathstack.Push(p);
                            while (!pathstack.Empty())
                            {
                                foreach (FileStatus stat in fs.ListStatus(pathstack.Pop()))
                                {
                                    if (stat.IsDirectory())
                                    {
                                        if (!stat.GetPath().GetName().StartsWith("_"))
                                        {
                                            pathstack.Push(stat.GetPath());
                                        }
                                    }
                                    else
                                    {
                                        writer.Sync();
                                        writer.Append(new LongWritable(stat.GetLen()), new Text(stat.GetPath().ToUri().ToString
                                                                                                    ()));
                                    }
                                }
                            }
                        }
                    }
                    finally
                    {
                        writer.Close();
                    }
                }
            }
            DateTime startTime = new DateTime();

            System.Console.Out.WriteLine("Job started: " + startTime);
            JobClient.RunJob(job);
            DateTime endTime = new DateTime();

            System.Console.Out.WriteLine("Job ended: " + endTime);
            System.Console.Out.WriteLine("The job took " + (endTime.GetTime() - startTime.GetTime
                                                                ()) / 1000 + " seconds.");
            return(0);
        }