Пример #1
0
        /// <summary>
        /// Retrieves a map of
        /// <see cref="Org.Apache.Hadoop.FS.Path"/>
        /// s to the
        /// <see cref="Org.Apache.Hadoop.Mapred.Mapper{K1, V1, K2, V2}"/>
        /// class that
        /// should be used for them.
        /// </summary>
        /// <param name="conf">The confuration of the job</param>
        /// <seealso cref="AddInputPath(Org.Apache.Hadoop.Mapred.JobConf, Org.Apache.Hadoop.FS.Path, System.Type{T}, System.Type{T})
        ///     "/>
        /// <returns>A map of paths to mappers for the job</returns>
        internal static IDictionary <Path, Type> GetMapperTypeMap(JobConf conf)
        {
            if (conf.Get("mapreduce.input.multipleinputs.dir.mappers") == null)
            {
                return(Sharpen.Collections.EmptyMap());
            }
            IDictionary <Path, Type> m = new Dictionary <Path, Type>();

            string[] pathMappings = conf.Get("mapreduce.input.multipleinputs.dir.mappers").Split
                                        (",");
            foreach (string pathMapping in pathMappings)
            {
                string[] split = pathMapping.Split(";");
                Type     mapClass;
                try
                {
                    mapClass = (Type)conf.GetClassByName(split[1]);
                }
                catch (TypeLoadException e)
                {
                    throw new RuntimeException(e);
                }
                m[new Path(split[0])] = mapClass;
            }
            return(m);
        }
Пример #2
0
        /// <seealso cref="Org.Apache.Hadoop.Mapreduce.Protocol.ClientProtocol.GetSystemDir()
        ///     "/>
        public override string GetSystemDir()
        {
            Path sysDir = new Path(conf.Get(JTConfig.JtSystemDir, "/tmp/hadoop/mapred/system"
                                            ));

            return(fs.MakeQualified(sysDir).ToString());
        }
Пример #3
0
 /*
  * (non-Javadoc)
  *
  * @see
  * org.apache.hadoop.mapred.MapReduceBase#configure(org.apache.hadoop.mapred
  * .JobConf)
  */
 public override void Configure(JobConf conf)
 {
     // MapReduceBase
     try
     {
         config = new ConfigExtractor(conf);
         ConfigExtractor.DumpOptions(config);
         filesystem = config.GetBaseDirectory().GetFileSystem(conf);
     }
     catch (Exception e)
     {
         Log.Error("Unable to setup slive " + StringUtils.StringifyException(e));
         throw new RuntimeException("Unable to setup slive configuration", e);
     }
     if (conf.Get(MRJobConfig.TaskAttemptId) != null)
     {
         this.taskId = TaskAttemptID.ForName(conf.Get(MRJobConfig.TaskAttemptId)).GetTaskID
                           ().GetId();
     }
     else
     {
         // So that branch-1/0.20 can run this same code as well
         this.taskId = TaskAttemptID.ForName(conf.Get("mapred.task.id")).GetTaskID().GetId
                           ();
     }
 }
Пример #4
0
            // Mappers that simply checks if the desired user env are present or not
            public override void Configure(JobConf job)
            {
                bool oldConfigs = job.GetBoolean(OldConfigs, false);

                if (oldConfigs)
                {
                    string javaOpts = job.Get(JobConf.MapredTaskJavaOpts);
                    NUnit.Framework.Assert.IsNotNull(JobConf.MapredTaskJavaOpts + " is null!", javaOpts
                                                     );
                    NUnit.Framework.Assert.AreEqual(JobConf.MapredTaskJavaOpts + " has value of: " +
                                                    javaOpts, javaOpts, TaskOptsVal);
                }
                else
                {
                    string mapJavaOpts = job.Get(JobConf.MapredMapTaskJavaOpts);
                    NUnit.Framework.Assert.IsNotNull(JobConf.MapredMapTaskJavaOpts + " is null!", mapJavaOpts
                                                     );
                    NUnit.Framework.Assert.AreEqual(JobConf.MapredMapTaskJavaOpts + " has value of: "
                                                    + mapJavaOpts, mapJavaOpts, MapOptsVal);
                }
                string path = job.Get("path");
                // check if the pwd is there in LD_LIBRARY_PATH
                string pwd = Runtime.Getenv("PWD");

                NUnit.Framework.Assert.IsTrue("LD doesnt contain pwd", Runtime.Getenv("LD_LIBRARY_PATH"
                                                                                      ).Contains(pwd));
                // check if X=$X:/abc works for LD_LIBRARY_PATH
                CheckEnv("LD_LIBRARY_PATH", "/tmp", "append");
                // check if X=y works for an already existing parameter
                CheckEnv("LANG", "en_us_8859_1", "noappend");
                // check if X=/tmp for a new env variable
                CheckEnv("MY_PATH", "/tmp", "noappend");
                // check if X=$X:/tmp works for a new env var and results into :/tmp
                CheckEnv("NEW_PATH", FilePath.pathSeparator + "/tmp", "noappend");
                // check if X=$(tt's X var):/tmp for an old env variable inherited from
                // the tt
                if (Shell.Windows)
                {
                    // On Windows, PATH is replaced one more time as part of default config
                    // of "mapreduce.admin.user.env", i.e. on Windows,
                    // "mapreduce.admin.user.env" is set to
                    // "PATH=%PATH%;%HADOOP_COMMON_HOME%\\bin"
                    string hadoopHome = Runtime.Getenv("HADOOP_COMMON_HOME");
                    if (hadoopHome == null)
                    {
                        hadoopHome = string.Empty;
                    }
                    string hadoopLibLocation = hadoopHome + "\\bin";
                    path += FilePath.pathSeparator + hadoopLibLocation;
                    path += FilePath.pathSeparator + path;
                }
                CheckEnv("PATH", path + FilePath.pathSeparator + "/tmp", "noappend");
                string jobLocalDir = job.Get(MRJobConfig.JobLocalDir);

                NUnit.Framework.Assert.IsNotNull(MRJobConfig.JobLocalDir + " is null", jobLocalDir
                                                 );
            }
Пример #5
0
 private static string GetChildEnv(JobConf jobConf, bool isMap)
 {
     if (isMap)
     {
         return(jobConf.Get(JobConf.MapredMapTaskEnv, jobConf.Get(JobConf.MapredTaskEnv)));
     }
     return(jobConf.Get(JobConf.MapredReduceTaskEnv, jobConf.Get(JobConf.MapredTaskEnv
                                                                 )));
 }
Пример #6
0
 public virtual void Configure(JobConf job)
 {
     this.fieldSeparator        = job.Get(FieldSelectionHelper.DataFieldSeperator, "\t");
     this.mapOutputKeyValueSpec = job.Get(FieldSelectionHelper.MapOutputKeyValueSpec,
                                          "0-:");
     this.ignoreInputKey = typeof(TextInputFormat).GetCanonicalName().Equals(job.GetInputFormat
                                                                                 ().GetType().GetCanonicalName());
     this.reduceOutputKeyValueSpec = job.Get(FieldSelectionHelper.ReduceOutputKeyValueSpec
                                             , "0-:");
     ParseOutputKeyValueSpec();
     Log.Info(SpecToString());
 }
Пример #7
0
        /// <exception cref="System.Exception"/>
        public virtual void TestMRConfig()
        {
            JobConf conf = CreateJobConf();

            NUnit.Framework.Assert.IsNull(conf.Get("xyz"));
            Properties config = new Properties();

            config.SetProperty("xyz", "XYZ");
            StopCluster();
            StartCluster(false, config);
            conf = CreateJobConf();
            NUnit.Framework.Assert.AreEqual("XYZ", conf.Get("xyz"));
        }
Пример #8
0
        /// <summary>test configuration for db.</summary>
        /// <remarks>test configuration for db. should works DBConfiguration.* parameters.</remarks>
        public virtual void TestSetInput()
        {
            JobConf configuration = new JobConf();

            string[] fieldNames = new string[] { "field1", "field2" };
            DBInputFormat.SetInput(configuration, typeof(DBInputFormat.NullDBWritable), "table"
                                   , "conditions", "orderBy", fieldNames);
            NUnit.Framework.Assert.AreEqual("org.apache.hadoop.mapred.lib.db.DBInputFormat$NullDBWritable"
                                            , configuration.GetClass(DBConfiguration.InputClassProperty, null).FullName);
            NUnit.Framework.Assert.AreEqual("table", configuration.Get(DBConfiguration.InputTableNameProperty
                                                                       , null));
            string[] fields = configuration.GetStrings(DBConfiguration.InputFieldNamesProperty
                                                       );
            NUnit.Framework.Assert.AreEqual("field1", fields[0]);
            NUnit.Framework.Assert.AreEqual("field2", fields[1]);
            NUnit.Framework.Assert.AreEqual("conditions", configuration.Get(DBConfiguration.InputConditionsProperty
                                                                            , null));
            NUnit.Framework.Assert.AreEqual("orderBy", configuration.Get(DBConfiguration.InputOrderByProperty
                                                                         , null));
            configuration = new JobConf();
            DBInputFormat.SetInput(configuration, typeof(DBInputFormat.NullDBWritable), "query"
                                   , "countQuery");
            NUnit.Framework.Assert.AreEqual("query", configuration.Get(DBConfiguration.InputQuery
                                                                       , null));
            NUnit.Framework.Assert.AreEqual("countQuery", configuration.Get(DBConfiguration.InputCountQuery
                                                                            , null));
            JobConf jConfiguration = new JobConf();

            DBConfiguration.ConfigureDB(jConfiguration, "driverClass", "dbUrl", "user", "password"
                                        );
            NUnit.Framework.Assert.AreEqual("driverClass", jConfiguration.Get(DBConfiguration
                                                                              .DriverClassProperty));
            NUnit.Framework.Assert.AreEqual("dbUrl", jConfiguration.Get(DBConfiguration.UrlProperty
                                                                        ));
            NUnit.Framework.Assert.AreEqual("user", jConfiguration.Get(DBConfiguration.UsernameProperty
                                                                       ));
            NUnit.Framework.Assert.AreEqual("password", jConfiguration.Get(DBConfiguration.PasswordProperty
                                                                           ));
            jConfiguration = new JobConf();
            DBConfiguration.ConfigureDB(jConfiguration, "driverClass", "dbUrl");
            NUnit.Framework.Assert.AreEqual("driverClass", jConfiguration.Get(DBConfiguration
                                                                              .DriverClassProperty));
            NUnit.Framework.Assert.AreEqual("dbUrl", jConfiguration.Get(DBConfiguration.UrlProperty
                                                                        ));
            NUnit.Framework.Assert.IsNull(jConfiguration.Get(DBConfiguration.UsernameProperty
                                                             ));
            NUnit.Framework.Assert.IsNull(jConfiguration.Get(DBConfiguration.PasswordProperty
                                                             ));
        }
Пример #9
0
 public override void Configure(JobConf conf)
 {
     try
     {
         string taskId = conf.Get(JobContext.TaskAttemptId);
         id        = System.Convert.ToInt32(taskId.Split("_")[4]);
         totalMaps = System.Convert.ToInt32(conf.Get(JobContext.NumMaps));
         fs        = FileSystem.Get(conf);
         signal    = new Path(conf.Get(GetTaskSignalParameter(true)));
     }
     catch (IOException)
     {
         System.Console.Out.WriteLine("Got an exception while obtaining the filesystem");
     }
 }
        /// <summary>
        /// Get the
        /// <see cref="Org.Apache.Hadoop.IO.SequenceFile.CompressionType"/>
        /// for the output
        /// <see cref="Org.Apache.Hadoop.IO.SequenceFile"/>
        /// .
        /// </summary>
        /// <param name="conf">
        /// the
        /// <see cref="JobConf"/>
        /// </param>
        /// <returns>
        /// the
        /// <see cref="Org.Apache.Hadoop.IO.SequenceFile.CompressionType"/>
        /// for the output
        /// <see cref="Org.Apache.Hadoop.IO.SequenceFile"/>
        /// ,
        /// defaulting to
        /// <see cref="Org.Apache.Hadoop.IO.SequenceFile.CompressionType.Record"/>
        /// </returns>
        public static SequenceFile.CompressionType GetOutputCompressionType(JobConf conf)
        {
            string val = conf.Get(FileOutputFormat.CompressType, SequenceFile.CompressionType
                                  .Record.ToString());

            return(SequenceFile.CompressionType.ValueOf(val));
        }
Пример #11
0
        /// <exception cref="System.IO.IOException"/>
        internal static long ReadBench(JobConf conf)
        {
            // InputFormat instantiation
            InputFormat  inf = conf.GetInputFormat();
            string       fn  = conf.Get("test.filebench.name", string.Empty);
            Path         pin = new Path(FileInputFormat.GetInputPaths(conf)[0], fn);
            FileStatus   @in = pin.GetFileSystem(conf).GetFileStatus(pin);
            RecordReader rr  = inf.GetRecordReader(new FileSplit(pin, 0, @in.GetLen(), (string
                                                                                        [])null), conf, Reporter.Null);

            try
            {
                object   key   = rr.CreateKey();
                object   val   = rr.CreateValue();
                DateTime start = new DateTime();
                while (rr.Next(key, val))
                {
                }
                DateTime end = new DateTime();
                return(end.GetTime() - start.GetTime());
            }
            finally
            {
                rr.Close();
            }
        }
Пример #12
0
        /// <exception cref="System.IO.IOException"/>
        public override RecordWriter <K, V> GetRecordWriter(FileSystem ignored, JobConf job
                                                            , string name, Progressable progress)
        {
            bool   isCompressed      = GetCompressOutput(job);
            string keyValueSeparator = job.Get("mapreduce.output.textoutputformat.separator",
                                               "\t");

            if (!isCompressed)
            {
                Path               file    = FileOutputFormat.GetTaskOutputPath(job, name);
                FileSystem         fs      = file.GetFileSystem(job);
                FSDataOutputStream fileOut = fs.Create(file, progress);
                return(new TextOutputFormat.LineRecordWriter <K, V>(fileOut, keyValueSeparator));
            }
            else
            {
                Type codecClass = GetOutputCompressorClass(job, typeof(GzipCodec));
                // create the named codec
                CompressionCodec codec = ReflectionUtils.NewInstance(codecClass, job);
                // build the filename including the extension
                Path file = FileOutputFormat.GetTaskOutputPath(job, name + codec.GetDefaultExtension
                                                                   ());
                FileSystem         fs      = file.GetFileSystem(job);
                FSDataOutputStream fileOut = fs.Create(file, progress);
                return(new TextOutputFormat.LineRecordWriter <K, V>(new DataOutputStream(codec.CreateOutputStream
                                                                                             (fileOut)), keyValueSeparator));
            }
        }
Пример #13
0
 /// <summary>
 /// Set the configuration, if it doesn't already have a value for the given
 /// key.
 /// </summary>
 /// <param name="conf">the configuration to modify</param>
 /// <param name="key">the key to set</param>
 /// <param name="value">the new "default" value to set</param>
 private static void SetIfUnset(JobConf conf, string key, string value)
 {
     if (conf.Get(key) == null)
     {
         conf.Set(key, value);
     }
 }
Пример #14
0
 private Configuration AddSecurityConfiguration(Configuration conf)
 {
     conf = new JobConf(conf);
     conf.Set(CommonConfigurationKeys.HadoopSecurityServiceUserNameKey, conf.Get(JHAdminConfig
                                                                                 .MrHistoryPrincipal, string.Empty));
     return(conf);
 }
Пример #15
0
        /// <exception cref="System.IO.IOException"/>
        public virtual void TestWithDFS()
        {
            MiniDFSCluster dfs     = null;
            MiniMRCluster  mr      = null;
            FileSystem     fileSys = null;

            try
            {
                int     taskTrackers = 4;
                JobConf conf         = new JobConf();
                conf.Set(JTConfig.JtSystemDir, "/tmp/custom/mapred/system");
                dfs     = new MiniDFSCluster.Builder(conf).NumDataNodes(4).Build();
                fileSys = dfs.GetFileSystem();
                mr      = new MiniMRCluster(taskTrackers, fileSys.GetUri().ToString(), 1, null, null,
                                            conf);
                RunWordCount(mr, mr.CreateJobConf(), conf.Get("mapred.system.dir"));
            }
            finally
            {
                if (dfs != null)
                {
                    dfs.Shutdown();
                }
                if (mr != null)
                {
                    mr.Shutdown();
                }
            }
        }
Пример #16
0
        /// <exception cref="System.Exception"/>
        private string ReadStdOut(JobConf conf)
        {
            TaskAttemptID taskId = ((TaskAttemptID)TaskAttemptID.ForName(conf.Get(MRJobConfig
                                                                                  .TaskAttemptId)));
            FilePath stdOut = TaskLog.GetTaskLogFile(taskId, false, TaskLog.LogName.Stdout);

            return(ReadFile(stdOut));
        }
Пример #17
0
        public virtual void TestProfileParamsSetter()
        {
            JobConf configuration = new JobConf();

            configuration.SetProfileParams("test");
            NUnit.Framework.Assert.AreEqual("test", configuration.Get(MRJobConfig.TaskProfileParams
                                                                      ));
        }
Пример #18
0
        private static IntWritable DeduceInputFile(JobConf job)
        {
            Path[] inputPaths = FileInputFormat.GetInputPaths(job);
            Path   inputFile  = new Path(job.Get(JobContext.MapInputFile));

            // value == one for sort-input; value == two for sort-output
            return((inputFile.GetParent().Equals(inputPaths[0])) ? sortInput : sortOutput);
        }
Пример #19
0
        /// <summary>
        /// Add a
        /// <see cref="Org.Apache.Hadoop.FS.Path"/>
        /// to the list of inputs for the map-reduce job.
        /// </summary>
        /// <param name="conf">The configuration of the job</param>
        /// <param name="path">
        ///
        /// <see cref="Org.Apache.Hadoop.FS.Path"/>
        /// to be added to the list of inputs for
        /// the map-reduce job.
        /// </param>
        public static void AddInputPath(JobConf conf, Path path)
        {
            path = new Path(conf.GetWorkingDirectory(), path);
            string dirStr = StringUtils.EscapeString(path.ToString());
            string dirs   = conf.Get(FileInputFormat.InputDir);

            conf.Set(FileInputFormat.InputDir, dirs == null ? dirStr : dirs + StringUtils.CommaStr
                     + dirStr);
        }
Пример #20
0
        /// <summary>
        /// Add a
        /// <see cref="Org.Apache.Hadoop.FS.Path"/>
        /// with a custom
        /// <see cref="Org.Apache.Hadoop.Mapred.InputFormat{K, V}"/>
        /// to the list of
        /// inputs for the map-reduce job.
        /// </summary>
        /// <param name="conf">The configuration of the job</param>
        /// <param name="path">
        ///
        /// <see cref="Org.Apache.Hadoop.FS.Path"/>
        /// to be added to the list of inputs for the job
        /// </param>
        /// <param name="inputFormatClass">
        ///
        /// <see cref="Org.Apache.Hadoop.Mapred.InputFormat{K, V}"/>
        /// class to use for this path
        /// </param>
        public static void AddInputPath(JobConf conf, Path path, Type inputFormatClass)
        {
            string inputFormatMapping = path.ToString() + ";" + inputFormatClass.FullName;
            string inputFormats       = conf.Get("mapreduce.input.multipleinputs.dir.formats");

            conf.Set("mapreduce.input.multipleinputs.dir.formats", inputFormats == null ? inputFormatMapping
                                 : inputFormats + "," + inputFormatMapping);
            conf.SetInputFormat(typeof(DelegatingInputFormat));
        }
Пример #21
0
            /// <exception cref="System.IO.IOException"/>
            internal static void CheckRecords(Configuration defaults, int noMaps, int noReduces
                                              , Path sortInput, Path sortOutput)
            {
                JobConf jobConf = new JobConf(defaults, typeof(SortValidator.RecordChecker));

                jobConf.SetJobName("sortvalidate-record-checker");
                jobConf.SetInputFormat(typeof(SequenceFileInputFormat));
                jobConf.SetOutputFormat(typeof(SequenceFileOutputFormat));
                jobConf.SetOutputKeyClass(typeof(BytesWritable));
                jobConf.SetOutputValueClass(typeof(IntWritable));
                jobConf.SetMapperClass(typeof(SortValidator.RecordChecker.Map));
                jobConf.SetReducerClass(typeof(SortValidator.RecordChecker.Reduce));
                JobClient     client  = new JobClient(jobConf);
                ClusterStatus cluster = client.GetClusterStatus();

                if (noMaps == -1)
                {
                    noMaps = cluster.GetTaskTrackers() * jobConf.GetInt(MapsPerHost, 10);
                }
                if (noReduces == -1)
                {
                    noReduces = (int)(cluster.GetMaxReduceTasks() * 0.9);
                    string sortReduces = jobConf.Get(ReducesPerHost);
                    if (sortReduces != null)
                    {
                        noReduces = cluster.GetTaskTrackers() * System.Convert.ToInt32(sortReduces);
                    }
                }
                jobConf.SetNumMapTasks(noMaps);
                jobConf.SetNumReduceTasks(noReduces);
                FileInputFormat.SetInputPaths(jobConf, sortInput);
                FileInputFormat.AddInputPath(jobConf, sortOutput);
                Path       outputPath = new Path("/tmp/sortvalidate/recordchecker");
                FileSystem fs         = FileSystem.Get(defaults);

                if (fs.Exists(outputPath))
                {
                    fs.Delete(outputPath, true);
                }
                FileOutputFormat.SetOutputPath(jobConf, outputPath);
                // Uncomment to run locally in a single process
                //job_conf.set(JTConfig.JT, "local");
                Path[] inputPaths = FileInputFormat.GetInputPaths(jobConf);
                System.Console.Out.WriteLine("\nSortValidator.RecordChecker: Running on " + cluster
                                             .GetTaskTrackers() + " nodes to validate sort from " + inputPaths[0] + ", " + inputPaths
                                             [1] + " into " + FileOutputFormat.GetOutputPath(jobConf) + " with " + noReduces
                                             + " reduces.");
                DateTime startTime = new DateTime();

                System.Console.Out.WriteLine("Job started: " + startTime);
                JobClient.RunJob(jobConf);
                DateTime end_time = new DateTime();

                System.Console.Out.WriteLine("Job ended: " + end_time);
                System.Console.Out.WriteLine("The job took " + (end_time.GetTime() - startTime.GetTime
                                                                    ()) / 1000 + " seconds.");
            }
Пример #22
0
        public virtual void TestSocketFactory()
        {
            // Create a standard mini-cluster
            Configuration  sconf   = new Configuration();
            MiniDFSCluster cluster = new MiniDFSCluster.Builder(sconf).NumDataNodes(1).Build(
                );
            int nameNodePort = cluster.GetNameNodePort();
            // Get a reference to its DFS directly
            FileSystem fs = cluster.GetFileSystem();

            NUnit.Framework.Assert.IsTrue(fs is DistributedFileSystem);
            DistributedFileSystem directDfs = (DistributedFileSystem)fs;
            Configuration         cconf     = GetCustomSocketConfigs(nameNodePort);

            fs = FileSystem.Get(cconf);
            NUnit.Framework.Assert.IsTrue(fs is DistributedFileSystem);
            DistributedFileSystem dfs               = (DistributedFileSystem)fs;
            JobClient             client            = null;
            MiniMRYarnCluster     miniMRYarnCluster = null;

            try
            {
                // This will test RPC to the NameNode only.
                // could we test Client-DataNode connections?
                Path filePath = new Path("/dir");
                NUnit.Framework.Assert.IsFalse(directDfs.Exists(filePath));
                NUnit.Framework.Assert.IsFalse(dfs.Exists(filePath));
                directDfs.Mkdirs(filePath);
                NUnit.Framework.Assert.IsTrue(directDfs.Exists(filePath));
                NUnit.Framework.Assert.IsTrue(dfs.Exists(filePath));
                // This will test RPC to a Resource Manager
                fs = FileSystem.Get(sconf);
                JobConf jobConf = new JobConf();
                FileSystem.SetDefaultUri(jobConf, fs.GetUri().ToString());
                miniMRYarnCluster = InitAndStartMiniMRYarnCluster(jobConf);
                JobConf jconf = new JobConf(miniMRYarnCluster.GetConfig());
                jconf.Set("hadoop.rpc.socket.factory.class.default", "org.apache.hadoop.ipc.DummySocketFactory"
                          );
                jconf.Set(MRConfig.FrameworkName, MRConfig.YarnFrameworkName);
                string   rmAddress = jconf.Get("yarn.resourcemanager.address");
                string[] split     = rmAddress.Split(":");
                jconf.Set("yarn.resourcemanager.address", split[0] + ':' + (System.Convert.ToInt32
                                                                                (split[1]) + 10));
                client = new JobClient(jconf);
                JobStatus[] jobs = client.JobsToComplete();
                NUnit.Framework.Assert.IsTrue(jobs.Length == 0);
            }
            finally
            {
                CloseClient(client);
                CloseDfs(dfs);
                CloseDfs(directDfs);
                StopMiniMRYarnCluster(miniMRYarnCluster);
                ShutdownDFSCluster(cluster);
            }
        }
Пример #23
0
        /// <summary>
        /// Add a
        /// <see cref="Org.Apache.Hadoop.FS.Path"/>
        /// with a custom
        /// <see cref="Org.Apache.Hadoop.Mapred.InputFormat{K, V}"/>
        /// and
        /// <see cref="Org.Apache.Hadoop.Mapred.Mapper{K1, V1, K2, V2}"/>
        /// to the list of inputs for the map-reduce job.
        /// </summary>
        /// <param name="conf">The configuration of the job</param>
        /// <param name="path">
        ///
        /// <see cref="Org.Apache.Hadoop.FS.Path"/>
        /// to be added to the list of inputs for the job
        /// </param>
        /// <param name="inputFormatClass">
        ///
        /// <see cref="Org.Apache.Hadoop.Mapred.InputFormat{K, V}"/>
        /// class to use for this path
        /// </param>
        /// <param name="mapperClass">
        ///
        /// <see cref="Org.Apache.Hadoop.Mapred.Mapper{K1, V1, K2, V2}"/>
        /// class to use for this path
        /// </param>
        public static void AddInputPath(JobConf conf, Path path, Type inputFormatClass, Type
                                        mapperClass)
        {
            AddInputPath(conf, path, inputFormatClass);
            string mapperMapping = path.ToString() + ";" + mapperClass.FullName;
            string mappers       = conf.Get("mapreduce.input.multipleinputs.dir.mappers");

            conf.Set("mapreduce.input.multipleinputs.dir.mappers", mappers == null ? mapperMapping
                                 : mappers + "," + mapperMapping);
            conf.SetMapperClass(typeof(DelegatingMapper));
        }
Пример #24
0
        /// <summary>
        /// Get the list of input
        /// <see cref="Org.Apache.Hadoop.FS.Path"/>
        /// s for the map-reduce job.
        /// </summary>
        /// <param name="conf">The configuration of the job</param>
        /// <returns>
        /// the list of input
        /// <see cref="Org.Apache.Hadoop.FS.Path"/>
        /// s for the map-reduce job.
        /// </returns>
        public static Path[] GetInputPaths(JobConf conf)
        {
            string dirs = conf.Get(FileInputFormat.InputDir, string.Empty);

            string[] list   = StringUtils.Split(dirs);
            Path[]   result = new Path[list.Length];
            for (int i = 0; i < list.Length; i++)
            {
                result[i] = new Path(StringUtils.UnEscapeString(list[i]));
            }
            return(result);
        }
Пример #25
0
 public override void Configure(JobConf conf)
 {
     try
     {
         fs     = FileSystem.Get(conf);
         signal = new Path(conf.Get(GetTaskSignalParameter(false)));
     }
     catch (IOException)
     {
         System.Console.Out.WriteLine("Got an exception while obtaining the filesystem");
     }
 }
Пример #26
0
 /// <summary>Adds a named output for the job.</summary>
 /// <param name="conf">job conf to add the named output</param>
 /// <param name="namedOutput">
 /// named output name, it has to be a word, letters
 /// and numbers only, cannot be the word 'part' as
 /// that is reserved for the
 /// default output.
 /// </param>
 /// <param name="multi">indicates if the named output is multi</param>
 /// <param name="outputFormatClass">OutputFormat class.</param>
 /// <param name="keyClass">key class</param>
 /// <param name="valueClass">value class</param>
 private static void AddNamedOutput(JobConf conf, string namedOutput, bool multi,
                                    Type outputFormatClass, Type keyClass, Type valueClass)
 {
     CheckNamedOutputName(namedOutput);
     CheckNamedOutput(conf, namedOutput, true);
     conf.Set(NamedOutputs, conf.Get(NamedOutputs, string.Empty) + " " + namedOutput);
     conf.SetClass(MoPrefix + namedOutput + Format, outputFormatClass, typeof(OutputFormat
                                                                              ));
     conf.SetClass(MoPrefix + namedOutput + Key, keyClass, typeof(object));
     conf.SetClass(MoPrefix + namedOutput + Value, valueClass, typeof(object));
     conf.SetBoolean(MoPrefix + namedOutput + Multi, multi);
 }
Пример #27
0
        /// <summary>Returns list of channel names.</summary>
        /// <param name="conf">job conf</param>
        /// <returns>List of channel Names</returns>
        public static IList <string> GetNamedOutputsList(JobConf conf)
        {
            IList <string>  names = new AList <string>();
            StringTokenizer st    = new StringTokenizer(conf.Get(NamedOutputs, string.Empty), " "
                                                        );

            while (st.HasMoreTokens())
            {
                names.AddItem(st.NextToken());
            }
            return(names);
        }
Пример #28
0
 public virtual void Configure(JobConf conf)
 {
     this.conf = conf;
     NUnit.Framework.Assert.AreEqual(prop, conf.Get("a"));
     try
     {
         WriteFlag(conf, "configure." + name);
     }
     catch (IOException ex)
     {
         throw new RuntimeException(ex);
     }
 }
Пример #29
0
        /// <exception cref="System.IO.IOException"/>
        public override RecordReader <LongWritable, Text> GetRecordReader(InputSplit genericSplit
                                                                          , JobConf job, Reporter reporter)
        {
            reporter.SetStatus(genericSplit.ToString());
            string delimiter = job.Get("textinputformat.record.delimiter");

            byte[] recordDelimiterBytes = null;
            if (null != delimiter)
            {
                recordDelimiterBytes = Sharpen.Runtime.GetBytesForString(delimiter, Charsets.Utf8
                                                                         );
            }
            return(new LineRecordReader(job, (FileSplit)genericSplit, recordDelimiterBytes));
        }
Пример #30
0
        /// <exception cref="System.Exception"/>
        public virtual void TestNoDefaults()
        {
            JobConf configuration = new JobConf();

            NUnit.Framework.Assert.IsTrue(configuration.Get("hadoop.tmp.dir", null) != null);
            configuration = new JobConf(false);
            NUnit.Framework.Assert.IsTrue(configuration.Get("hadoop.tmp.dir", null) == null);
            Path         inDir  = new Path("testing/jobconf/input");
            Path         outDir = new Path("testing/jobconf/output");
            OutputStream os     = GetFileSystem().Create(new Path(inDir, "text.txt"));
            TextWriter   wr     = new OutputStreamWriter(os);

            wr.Write("hello\n");
            wr.Write("hello\n");
            wr.Close();
            JobConf conf = new JobConf(false);

            conf.Set("fs.defaultFS", CreateJobConf().Get("fs.defaultFS"));
            conf.SetJobName("mr");
            conf.SetInputFormat(typeof(TextInputFormat));
            conf.SetMapOutputKeyClass(typeof(LongWritable));
            conf.SetMapOutputValueClass(typeof(Text));
            conf.SetOutputFormat(typeof(TextOutputFormat));
            conf.SetOutputKeyClass(typeof(LongWritable));
            conf.SetOutputValueClass(typeof(Text));
            conf.SetMapperClass(typeof(IdentityMapper));
            conf.SetReducerClass(typeof(IdentityReducer));
            FileInputFormat.SetInputPaths(conf, inDir);
            FileOutputFormat.SetOutputPath(conf, outDir);
            JobClient.RunJob(conf);
            Path[] outputFiles = FileUtil.Stat2Paths(GetFileSystem().ListStatus(outDir, new Utils.OutputFileUtils.OutputFilesFilter
                                                                                    ()));
            if (outputFiles.Length > 0)
            {
                InputStream    @is     = GetFileSystem().Open(outputFiles[0]);
                BufferedReader reader  = new BufferedReader(new InputStreamReader(@is));
                string         line    = reader.ReadLine();
                int            counter = 0;
                while (line != null)
                {
                    counter++;
                    NUnit.Framework.Assert.IsTrue(line.Contains("hello"));
                    line = reader.ReadLine();
                }
                reader.Close();
                NUnit.Framework.Assert.AreEqual(2, counter);
            }
        }