FileInputFormat, Org.Apache.Hadoop.Mapred C# (CSharp)代码示例

示例#1

0

显示文件

        /// <summary>
        /// HADOOP-4466:
        /// This test verifies the JavSerialization impl can write to
        /// SequenceFiles.
        /// </summary>
        /// <remarks>
        /// HADOOP-4466:
        /// This test verifies the JavSerialization impl can write to
        /// SequenceFiles. by virtue other SequenceFileOutputFormat is not
        /// coupled to Writable types, if so, the job will fail.
        /// </remarks>
        /// <exception cref="System.Exception"/>
        public virtual void TestWriteToSequencefile()
        {
            JobConf conf = new JobConf(typeof(TestJavaSerialization));

            conf.SetJobName("JavaSerialization");
            FileSystem fs = FileSystem.Get(conf);

            CleanAndCreateInput(fs);
            conf.Set("io.serializations", "org.apache.hadoop.io.serializer.JavaSerialization,"
                     + "org.apache.hadoop.io.serializer.WritableSerialization");
            conf.SetInputFormat(typeof(TextInputFormat));
            // test we can write to sequence files
            conf.SetOutputFormat(typeof(SequenceFileOutputFormat));
            conf.SetOutputKeyClass(typeof(string));
            conf.SetOutputValueClass(typeof(long));
            conf.SetOutputKeyComparatorClass(typeof(JavaSerializationComparator));
            conf.SetMapperClass(typeof(TestJavaSerialization.WordCountMapper));
            conf.SetReducerClass(typeof(TestJavaSerialization.SumReducer));
            conf.Set(MRConfig.FrameworkName, MRConfig.LocalFrameworkName);
            FileInputFormat.SetInputPaths(conf, InputDir);
            FileOutputFormat.SetOutputPath(conf, OutputDir);
            JobClient.RunJob(conf);
            Path[] outputFiles = FileUtil.Stat2Paths(fs.ListStatus(OutputDir, new Utils.OutputFileUtils.OutputFilesFilter
                                                                       ()));
            NUnit.Framework.Assert.AreEqual(1, outputFiles.Length);
        }

示例#2

0

显示文件

        public virtual void TestOldCounterC()
        {
            JobConf conf = CreateConfiguration();

            CreateWordsFile(inFiles[3], conf);
            CreateWordsFile(inFiles[4], conf);
            long inputSize = 0;

            inputSize += GetFileSize(inFiles[0]);
            inputSize += GetFileSize(inFiles[1]);
            inputSize += GetFileSize(inFiles[2]);
            inputSize += GetFileSize(inFiles[3]);
            inputSize += GetFileSize(inFiles[4]);
            conf.SetNumMapTasks(4);
            conf.SetInt(JobContext.IoSortFactor, 3);
            FileInputFormat.SetInputPaths(conf, InDir);
            FileOutputFormat.SetOutputPath(conf, new Path(OutDir, "outputO2"));
            RunningJob myJob = JobClient.RunJob(conf);
            Counters   c1    = myJob.GetCounters();

            // As above, each map spills 2^14 records, so 5 maps spill 81920
            // 1st merge: read + write = 6 * 8192
            // final merge: unmerged = 2 * 8192
            // Total reduce: 45056
            // 5 files, 5120 = 5 * 1024 rec/file = 15360 input records
            // 4 records/line = 102400 output records
            ValidateCounters(c1, 122880, 25600, 102400);
            ValidateFileCounters(c1, inputSize, 0, 0, 0);
        }

示例#3

0

显示文件

        /// <exception cref="System.Exception"/>
        public virtual void TestComplexNameWithRegex()
        {
            OutputStream os = GetFileSystem().Create(new Path(GetInputDir(), "text.txt"));
            TextWriter   wr = new OutputStreamWriter(os);

            wr.Write("b a\n");
            wr.Close();
            JobConf conf = CreateJobConf();

            conf.SetJobName("name \\Evalue]");
            conf.SetInputFormat(typeof(TextInputFormat));
            conf.SetOutputKeyClass(typeof(LongWritable));
            conf.SetOutputValueClass(typeof(Text));
            conf.SetMapperClass(typeof(IdentityMapper));
            FileInputFormat.SetInputPaths(conf, GetInputDir());
            FileOutputFormat.SetOutputPath(conf, GetOutputDir());
            JobClient.RunJob(conf);
            Path[] outputFiles = FileUtil.Stat2Paths(GetFileSystem().ListStatus(GetOutputDir(
                                                                                    ), new Utils.OutputFileUtils.OutputFilesFilter()));
            NUnit.Framework.Assert.AreEqual(1, outputFiles.Length);
            InputStream    @is    = GetFileSystem().Open(outputFiles[0]);
            BufferedReader reader = new BufferedReader(new InputStreamReader(@is));

            NUnit.Framework.Assert.AreEqual("0\tb a", reader.ReadLine());
            NUnit.Framework.Assert.IsNull(reader.ReadLine());
            reader.Close();
        }

示例#4

0

显示文件

        /// <exception cref="System.IO.IOException"/>
        internal static void ConfigureWordCount(FileSystem fs, JobConf conf, string input
                                                , int numMaps, int numReduces, Path inDir, Path outDir)
        {
            fs.Delete(outDir, true);
            if (!fs.Mkdirs(inDir))
            {
                throw new IOException("Mkdirs failed to create " + inDir.ToString());
            }
            DataOutputStream file = fs.Create(new Path(inDir, "part-0"));

            file.WriteBytes(input);
            file.Close();
            FileSystem.SetDefaultUri(conf, fs.GetUri());
            conf.Set(JTConfig.FrameworkName, JTConfig.YarnFrameworkName);
            conf.SetJobName("wordcount");
            conf.SetInputFormat(typeof(TextInputFormat));
            // the keys are words (strings)
            conf.SetOutputKeyClass(typeof(Text));
            // the values are counts (ints)
            conf.SetOutputValueClass(typeof(IntWritable));
            conf.Set("mapred.mapper.class", "testjar.ClassWordCount$MapClass");
            conf.Set("mapred.combine.class", "testjar.ClassWordCount$Reduce");
            conf.Set("mapred.reducer.class", "testjar.ClassWordCount$Reduce");
            FileInputFormat.SetInputPaths(conf, inDir);
            FileOutputFormat.SetOutputPath(conf, outDir);
            conf.SetNumMapTasks(numMaps);
            conf.SetNumReduceTasks(numReduces);
            //set the tests jar file
            conf.SetJarByClass(typeof(TestMiniMRClasspath));
        }

示例#5

0

显示文件

文件： TestMapOutputType.cs 项目： orf53975/hadoop.net

        public virtual void Configure()
        {
            Path       testdir = new Path(TestDir.GetAbsolutePath());
            Path       inDir   = new Path(testdir, "in");
            Path       outDir  = new Path(testdir, "out");
            FileSystem fs      = FileSystem.Get(conf);

            fs.Delete(testdir, true);
            conf.SetInt(JobContext.IoSortMb, 1);
            conf.SetInputFormat(typeof(SequenceFileInputFormat));
            FileInputFormat.SetInputPaths(conf, inDir);
            FileOutputFormat.SetOutputPath(conf, outDir);
            conf.SetMapperClass(typeof(TestMapOutputType.TextGen));
            conf.SetReducerClass(typeof(TestMapOutputType.TextReduce));
            conf.SetOutputKeyClass(typeof(Text));
            conf.SetOutputValueClass(typeof(Text));
            conf.Set(MRConfig.FrameworkName, MRConfig.LocalFrameworkName);
            conf.SetOutputFormat(typeof(SequenceFileOutputFormat));
            if (!fs.Mkdirs(testdir))
            {
                throw new IOException("Mkdirs failed to create " + testdir.ToString());
            }
            if (!fs.Mkdirs(inDir))
            {
                throw new IOException("Mkdirs failed to create " + inDir.ToString());
            }
            Path inFile = new Path(inDir, "part0");

            SequenceFile.Writer writer = SequenceFile.CreateWriter(fs, conf, inFile, typeof(Text
                                                                                            ), typeof(Text));
            writer.Append(new Text("rec: 1"), new Text("Hello"));
            writer.Close();
            jc = new JobClient(conf);
        }

示例#6

0

显示文件

文件： TestLazyOutput.cs 项目： orf53975/hadoop.net

        /// <exception cref="System.Exception"/>
        private static void RunTestLazyOutput(JobConf job, Path output, int numReducers,
                                              bool createLazily)
        {
            job.SetJobName("test-lazy-output");
            FileInputFormat.SetInputPaths(job, Input);
            FileOutputFormat.SetOutputPath(job, output);
            job.SetInputFormat(typeof(TextInputFormat));
            job.SetMapOutputKeyClass(typeof(LongWritable));
            job.SetMapOutputValueClass(typeof(Text));
            job.SetOutputKeyClass(typeof(LongWritable));
            job.SetOutputValueClass(typeof(Text));
            job.SetMapperClass(typeof(TestLazyOutput.TestMapper));
            job.SetReducerClass(typeof(TestLazyOutput.TestReducer));
            JobClient client = new JobClient(job);

            job.SetNumReduceTasks(numReducers);
            if (createLazily)
            {
                LazyOutputFormat.SetOutputFormatClass(job, typeof(TextOutputFormat));
            }
            else
            {
                job.SetOutputFormat(typeof(TextOutputFormat));
            }
            JobClient.RunJob(job);
        }

示例#7

0

显示文件

文件： TestReporter.cs 项目： orf53975/hadoop.net

        public virtual void TestStatusLimit()
        {
            Path          test   = new Path(testRootTempDir, "testStatusLimit");
            Configuration conf   = new Configuration();
            Path          inDir  = new Path(test, "in");
            Path          outDir = new Path(test, "out");
            FileSystem    fs     = FileSystem.Get(conf);

            if (fs.Exists(inDir))
            {
                fs.Delete(inDir, true);
            }
            fs.Mkdirs(inDir);
            DataOutputStream file = fs.Create(new Path(inDir, "part-" + 0));

            file.WriteBytes("testStatusLimit");
            file.Close();
            if (fs.Exists(outDir))
            {
                fs.Delete(outDir, true);
            }
            Job job = Job.GetInstance(conf, "testStatusLimit");

            job.SetMapperClass(typeof(TestReporter.StatusLimitMapper));
            job.SetNumReduceTasks(0);
            FileInputFormat.AddInputPath(job, inDir);
            FileOutputFormat.SetOutputPath(job, outDir);
            job.WaitForCompletion(true);
            NUnit.Framework.Assert.IsTrue("Job failed", job.IsSuccessful());
        }

示例#8

0

显示文件

        /// <summary>Test using the gzip codec with two input files.</summary>
        /// <exception cref="System.IO.IOException"/>
        public virtual void TestGzipWithTwoInputs()
        {
            CompressionCodec gzip = new GzipCodec();

            localFs.Delete(workDir, true);
            FixedLengthInputFormat format = new FixedLengthInputFormat();
            JobConf job = new JobConf(defaultConf);

            FixedLengthInputFormat.SetRecordLength(job, 5);
            FileInputFormat.SetInputPaths(job, workDir);
            ReflectionUtils.SetConf(gzip, job);
            format.Configure(job);
            // Create files with fixed length records with 5 byte long records.
            WriteFile(localFs, new Path(workDir, "part1.txt.gz"), gzip, "one  two  threefour five six  seveneightnine ten  "
                      );
            WriteFile(localFs, new Path(workDir, "part2.txt.gz"), gzip, "ten  nine eightsevensix  five four threetwo  one  "
                      );
            InputSplit[] splits = format.GetSplits(job, 100);
            NUnit.Framework.Assert.AreEqual("compressed splits == 2", 2, splits.Length);
            FileSplit tmp = (FileSplit)splits[0];

            if (tmp.GetPath().GetName().Equals("part2.txt.gz"))
            {
                splits[0] = splits[1];
                splits[1] = tmp;
            }
            IList <string> results = ReadSplit(format, splits[0], job);

            NUnit.Framework.Assert.AreEqual("splits[0] length", 10, results.Count);
            NUnit.Framework.Assert.AreEqual("splits[0][5]", "six  ", results[5]);
            results = ReadSplit(format, splits[1], job);
            NUnit.Framework.Assert.AreEqual("splits[1] length", 10, results.Count);
            NUnit.Framework.Assert.AreEqual("splits[1][0]", "ten  ", results[0]);
            NUnit.Framework.Assert.AreEqual("splits[1][1]", "nine ", results[1]);
        }

示例#9

0

显示文件

        /// <summary>Test with no record length set.</summary>
        /// <exception cref="System.IO.IOException"/>
        public virtual void TestNoRecordLength()
        {
            localFs.Delete(workDir, true);
            Path file = new Path(workDir, new string("testFormat.txt"));

            CreateFile(file, null, 10, 10);
            // Set the fixed length record length config property
            JobConf job = new JobConf(defaultConf);

            FileInputFormat.SetInputPaths(job, workDir);
            FixedLengthInputFormat format = new FixedLengthInputFormat();

            format.Configure(job);
            InputSplit[] splits          = format.GetSplits(job, 1);
            bool         exceptionThrown = false;

            foreach (InputSplit split in splits)
            {
                try
                {
                    RecordReader <LongWritable, BytesWritable> reader = format.GetRecordReader(split,
                                                                                               job, voidReporter);
                }
                catch (IOException ioe)
                {
                    exceptionThrown = true;
                    Log.Info("Exception message:" + ioe.Message);
                }
            }
            NUnit.Framework.Assert.IsTrue("Exception for not setting record length:", exceptionThrown
                                          );
        }

示例#10

0

显示文件

        public virtual void TestOldCounterB()
        {
            JobConf conf = CreateConfiguration();

            CreateWordsFile(inFiles[3], conf);
            RemoveWordsFile(inFiles[4], conf);
            long inputSize = 0;

            inputSize += GetFileSize(inFiles[0]);
            inputSize += GetFileSize(inFiles[1]);
            inputSize += GetFileSize(inFiles[2]);
            inputSize += GetFileSize(inFiles[3]);
            conf.SetNumMapTasks(4);
            conf.SetInt(JobContext.IoSortFactor, 2);
            FileInputFormat.SetInputPaths(conf, InDir);
            FileOutputFormat.SetOutputPath(conf, new Path(OutDir, "outputO1"));
            RunningJob myJob = JobClient.RunJob(conf);
            Counters   c1    = myJob.GetCounters();

            // As above, each map spills 2^14 records, so 4 maps spill 2^16 records
            // In the reduce, there are two intermediate merges before the reduce.
            // 1st merge: read + write = 8192 * 4
            // 2nd merge: read + write = 8192 * 4
            // final merge: 0
            // Total reduce: 32768
            // Total: map + reduce = 2^16 + 2^15 = 98304
            // 4 files, 5120 = 5 * 1024 rec/file = 15360 input records
            // 4 records/line = 81920 output records
            ValidateCounters(c1, 98304, 20480, 81920);
            ValidateFileCounters(c1, inputSize, 0, 0, 0);
        }

示例#11

0

显示文件

        /// <exception cref="System.Exception"/>
        private void RunMapReduce(JobConf conf, IList <string> mapperBadRecords, IList <string
                                                                                        > redBadRecords)
        {
            CreateInput();
            conf.SetJobName("mr");
            conf.SetNumMapTasks(1);
            conf.SetNumReduceTasks(1);
            conf.SetInt(JobContext.TaskTimeout, 30 * 1000);
            SkipBadRecords.SetMapperMaxSkipRecords(conf, long.MaxValue);
            SkipBadRecords.SetReducerMaxSkipGroups(conf, long.MaxValue);
            SkipBadRecords.SetAttemptsToStartSkipping(conf, 0);
            //the no of attempts to successfully complete the task depends
            //on the no of bad records.
            conf.SetMaxMapAttempts(SkipBadRecords.GetAttemptsToStartSkipping(conf) + 1 + mapperBadRecords
                                   .Count);
            conf.SetMaxReduceAttempts(SkipBadRecords.GetAttemptsToStartSkipping(conf) + 1 + redBadRecords
                                      .Count);
            FileInputFormat.SetInputPaths(conf, GetInputDir());
            FileOutputFormat.SetOutputPath(conf, GetOutputDir());
            conf.SetInputFormat(typeof(TextInputFormat));
            conf.SetMapOutputKeyClass(typeof(LongWritable));
            conf.SetMapOutputValueClass(typeof(Text));
            conf.SetOutputFormat(typeof(TextOutputFormat));
            conf.SetOutputKeyClass(typeof(LongWritable));
            conf.SetOutputValueClass(typeof(Text));
            RunningJob runningJob = JobClient.RunJob(conf);

            ValidateOutput(conf, runningJob, mapperBadRecords, redBadRecords);
        }

示例#12

0

显示文件

        /// <exception cref="System.IO.IOException"/>
        private Path InitFiles(FileSystem fs, int numFiles, int numBytes)
        {
            Path dir          = new Path(Runtime.GetProperty("test.build.data", ".") + "/mapred");
            Path multiFileDir = new Path(dir, "test.multifile");

            fs.Delete(multiFileDir, true);
            fs.Mkdirs(multiFileDir);
            Log.Info("Creating " + numFiles + " file(s) in " + multiFileDir);
            for (int i = 0; i < numFiles; i++)
            {
                Path path = new Path(multiFileDir, "file_" + i);
                FSDataOutputStream @out = fs.Create(path);
                if (numBytes == -1)
                {
                    numBytes = rand.Next(MaxBytes);
                }
                for (int j = 0; j < numBytes; j++)
                {
                    @out.Write(rand.Next());
                }
                @out.Close();
                if (Log.IsDebugEnabled())
                {
                    Log.Debug("Created file " + path + " with length " + numBytes);
                }
                lengths[path.GetName()] = System.Convert.ToInt64(numBytes);
            }
            FileInputFormat.SetInputPaths(job, multiFileDir);
            return(multiFileDir);
        }

示例#13

0

显示文件

        /// <exception cref="System.Exception"/>
        private void _testInputFiles(bool withFilter, bool withGlob)
        {
            ICollection <Path> createdFiles = CreateFiles();
            JobConf            conf         = new JobConf();
            Path inputDir = (withGlob) ? new Path(workDir, "a*") : workDir;

            FileInputFormat.SetInputPaths(conf, inputDir);
            conf.SetInputFormat(typeof(TestFileInputFormatPathFilter.DummyFileInputFormat));
            if (withFilter)
            {
                FileInputFormat.SetInputPathFilter(conf, typeof(TestFileInputFormatPathFilter.TestPathFilter
                                                                ));
            }
            TestFileInputFormatPathFilter.DummyFileInputFormat inputFormat = (TestFileInputFormatPathFilter.DummyFileInputFormat
                                                                              )conf.GetInputFormat();
            ICollection <Path> computedFiles = new HashSet <Path>();

            foreach (FileStatus file in inputFormat.ListStatus(conf))
            {
                computedFiles.AddItem(file.GetPath());
            }
            createdFiles.Remove(localFs.MakeQualified(new Path(workDir, "_hello")));
            createdFiles.Remove(localFs.MakeQualified(new Path(workDir, ".hello")));
            if (withFilter)
            {
                createdFiles.Remove(localFs.MakeQualified(new Path(workDir, "aa")));
                createdFiles.Remove(localFs.MakeQualified(new Path(workDir, "bb")));
            }
            if (withGlob)
            {
                createdFiles.Remove(localFs.MakeQualified(new Path(workDir, "b")));
                createdFiles.Remove(localFs.MakeQualified(new Path(workDir, "bb")));
            }
            NUnit.Framework.Assert.AreEqual(createdFiles, computedFiles);
        }

示例#14

0

显示文件

文件： TestMiniMRChildTask.cs 项目： orf53975/hadoop.net

        // configure a job
        /// <exception cref="System.IO.IOException"/>
        private void Configure(JobConf conf, Path inDir, Path outDir, string input, Type
                               map, Type reduce)
        {
            // set up the input file system and write input text.
            FileSystem inFs  = inDir.GetFileSystem(conf);
            FileSystem outFs = outDir.GetFileSystem(conf);

            outFs.Delete(outDir, true);
            if (!inFs.Mkdirs(inDir))
            {
                throw new IOException("Mkdirs failed to create " + inDir.ToString());
            }
            {
                // write input into input file
                DataOutputStream file = inFs.Create(new Path(inDir, "part-0"));
                file.WriteBytes(input);
                file.Close();
            }
            // configure the mapred Job which creates a tempfile in map.
            conf.SetJobName("testmap");
            conf.SetMapperClass(map);
            conf.SetReducerClass(reduce);
            conf.SetNumMapTasks(1);
            conf.SetNumReduceTasks(0);
            FileInputFormat.SetInputPaths(conf, inDir);
            FileOutputFormat.SetOutputPath(conf, outDir);
            string TestRootDir = new Path(Runtime.GetProperty("test.build.data", "/tmp")).ToString
                                     ().Replace(' ', '+');

            conf.Set("test.build.data", TestRootDir);
        }

示例#15

0

显示文件

文件： TestMRWithDistributedCache.cs 项目： orf53975/hadoop.net

        /// <exception cref="System.IO.IOException"/>
        /// <exception cref="System.Exception"/>
        /// <exception cref="System.TypeLoadException"/>
        /// <exception cref="Sharpen.URISyntaxException"/>
        private void TestWithConf(Configuration conf)
        {
            // Create a temporary file of length 1.
            Path first = CreateTempFile("distributed.first", "x");
            // Create two jars with a single file inside them.
            Path second = MakeJar(new Path(TestRootDir, "distributed.second.jar"), 2);
            Path third  = MakeJar(new Path(TestRootDir, "distributed.third.jar"), 3);
            Path fourth = MakeJar(new Path(TestRootDir, "distributed.fourth.jar"), 4);
            Job  job    = Job.GetInstance(conf);

            job.SetMapperClass(typeof(TestMRWithDistributedCache.DistributedCacheCheckerMapper
                                      ));
            job.SetReducerClass(typeof(TestMRWithDistributedCache.DistributedCacheCheckerReducer
                                       ));
            job.SetOutputFormatClass(typeof(NullOutputFormat));
            FileInputFormat.SetInputPaths(job, first);
            // Creates the Job Configuration
            job.AddCacheFile(new URI(first.ToUri().ToString() + "#distributed.first.symlink")
                             );
            job.AddFileToClassPath(second);
            job.AddArchiveToClassPath(third);
            job.AddCacheArchive(fourth.ToUri());
            job.SetMaxMapAttempts(1);
            // speed up failures
            job.Submit();
            NUnit.Framework.Assert.IsTrue(job.WaitForCompletion(false));
        }

示例#16

0

显示文件

        /// <summary>Test using the gzip codec for reading</summary>
        /// <exception cref="System.IO.IOException"/>
        public static void TestGzip()
        {
            JobConf          job  = new JobConf();
            CompressionCodec gzip = new GzipCodec();

            ReflectionUtils.SetConf(gzip, job);
            localFs.Delete(workDir, true);
            WriteFile(localFs, new Path(workDir, "part1.txt.gz"), gzip, "line-1\tthe quick\nline-2\tbrown\nline-3\tfox jumped\nline-4\tover\nline-5\t the lazy\nline-6\t dog\n"
                      );
            WriteFile(localFs, new Path(workDir, "part2.txt.gz"), gzip, "line-1\tthis is a test\nline-1\tof gzip\n"
                      );
            FileInputFormat.SetInputPaths(job, workDir);
            KeyValueTextInputFormat format = new KeyValueTextInputFormat();

            format.Configure(job);
            InputSplit[] splits = format.GetSplits(job, 100);
            NUnit.Framework.Assert.AreEqual("compressed splits == 2", 2, splits.Length);
            FileSplit tmp = (FileSplit)splits[0];

            if (tmp.GetPath().GetName().Equals("part2.txt.gz"))
            {
                splits[0] = splits[1];
                splits[1] = tmp;
            }
            IList <Text> results = ReadSplit(format, splits[0], job);

            NUnit.Framework.Assert.AreEqual("splits[0] length", 6, results.Count);
            NUnit.Framework.Assert.AreEqual("splits[0][5]", " dog", results[5].ToString());
            results = ReadSplit(format, splits[1], job);
            NUnit.Framework.Assert.AreEqual("splits[1] length", 2, results.Count);
            NUnit.Framework.Assert.AreEqual("splits[1][0]", "this is a test", results[0].ToString
                                                ());
            NUnit.Framework.Assert.AreEqual("splits[1][1]", "of gzip", results[1].ToString());
        }

示例#17

0

显示文件

        /// <exception cref="System.Exception"/>
        private string RunJob()
        {
            OutputStream os = GetFileSystem().Create(new Path(GetInputDir(), "text.txt"));
            TextWriter   wr = new OutputStreamWriter(os);

            wr.Write("hello1\n");
            wr.Write("hello2\n");
            wr.Write("hello3\n");
            wr.Close();
            JobConf conf = CreateJobConf();

            conf.SetJobName("mr");
            conf.SetJobPriority(JobPriority.High);
            conf.SetInputFormat(typeof(TextInputFormat));
            conf.SetMapOutputKeyClass(typeof(LongWritable));
            conf.SetMapOutputValueClass(typeof(Text));
            conf.SetOutputFormat(typeof(TextOutputFormat));
            conf.SetOutputKeyClass(typeof(LongWritable));
            conf.SetOutputValueClass(typeof(Text));
            conf.SetMapperClass(typeof(IdentityMapper));
            conf.SetReducerClass(typeof(IdentityReducer));
            FileInputFormat.SetInputPaths(conf, GetInputDir());
            FileOutputFormat.SetOutputPath(conf, GetOutputDir());
            return(JobClient.RunJob(conf).GetID().ToString());
        }

示例#18

0

显示文件

        /// <exception cref="System.Exception"/>
        public static Counters RunJob(JobConf conf)
        {
            conf.SetMapperClass(typeof(TestReduceFetchFromPartialMem.MapMB));
            conf.SetReducerClass(typeof(TestReduceFetchFromPartialMem.MBValidate));
            conf.SetOutputKeyClass(typeof(Org.Apache.Hadoop.IO.Text));
            conf.SetOutputValueClass(typeof(Org.Apache.Hadoop.IO.Text));
            conf.SetNumReduceTasks(1);
            conf.SetInputFormat(typeof(TestReduceFetchFromPartialMem.FakeIF));
            conf.SetNumTasksToExecutePerJvm(1);
            conf.SetInt(JobContext.MapMaxAttempts, 0);
            conf.SetInt(JobContext.ReduceMaxAttempts, 0);
            FileInputFormat.SetInputPaths(conf, new Path("/in"));
            Path outp = new Path("/out");

            FileOutputFormat.SetOutputPath(conf, outp);
            RunningJob job = null;

            try
            {
                job = JobClient.RunJob(conf);
                NUnit.Framework.Assert.IsTrue(job.IsSuccessful());
            }
            finally
            {
                FileSystem fs = dfsCluster.GetFileSystem();
                if (fs.Exists(outp))
                {
                    fs.Delete(outp, true);
                }
            }
            return(job.GetCounters());
        }

示例#19

0

显示文件

文件： UtilsForTests.cs 项目： orf53975/hadoop.net

        // Start a job with the specified input and return its RunningJob object
        /// <exception cref="System.IO.IOException"/>
        internal static RunningJob RunJob(JobConf conf, Path inDir, Path outDir, int numMaps
                                          , int numReds, string input)
        {
            FileSystem fs = FileSystem.Get(conf);

            if (fs.Exists(outDir))
            {
                fs.Delete(outDir, true);
            }
            if (!fs.Exists(inDir))
            {
                fs.Mkdirs(inDir);
            }
            for (int i = 0; i < numMaps; ++i)
            {
                DataOutputStream file = fs.Create(new Path(inDir, "part-" + i));
                file.WriteBytes(input);
                file.Close();
            }
            conf.SetInputFormat(typeof(TextInputFormat));
            conf.SetOutputKeyClass(typeof(LongWritable));
            conf.SetOutputValueClass(typeof(Org.Apache.Hadoop.IO.Text));
            FileInputFormat.SetInputPaths(conf, inDir);
            FileOutputFormat.SetOutputPath(conf, outDir);
            conf.SetNumMapTasks(numMaps);
            conf.SetNumReduceTasks(numReds);
            JobClient  jobClient = new JobClient(conf);
            RunningJob job       = jobClient.SubmitJob(conf);

            return(job);
        }

示例#20

0

显示文件

文件： SortValidator.cs 项目： orf53975/hadoop.net

        private static IntWritable DeduceInputFile(JobConf job)
        {
            Path[] inputPaths = FileInputFormat.GetInputPaths(job);
            Path   inputFile  = new Path(job.Get(JobContext.MapInputFile));

            // value == one for sort-input; value == two for sort-output
            return((inputFile.GetParent().Equals(inputPaths[0])) ? sortInput : sortOutput);
        }

示例#21

0

显示文件

文件： WordCount.cs 项目： orf53975/hadoop.net

        /// <summary>The main driver for word count map/reduce program.</summary>
        /// <remarks>
        /// The main driver for word count map/reduce program.
        /// Invoke this method to submit the map/reduce job.
        /// </remarks>
        /// <exception cref="System.IO.IOException">
        /// When there is communication problems with the
        /// job tracker.
        /// </exception>
        /// <exception cref="System.Exception"/>
        public virtual int Run(string[] args)
        {
            JobConf conf = new JobConf(GetConf(), typeof(WordCount));

            conf.SetJobName("wordcount");
            // the keys are words (strings)
            conf.SetOutputKeyClass(typeof(Text));
            // the values are counts (ints)
            conf.SetOutputValueClass(typeof(IntWritable));
            conf.SetMapperClass(typeof(WordCount.MapClass));
            conf.SetCombinerClass(typeof(WordCount.Reduce));
            conf.SetReducerClass(typeof(WordCount.Reduce));
            IList <string> other_args = new AList <string>();

            for (int i = 0; i < args.Length; ++i)
            {
                try
                {
                    if ("-m".Equals(args[i]))
                    {
                        conf.SetNumMapTasks(System.Convert.ToInt32(args[++i]));
                    }
                    else
                    {
                        if ("-r".Equals(args[i]))
                        {
                            conf.SetNumReduceTasks(System.Convert.ToInt32(args[++i]));
                        }
                        else
                        {
                            other_args.AddItem(args[i]);
                        }
                    }
                }
                catch (FormatException)
                {
                    System.Console.Out.WriteLine("ERROR: Integer expected instead of " + args[i]);
                    return(PrintUsage());
                }
                catch (IndexOutOfRangeException)
                {
                    System.Console.Out.WriteLine("ERROR: Required parameter missing from " + args[i -
                                                                                                  1]);
                    return(PrintUsage());
                }
            }
            // Make sure there are exactly 2 parameters left.
            if (other_args.Count != 2)
            {
                System.Console.Out.WriteLine("ERROR: Wrong number of parameters: " + other_args.Count
                                             + " instead of 2.");
                return(PrintUsage());
            }
            FileInputFormat.SetInputPaths(conf, other_args[0]);
            FileOutputFormat.SetOutputPath(conf, new Path(other_args[1]));
            JobClient.RunJob(conf);
            return(0);
        }

示例#22

0

显示文件

文件： SortValidator.cs 项目： orf53975/hadoop.net

            /// <exception cref="System.IO.IOException"/>
            internal static void CheckRecords(Configuration defaults, int noMaps, int noReduces
                                              , Path sortInput, Path sortOutput)
            {
                JobConf jobConf = new JobConf(defaults, typeof(SortValidator.RecordChecker));

                jobConf.SetJobName("sortvalidate-record-checker");
                jobConf.SetInputFormat(typeof(SequenceFileInputFormat));
                jobConf.SetOutputFormat(typeof(SequenceFileOutputFormat));
                jobConf.SetOutputKeyClass(typeof(BytesWritable));
                jobConf.SetOutputValueClass(typeof(IntWritable));
                jobConf.SetMapperClass(typeof(SortValidator.RecordChecker.Map));
                jobConf.SetReducerClass(typeof(SortValidator.RecordChecker.Reduce));
                JobClient     client  = new JobClient(jobConf);
                ClusterStatus cluster = client.GetClusterStatus();

                if (noMaps == -1)
                {
                    noMaps = cluster.GetTaskTrackers() * jobConf.GetInt(MapsPerHost, 10);
                }
                if (noReduces == -1)
                {
                    noReduces = (int)(cluster.GetMaxReduceTasks() * 0.9);
                    string sortReduces = jobConf.Get(ReducesPerHost);
                    if (sortReduces != null)
                    {
                        noReduces = cluster.GetTaskTrackers() * System.Convert.ToInt32(sortReduces);
                    }
                }
                jobConf.SetNumMapTasks(noMaps);
                jobConf.SetNumReduceTasks(noReduces);
                FileInputFormat.SetInputPaths(jobConf, sortInput);
                FileInputFormat.AddInputPath(jobConf, sortOutput);
                Path       outputPath = new Path("/tmp/sortvalidate/recordchecker");
                FileSystem fs         = FileSystem.Get(defaults);

                if (fs.Exists(outputPath))
                {
                    fs.Delete(outputPath, true);
                }
                FileOutputFormat.SetOutputPath(jobConf, outputPath);
                // Uncomment to run locally in a single process
                //job_conf.set(JTConfig.JT, "local");
                Path[] inputPaths = FileInputFormat.GetInputPaths(jobConf);
                System.Console.Out.WriteLine("\nSortValidator.RecordChecker: Running on " + cluster
                                             .GetTaskTrackers() + " nodes to validate sort from " + inputPaths[0] + ", " + inputPaths
                                             [1] + " into " + FileOutputFormat.GetOutputPath(jobConf) + " with " + noReduces
                                             + " reduces.");
                DateTime startTime = new DateTime();

                System.Console.Out.WriteLine("Job started: " + startTime);
                JobClient.RunJob(jobConf);
                DateTime end_time = new DateTime();

                System.Console.Out.WriteLine("Job ended: " + end_time);
                System.Console.Out.WriteLine("The job took " + (end_time.GetTime() - startTime.GetTime
                                                                    ()) / 1000 + " seconds.");
            }

示例#23

0

显示文件

文件： TestMiniMRClientCluster.cs 项目： orf53975/hadoop.net

        public virtual void TestJob()
        {
            Job job = CreateJob();

            FileInputFormat.SetInputPaths(job, inDir);
            FileOutputFormat.SetOutputPath(job, new Path(outDir, "testJob"));
            NUnit.Framework.Assert.IsTrue(job.WaitForCompletion(true));
            ValidateCounters(job.GetCounters(), 5, 25, 5, 5);
        }

示例#24

0

显示文件

文件： TestMapRed.cs 项目： orf53975/hadoop.net

        /// <exception cref="System.Exception"/>
        private void CheckCompression(bool compressMapOutputs, SequenceFile.CompressionType
                                      redCompression, bool includeCombine)
        {
            JobConf    conf    = new JobConf(typeof(TestMapRed));
            Path       testdir = new Path(TestDir.GetAbsolutePath());
            Path       inDir   = new Path(testdir, "in");
            Path       outDir  = new Path(testdir, "out");
            FileSystem fs      = FileSystem.Get(conf);

            fs.Delete(testdir, true);
            FileInputFormat.SetInputPaths(conf, inDir);
            FileOutputFormat.SetOutputPath(conf, outDir);
            conf.SetMapperClass(typeof(TestMapRed.MyMap));
            conf.SetReducerClass(typeof(TestMapRed.MyReduce));
            conf.SetOutputKeyClass(typeof(Text));
            conf.SetOutputValueClass(typeof(Text));
            conf.SetOutputFormat(typeof(SequenceFileOutputFormat));
            conf.Set(MRConfig.FrameworkName, MRConfig.LocalFrameworkName);
            if (includeCombine)
            {
                conf.SetCombinerClass(typeof(IdentityReducer));
            }
            conf.SetCompressMapOutput(compressMapOutputs);
            SequenceFileOutputFormat.SetOutputCompressionType(conf, redCompression);
            try
            {
                if (!fs.Mkdirs(testdir))
                {
                    throw new IOException("Mkdirs failed to create " + testdir.ToString());
                }
                if (!fs.Mkdirs(inDir))
                {
                    throw new IOException("Mkdirs failed to create " + inDir.ToString());
                }
                Path             inFile = new Path(inDir, "part0");
                DataOutputStream f      = fs.Create(inFile);
                f.WriteBytes("Owen was here\n");
                f.WriteBytes("Hadoop is fun\n");
                f.WriteBytes("Is this done, yet?\n");
                f.Close();
                RunningJob rj = JobClient.RunJob(conf);
                NUnit.Framework.Assert.IsTrue("job was complete", rj.IsComplete());
                NUnit.Framework.Assert.IsTrue("job was successful", rj.IsSuccessful());
                Path output = new Path(outDir, Task.GetOutputName(0));
                NUnit.Framework.Assert.IsTrue("reduce output exists " + output, fs.Exists(output)
                                              );
                SequenceFile.Reader rdr = new SequenceFile.Reader(fs, output, conf);
                NUnit.Framework.Assert.AreEqual("is reduce output compressed " + output, redCompression
                                                != SequenceFile.CompressionType.None, rdr.IsCompressed());
                rdr.Close();
            }
            finally
            {
                fs.Delete(testdir, true);
            }
        }

示例#25

0

显示文件

        /// <exception cref="System.Exception"/>
        public virtual void TestFormat()
        {
            JobConf  job      = new JobConf(conf);
            Reporter reporter = Reporter.Null;
            Random   random   = new Random();
            long     seed     = random.NextLong();

            Log.Info("seed = " + seed);
            random.SetSeed(seed);
            localFs.Delete(workDir, true);
            FileInputFormat.SetInputPaths(job, workDir);
            int length   = 10000;
            int numFiles = 10;

            // create a file with various lengths
            CreateFiles(length, numFiles, random);
            // create a combine split for the files
            InputFormat <IntWritable, BytesWritable> format = new CombineSequenceFileInputFormat
                                                              <IntWritable, BytesWritable>();
            IntWritable   key   = new IntWritable();
            BytesWritable value = new BytesWritable();

            for (int i = 0; i < 3; i++)
            {
                int numSplits = random.Next(length / (SequenceFile.SyncInterval / 20)) + 1;
                Log.Info("splitting: requesting = " + numSplits);
                InputSplit[] splits = format.GetSplits(job, numSplits);
                Log.Info("splitting: got =        " + splits.Length);
                // we should have a single split as the length is comfortably smaller than
                // the block size
                NUnit.Framework.Assert.AreEqual("We got more than one splits!", 1, splits.Length);
                InputSplit split = splits[0];
                NUnit.Framework.Assert.AreEqual("It should be CombineFileSplit", typeof(CombineFileSplit
                                                                                        ), split.GetType());
                // check each split
                BitSet bits = new BitSet(length);
                RecordReader <IntWritable, BytesWritable> reader = format.GetRecordReader(split, job
                                                                                          , reporter);
                try
                {
                    while (reader.Next(key, value))
                    {
                        NUnit.Framework.Assert.IsFalse("Key in multiple partitions.", bits.Get(key.Get())
                                                       );
                        bits.Set(key.Get());
                    }
                }
                finally
                {
                    reader.Close();
                }
                NUnit.Framework.Assert.AreEqual("Some keys in no partition.", length, bits.Cardinality
                                                    ());
            }
        }

示例#26

0

显示文件

文件： TestLineRecordReaderJobs.cs 项目： orf53975/hadoop.net

        /// <summary>Creates and runs an MR job</summary>
        /// <param name="conf"/>
        /// <exception cref="System.IO.IOException"/>
        /// <exception cref="System.Exception"/>
        /// <exception cref="System.TypeLoadException"/>
        public virtual void CreateAndRunJob(Configuration conf)
        {
            JobConf job = new JobConf(conf);

            job.SetJarByClass(typeof(TestLineRecordReaderJobs));
            job.SetMapperClass(typeof(IdentityMapper));
            job.SetReducerClass(typeof(IdentityReducer));
            FileInputFormat.AddInputPath(job, inputDir);
            FileOutputFormat.SetOutputPath(job, outputDir);
            JobClient.RunJob(job);
        }

示例#27

0

显示文件

        /// <exception cref="System.IO.IOException"/>
        internal static string LaunchExternal(URI uri, JobConf conf, string input, int numMaps
                                              , int numReduces)
        {
            Path       inDir  = new Path("/testing/ext/input");
            Path       outDir = new Path("/testing/ext/output");
            FileSystem fs     = FileSystem.Get(uri, conf);

            fs.Delete(outDir, true);
            if (!fs.Mkdirs(inDir))
            {
                throw new IOException("Mkdirs failed to create " + inDir.ToString());
            }
            {
                DataOutputStream file = fs.Create(new Path(inDir, "part-0"));
                file.WriteBytes(input);
                file.Close();
            }
            FileSystem.SetDefaultUri(conf, uri);
            conf.Set(JTConfig.FrameworkName, JTConfig.YarnFrameworkName);
            conf.SetJobName("wordcount");
            conf.SetInputFormat(typeof(TextInputFormat));
            // the keys are counts
            conf.SetOutputValueClass(typeof(IntWritable));
            // the values are the messages
            conf.Set(JobContext.OutputKeyClass, "testjar.ExternalWritable");
            FileInputFormat.SetInputPaths(conf, inDir);
            FileOutputFormat.SetOutputPath(conf, outDir);
            conf.SetNumMapTasks(numMaps);
            conf.SetNumReduceTasks(numReduces);
            conf.Set("mapred.mapper.class", "testjar.ExternalMapperReducer");
            conf.Set("mapred.reducer.class", "testjar.ExternalMapperReducer");
            // set the tests jar file
            conf.SetJarByClass(typeof(TestMiniMRClasspath));
            JobClient.RunJob(conf);
            StringBuilder result = new StringBuilder();

            Path[] fileList = FileUtil.Stat2Paths(fs.ListStatus(outDir, new Utils.OutputFileUtils.OutputFilesFilter
                                                                    ()));
            for (int i = 0; i < fileList.Length; ++i)
            {
                BufferedReader file = new BufferedReader(new InputStreamReader(fs.Open(fileList[i
                                                                                       ])));
                string line = file.ReadLine();
                while (line != null)
                {
                    result.Append(line);
                    line = file.ReadLine();
                    result.Append("\n");
                }
                file.Close();
            }
            return(result.ToString());
        }

示例#28

0

显示文件

        /// <exception cref="System.Exception"/>
        public static void Launch()
        {
            JobConf    conf            = new JobConf(typeof(Org.Apache.Hadoop.Mapred.TestFieldSelection));
            FileSystem fs              = FileSystem.Get(conf);
            int        numOfInputLines = 10;
            Path       OutputDir       = new Path("build/test/output_for_field_selection_test");
            Path       InputDir        = new Path("build/test/input_for_field_selection_test");
            string     inputFile       = "input.txt";

            fs.Delete(InputDir, true);
            fs.Mkdirs(InputDir);
            fs.Delete(OutputDir, true);
            StringBuilder inputData      = new StringBuilder();
            StringBuilder expectedOutput = new StringBuilder();

            TestMRFieldSelection.ConstructInputOutputData(inputData, expectedOutput, numOfInputLines
                                                          );
            FSDataOutputStream fileOut = fs.Create(new Path(InputDir, inputFile));

            fileOut.Write(Sharpen.Runtime.GetBytesForString(inputData.ToString(), "utf-8"));
            fileOut.Close();
            System.Console.Out.WriteLine("inputData:");
            System.Console.Out.WriteLine(inputData.ToString());
            JobConf job = new JobConf(conf, typeof(Org.Apache.Hadoop.Mapred.TestFieldSelection
                                                   ));

            FileInputFormat.SetInputPaths(job, InputDir);
            job.SetInputFormat(typeof(TextInputFormat));
            job.SetMapperClass(typeof(FieldSelectionMapReduce));
            job.SetReducerClass(typeof(FieldSelectionMapReduce));
            FileOutputFormat.SetOutputPath(job, OutputDir);
            job.SetOutputKeyClass(typeof(Org.Apache.Hadoop.IO.Text));
            job.SetOutputValueClass(typeof(Org.Apache.Hadoop.IO.Text));
            job.SetOutputFormat(typeof(TextOutputFormat));
            job.SetNumReduceTasks(1);
            job.Set(FieldSelectionHelper.DataFieldSeperator, "-");
            job.Set(FieldSelectionHelper.MapOutputKeyValueSpec, "6,5,1-3:0-");
            job.Set(FieldSelectionHelper.ReduceOutputKeyValueSpec, ":4,3,2,1,0,0-");
            JobClient.RunJob(job);
            //
            // Finally, we compare the reconstructed answer key with the
            // original one.  Remember, we need to ignore zero-count items
            // in the original key.
            //
            bool   success = true;
            Path   outPath = new Path(OutputDir, "part-00000");
            string outdata = MapReduceTestUtil.ReadOutput(outPath, job);

            NUnit.Framework.Assert.AreEqual(expectedOutput.ToString(), outdata);
            fs.Delete(OutputDir, true);
            fs.Delete(InputDir, true);
        }

示例#29

0

显示文件

文件： TestMapRed.cs 项目： orf53975/hadoop.net

        public virtual void TestNullKeys()
        {
            JobConf          conf   = new JobConf(typeof(TestMapRed));
            FileSystem       fs     = FileSystem.GetLocal(conf);
            HashSet <string> values = new HashSet <string>();
            string           m      = "AAAAAAAAAAAAAA";

            for (int i = 1; i < 11; ++i)
            {
                values.AddItem(m);
                m = m.Replace((char)('A' + i - 1), (char)('A' + i));
            }
            Path testdir = new Path(Runtime.GetProperty("test.build.data", "/tmp")).MakeQualified
                               (fs);

            fs.Delete(testdir, true);
            Path inFile = new Path(testdir, "nullin/blah");

            SequenceFile.Writer w = SequenceFile.CreateWriter(fs, conf, inFile, typeof(NullWritable
                                                                                       ), typeof(Text), SequenceFile.CompressionType.None);
            Text t = new Text();

            foreach (string s in values)
            {
                t.Set(s);
                w.Append(NullWritable.Get(), t);
            }
            w.Close();
            FileInputFormat.SetInputPaths(conf, inFile);
            FileOutputFormat.SetOutputPath(conf, new Path(testdir, "nullout"));
            conf.SetMapperClass(typeof(TestMapRed.NullMapper));
            conf.SetReducerClass(typeof(IdentityReducer));
            conf.SetOutputKeyClass(typeof(NullWritable));
            conf.SetOutputValueClass(typeof(Text));
            conf.SetInputFormat(typeof(SequenceFileInputFormat));
            conf.SetOutputFormat(typeof(SequenceFileOutputFormat));
            conf.SetNumReduceTasks(1);
            conf.Set(MRConfig.FrameworkName, MRConfig.LocalFrameworkName);
            JobClient.RunJob(conf);
            // Since null keys all equal, allow any ordering
            SequenceFile.Reader r = new SequenceFile.Reader(fs, new Path(testdir, "nullout/part-00000"
                                                                         ), conf);
            m = "AAAAAAAAAAAAAA";
            for (int i_1 = 1; r.Next(NullWritable.Get(), t); ++i_1)
            {
                NUnit.Framework.Assert.IsTrue("Unexpected value: " + t, values.Remove(t.ToString(
                                                                                          )));
                m = m.Replace((char)('A' + i_1 - 1), (char)('A' + i_1));
            }
            NUnit.Framework.Assert.IsTrue("Missing values: " + values.ToString(), values.IsEmpty
                                              ());
        }

示例#30

0

显示文件

        /// <exception cref="System.Exception"/>
        private void RunMergeTest(JobConf job, FileSystem fileSystem, int numMappers, int
                                  numReducers, int numLines, bool isUber)
        {
            fileSystem.Delete(Output, true);
            job.SetJobName("Test");
            JobClient  client       = new JobClient(job);
            RunningJob submittedJob = null;

            FileInputFormat.SetInputPaths(job, InputDir);
            FileOutputFormat.SetOutputPath(job, Output);
            job.Set("mapreduce.output.textoutputformat.separator", " ");
            job.SetInputFormat(typeof(TextInputFormat));
            job.SetMapOutputKeyClass(typeof(Text));
            job.SetMapOutputValueClass(typeof(Text));
            job.SetOutputKeyClass(typeof(Text));
            job.SetOutputValueClass(typeof(Text));
            job.SetMapperClass(typeof(TestMRIntermediateDataEncryption.MyMapper));
            job.SetPartitionerClass(typeof(TestMRIntermediateDataEncryption.MyPartitioner));
            job.SetOutputFormat(typeof(TextOutputFormat));
            job.SetNumReduceTasks(numReducers);
            job.SetInt("mapreduce.map.maxattempts", 1);
            job.SetInt("mapreduce.reduce.maxattempts", 1);
            job.SetInt("mapred.test.num_lines", numLines);
            if (isUber)
            {
                job.SetBoolean("mapreduce.job.ubertask.enable", true);
            }
            job.SetBoolean(MRJobConfig.MrEncryptedIntermediateData, true);
            try
            {
                submittedJob = client.SubmitJob(job);
                try
                {
                    if (!client.MonitorAndPrintJob(job, submittedJob))
                    {
                        throw new IOException("Job failed!");
                    }
                }
                catch (Exception)
                {
                    Sharpen.Thread.CurrentThread().Interrupt();
                }
            }
            catch (IOException ioe)
            {
                System.Console.Error.WriteLine("Job failed with: " + ioe);
            }
            finally
            {
                VerifyOutput(submittedJob, fileSystem, numMappers, numLines);
            }
        }

C# (CSharp) Org.Apache.Hadoop.Mapred FileInputFormat示例