Exemple #1
0
        public virtual void ExtractOutputKeyValue(string key, string val, string fieldSep
                                                  , IList <int> keyFieldList, IList <int> valFieldList, int allValueFieldsFrom, bool
                                                  ignoreKey, bool isMap)
        {
            if (!ignoreKey)
            {
                val = key + val;
            }
            string[] fields = val.Split(fieldSep);
            string   newKey = SelectFields(fields, keyFieldList, -1, fieldSep);
            string   newVal = SelectFields(fields, valFieldList, allValueFieldsFrom, fieldSep);

            if (isMap && newKey == null)
            {
                newKey = newVal;
                newVal = null;
            }
            if (newKey != null)
            {
                this.key = new Org.Apache.Hadoop.IO.Text(newKey);
            }
            if (newVal != null)
            {
                this.value = new Org.Apache.Hadoop.IO.Text(newVal);
            }
        }
Exemple #2
0
        public virtual void TestFormatWithCustomSeparator()
        {
            JobConf job       = new JobConf();
            string  separator = "\u0001";

            job.Set("mapreduce.output.textoutputformat.separator", separator);
            job.Set(JobContext.TaskAttemptId, attempt);
            FileOutputFormat.SetOutputPath(job, workDir.GetParent().GetParent());
            FileOutputFormat.SetWorkOutputPath(job, workDir);
            FileSystem fs = workDir.GetFileSystem(job);

            if (!fs.Mkdirs(workDir))
            {
                NUnit.Framework.Assert.Fail("Failed to create output directory");
            }
            string file = "test_custom.txt";
            // A reporter that does nothing
            Reporter reporter = Reporter.Null;
            TextOutputFormat <object, object> theOutputFormat = new TextOutputFormat <object, object
                                                                                      >();
            RecordWriter <object, object> theRecordWriter = theOutputFormat.GetRecordWriter(localFs
                                                                                            , job, file, reporter);

            Org.Apache.Hadoop.IO.Text key1 = new Org.Apache.Hadoop.IO.Text("key1");
            Org.Apache.Hadoop.IO.Text key2 = new Org.Apache.Hadoop.IO.Text("key2");
            Org.Apache.Hadoop.IO.Text val1 = new Org.Apache.Hadoop.IO.Text("val1");
            Org.Apache.Hadoop.IO.Text val2 = new Org.Apache.Hadoop.IO.Text("val2");
            NullWritable nullWritable      = NullWritable.Get();

            try
            {
                theRecordWriter.Write(key1, val1);
                theRecordWriter.Write(null, nullWritable);
                theRecordWriter.Write(null, val1);
                theRecordWriter.Write(nullWritable, val2);
                theRecordWriter.Write(key2, nullWritable);
                theRecordWriter.Write(key1, null);
                theRecordWriter.Write(null, null);
                theRecordWriter.Write(key2, val2);
            }
            finally
            {
                theRecordWriter.Close(reporter);
            }
            FilePath      expectedFile   = new FilePath(new Path(workDir, file).ToString());
            StringBuilder expectedOutput = new StringBuilder();

            expectedOutput.Append(key1).Append(separator).Append(val1).Append("\n");
            expectedOutput.Append(val1).Append("\n");
            expectedOutput.Append(val2).Append("\n");
            expectedOutput.Append(key2).Append("\n");
            expectedOutput.Append(key1).Append("\n");
            expectedOutput.Append(key2).Append(separator).Append(val2).Append("\n");
            string output = UtilsForTests.Slurp(expectedFile);

            NUnit.Framework.Assert.AreEqual(expectedOutput.ToString(), output);
        }
 private static void GenRandom(Org.Apache.Hadoop.IO.Text t, int len, StringBuilder
                               sb)
 {
     sb.Length = 0;
     for (int i = 0; i < len; ++i)
     {
         sb.Append(Extensions.ToString(r.Next(26) + 10, 36));
     }
     t.Set(sb.ToString());
 }
Exemple #4
0
        // TODO Auto-generated method stub
        /// <exception cref="System.IO.IOException"/>
        public virtual void Reduce(Org.Apache.Hadoop.IO.Text key, IEnumerator <Org.Apache.Hadoop.IO.Text
                                                                               > values, OutputCollector <Org.Apache.Hadoop.IO.Text, Org.Apache.Hadoop.IO.Text>
                                   output, Reporter reporter)
        {
            string keyStr = key.ToString() + this.fieldSeparator;

            while (values.HasNext())
            {
                FieldSelectionHelper helper = new FieldSelectionHelper();
                helper.ExtractOutputKeyValue(keyStr, values.Next().ToString(), fieldSeparator, reduceOutputKeyFieldList
                                             , reduceOutputValueFieldList, allReduceValueFieldsFrom, false, false);
                output.Collect(helper.GetKey(), helper.GetValue());
            }
        }
Exemple #5
0
 public virtual void RunJob(int items)
 {
     try
     {
         JobConf    conf    = new JobConf(typeof(TestMapRed));
         Path       testdir = new Path(TestDir.GetAbsolutePath());
         Path       inDir   = new Path(testdir, "in");
         Path       outDir  = new Path(testdir, "out");
         FileSystem fs      = FileSystem.Get(conf);
         fs.Delete(testdir, true);
         conf.SetInt(JobContext.IoSortMb, 1);
         conf.SetInputFormat(typeof(SequenceFileInputFormat));
         FileInputFormat.SetInputPaths(conf, inDir);
         FileOutputFormat.SetOutputPath(conf, outDir);
         conf.SetMapperClass(typeof(IdentityMapper));
         conf.SetReducerClass(typeof(IdentityReducer));
         conf.SetOutputKeyClass(typeof(Text));
         conf.SetOutputValueClass(typeof(Text));
         conf.SetOutputFormat(typeof(SequenceFileOutputFormat));
         conf.Set(MRConfig.FrameworkName, MRConfig.LocalFrameworkName);
         if (!fs.Mkdirs(testdir))
         {
             throw new IOException("Mkdirs failed to create " + testdir.ToString());
         }
         if (!fs.Mkdirs(inDir))
         {
             throw new IOException("Mkdirs failed to create " + inDir.ToString());
         }
         Path inFile = new Path(inDir, "part0");
         SequenceFile.Writer writer = SequenceFile.CreateWriter(fs, conf, inFile, typeof(Text
                                                                                         ), typeof(Text));
         StringBuilder content = new StringBuilder();
         for (int i = 0; i < 1000; i++)
         {
             content.Append(i).Append(": This is one more line of content\n");
         }
         Org.Apache.Hadoop.IO.Text text = new Org.Apache.Hadoop.IO.Text(content.ToString()
                                                                        );
         for (int i_1 = 0; i_1 < items; i_1++)
         {
             writer.Append(new Org.Apache.Hadoop.IO.Text("rec:" + i_1), text);
         }
         writer.Close();
         JobClient.RunJob(conf);
     }
     catch (Exception e)
     {
         NUnit.Framework.Assert.IsTrue("Threw exception:" + e, false);
     }
 }
            /// <exception cref="System.IO.IOException"/>
            public virtual string[] GetSorted()
            {
                string[] ret = new string[indices.Length];
                Org.Apache.Hadoop.IO.Text t   = new Org.Apache.Hadoop.IO.Text();
                DataInputBuffer           dib = new DataInputBuffer();

                for (int i = 0; i < ret.Length; ++i)
                {
                    int ii = indices[i];
                    dib.Reset(bytes, offsets[ii], ((ii + 1 == indices.Length) ? eob : offsets[ii + 1]
                                                   ) - offsets[ii]);
                    t.ReadFields(dib);
                    ret[i] = t.ToString();
                }
                return(ret);
            }
 /// <summary>Parse the command line arguments into lines and display the result.</summary>
 /// <param name="args"/>
 /// <exception cref="System.Exception"/>
 public static void Main(string[] args)
 {
     foreach (string arg in args)
     {
         System.Console.Out.WriteLine("Working on " + arg);
         LineReader reader = MakeStream(Unquote(arg));
         Org.Apache.Hadoop.IO.Text line = new Org.Apache.Hadoop.IO.Text();
         int size = reader.ReadLine(line);
         while (size > 0)
         {
             System.Console.Out.WriteLine("Got: " + line.ToString());
             size = reader.ReadLine(line);
         }
         reader.Close();
     }
 }
Exemple #8
0
 private int GenerateSentence(Org.Apache.Hadoop.IO.Text t, int noWords)
 {
     sentence.Length = 0;
     --noWords;
     for (int i = 0; i < noWords; ++i)
     {
         sentence.Append(words[r.Next(words.Length)]);
         sentence.Append(" ");
     }
     if (noWords >= 0)
     {
         sentence.Append(words[r.Next(words.Length)]);
     }
     t.Set(sentence.ToString());
     return(sentence.Length);
 }
            /// <exception cref="System.IO.IOException"/>
            public override IList <InputSplit> GetSplits(JobContext job)
            {
                Configuration      conf   = job.GetConfiguration();
                Path               src    = new Path(conf.Get(IndirectInputFile, null));
                FileSystem         fs     = src.GetFileSystem(conf);
                IList <InputSplit> splits = new AList <InputSplit>();
                LongWritable       key    = new LongWritable();

                Org.Apache.Hadoop.IO.Text value = new Org.Apache.Hadoop.IO.Text();
                for (SequenceFile.Reader sl = new SequenceFile.Reader(fs, src, conf); sl.Next(key
                                                                                              , value);)
                {
                    splits.AddItem(new GenericMRLoadGenerator.IndirectInputFormat.IndirectSplit(new Path
                                                                                                    (value.ToString()), key.Get()));
                }
                return(splits);
            }
Exemple #10
0
            /// <exception cref="System.IO.IOException"/>
            public virtual InputSplit[] GetSplits(JobConf job, int numSplits)
            {
                Path       src = new Path(job.Get(GenericMRLoadGenerator.IndirectInputFile, null));
                FileSystem fs  = src.GetFileSystem(job);
                AList <GenericMRLoadGenerator.IndirectInputFormat.IndirectSplit> splits = new AList
                                                                                          <GenericMRLoadGenerator.IndirectInputFormat.IndirectSplit>(numSplits);
                LongWritable key = new LongWritable();

                Org.Apache.Hadoop.IO.Text value = new Org.Apache.Hadoop.IO.Text();
                for (SequenceFile.Reader sl = new SequenceFile.Reader(fs, src, job); sl.Next(key,
                                                                                             value);)
                {
                    splits.AddItem(new GenericMRLoadGenerator.IndirectInputFormat.IndirectSplit(new Path
                                                                                                    (value.ToString()), key.Get()));
                }
                return(Sharpen.Collections.ToArray(splits, new GenericMRLoadGenerator.IndirectInputFormat.IndirectSplit
                                                   [splits.Count]));
            }
Exemple #11
0
            /// <exception cref="System.IO.IOException"/>
            public virtual void Map(Org.Apache.Hadoop.IO.Text key, Org.Apache.Hadoop.IO.Text
                                    val, OutputCollector <Org.Apache.Hadoop.IO.Text, Org.Apache.Hadoop.IO.Text> output
                                    , Reporter reporter)
            {
                long acc     = 0L;
                long recs    = 0;
                int  keydiff = keymax - keymin;
                int  valdiff = valmax - valmin;

                for (long i = 0L; acc < bytesToWrite; ++i)
                {
                    int recacc = 0;
                    recacc += GenerateSentence(key, keymin + (0 == keydiff ? 0 : r.Next(keydiff)));
                    recacc += GenerateSentence(val, valmin + (0 == valdiff ? 0 : r.Next(valdiff)));
                    output.Collect(key, val);
                    ++recs;
                    acc += recacc;
                    reporter.IncrCounter(GenericMRLoadGenerator.Counters.BytesWritten, recacc);
                    reporter.IncrCounter(GenericMRLoadGenerator.Counters.RecordsWritten, 1);
                    reporter.SetStatus(acc + "/" + (bytesToWrite - acc) + " bytes");
                }
                reporter.SetStatus("Wrote " + recs + " records");
            }
            /// <exception cref="System.IO.IOException"/>
            /// <exception cref="System.Exception"/>
            protected override void Map(Org.Apache.Hadoop.IO.Text key, Org.Apache.Hadoop.IO.Text
                                        val, Mapper.Context context)
            {
                long acc     = 0L;
                long recs    = 0;
                int  keydiff = keymax - keymin;
                int  valdiff = valmax - valmin;

                for (long i = 0L; acc < bytesToWrite; ++i)
                {
                    int recacc = 0;
                    recacc += GenerateSentence(key, keymin + (0 == keydiff ? 0 : r.Next(keydiff)));
                    recacc += GenerateSentence(val, valmin + (0 == valdiff ? 0 : r.Next(valdiff)));
                    context.Write(key, val);
                    ++recs;
                    acc += recacc;
                    context.GetCounter(GenericMRLoadGenerator.Counters.BytesWritten).Increment(recacc
                                                                                               );
                    context.GetCounter(GenericMRLoadGenerator.Counters.RecordsWritten).Increment(1);
                    context.SetStatus(acc + "/" + (bytesToWrite - acc) + " bytes");
                }
                context.SetStatus("Wrote " + recs + " records");
            }
Exemple #13
0
 public FieldSelectionHelper(Org.Apache.Hadoop.IO.Text key, Org.Apache.Hadoop.IO.Text
                             val)
 {
     this.key   = key;
     this.value = val;
 }
Exemple #14
0
        public virtual void TestCompress()
        {
            JobConf job = new JobConf();

            job.Set(JobContext.TaskAttemptId, attempt);
            job.Set(FileOutputFormat.Compress, "true");
            FileOutputFormat.SetOutputPath(job, workDir.GetParent().GetParent());
            FileOutputFormat.SetWorkOutputPath(job, workDir);
            FileSystem fs = workDir.GetFileSystem(job);

            if (!fs.Mkdirs(workDir))
            {
                NUnit.Framework.Assert.Fail("Failed to create output directory");
            }
            string file = "test_compress.txt";
            // A reporter that does nothing
            Reporter reporter = Reporter.Null;
            TextOutputFormat <object, object> theOutputFormat = new TextOutputFormat <object, object
                                                                                      >();
            RecordWriter <object, object> theRecordWriter = theOutputFormat.GetRecordWriter(localFs
                                                                                            , job, file, reporter);

            Org.Apache.Hadoop.IO.Text key1 = new Org.Apache.Hadoop.IO.Text("key1");
            Org.Apache.Hadoop.IO.Text key2 = new Org.Apache.Hadoop.IO.Text("key2");
            Org.Apache.Hadoop.IO.Text val1 = new Org.Apache.Hadoop.IO.Text("val1");
            Org.Apache.Hadoop.IO.Text val2 = new Org.Apache.Hadoop.IO.Text("val2");
            NullWritable nullWritable      = NullWritable.Get();

            try
            {
                theRecordWriter.Write(key1, val1);
                theRecordWriter.Write(null, nullWritable);
                theRecordWriter.Write(null, val1);
                theRecordWriter.Write(nullWritable, val2);
                theRecordWriter.Write(key2, nullWritable);
                theRecordWriter.Write(key1, null);
                theRecordWriter.Write(null, null);
                theRecordWriter.Write(key2, val2);
            }
            finally
            {
                theRecordWriter.Close(reporter);
            }
            StringBuilder expectedOutput = new StringBuilder();

            expectedOutput.Append(key1).Append("\t").Append(val1).Append("\n");
            expectedOutput.Append(val1).Append("\n");
            expectedOutput.Append(val2).Append("\n");
            expectedOutput.Append(key2).Append("\n");
            expectedOutput.Append(key1).Append("\n");
            expectedOutput.Append(key2).Append("\t").Append(val2).Append("\n");
            DefaultCodec codec = new DefaultCodec();

            codec.SetConf(job);
            Path                   expectedFile = new Path(workDir, file + codec.GetDefaultExtension());
            FileInputStream        istream      = new FileInputStream(expectedFile.ToString());
            CompressionInputStream cistream     = codec.CreateInputStream(istream);
            LineReader             reader       = new LineReader(cistream);
            string                 output       = string.Empty;

            Org.Apache.Hadoop.IO.Text @out = new Org.Apache.Hadoop.IO.Text();
            while (reader.ReadLine(@out) > 0)
            {
                output += @out;
                output += "\n";
            }
            reader.Close();
            NUnit.Framework.Assert.AreEqual(expectedOutput.ToString(), output);
        }