Ejemplo n.º 1
0
        /// <exception cref="System.IO.IOException"/>
        private static int CountProduct(IntWritable key, Path[] src, Configuration conf)
        {
            int product = 1;

            foreach (Path p in src)
            {
                int count             = 0;
                SequenceFile.Reader r = new SequenceFile.Reader(cluster.GetFileSystem(), p, conf);
                IntWritable         k = new IntWritable();
                IntWritable         v = new IntWritable();
                while (r.Next(k, v))
                {
                    if (k.Equals(key))
                    {
                        count++;
                    }
                }
                r.Close();
                if (count != 0)
                {
                    product *= count;
                }
            }
            return(product);
        }
Ejemplo n.º 2
0
        /// <summary>Read the cut points from the given IFile.</summary>
        /// <param name="fs">The file system</param>
        /// <param name="p">The path to read</param>
        /// <param name="keyClass">The map output key class</param>
        /// <param name="job">The job config</param>
        /// <exception cref="System.IO.IOException"/>
        private K[] ReadPartitions(FileSystem fs, Path p, Type keyClass, Configuration conf
                                   )
        {
            // matching key types enforced by passing in
            // map output key class
            SequenceFile.Reader reader = new SequenceFile.Reader(fs, p, conf);
            AList <K>           parts  = new AList <K>();
            K            key           = ReflectionUtils.NewInstance(keyClass, conf);
            NullWritable value         = NullWritable.Get();

            try
            {
                while (reader.Next(key, value))
                {
                    parts.AddItem(key);
                    key = ReflectionUtils.NewInstance(keyClass, conf);
                }
                reader.Close();
                reader = null;
            }
            finally
            {
                IOUtils.Cleanup(Log, reader);
            }
            return(Sharpen.Collections.ToArray(parts, (K[])System.Array.CreateInstance(keyClass
                                                                                       , parts.Count)));
        }
Ejemplo n.º 3
0
 /// <exception cref="System.IO.IOException"/>
 private SequenceFile.Metadata ReadMetadata(FileSystem fs, Path file)
 {
     Log.Info("reading file: " + file.ToString());
     SequenceFile.Reader   reader = new SequenceFile.Reader(fs, file, conf);
     SequenceFile.Metadata meta   = reader.GetMetadata();
     reader.Close();
     return(meta);
 }
Ejemplo n.º 4
0
            /// <exception cref="System.IO.IOException"/>
            public SeqFileReadable(FileSystem fs, Path path, int osBufferSize)
            {
                Configuration conf = new Configuration();

                conf.SetInt("io.file.buffer.size", osBufferSize);
                reader = new SequenceFile.Reader(fs, path, conf);
                key    = new BytesWritable();
                value  = new BytesWritable();
            }
Ejemplo n.º 5
0
        /// <exception cref="System.Exception"/>
        private void CheckCompression(bool compressMapOutputs, SequenceFile.CompressionType
                                      redCompression, bool includeCombine)
        {
            JobConf    conf    = new JobConf(typeof(TestMapRed));
            Path       testdir = new Path(TestDir.GetAbsolutePath());
            Path       inDir   = new Path(testdir, "in");
            Path       outDir  = new Path(testdir, "out");
            FileSystem fs      = FileSystem.Get(conf);

            fs.Delete(testdir, true);
            FileInputFormat.SetInputPaths(conf, inDir);
            FileOutputFormat.SetOutputPath(conf, outDir);
            conf.SetMapperClass(typeof(TestMapRed.MyMap));
            conf.SetReducerClass(typeof(TestMapRed.MyReduce));
            conf.SetOutputKeyClass(typeof(Text));
            conf.SetOutputValueClass(typeof(Text));
            conf.SetOutputFormat(typeof(SequenceFileOutputFormat));
            conf.Set(MRConfig.FrameworkName, MRConfig.LocalFrameworkName);
            if (includeCombine)
            {
                conf.SetCombinerClass(typeof(IdentityReducer));
            }
            conf.SetCompressMapOutput(compressMapOutputs);
            SequenceFileOutputFormat.SetOutputCompressionType(conf, redCompression);
            try
            {
                if (!fs.Mkdirs(testdir))
                {
                    throw new IOException("Mkdirs failed to create " + testdir.ToString());
                }
                if (!fs.Mkdirs(inDir))
                {
                    throw new IOException("Mkdirs failed to create " + inDir.ToString());
                }
                Path             inFile = new Path(inDir, "part0");
                DataOutputStream f      = fs.Create(inFile);
                f.WriteBytes("Owen was here\n");
                f.WriteBytes("Hadoop is fun\n");
                f.WriteBytes("Is this done, yet?\n");
                f.Close();
                RunningJob rj = JobClient.RunJob(conf);
                NUnit.Framework.Assert.IsTrue("job was complete", rj.IsComplete());
                NUnit.Framework.Assert.IsTrue("job was successful", rj.IsSuccessful());
                Path output = new Path(outDir, Task.GetOutputName(0));
                NUnit.Framework.Assert.IsTrue("reduce output exists " + output, fs.Exists(output)
                                              );
                SequenceFile.Reader rdr = new SequenceFile.Reader(fs, output, conf);
                NUnit.Framework.Assert.AreEqual("is reduce output compressed " + output, redCompression
                                                != SequenceFile.CompressionType.None, rdr.IsCompressed());
                rdr.Close();
            }
            finally
            {
                fs.Delete(testdir, true);
            }
        }
Ejemplo n.º 6
0
 /// <exception cref="System.IO.IOException"/>
 private void Verify2Values(Path file)
 {
     SequenceFile.Reader reader = new SequenceFile.Reader(conf, SequenceFile.Reader.File
                                                              (file));
     Assert.Equal(1L, reader.Next((object)null));
     Assert.Equal("one", reader.GetCurrentValue((object)null));
     Assert.Equal(2L, reader.Next((object)null));
     Assert.Equal("two", reader.GetCurrentValue((object)null));
     NUnit.Framework.Assert.IsNull(reader.Next((object)null));
     reader.Close();
 }
Ejemplo n.º 7
0
        /// <exception cref="System.IO.IOException"/>
        private static void ForOffset(SequenceFile.Reader reader, IntWritable key, Text val
                                      , int iter, long off, int expectedRecord)
        {
            val.Clear();
            reader.Sync(off);
            reader.Next(key, val);
            Assert.Equal(key.Get(), expectedRecord);
            string test = string.Format(RecFmt, expectedRecord, expectedRecord);

            Assert.Equal("Invalid value " + val, 0, val.Find(test, 0));
        }
Ejemplo n.º 8
0
        public virtual void TestNullKeys()
        {
            JobConf          conf   = new JobConf(typeof(TestMapRed));
            FileSystem       fs     = FileSystem.GetLocal(conf);
            HashSet <string> values = new HashSet <string>();
            string           m      = "AAAAAAAAAAAAAA";

            for (int i = 1; i < 11; ++i)
            {
                values.AddItem(m);
                m = m.Replace((char)('A' + i - 1), (char)('A' + i));
            }
            Path testdir = new Path(Runtime.GetProperty("test.build.data", "/tmp")).MakeQualified
                               (fs);

            fs.Delete(testdir, true);
            Path inFile = new Path(testdir, "nullin/blah");

            SequenceFile.Writer w = SequenceFile.CreateWriter(fs, conf, inFile, typeof(NullWritable
                                                                                       ), typeof(Text), SequenceFile.CompressionType.None);
            Text t = new Text();

            foreach (string s in values)
            {
                t.Set(s);
                w.Append(NullWritable.Get(), t);
            }
            w.Close();
            FileInputFormat.SetInputPaths(conf, inFile);
            FileOutputFormat.SetOutputPath(conf, new Path(testdir, "nullout"));
            conf.SetMapperClass(typeof(TestMapRed.NullMapper));
            conf.SetReducerClass(typeof(IdentityReducer));
            conf.SetOutputKeyClass(typeof(NullWritable));
            conf.SetOutputValueClass(typeof(Text));
            conf.SetInputFormat(typeof(SequenceFileInputFormat));
            conf.SetOutputFormat(typeof(SequenceFileOutputFormat));
            conf.SetNumReduceTasks(1);
            conf.Set(MRConfig.FrameworkName, MRConfig.LocalFrameworkName);
            JobClient.RunJob(conf);
            // Since null keys all equal, allow any ordering
            SequenceFile.Reader r = new SequenceFile.Reader(fs, new Path(testdir, "nullout/part-00000"
                                                                         ), conf);
            m = "AAAAAAAAAAAAAA";
            for (int i_1 = 1; r.Next(NullWritable.Get(), t); ++i_1)
            {
                NUnit.Framework.Assert.IsTrue("Unexpected value: " + t, values.Remove(t.ToString(
                                                                                          )));
                m = m.Replace((char)('A' + i_1 - 1), (char)('A' + i_1));
            }
            NUnit.Framework.Assert.IsTrue("Missing values: " + values.ToString(), values.IsEmpty
                                              ());
        }
Ejemplo n.º 9
0
        /// <exception cref="System.IO.IOException"/>
        private static void PrintSequenceFile(FileSystem fs, Path p, Configuration conf)
        {
            SequenceFile.Reader r = new SequenceFile.Reader(fs, p, conf);
            object key            = null;
            object value          = null;

            while ((key = r.Next(key)) != null)
            {
                value = r.GetCurrentValue(value);
                System.Console.Out.WriteLine("  Row: " + key + ", " + value);
            }
            r.Close();
        }
Ejemplo n.º 10
0
            /// <exception cref="System.IO.IOException"/>
            public TextRecordInputStream(Display _enclosing, FileStatus f)
            {
                this._enclosing = _enclosing;
                Path          fpath = f.GetPath();
                Configuration lconf = this._enclosing.GetConf();

                this.r   = new SequenceFile.Reader(lconf, SequenceFile.Reader.File(fpath));
                this.key = ReflectionUtils.NewInstance(this.r.GetKeyClass().AsSubclass <IWritableComparable <> >(), lconf);
                this.val = ReflectionUtils.NewInstance(this.r.GetValueClass().AsSubclass <IWritable
                                                                                          >(), lconf);
                this.inbuf  = new DataInputBuffer();
                this.outbuf = new DataOutputBuffer();
            }
Ejemplo n.º 11
0
        /// <summary>Open the output generated by this format.</summary>
        /// <exception cref="System.IO.IOException"/>
        public static SequenceFile.Reader[] GetReaders(Configuration conf, Path dir)
        {
            FileSystem fs = dir.GetFileSystem(conf);

            Path[] names = FileUtil.Stat2Paths(fs.ListStatus(dir));
            // sort names, so that hash partitioning works
            Arrays.Sort(names);
            SequenceFile.Reader[] parts = new SequenceFile.Reader[names.Length];
            for (int i = 0; i < names.Length; i++)
            {
                parts[i] = new SequenceFile.Reader(fs, names[i], conf);
            }
            return(parts);
        }
Ejemplo n.º 12
0
        /// <exception cref="System.IO.IOException"/>
        /// <exception cref="System.TypeLoadException"/>
        /// <exception cref="InstantiationException"/>
        /// <exception cref="System.MemberAccessException"/>
        private static void SequenceFileCodecTest(Configuration conf, int lines, string codecClass
                                                  , int blockSize)
        {
            Path filePath = new Path("SequenceFileCodecTest." + codecClass);

            // Configuration
            conf.SetInt("io.seqfile.compress.blocksize", blockSize);
            // Create the SequenceFile
            FileSystem fs = FileSystem.Get(conf);

            Log.Info("Creating SequenceFile with codec \"" + codecClass + "\"");
            SequenceFile.Writer writer = SequenceFile.CreateWriter(fs, conf, filePath, typeof(
                                                                       Text), typeof(Text), SequenceFile.CompressionType.Block, (CompressionCodec)System.Activator.CreateInstance
                                                                       (Runtime.GetType(codecClass)));
            // Write some data
            Log.Info("Writing to SequenceFile...");
            for (int i = 0; i < lines; i++)
            {
                Text key   = new Text("key" + i);
                Text value = new Text("value" + i);
                writer.Append(key, value);
            }
            writer.Close();
            // Read the data back and check
            Log.Info("Reading from the SequenceFile...");
            SequenceFile.Reader reader  = new SequenceFile.Reader(fs, filePath, conf);
            Writable            key_1   = (Writable)System.Activator.CreateInstance(reader.GetKeyClass());
            Writable            value_1 = (Writable)System.Activator.CreateInstance(reader.GetValueClass
                                                                                        ());
            int lc = 0;

            try
            {
                while (reader.Next(key_1, value_1))
                {
                    Assert.Equal("key" + lc, key_1.ToString());
                    Assert.Equal("value" + lc, value_1.ToString());
                    lc++;
                }
            }
            finally
            {
                reader.Close();
            }
            Assert.Equal(lines, lc);
            // Delete temporary files
            fs.Delete(filePath, false);
            Log.Info("SUCCESS! Completed SequenceFileCodecTest with codec \"" + codecClass +
                     "\"");
        }
Ejemplo n.º 13
0
        /// <exception cref="System.IO.IOException"/>
        public virtual void TestClose()
        {
            Configuration   conf = new Configuration();
            LocalFileSystem fs   = FileSystem.GetLocal(conf);
            // create a sequence file 1
            Path path1 = new Path(Runtime.GetProperty("test.build.data", ".") + "/test1.seq");

            SequenceFile.Writer writer = SequenceFile.CreateWriter(fs, conf, path1, typeof(Text
                                                                                           ), typeof(NullWritable), SequenceFile.CompressionType.Block);
            writer.Append(new Text("file1-1"), NullWritable.Get());
            writer.Append(new Text("file1-2"), NullWritable.Get());
            writer.Close();
            Path path2 = new Path(Runtime.GetProperty("test.build.data", ".") + "/test2.seq");

            writer = SequenceFile.CreateWriter(fs, conf, path2, typeof(Text), typeof(NullWritable
                                                                                     ), SequenceFile.CompressionType.Block);
            writer.Append(new Text("file2-1"), NullWritable.Get());
            writer.Append(new Text("file2-2"), NullWritable.Get());
            writer.Close();
            // Create a reader which uses 4 BuiltInZLibInflater instances
            SequenceFile.Reader reader = new SequenceFile.Reader(fs, path1, conf);
            // Returns the 4 BuiltInZLibInflater instances to the CodecPool
            reader.Close();
            // The second close _could_ erroneously returns the same
            // 4 BuiltInZLibInflater instances to the CodecPool again
            reader.Close();
            // The first reader gets 4 BuiltInZLibInflater instances from the CodecPool
            SequenceFile.Reader reader1 = new SequenceFile.Reader(fs, path1, conf);
            // read first value from reader1
            Text text = new Text();

            reader1.Next(text);
            Assert.Equal("file1-1", text.ToString());
            // The second reader _could_ get the same 4 BuiltInZLibInflater
            // instances from the CodePool as reader1
            SequenceFile.Reader reader2 = new SequenceFile.Reader(fs, path2, conf);
            // read first value from reader2
            reader2.Next(text);
            Assert.Equal("file2-1", text.ToString());
            // read second value from reader1
            reader1.Next(text);
            Assert.Equal("file1-2", text.ToString());
            // read second value from reader2 (this throws an exception)
            reader2.Next(text);
            Assert.Equal("file2-2", text.ToString());
            NUnit.Framework.Assert.IsFalse(reader1.Next(text));
            NUnit.Framework.Assert.IsFalse(reader2.Next(text));
        }
Ejemplo n.º 14
0
            /// <exception cref="System.IO.IOException"/>
            public SequenceFileAsBinaryRecordReader(Configuration conf, FileSplit split)
            {
                Path       path = split.GetPath();
                FileSystem fs   = path.GetFileSystem(conf);

                this.@in = new SequenceFile.Reader(fs, path, conf);
                this.end = split.GetStart() + split.GetLength();
                if (split.GetStart() > @in.GetPosition())
                {
                    @in.Sync(split.GetStart());
                }
                // sync to start
                this.start = @in.GetPosition();
                vbytes     = @in.CreateValueBytes();
                done       = start >= end;
            }
Ejemplo n.º 15
0
        /// <exception cref="System.IO.IOException"/>
        public SequenceFileRecordReader(Configuration conf, FileSplit split)
        {
            Path       path = split.GetPath();
            FileSystem fs   = path.GetFileSystem(conf);

            this.@in  = new SequenceFile.Reader(fs, path, conf);
            this.end  = split.GetStart() + split.GetLength();
            this.conf = conf;
            if (split.GetStart() > @in.GetPosition())
            {
                @in.Sync(split.GetStart());
            }
            // sync to start
            this.start = @in.GetPosition();
            more       = start < end;
        }
Ejemplo n.º 16
0
        public virtual void TestLowSyncpoint()
        {
            Configuration conf = new Configuration();
            FileSystem    fs   = FileSystem.GetLocal(conf);
            Path          path = new Path(Runtime.GetProperty("test.build.data", "/tmp"), "sequencefile.sync.test"
                                          );
            IntWritable input = new IntWritable();
            Text        val   = new Text();

            SequenceFile.Writer writer = new SequenceFile.Writer(fs, conf, path, typeof(IntWritable
                                                                                        ), typeof(Text));
            try
            {
                WriteSequenceFile(writer, Numrecords);
                for (int i = 0; i < 5; i++)
                {
                    SequenceFile.Reader reader;
                    //try different SequenceFile.Reader constructors
                    if (i % 2 == 0)
                    {
                        reader = new SequenceFile.Reader(fs, path, conf);
                    }
                    else
                    {
                        FSDataInputStream @in = fs.Open(path);
                        long length           = fs.GetFileStatus(path).GetLen();
                        int  buffersize       = conf.GetInt("io.file.buffer.size", 4096);
                        reader = new SequenceFile.Reader(@in, buffersize, 0L, length, conf);
                    }
                    try
                    {
                        ForOffset(reader, input, val, i, 0, 0);
                        ForOffset(reader, input, val, i, 65, 0);
                        ForOffset(reader, input, val, i, 2000, 21);
                        ForOffset(reader, input, val, i, 0, 0);
                    }
                    finally
                    {
                        reader.Close();
                    }
                }
            }
            finally
            {
                fs.Delete(path, false);
            }
        }
Ejemplo n.º 17
0
 /// <exception cref="System.IO.IOException"/>
 public virtual void Reduce(WritableComparable key, IEnumerator values, OutputCollector
                            output, Reporter reporter)
 {
     if (first)
     {
         first = false;
         MapOutputFile mapOutputFile = new MROutputFiles();
         mapOutputFile.SetConf(conf);
         Path       input = mapOutputFile.GetInputFile(0);
         FileSystem fs    = FileSystem.Get(conf);
         NUnit.Framework.Assert.IsTrue("reduce input exists " + input, fs.Exists(input));
         SequenceFile.Reader rdr = new SequenceFile.Reader(fs, input, conf);
         NUnit.Framework.Assert.AreEqual("is reduce input compressed " + input, compressInput
                                         , rdr.IsCompressed());
         rdr.Close();
     }
 }
Ejemplo n.º 18
0
            /// <exception cref="System.IO.IOException"/>
            public override IList <InputSplit> GetSplits(JobContext job)
            {
                Configuration      conf   = job.GetConfiguration();
                Path               src    = new Path(conf.Get(IndirectInputFile, null));
                FileSystem         fs     = src.GetFileSystem(conf);
                IList <InputSplit> splits = new AList <InputSplit>();
                LongWritable       key    = new LongWritable();

                Org.Apache.Hadoop.IO.Text value = new Org.Apache.Hadoop.IO.Text();
                for (SequenceFile.Reader sl = new SequenceFile.Reader(fs, src, conf); sl.Next(key
                                                                                              , value);)
                {
                    splits.AddItem(new GenericMRLoadGenerator.IndirectInputFormat.IndirectSplit(new Path
                                                                                                    (value.ToString()), key.Get()));
                }
                return(splits);
            }
Ejemplo n.º 19
0
            /// <exception cref="System.IO.IOException"/>
            /// <exception cref="System.Exception"/>
            public override void Initialize(InputSplit split, TaskAttemptContext context)
            {
                Path          path = ((FileSplit)split).GetPath();
                Configuration conf = context.GetConfiguration();
                FileSystem    fs   = path.GetFileSystem(conf);

                this.@in = new SequenceFile.Reader(fs, path, conf);
                this.end = ((FileSplit)split).GetStart() + split.GetLength();
                if (((FileSplit)split).GetStart() > @in.GetPosition())
                {
                    @in.Sync(((FileSplit)split).GetStart());
                }
                // sync to start
                this.start = @in.GetPosition();
                vbytes     = @in.CreateValueBytes();
                done       = start >= end;
            }
Ejemplo n.º 20
0
            /// <exception cref="System.IO.IOException"/>
            public virtual InputSplit[] GetSplits(JobConf job, int numSplits)
            {
                Path       src = new Path(job.Get(GenericMRLoadGenerator.IndirectInputFile, null));
                FileSystem fs  = src.GetFileSystem(job);
                AList <GenericMRLoadGenerator.IndirectInputFormat.IndirectSplit> splits = new AList
                                                                                          <GenericMRLoadGenerator.IndirectInputFormat.IndirectSplit>(numSplits);
                LongWritable key = new LongWritable();

                Org.Apache.Hadoop.IO.Text value = new Org.Apache.Hadoop.IO.Text();
                for (SequenceFile.Reader sl = new SequenceFile.Reader(fs, src, job); sl.Next(key,
                                                                                             value);)
                {
                    splits.AddItem(new GenericMRLoadGenerator.IndirectInputFormat.IndirectSplit(new Path
                                                                                                    (value.ToString()), key.Get()));
                }
                return(Sharpen.Collections.ToArray(splits, new GenericMRLoadGenerator.IndirectInputFormat.IndirectSplit
                                                   [splits.Count]));
            }
Ejemplo n.º 21
0
        /// <exception cref="System.IO.IOException"/>
        /// <exception cref="System.Exception"/>
        public override void Initialize(InputSplit split, TaskAttemptContext context)
        {
            FileSplit fileSplit = (FileSplit)split;

            conf = context.GetConfiguration();
            Path       path = fileSplit.GetPath();
            FileSystem fs   = path.GetFileSystem(conf);

            this.@in = new SequenceFile.Reader(fs, path, conf);
            this.end = fileSplit.GetStart() + fileSplit.GetLength();
            if (fileSplit.GetStart() > @in.GetPosition())
            {
                @in.Sync(fileSplit.GetStart());
            }
            // sync to start
            this.start = @in.GetPosition();
            more       = start < end;
        }
        /// <exception cref="System.Exception"/>
        public virtual void TestJavaSerialization()
        {
            Path file = new Path(Runtime.GetProperty("test.build.data", ".") + "/testseqser.seq"
                                 );

            fs.Delete(file, true);
            SequenceFile.Writer writer = SequenceFile.CreateWriter(fs, conf, file, typeof(long
                                                                                          ), typeof(string));
            writer.Append(1L, "one");
            writer.Append(2L, "two");
            writer.Close();
            SequenceFile.Reader reader = new SequenceFile.Reader(fs, file, conf);
            Assert.Equal(1L, reader.Next((object)null));
            Assert.Equal("one", reader.GetCurrentValue((object)null));
            Assert.Equal(2L, reader.Next((object)null));
            Assert.Equal("two", reader.GetCurrentValue((object)null));
            NUnit.Framework.Assert.IsNull(reader.Next((object)null));
            reader.Close();
        }
Ejemplo n.º 23
0
        /// <exception cref="System.IO.IOException"/>
        private void CheckSort(FileSystem fs, int count, int seed, Path file)
        {
            Log.Info("sorting " + count + " records in memory for debug");
            RandomDatum.Generator generator = new RandomDatum.Generator(seed);
            SortedDictionary <RandomDatum, RandomDatum> map = new SortedDictionary <RandomDatum
                                                                                    , RandomDatum>();

            for (int i = 0; i < count; i++)
            {
                generator.Next();
                RandomDatum key   = generator.GetKey();
                RandomDatum value = generator.GetValue();
                map[key] = value;
            }
            Log.Debug("checking order of " + count + " records");
            RandomDatum k = new RandomDatum();
            RandomDatum v = new RandomDatum();
            IEnumerator <KeyValuePair <RandomDatum, RandomDatum> > iterator = map.GetEnumerator(
                );

            SequenceFile.Reader reader = new SequenceFile.Reader(fs, file.Suffix(".sorted"),
                                                                 conf);
            for (int i_1 = 0; i_1 < count; i_1++)
            {
                KeyValuePair <RandomDatum, RandomDatum> entry = iterator.Next();
                RandomDatum key   = entry.Key;
                RandomDatum value = entry.Value;
                reader.Next(k, v);
                if (!k.Equals(key))
                {
                    throw new RuntimeException("wrong key at " + i_1);
                }
                if (!v.Equals(value))
                {
                    throw new RuntimeException("wrong value at " + i_1);
                }
            }
            reader.Close();
            Log.Debug("sucessfully checked " + count + " records");
        }
Ejemplo n.º 24
0
        /// <exception cref="System.IO.IOException"/>
        private static void CheckOuterConsistency(Job job, Path[] src)
        {
            Path outf = FileOutputFormat.GetOutputPath(job);

            FileStatus[] outlist = cluster.GetFileSystem().ListStatus(outf, new Utils.OutputFileUtils.OutputFilesFilter
                                                                          ());
            NUnit.Framework.Assert.AreEqual("number of part files is more than 1. It is" + outlist
                                            .Length, 1, outlist.Length);
            NUnit.Framework.Assert.IsTrue("output file with zero length" + outlist[0].GetLen(
                                              ), 0 < outlist[0].GetLen());
            SequenceFile.Reader r = new SequenceFile.Reader(cluster.GetFileSystem(), outlist[
                                                                0].GetPath(), job.GetConfiguration());
            IntWritable k = new IntWritable();
            IntWritable v = new IntWritable();

            while (r.Next(k, v))
            {
                NUnit.Framework.Assert.AreEqual("counts does not match", v.Get(), CountProduct(k,
                                                                                               src, job.GetConfiguration()));
            }
            r.Close();
        }
Ejemplo n.º 25
0
        /// <exception cref="System.Exception"/>
        public virtual void TestNestedJoin()
        {
            // outer(inner(S1,...,Sn),outer(S1,...Sn))
            int     Sources = 3;
            int     Items   = (Sources + 1) * (Sources + 1);
            JobConf job     = new JobConf();
            Path    @base   = cluster.GetFileSystem().MakeQualified(new Path("/nested"));

            int[][] source = new int[Sources][];
            for (int i = 0; i < Sources; ++i)
            {
                source[i] = new int[Items];
                for (int j = 0; j < Items; ++j)
                {
                    source[i][j] = (i + 2) * (j + 1);
                }
            }
            Path[] src = new Path[Sources];
            SequenceFile.Writer[] @out = CreateWriters(@base, job, Sources, src);
            IntWritable           k    = new IntWritable();

            for (int i_1 = 0; i_1 < Sources; ++i_1)
            {
                IntWritable v = new IntWritable();
                v.Set(i_1);
                for (int j = 0; j < Items; ++j)
                {
                    k.Set(source[i_1][j]);
                    @out[i_1].Append(k, v);
                }
                @out[i_1].Close();
            }
            @out = null;
            StringBuilder sb = new StringBuilder();

            sb.Append("outer(inner(");
            for (int i_2 = 0; i_2 < Sources; ++i_2)
            {
                sb.Append(CompositeInputFormat.Compose(typeof(SequenceFileInputFormat), src[i_2].
                                                       ToString()));
                if (i_2 + 1 != Sources)
                {
                    sb.Append(",");
                }
            }
            sb.Append("),outer(");
            sb.Append(CompositeInputFormat.Compose(typeof(TestDatamerge.Fake_IF), "foobar"));
            sb.Append(",");
            for (int i_3 = 0; i_3 < Sources; ++i_3)
            {
                sb.Append(CompositeInputFormat.Compose(typeof(SequenceFileInputFormat), src[i_3].
                                                       ToString()));
                sb.Append(",");
            }
            sb.Append(CompositeInputFormat.Compose(typeof(TestDatamerge.Fake_IF), "raboof") +
                      "))");
            job.Set("mapreduce.join.expr", sb.ToString());
            job.SetInputFormat(typeof(CompositeInputFormat));
            Path outf = new Path(@base, "out");

            FileOutputFormat.SetOutputPath(job, outf);
            TestDatamerge.Fake_IF.SetKeyClass(job, typeof(IntWritable));
            TestDatamerge.Fake_IF.SetValClass(job, typeof(IntWritable));
            job.SetMapperClass(typeof(IdentityMapper));
            job.SetReducerClass(typeof(IdentityReducer));
            job.SetNumReduceTasks(0);
            job.SetOutputKeyClass(typeof(IntWritable));
            job.SetOutputValueClass(typeof(TupleWritable));
            job.SetOutputFormat(typeof(SequenceFileOutputFormat));
            JobClient.RunJob(job);
            FileStatus[] outlist = cluster.GetFileSystem().ListStatus(outf, new Utils.OutputFileUtils.OutputFilesFilter
                                                                          ());
            NUnit.Framework.Assert.AreEqual(1, outlist.Length);
            NUnit.Framework.Assert.IsTrue(0 < outlist[0].GetLen());
            SequenceFile.Reader r = new SequenceFile.Reader(cluster.GetFileSystem(), outlist[
                                                                0].GetPath(), job);
            TupleWritable v_1 = new TupleWritable();

            while (r.Next(k, v_1))
            {
                NUnit.Framework.Assert.IsFalse(((TupleWritable)v_1.Get(1)).Has(0));
                NUnit.Framework.Assert.IsFalse(((TupleWritable)v_1.Get(1)).Has(Sources + 1));
                bool chk = true;
                int  ki  = k.Get();
                for (int i_4 = 2; i_4 < Sources + 2; ++i_4)
                {
                    if ((ki % i_4) == 0 && ki <= i_4 * Items)
                    {
                        NUnit.Framework.Assert.AreEqual(i_4 - 2, ((IntWritable)((TupleWritable)v_1.Get(1)
                                                                                ).Get((i_4 - 1))).Get());
                    }
                    else
                    {
                        chk = false;
                    }
                }
                if (chk)
                {
                    // present in all sources; chk inner
                    NUnit.Framework.Assert.IsTrue(v_1.Has(0));
                    for (int i_5 = 0; i_5 < Sources; ++i_5)
                    {
                        NUnit.Framework.Assert.IsTrue(((TupleWritable)v_1.Get(0)).Has(i_5));
                    }
                }
                else
                {
                    // should not be present in inner join
                    NUnit.Framework.Assert.IsFalse(v_1.Has(0));
                }
            }
            r.Close();
            @base.GetFileSystem(job).Delete(@base, true);
        }
Ejemplo n.º 26
0
        /// <summary>Run a map/reduce job for estimating Pi.</summary>
        /// <returns>the estimated value of Pi</returns>
        /// <exception cref="System.IO.IOException"/>
        /// <exception cref="System.TypeLoadException"/>
        /// <exception cref="System.Exception"/>
        public static BigDecimal EstimatePi(int numMaps, long numPoints, Path tmpDir, Configuration
                                            conf)
        {
            Job job = Job.GetInstance(conf);

            //setup job conf
            job.SetJobName(typeof(QuasiMonteCarlo).Name);
            job.SetJarByClass(typeof(QuasiMonteCarlo));
            job.SetInputFormatClass(typeof(SequenceFileInputFormat));
            job.SetOutputKeyClass(typeof(BooleanWritable));
            job.SetOutputValueClass(typeof(LongWritable));
            job.SetOutputFormatClass(typeof(SequenceFileOutputFormat));
            job.SetMapperClass(typeof(QuasiMonteCarlo.QmcMapper));
            job.SetReducerClass(typeof(QuasiMonteCarlo.QmcReducer));
            job.SetNumReduceTasks(1);
            // turn off speculative execution, because DFS doesn't handle
            // multiple writers to the same file.
            job.SetSpeculativeExecution(false);
            //setup input/output directories
            Path inDir  = new Path(tmpDir, "in");
            Path outDir = new Path(tmpDir, "out");

            FileInputFormat.SetInputPaths(job, inDir);
            FileOutputFormat.SetOutputPath(job, outDir);
            FileSystem fs = FileSystem.Get(conf);

            if (fs.Exists(tmpDir))
            {
                throw new IOException("Tmp directory " + fs.MakeQualified(tmpDir) + " already exists.  Please remove it first."
                                      );
            }
            if (!fs.Mkdirs(inDir))
            {
                throw new IOException("Cannot create input directory " + inDir);
            }
            try
            {
                //generate an input file for each map task
                for (int i = 0; i < numMaps; ++i)
                {
                    Path                file   = new Path(inDir, "part" + i);
                    LongWritable        offset = new LongWritable(i * numPoints);
                    LongWritable        size   = new LongWritable(numPoints);
                    SequenceFile.Writer writer = SequenceFile.CreateWriter(fs, conf, file, typeof(LongWritable
                                                                                                  ), typeof(LongWritable), SequenceFile.CompressionType.None);
                    try
                    {
                        writer.Append(offset, size);
                    }
                    finally
                    {
                        writer.Close();
                    }
                    System.Console.Out.WriteLine("Wrote input for Map #" + i);
                }
                //start a map/reduce job
                System.Console.Out.WriteLine("Starting Job");
                long startTime = Runtime.CurrentTimeMillis();
                job.WaitForCompletion(true);
                double duration = (Runtime.CurrentTimeMillis() - startTime) / 1000.0;
                System.Console.Out.WriteLine("Job Finished in " + duration + " seconds");
                //read outputs
                Path                inFile     = new Path(outDir, "reduce-out");
                LongWritable        numInside  = new LongWritable();
                LongWritable        numOutside = new LongWritable();
                SequenceFile.Reader reader     = new SequenceFile.Reader(fs, inFile, conf);
                try
                {
                    reader.Next(numInside, numOutside);
                }
                finally
                {
                    reader.Close();
                }
                //compute estimated value
                BigDecimal numTotal = BigDecimal.ValueOf(numMaps).Multiply(BigDecimal.ValueOf(numPoints
                                                                                              ));
                return(BigDecimal.ValueOf(4).SetScale(20).Multiply(BigDecimal.ValueOf(numInside.Get
                                                                                          ())).Divide(numTotal, RoundingMode.HalfUp));
            }
            finally
            {
                fs.Delete(tmpDir, true);
            }
        }
Ejemplo n.º 27
0
        /// <exception cref="System.Exception"/>
        protected internal virtual void _testMultipleOutputs(bool withCounters)
        {
            string        input = "a\nb\nc\nd\ne\nc\nd\ne";
            Configuration conf  = CreateJobConf();
            Job           job   = MapReduceTestUtil.CreateJob(conf, InDir, OutDir, 2, 1, input);

            job.SetJobName("mo");
            MultipleOutputs.AddNamedOutput(job, Text, typeof(TextOutputFormat), typeof(LongWritable
                                                                                       ), typeof(Text));
            MultipleOutputs.AddNamedOutput(job, Sequence, typeof(SequenceFileOutputFormat), typeof(
                                               IntWritable), typeof(Text));
            MultipleOutputs.SetCountersEnabled(job, withCounters);
            job.SetMapperClass(typeof(TestMRMultipleOutputs.MOMap));
            job.SetReducerClass(typeof(TestMRMultipleOutputs.MOReduce));
            job.WaitForCompletion(true);
            // assert number of named output part files
            int        namedOutputCount      = 0;
            int        valueBasedOutputCount = 0;
            FileSystem fs = OutDir.GetFileSystem(conf);

            FileStatus[] statuses = fs.ListStatus(OutDir);
            foreach (FileStatus status in statuses)
            {
                string fileName = status.GetPath().GetName();
                if (fileName.Equals("text-m-00000") || fileName.Equals("text-m-00001") || fileName
                    .Equals("text-r-00000") || fileName.Equals("sequence_A-m-00000") || fileName.Equals
                        ("sequence_A-m-00001") || fileName.Equals("sequence_B-m-00000") || fileName.Equals
                        ("sequence_B-m-00001") || fileName.Equals("sequence_B-r-00000") || fileName.Equals
                        ("sequence_C-r-00000"))
                {
                    namedOutputCount++;
                }
                else
                {
                    if (fileName.Equals("a-r-00000") || fileName.Equals("b-r-00000") || fileName.Equals
                            ("c-r-00000") || fileName.Equals("d-r-00000") || fileName.Equals("e-r-00000"))
                    {
                        valueBasedOutputCount++;
                    }
                }
            }
            NUnit.Framework.Assert.AreEqual(9, namedOutputCount);
            NUnit.Framework.Assert.AreEqual(5, valueBasedOutputCount);
            // assert TextOutputFormat files correctness
            BufferedReader reader = new BufferedReader(new InputStreamReader(fs.Open(new Path
                                                                                         (FileOutputFormat.GetOutputPath(job), "text-r-00000"))));
            int    count = 0;
            string line  = reader.ReadLine();

            while (line != null)
            {
                NUnit.Framework.Assert.IsTrue(line.EndsWith(Text));
                line = reader.ReadLine();
                count++;
            }
            reader.Close();
            NUnit.Framework.Assert.IsFalse(count == 0);
            // assert SequenceOutputFormat files correctness
            SequenceFile.Reader seqReader = new SequenceFile.Reader(fs, new Path(FileOutputFormat
                                                                                 .GetOutputPath(job), "sequence_B-r-00000"), conf);
            NUnit.Framework.Assert.AreEqual(typeof(IntWritable), seqReader.GetKeyClass());
            NUnit.Framework.Assert.AreEqual(typeof(Text), seqReader.GetValueClass());
            count = 0;
            IntWritable key   = new IntWritable();
            Text        value = new Text();

            while (seqReader.Next(key, value))
            {
                NUnit.Framework.Assert.AreEqual(Sequence, value.ToString());
                count++;
            }
            seqReader.Close();
            NUnit.Framework.Assert.IsFalse(count == 0);
            if (withCounters)
            {
                CounterGroup counters = job.GetCounters().GetGroup(typeof(MultipleOutputs).FullName
                                                                   );
                NUnit.Framework.Assert.AreEqual(9, counters.Size());
                NUnit.Framework.Assert.AreEqual(4, counters.FindCounter(Text).GetValue());
                NUnit.Framework.Assert.AreEqual(2, counters.FindCounter(Sequence + "_A").GetValue
                                                    ());
                NUnit.Framework.Assert.AreEqual(4, counters.FindCounter(Sequence + "_B").GetValue
                                                    ());
                NUnit.Framework.Assert.AreEqual(2, counters.FindCounter(Sequence + "_C").GetValue
                                                    ());
                NUnit.Framework.Assert.AreEqual(2, counters.FindCounter("a").GetValue());
                NUnit.Framework.Assert.AreEqual(2, counters.FindCounter("b").GetValue());
                NUnit.Framework.Assert.AreEqual(4, counters.FindCounter("c").GetValue());
                NUnit.Framework.Assert.AreEqual(4, counters.FindCounter("d").GetValue());
                NUnit.Framework.Assert.AreEqual(4, counters.FindCounter("e").GetValue());
            }
        }
Ejemplo n.º 28
0
            /// <exception cref="System.IO.IOException"/>
            internal static void CheckRecords(Configuration defaults, Path sortInput, Path sortOutput
                                              )
            {
                FileSystem inputfs   = sortInput.GetFileSystem(defaults);
                FileSystem outputfs  = sortOutput.GetFileSystem(defaults);
                FileSystem defaultfs = FileSystem.Get(defaults);
                JobConf    jobConf   = new JobConf(defaults, typeof(SortValidator.RecordStatsChecker));

                jobConf.SetJobName("sortvalidate-recordstats-checker");
                int noSortReduceTasks = outputfs.ListStatus(sortOutput, sortPathsFilter).Length;

                jobConf.SetInt(SortReduces, noSortReduceTasks);
                int noSortInputpaths = inputfs.ListStatus(sortInput).Length;

                jobConf.SetInputFormat(typeof(SortValidator.RecordStatsChecker.NonSplitableSequenceFileInputFormat
                                              ));
                jobConf.SetOutputFormat(typeof(SequenceFileOutputFormat));
                jobConf.SetOutputKeyClass(typeof(IntWritable));
                jobConf.SetOutputValueClass(typeof(SortValidator.RecordStatsChecker.RecordStatsWritable
                                                   ));
                jobConf.SetMapperClass(typeof(SortValidator.RecordStatsChecker.Map));
                jobConf.SetCombinerClass(typeof(SortValidator.RecordStatsChecker.Reduce));
                jobConf.SetReducerClass(typeof(SortValidator.RecordStatsChecker.Reduce));
                jobConf.SetNumMapTasks(noSortReduceTasks);
                jobConf.SetNumReduceTasks(1);
                FileInputFormat.SetInputPaths(jobConf, sortInput);
                FileInputFormat.AddInputPath(jobConf, sortOutput);
                Path outputPath = new Path(new Path("/tmp", "sortvalidate"), UUID.RandomUUID().ToString
                                               ());

                if (defaultfs.Exists(outputPath))
                {
                    defaultfs.Delete(outputPath, true);
                }
                FileOutputFormat.SetOutputPath(jobConf, outputPath);
                // Uncomment to run locally in a single process
                //job_conf.set(JTConfig.JT, "local");
                Path[] inputPaths = FileInputFormat.GetInputPaths(jobConf);
                System.Console.Out.WriteLine("\nSortValidator.RecordStatsChecker: Validate sort "
                                             + "from " + inputPaths[0] + " (" + noSortInputpaths + " files), " + inputPaths[
                                                 1] + " (" + noSortReduceTasks + " files) into " + FileOutputFormat.GetOutputPath
                                                 (jobConf) + " with 1 reducer.");
                DateTime startTime = new DateTime();

                System.Console.Out.WriteLine("Job started: " + startTime);
                JobClient.RunJob(jobConf);
                try
                {
                    DateTime end_time = new DateTime();
                    System.Console.Out.WriteLine("Job ended: " + end_time);
                    System.Console.Out.WriteLine("The job took " + (end_time.GetTime() - startTime.GetTime
                                                                        ()) / 1000 + " seconds.");
                    // Check to ensure that the statistics of the
                    // framework's sort-input and sort-output match
                    SequenceFile.Reader stats = new SequenceFile.Reader(defaultfs, new Path(outputPath
                                                                                            , "part-00000"), defaults);
                    try
                    {
                        IntWritable k1 = new IntWritable();
                        IntWritable k2 = new IntWritable();
                        SortValidator.RecordStatsChecker.RecordStatsWritable v1 = new SortValidator.RecordStatsChecker.RecordStatsWritable
                                                                                      ();
                        SortValidator.RecordStatsChecker.RecordStatsWritable v2 = new SortValidator.RecordStatsChecker.RecordStatsWritable
                                                                                      ();
                        if (!stats.Next(k1, v1))
                        {
                            throw new IOException("Failed to read record #1 from reduce's output");
                        }
                        if (!stats.Next(k2, v2))
                        {
                            throw new IOException("Failed to read record #2 from reduce's output");
                        }
                        if ((v1.GetBytes() != v2.GetBytes()) || (v1.GetRecords() != v2.GetRecords()) || v1
                            .GetChecksum() != v2.GetChecksum())
                        {
                            throw new IOException("(" + v1.GetBytes() + ", " + v1.GetRecords() + ", " + v1.GetChecksum
                                                      () + ") v/s (" + v2.GetBytes() + ", " + v2.GetRecords() + ", " + v2.GetChecksum(
                                                      ) + ")");
                        }
                    }
                    finally
                    {
                        stats.Close();
                    }
                }
                finally
                {
                    defaultfs.Delete(outputPath, true);
                }
            }
Ejemplo n.º 29
0
        /// <exception cref="System.Exception"/>
        private static void Launch()
        {
            //
            // Generate distribution of ints.  This is the answer key.
            //
            Configuration conf       = new Configuration();
            int           countsToGo = counts;

            int[] dist = new int[range];
            for (int i = 0; i < range; i++)
            {
                double avgInts = (1.0 * countsToGo) / (range - i);
                dist[i] = (int)Math.Max(0, Math.Round(avgInts + (Math.Sqrt(avgInts) * r.NextGaussian
                                                                     ())));
                countsToGo -= dist[i];
            }
            if (countsToGo > 0)
            {
                dist[dist.Length - 1] += countsToGo;
            }
            //
            // Write the answer key to a file.
            //
            Path testdir = new Path(TestDir.GetAbsolutePath());

            if (!fs.Mkdirs(testdir))
            {
                throw new IOException("Mkdirs failed to create " + testdir.ToString());
            }
            Path randomIns = new Path(testdir, "genins");

            if (!fs.Mkdirs(randomIns))
            {
                throw new IOException("Mkdirs failed to create " + randomIns.ToString());
            }
            Path answerkey = new Path(randomIns, "answer.key");

            SequenceFile.Writer @out = SequenceFile.CreateWriter(fs, conf, answerkey, typeof(
                                                                     IntWritable), typeof(IntWritable), SequenceFile.CompressionType.None);
            try
            {
                for (int i_1 = 0; i_1 < range; i_1++)
                {
                    @out.Append(new IntWritable(i_1), new IntWritable(dist[i_1]));
                }
            }
            finally
            {
                @out.Close();
            }
            PrintFiles(randomIns, conf);
            //
            // Now we need to generate the random numbers according to
            // the above distribution.
            //
            // We create a lot of map tasks, each of which takes at least
            // one "line" of the distribution.  (That is, a certain number
            // X is to be generated Y number of times.)
            //
            // A map task emits Y key/val pairs.  The val is X.  The key
            // is a randomly-generated number.
            //
            // The reduce task gets its input sorted by key.  That is, sorted
            // in random order.  It then emits a single line of text that
            // for the given values.  It does not emit the key.
            //
            // Because there's just one reduce task, we emit a single big
            // file of random numbers.
            //
            Path randomOuts = new Path(testdir, "genouts");

            fs.Delete(randomOuts, true);
            Job genJob = Job.GetInstance(conf);

            FileInputFormat.SetInputPaths(genJob, randomIns);
            genJob.SetInputFormatClass(typeof(SequenceFileInputFormat));
            genJob.SetMapperClass(typeof(TestMapReduce.RandomGenMapper));
            FileOutputFormat.SetOutputPath(genJob, randomOuts);
            genJob.SetOutputKeyClass(typeof(IntWritable));
            genJob.SetOutputValueClass(typeof(IntWritable));
            genJob.SetReducerClass(typeof(TestMapReduce.RandomGenReducer));
            genJob.SetNumReduceTasks(1);
            genJob.WaitForCompletion(true);
            PrintFiles(randomOuts, conf);
            //
            // Next, we read the big file in and regenerate the
            // original map.  It's split into a number of parts.
            // (That number is 'intermediateReduces'.)
            //
            // We have many map tasks, each of which read at least one
            // of the output numbers.  For each number read in, the
            // map task emits a key/value pair where the key is the
            // number and the value is "1".
            //
            // We have a single reduce task, which receives its input
            // sorted by the key emitted above.  For each key, there will
            // be a certain number of "1" values.  The reduce task sums
            // these values to compute how many times the given key was
            // emitted.
            //
            // The reduce task then emits a key/val pair where the key
            // is the number in question, and the value is the number of
            // times the key was emitted.  This is the same format as the
            // original answer key (except that numbers emitted zero times
            // will not appear in the regenerated key.)  The answer set
            // is split into a number of pieces.  A final MapReduce job
            // will merge them.
            //
            // There's not really a need to go to 10 reduces here
            // instead of 1.  But we want to test what happens when
            // you have multiple reduces at once.
            //
            int  intermediateReduces = 10;
            Path intermediateOuts    = new Path(testdir, "intermediateouts");

            fs.Delete(intermediateOuts, true);
            Job checkJob = Job.GetInstance(conf);

            FileInputFormat.SetInputPaths(checkJob, randomOuts);
            checkJob.SetMapperClass(typeof(TestMapReduce.RandomCheckMapper));
            FileOutputFormat.SetOutputPath(checkJob, intermediateOuts);
            checkJob.SetOutputKeyClass(typeof(IntWritable));
            checkJob.SetOutputValueClass(typeof(IntWritable));
            checkJob.SetOutputFormatClass(typeof(MapFileOutputFormat));
            checkJob.SetReducerClass(typeof(TestMapReduce.RandomCheckReducer));
            checkJob.SetNumReduceTasks(intermediateReduces);
            checkJob.WaitForCompletion(true);
            PrintFiles(intermediateOuts, conf);
            //
            // OK, now we take the output from the last job and
            // merge it down to a single file.  The map() and reduce()
            // functions don't really do anything except reemit tuples.
            // But by having a single reduce task here, we end up merging
            // all the files.
            //
            Path finalOuts = new Path(testdir, "finalouts");

            fs.Delete(finalOuts, true);
            Job mergeJob = Job.GetInstance(conf);

            FileInputFormat.SetInputPaths(mergeJob, intermediateOuts);
            mergeJob.SetInputFormatClass(typeof(SequenceFileInputFormat));
            mergeJob.SetMapperClass(typeof(TestMapReduce.MergeMapper));
            FileOutputFormat.SetOutputPath(mergeJob, finalOuts);
            mergeJob.SetOutputKeyClass(typeof(IntWritable));
            mergeJob.SetOutputValueClass(typeof(IntWritable));
            mergeJob.SetOutputFormatClass(typeof(SequenceFileOutputFormat));
            mergeJob.SetReducerClass(typeof(TestMapReduce.MergeReducer));
            mergeJob.SetNumReduceTasks(1);
            mergeJob.WaitForCompletion(true);
            PrintFiles(finalOuts, conf);
            //
            // Finally, we compare the reconstructed answer key with the
            // original one.  Remember, we need to ignore zero-count items
            // in the original key.
            //
            bool success       = true;
            Path recomputedkey = new Path(finalOuts, "part-r-00000");

            SequenceFile.Reader @in = new SequenceFile.Reader(fs, recomputedkey, conf);
            int totalseen           = 0;

            try
            {
                IntWritable key = new IntWritable();
                IntWritable val = new IntWritable();
                for (int i_1 = 0; i_1 < range; i_1++)
                {
                    if (dist[i_1] == 0)
                    {
                        continue;
                    }
                    if ([email protected](key, val))
                    {
                        System.Console.Error.WriteLine("Cannot read entry " + i_1);
                        success = false;
                        break;
                    }
                    else
                    {
                        if (!((key.Get() == i_1) && (val.Get() == dist[i_1])))
                        {
                            System.Console.Error.WriteLine("Mismatch!  Pos=" + key.Get() + ", i=" + i_1 + ", val="
                                                           + val.Get() + ", dist[i]=" + dist[i_1]);
                            success = false;
                        }
                        totalseen += val.Get();
                    }
                }
                if (success)
                {
                    if (@in.Next(key, val))
                    {
                        System.Console.Error.WriteLine("Unnecessary lines in recomputed key!");
                        success = false;
                    }
                }
            }
            finally
            {
                @in.Close();
            }
            int originalTotal = 0;

            for (int i_2 = 0; i_2 < dist.Length; i_2++)
            {
                originalTotal += dist[i_2];
            }
            System.Console.Out.WriteLine("Original sum: " + originalTotal);
            System.Console.Out.WriteLine("Recomputed sum: " + totalseen);
            //
            // Write to "results" whether the test succeeded or not.
            //
            Path           resultFile = new Path(testdir, "results");
            BufferedWriter bw         = new BufferedWriter(new OutputStreamWriter(fs.Create(resultFile
                                                                                            )));

            try
            {
                bw.Write("Success=" + success + "\n");
                System.Console.Out.WriteLine("Success=" + success);
            }
            finally
            {
                bw.Close();
            }
            NUnit.Framework.Assert.IsTrue("testMapRed failed", success);
            fs.Delete(testdir, true);
        }
Ejemplo n.º 30
0
        /// <exception cref="System.Exception"/>
        private void ValidateOutput(JobConf conf, RunningJob runningJob, IList <string> mapperBadRecords
                                    , IList <string> redBadRecords)
        {
            Log.Info(runningJob.GetCounters().ToString());
            NUnit.Framework.Assert.IsTrue(runningJob.IsSuccessful());
            //validate counters
            Counters counters = runningJob.GetCounters();

            NUnit.Framework.Assert.AreEqual(counters.FindCounter(TaskCounter.MapSkippedRecords
                                                                 ).GetCounter(), mapperBadRecords.Count);
            int mapRecs = input.Count - mapperBadRecords.Count;

            NUnit.Framework.Assert.AreEqual(counters.FindCounter(TaskCounter.MapInputRecords)
                                            .GetCounter(), mapRecs);
            NUnit.Framework.Assert.AreEqual(counters.FindCounter(TaskCounter.MapOutputRecords
                                                                 ).GetCounter(), mapRecs);
            int redRecs = mapRecs - redBadRecords.Count;

            NUnit.Framework.Assert.AreEqual(counters.FindCounter(TaskCounter.ReduceSkippedRecords
                                                                 ).GetCounter(), redBadRecords.Count);
            NUnit.Framework.Assert.AreEqual(counters.FindCounter(TaskCounter.ReduceSkippedGroups
                                                                 ).GetCounter(), redBadRecords.Count);
            NUnit.Framework.Assert.AreEqual(counters.FindCounter(TaskCounter.ReduceInputGroups
                                                                 ).GetCounter(), redRecs);
            NUnit.Framework.Assert.AreEqual(counters.FindCounter(TaskCounter.ReduceInputRecords
                                                                 ).GetCounter(), redRecs);
            NUnit.Framework.Assert.AreEqual(counters.FindCounter(TaskCounter.ReduceOutputRecords
                                                                 ).GetCounter(), redRecs);
            //validate skipped records
            Path skipDir = SkipBadRecords.GetSkipOutputPath(conf);

            NUnit.Framework.Assert.IsNotNull(skipDir);
            Path[]         skips      = FileUtil.Stat2Paths(GetFileSystem().ListStatus(skipDir));
            IList <string> mapSkipped = new AList <string>();
            IList <string> redSkipped = new AList <string>();

            foreach (Path skipPath in skips)
            {
                Log.Info("skipPath: " + skipPath);
                SequenceFile.Reader reader = new SequenceFile.Reader(GetFileSystem(), skipPath, conf
                                                                     );
                object key   = ReflectionUtils.NewInstance(reader.GetKeyClass(), conf);
                object value = ReflectionUtils.NewInstance(reader.GetValueClass(), conf);
                key = reader.Next(key);
                while (key != null)
                {
                    value = reader.GetCurrentValue(value);
                    Log.Debug("key:" + key + " value:" + value.ToString());
                    if (skipPath.GetName().Contains("_r_"))
                    {
                        redSkipped.AddItem(value.ToString());
                    }
                    else
                    {
                        mapSkipped.AddItem(value.ToString());
                    }
                    key = reader.Next(key);
                }
                reader.Close();
            }
            NUnit.Framework.Assert.IsTrue(mapSkipped.ContainsAll(mapperBadRecords));
            NUnit.Framework.Assert.IsTrue(redSkipped.ContainsAll(redBadRecords));
            Path[] outputFiles = FileUtil.Stat2Paths(GetFileSystem().ListStatus(GetOutputDir(
                                                                                    ), new Utils.OutputFileUtils.OutputFilesFilter()));
            IList <string> mapperOutput = GetProcessed(input, mapperBadRecords);

            Log.Debug("mapperOutput " + mapperOutput.Count);
            IList <string> reducerOutput = GetProcessed(mapperOutput, redBadRecords);

            Log.Debug("reducerOutput " + reducerOutput.Count);
            if (outputFiles.Length > 0)
            {
                InputStream    @is     = GetFileSystem().Open(outputFiles[0]);
                BufferedReader reader  = new BufferedReader(new InputStreamReader(@is));
                string         line    = reader.ReadLine();
                int            counter = 0;
                while (line != null)
                {
                    counter++;
                    StringTokenizer tokeniz = new StringTokenizer(line, "\t");
                    string          key     = tokeniz.NextToken();
                    string          value   = tokeniz.NextToken();
                    Log.Debug("Output: key:" + key + "  value:" + value);
                    NUnit.Framework.Assert.IsTrue(value.Contains("hello"));
                    NUnit.Framework.Assert.IsTrue(reducerOutput.Contains(value));
                    line = reader.ReadLine();
                }
                reader.Close();
                NUnit.Framework.Assert.AreEqual(reducerOutput.Count, counter);
            }
        }