Ejemplo n.º 1
0
        /// <exception cref="System.IO.IOException"/>
        public virtual void TestClose()
        {
            Configuration   conf = new Configuration();
            LocalFileSystem fs   = FileSystem.GetLocal(conf);
            // create a sequence file 1
            Path path1 = new Path(Runtime.GetProperty("test.build.data", ".") + "/test1.seq");

            SequenceFile.Writer writer = SequenceFile.CreateWriter(fs, conf, path1, typeof(Text
                                                                                           ), typeof(NullWritable), SequenceFile.CompressionType.Block);
            writer.Append(new Text("file1-1"), NullWritable.Get());
            writer.Append(new Text("file1-2"), NullWritable.Get());
            writer.Close();
            Path path2 = new Path(Runtime.GetProperty("test.build.data", ".") + "/test2.seq");

            writer = SequenceFile.CreateWriter(fs, conf, path2, typeof(Text), typeof(NullWritable
                                                                                     ), SequenceFile.CompressionType.Block);
            writer.Append(new Text("file2-1"), NullWritable.Get());
            writer.Append(new Text("file2-2"), NullWritable.Get());
            writer.Close();
            // Create a reader which uses 4 BuiltInZLibInflater instances
            SequenceFile.Reader reader = new SequenceFile.Reader(fs, path1, conf);
            // Returns the 4 BuiltInZLibInflater instances to the CodecPool
            reader.Close();
            // The second close _could_ erroneously returns the same
            // 4 BuiltInZLibInflater instances to the CodecPool again
            reader.Close();
            // The first reader gets 4 BuiltInZLibInflater instances from the CodecPool
            SequenceFile.Reader reader1 = new SequenceFile.Reader(fs, path1, conf);
            // read first value from reader1
            Text text = new Text();

            reader1.Next(text);
            Assert.Equal("file1-1", text.ToString());
            // The second reader _could_ get the same 4 BuiltInZLibInflater
            // instances from the CodePool as reader1
            SequenceFile.Reader reader2 = new SequenceFile.Reader(fs, path2, conf);
            // read first value from reader2
            reader2.Next(text);
            Assert.Equal("file2-1", text.ToString());
            // read second value from reader1
            reader1.Next(text);
            Assert.Equal("file1-2", text.ToString());
            // read second value from reader2 (this throws an exception)
            reader2.Next(text);
            Assert.Equal("file2-2", text.ToString());
            NUnit.Framework.Assert.IsFalse(reader1.Next(text));
            NUnit.Framework.Assert.IsFalse(reader2.Next(text));
        }
 /// <exception cref="System.IO.IOException"/>
 public virtual void Close()
 {
     lock (this)
     {
         @in.Close();
     }
 }
 /// <exception cref="System.IO.IOException"/>
 public override void Close()
 {
     lock (this)
     {
         @in.Close();
     }
 }
Ejemplo n.º 4
0
        /// <exception cref="System.IO.IOException"/>
        private static int CountProduct(IntWritable key, Path[] src, Configuration conf)
        {
            int product = 1;

            foreach (Path p in src)
            {
                int count             = 0;
                SequenceFile.Reader r = new SequenceFile.Reader(cluster.GetFileSystem(), p, conf);
                IntWritable         k = new IntWritable();
                IntWritable         v = new IntWritable();
                while (r.Next(k, v))
                {
                    if (k.Equals(key))
                    {
                        count++;
                    }
                }
                r.Close();
                if (count != 0)
                {
                    product *= count;
                }
            }
            return(product);
        }
Ejemplo n.º 5
0
        /// <summary>Read the cut points from the given IFile.</summary>
        /// <param name="fs">The file system</param>
        /// <param name="p">The path to read</param>
        /// <param name="keyClass">The map output key class</param>
        /// <param name="job">The job config</param>
        /// <exception cref="System.IO.IOException"/>
        private K[] ReadPartitions(FileSystem fs, Path p, Type keyClass, Configuration conf
                                   )
        {
            // matching key types enforced by passing in
            // map output key class
            SequenceFile.Reader reader = new SequenceFile.Reader(fs, p, conf);
            AList <K>           parts  = new AList <K>();
            K            key           = ReflectionUtils.NewInstance(keyClass, conf);
            NullWritable value         = NullWritable.Get();

            try
            {
                while (reader.Next(key, value))
                {
                    parts.AddItem(key);
                    key = ReflectionUtils.NewInstance(keyClass, conf);
                }
                reader.Close();
                reader = null;
            }
            finally
            {
                IOUtils.Cleanup(Log, reader);
            }
            return(Sharpen.Collections.ToArray(parts, (K[])System.Array.CreateInstance(keyClass
                                                                                       , parts.Count)));
        }
Ejemplo n.º 6
0
 /// <exception cref="System.IO.IOException"/>
 private SequenceFile.Metadata ReadMetadata(FileSystem fs, Path file)
 {
     Log.Info("reading file: " + file.ToString());
     SequenceFile.Reader   reader = new SequenceFile.Reader(fs, file, conf);
     SequenceFile.Metadata meta   = reader.GetMetadata();
     reader.Close();
     return(meta);
 }
Ejemplo n.º 7
0
        /// <exception cref="System.Exception"/>
        private void CheckCompression(bool compressMapOutputs, SequenceFile.CompressionType
                                      redCompression, bool includeCombine)
        {
            JobConf    conf    = new JobConf(typeof(TestMapRed));
            Path       testdir = new Path(TestDir.GetAbsolutePath());
            Path       inDir   = new Path(testdir, "in");
            Path       outDir  = new Path(testdir, "out");
            FileSystem fs      = FileSystem.Get(conf);

            fs.Delete(testdir, true);
            FileInputFormat.SetInputPaths(conf, inDir);
            FileOutputFormat.SetOutputPath(conf, outDir);
            conf.SetMapperClass(typeof(TestMapRed.MyMap));
            conf.SetReducerClass(typeof(TestMapRed.MyReduce));
            conf.SetOutputKeyClass(typeof(Text));
            conf.SetOutputValueClass(typeof(Text));
            conf.SetOutputFormat(typeof(SequenceFileOutputFormat));
            conf.Set(MRConfig.FrameworkName, MRConfig.LocalFrameworkName);
            if (includeCombine)
            {
                conf.SetCombinerClass(typeof(IdentityReducer));
            }
            conf.SetCompressMapOutput(compressMapOutputs);
            SequenceFileOutputFormat.SetOutputCompressionType(conf, redCompression);
            try
            {
                if (!fs.Mkdirs(testdir))
                {
                    throw new IOException("Mkdirs failed to create " + testdir.ToString());
                }
                if (!fs.Mkdirs(inDir))
                {
                    throw new IOException("Mkdirs failed to create " + inDir.ToString());
                }
                Path             inFile = new Path(inDir, "part0");
                DataOutputStream f      = fs.Create(inFile);
                f.WriteBytes("Owen was here\n");
                f.WriteBytes("Hadoop is fun\n");
                f.WriteBytes("Is this done, yet?\n");
                f.Close();
                RunningJob rj = JobClient.RunJob(conf);
                NUnit.Framework.Assert.IsTrue("job was complete", rj.IsComplete());
                NUnit.Framework.Assert.IsTrue("job was successful", rj.IsSuccessful());
                Path output = new Path(outDir, Task.GetOutputName(0));
                NUnit.Framework.Assert.IsTrue("reduce output exists " + output, fs.Exists(output)
                                              );
                SequenceFile.Reader rdr = new SequenceFile.Reader(fs, output, conf);
                NUnit.Framework.Assert.AreEqual("is reduce output compressed " + output, redCompression
                                                != SequenceFile.CompressionType.None, rdr.IsCompressed());
                rdr.Close();
            }
            finally
            {
                fs.Delete(testdir, true);
            }
        }
Ejemplo n.º 8
0
 /// <exception cref="System.IO.IOException"/>
 private void Verify2Values(Path file)
 {
     SequenceFile.Reader reader = new SequenceFile.Reader(conf, SequenceFile.Reader.File
                                                              (file));
     Assert.Equal(1L, reader.Next((object)null));
     Assert.Equal("one", reader.GetCurrentValue((object)null));
     Assert.Equal(2L, reader.Next((object)null));
     Assert.Equal("two", reader.GetCurrentValue((object)null));
     NUnit.Framework.Assert.IsNull(reader.Next((object)null));
     reader.Close();
 }
Ejemplo n.º 9
0
        /// <exception cref="System.IO.IOException"/>
        private static void PrintSequenceFile(FileSystem fs, Path p, Configuration conf)
        {
            SequenceFile.Reader r = new SequenceFile.Reader(fs, p, conf);
            object key            = null;
            object value          = null;

            while ((key = r.Next(key)) != null)
            {
                value = r.GetCurrentValue(value);
                System.Console.Out.WriteLine("  Row: " + key + ", " + value);
            }
            r.Close();
        }
Ejemplo n.º 10
0
        /// <exception cref="System.IO.IOException"/>
        /// <exception cref="System.TypeLoadException"/>
        /// <exception cref="InstantiationException"/>
        /// <exception cref="System.MemberAccessException"/>
        private static void SequenceFileCodecTest(Configuration conf, int lines, string codecClass
                                                  , int blockSize)
        {
            Path filePath = new Path("SequenceFileCodecTest." + codecClass);

            // Configuration
            conf.SetInt("io.seqfile.compress.blocksize", blockSize);
            // Create the SequenceFile
            FileSystem fs = FileSystem.Get(conf);

            Log.Info("Creating SequenceFile with codec \"" + codecClass + "\"");
            SequenceFile.Writer writer = SequenceFile.CreateWriter(fs, conf, filePath, typeof(
                                                                       Text), typeof(Text), SequenceFile.CompressionType.Block, (CompressionCodec)System.Activator.CreateInstance
                                                                       (Runtime.GetType(codecClass)));
            // Write some data
            Log.Info("Writing to SequenceFile...");
            for (int i = 0; i < lines; i++)
            {
                Text key   = new Text("key" + i);
                Text value = new Text("value" + i);
                writer.Append(key, value);
            }
            writer.Close();
            // Read the data back and check
            Log.Info("Reading from the SequenceFile...");
            SequenceFile.Reader reader  = new SequenceFile.Reader(fs, filePath, conf);
            Writable            key_1   = (Writable)System.Activator.CreateInstance(reader.GetKeyClass());
            Writable            value_1 = (Writable)System.Activator.CreateInstance(reader.GetValueClass
                                                                                        ());
            int lc = 0;

            try
            {
                while (reader.Next(key_1, value_1))
                {
                    Assert.Equal("key" + lc, key_1.ToString());
                    Assert.Equal("value" + lc, value_1.ToString());
                    lc++;
                }
            }
            finally
            {
                reader.Close();
            }
            Assert.Equal(lines, lc);
            // Delete temporary files
            fs.Delete(filePath, false);
            Log.Info("SUCCESS! Completed SequenceFileCodecTest with codec \"" + codecClass +
                     "\"");
        }
Ejemplo n.º 11
0
 /// <exception cref="System.IO.IOException"/>
 public virtual void Reduce(WritableComparable key, IEnumerator values, OutputCollector
                            output, Reporter reporter)
 {
     if (first)
     {
         first = false;
         MapOutputFile mapOutputFile = new MROutputFiles();
         mapOutputFile.SetConf(conf);
         Path       input = mapOutputFile.GetInputFile(0);
         FileSystem fs    = FileSystem.Get(conf);
         NUnit.Framework.Assert.IsTrue("reduce input exists " + input, fs.Exists(input));
         SequenceFile.Reader rdr = new SequenceFile.Reader(fs, input, conf);
         NUnit.Framework.Assert.AreEqual("is reduce input compressed " + input, compressInput
                                         , rdr.IsCompressed());
         rdr.Close();
     }
 }
        /// <exception cref="System.Exception"/>
        public virtual void TestJavaSerialization()
        {
            Path file = new Path(Runtime.GetProperty("test.build.data", ".") + "/testseqser.seq"
                                 );

            fs.Delete(file, true);
            SequenceFile.Writer writer = SequenceFile.CreateWriter(fs, conf, file, typeof(long
                                                                                          ), typeof(string));
            writer.Append(1L, "one");
            writer.Append(2L, "two");
            writer.Close();
            SequenceFile.Reader reader = new SequenceFile.Reader(fs, file, conf);
            Assert.Equal(1L, reader.Next((object)null));
            Assert.Equal("one", reader.GetCurrentValue((object)null));
            Assert.Equal(2L, reader.Next((object)null));
            Assert.Equal("two", reader.GetCurrentValue((object)null));
            NUnit.Framework.Assert.IsNull(reader.Next((object)null));
            reader.Close();
        }
Ejemplo n.º 13
0
        /// <exception cref="System.IO.IOException"/>
        private void CheckSort(FileSystem fs, int count, int seed, Path file)
        {
            Log.Info("sorting " + count + " records in memory for debug");
            RandomDatum.Generator generator = new RandomDatum.Generator(seed);
            SortedDictionary <RandomDatum, RandomDatum> map = new SortedDictionary <RandomDatum
                                                                                    , RandomDatum>();

            for (int i = 0; i < count; i++)
            {
                generator.Next();
                RandomDatum key   = generator.GetKey();
                RandomDatum value = generator.GetValue();
                map[key] = value;
            }
            Log.Debug("checking order of " + count + " records");
            RandomDatum k = new RandomDatum();
            RandomDatum v = new RandomDatum();
            IEnumerator <KeyValuePair <RandomDatum, RandomDatum> > iterator = map.GetEnumerator(
                );

            SequenceFile.Reader reader = new SequenceFile.Reader(fs, file.Suffix(".sorted"),
                                                                 conf);
            for (int i_1 = 0; i_1 < count; i_1++)
            {
                KeyValuePair <RandomDatum, RandomDatum> entry = iterator.Next();
                RandomDatum key   = entry.Key;
                RandomDatum value = entry.Value;
                reader.Next(k, v);
                if (!k.Equals(key))
                {
                    throw new RuntimeException("wrong key at " + i_1);
                }
                if (!v.Equals(value))
                {
                    throw new RuntimeException("wrong value at " + i_1);
                }
            }
            reader.Close();
            Log.Debug("sucessfully checked " + count + " records");
        }
Ejemplo n.º 14
0
        /// <exception cref="System.IO.IOException"/>
        private static void CheckOuterConsistency(Job job, Path[] src)
        {
            Path outf = FileOutputFormat.GetOutputPath(job);

            FileStatus[] outlist = cluster.GetFileSystem().ListStatus(outf, new Utils.OutputFileUtils.OutputFilesFilter
                                                                          ());
            NUnit.Framework.Assert.AreEqual("number of part files is more than 1. It is" + outlist
                                            .Length, 1, outlist.Length);
            NUnit.Framework.Assert.IsTrue("output file with zero length" + outlist[0].GetLen(
                                              ), 0 < outlist[0].GetLen());
            SequenceFile.Reader r = new SequenceFile.Reader(cluster.GetFileSystem(), outlist[
                                                                0].GetPath(), job.GetConfiguration());
            IntWritable k = new IntWritable();
            IntWritable v = new IntWritable();

            while (r.Next(k, v))
            {
                NUnit.Framework.Assert.AreEqual("counts does not match", v.Get(), CountProduct(k,
                                                                                               src, job.GetConfiguration()));
            }
            r.Close();
        }
Ejemplo n.º 15
0
        /// <exception cref="System.Exception"/>
        public virtual void TestAppend()
        {
            Path file = new Path(RootPath, "testseqappend.seq");

            fs.Delete(file, true);
            Text key1   = new Text("Key1");
            Text value1 = new Text("Value1");
            Text value2 = new Text("Updated");

            SequenceFile.Metadata metadata = new SequenceFile.Metadata();
            metadata.Set(key1, value1);
            SequenceFile.Writer.Option metadataOption = SequenceFile.Writer.Metadata(metadata
                                                                                     );
            SequenceFile.Writer writer = SequenceFile.CreateWriter(conf, SequenceFile.Writer.
                                                                   File(file), SequenceFile.Writer.KeyClass(typeof(long)), SequenceFile.Writer.ValueClass
                                                                       (typeof(string)), metadataOption);
            writer.Append(1L, "one");
            writer.Append(2L, "two");
            writer.Close();
            Verify2Values(file);
            metadata.Set(key1, value2);
            writer = SequenceFile.CreateWriter(conf, SequenceFile.Writer.File(file), SequenceFile.Writer
                                               .KeyClass(typeof(long)), SequenceFile.Writer.ValueClass(typeof(string)), SequenceFile.Writer
                                               .AppendIfExists(true), metadataOption);
            // Verify the Meta data is not changed
            Assert.Equal(value1, writer.metadata.Get(key1));
            writer.Append(3L, "three");
            writer.Append(4L, "four");
            writer.Close();
            VerifyAll4Values(file);
            // Verify the Meta data readable after append
            SequenceFile.Reader reader = new SequenceFile.Reader(conf, SequenceFile.Reader.File
                                                                     (file));
            Assert.Equal(value1, reader.GetMetadata().Get(key1));
            reader.Close();
            // Verify failure if the compression details are different
            try
            {
                SequenceFile.Writer.Option wrongCompressOption = SequenceFile.Writer.Compression(
                    SequenceFile.CompressionType.Record, new GzipCodec());
                writer = SequenceFile.CreateWriter(conf, SequenceFile.Writer.File(file), SequenceFile.Writer
                                                   .KeyClass(typeof(long)), SequenceFile.Writer.ValueClass(typeof(string)), SequenceFile.Writer
                                                   .AppendIfExists(true), wrongCompressOption);
                writer.Close();
                NUnit.Framework.Assert.Fail("Expected IllegalArgumentException for compression options"
                                            );
            }
            catch (ArgumentException)
            {
            }
            // Expected exception. Ignore it
            try
            {
                SequenceFile.Writer.Option wrongCompressOption = SequenceFile.Writer.Compression(
                    SequenceFile.CompressionType.Block, new DefaultCodec());
                writer = SequenceFile.CreateWriter(conf, SequenceFile.Writer.File(file), SequenceFile.Writer
                                                   .KeyClass(typeof(long)), SequenceFile.Writer.ValueClass(typeof(string)), SequenceFile.Writer
                                                   .AppendIfExists(true), wrongCompressOption);
                writer.Close();
                NUnit.Framework.Assert.Fail("Expected IllegalArgumentException for compression options"
                                            );
            }
            catch (ArgumentException)
            {
            }
            // Expected exception. Ignore it
            fs.DeleteOnExit(file);
        }
Ejemplo n.º 16
0
 /// <exception cref="System.IO.IOException"/>
 public virtual void Close()
 {
     reader.Close();
 }
Ejemplo n.º 17
0
        /// <exception cref="System.Exception"/>
        protected internal virtual void _testMultipleOutputs(bool withCounters)
        {
            Path             inDir  = GetDir(InDir);
            Path             outDir = GetDir(OutDir);
            JobConf          conf   = CreateJobConf();
            FileSystem       fs     = FileSystem.Get(conf);
            DataOutputStream file   = fs.Create(new Path(inDir, "part-0"));

            file.WriteBytes("a\nb\n\nc\nd\ne");
            file.Close();
            file = fs.Create(new Path(inDir, "part-1"));
            file.WriteBytes("a\nb\n\nc\nd\ne");
            file.Close();
            conf.SetJobName("mo");
            conf.SetInputFormat(typeof(TextInputFormat));
            conf.SetOutputKeyClass(typeof(LongWritable));
            conf.SetOutputValueClass(typeof(Text));
            conf.SetMapOutputKeyClass(typeof(LongWritable));
            conf.SetMapOutputValueClass(typeof(Text));
            conf.SetOutputFormat(typeof(TextOutputFormat));
            MultipleOutputs.AddNamedOutput(conf, "text", typeof(TextOutputFormat), typeof(LongWritable
                                                                                          ), typeof(Text));
            MultipleOutputs.AddMultiNamedOutput(conf, "sequence", typeof(SequenceFileOutputFormat
                                                                         ), typeof(LongWritable), typeof(Text));
            MultipleOutputs.SetCountersEnabled(conf, withCounters);
            conf.SetMapperClass(typeof(TestMultipleOutputs.MOMap));
            conf.SetReducerClass(typeof(TestMultipleOutputs.MOReduce));
            FileInputFormat.SetInputPaths(conf, inDir);
            FileOutputFormat.SetOutputPath(conf, outDir);
            JobClient  jc  = new JobClient(conf);
            RunningJob job = jc.SubmitJob(conf);

            while (!job.IsComplete())
            {
                Sharpen.Thread.Sleep(100);
            }
            // assert number of named output part files
            int namedOutputCount = 0;

            FileStatus[] statuses = fs.ListStatus(outDir);
            foreach (FileStatus status in statuses)
            {
                if (status.GetPath().GetName().Equals("text-m-00000") || status.GetPath().GetName
                        ().Equals("text-m-00001") || status.GetPath().GetName().Equals("text-r-00000") ||
                    status.GetPath().GetName().Equals("sequence_A-m-00000") || status.GetPath().GetName
                        ().Equals("sequence_A-m-00001") || status.GetPath().GetName().Equals("sequence_B-m-00000"
                                                                                             ) || status.GetPath().GetName().Equals("sequence_B-m-00001") || status.GetPath()
                    .GetName().Equals("sequence_B-r-00000") || status.GetPath().GetName().Equals("sequence_C-r-00000"
                                                                                                 ))
                {
                    namedOutputCount++;
                }
            }
            NUnit.Framework.Assert.AreEqual(9, namedOutputCount);
            // assert TextOutputFormat files correctness
            BufferedReader reader = new BufferedReader(new InputStreamReader(fs.Open(new Path
                                                                                         (FileOutputFormat.GetOutputPath(conf), "text-r-00000"))));
            int    count = 0;
            string line  = reader.ReadLine();

            while (line != null)
            {
                NUnit.Framework.Assert.IsTrue(line.EndsWith("text"));
                line = reader.ReadLine();
                count++;
            }
            reader.Close();
            NUnit.Framework.Assert.IsFalse(count == 0);
            // assert SequenceOutputFormat files correctness
            SequenceFile.Reader seqReader = new SequenceFile.Reader(fs, new Path(FileOutputFormat
                                                                                 .GetOutputPath(conf), "sequence_B-r-00000"), conf);
            NUnit.Framework.Assert.AreEqual(typeof(LongWritable), seqReader.GetKeyClass());
            NUnit.Framework.Assert.AreEqual(typeof(Text), seqReader.GetValueClass());
            count = 0;
            LongWritable key   = new LongWritable();
            Text         value = new Text();

            while (seqReader.Next(key, value))
            {
                NUnit.Framework.Assert.AreEqual("sequence", value.ToString());
                count++;
            }
            seqReader.Close();
            NUnit.Framework.Assert.IsFalse(count == 0);
            Counters.Group counters = job.GetCounters().GetGroup(typeof(MultipleOutputs).FullName
                                                                 );
            if (!withCounters)
            {
                NUnit.Framework.Assert.AreEqual(0, counters.Size());
            }
            else
            {
                NUnit.Framework.Assert.AreEqual(4, counters.Size());
                NUnit.Framework.Assert.AreEqual(4, counters.GetCounter("text"));
                NUnit.Framework.Assert.AreEqual(2, counters.GetCounter("sequence_A"));
                NUnit.Framework.Assert.AreEqual(4, counters.GetCounter("sequence_B"));
                NUnit.Framework.Assert.AreEqual(2, counters.GetCounter("sequence_C"));
            }
        }
Ejemplo n.º 18
0
 /// <exception cref="System.IO.IOException"/>
 public virtual void Close()
 {
     @in.Close();
 }
Ejemplo n.º 19
0
        /// <exception cref="System.Exception"/>
        protected internal virtual void _testMultipleOutputs(bool withCounters)
        {
            string        input = "a\nb\nc\nd\ne\nc\nd\ne";
            Configuration conf  = CreateJobConf();
            Job           job   = MapReduceTestUtil.CreateJob(conf, InDir, OutDir, 2, 1, input);

            job.SetJobName("mo");
            MultipleOutputs.AddNamedOutput(job, Text, typeof(TextOutputFormat), typeof(LongWritable
                                                                                       ), typeof(Text));
            MultipleOutputs.AddNamedOutput(job, Sequence, typeof(SequenceFileOutputFormat), typeof(
                                               IntWritable), typeof(Text));
            MultipleOutputs.SetCountersEnabled(job, withCounters);
            job.SetMapperClass(typeof(TestMRMultipleOutputs.MOMap));
            job.SetReducerClass(typeof(TestMRMultipleOutputs.MOReduce));
            job.WaitForCompletion(true);
            // assert number of named output part files
            int        namedOutputCount      = 0;
            int        valueBasedOutputCount = 0;
            FileSystem fs = OutDir.GetFileSystem(conf);

            FileStatus[] statuses = fs.ListStatus(OutDir);
            foreach (FileStatus status in statuses)
            {
                string fileName = status.GetPath().GetName();
                if (fileName.Equals("text-m-00000") || fileName.Equals("text-m-00001") || fileName
                    .Equals("text-r-00000") || fileName.Equals("sequence_A-m-00000") || fileName.Equals
                        ("sequence_A-m-00001") || fileName.Equals("sequence_B-m-00000") || fileName.Equals
                        ("sequence_B-m-00001") || fileName.Equals("sequence_B-r-00000") || fileName.Equals
                        ("sequence_C-r-00000"))
                {
                    namedOutputCount++;
                }
                else
                {
                    if (fileName.Equals("a-r-00000") || fileName.Equals("b-r-00000") || fileName.Equals
                            ("c-r-00000") || fileName.Equals("d-r-00000") || fileName.Equals("e-r-00000"))
                    {
                        valueBasedOutputCount++;
                    }
                }
            }
            NUnit.Framework.Assert.AreEqual(9, namedOutputCount);
            NUnit.Framework.Assert.AreEqual(5, valueBasedOutputCount);
            // assert TextOutputFormat files correctness
            BufferedReader reader = new BufferedReader(new InputStreamReader(fs.Open(new Path
                                                                                         (FileOutputFormat.GetOutputPath(job), "text-r-00000"))));
            int    count = 0;
            string line  = reader.ReadLine();

            while (line != null)
            {
                NUnit.Framework.Assert.IsTrue(line.EndsWith(Text));
                line = reader.ReadLine();
                count++;
            }
            reader.Close();
            NUnit.Framework.Assert.IsFalse(count == 0);
            // assert SequenceOutputFormat files correctness
            SequenceFile.Reader seqReader = new SequenceFile.Reader(fs, new Path(FileOutputFormat
                                                                                 .GetOutputPath(job), "sequence_B-r-00000"), conf);
            NUnit.Framework.Assert.AreEqual(typeof(IntWritable), seqReader.GetKeyClass());
            NUnit.Framework.Assert.AreEqual(typeof(Text), seqReader.GetValueClass());
            count = 0;
            IntWritable key   = new IntWritable();
            Text        value = new Text();

            while (seqReader.Next(key, value))
            {
                NUnit.Framework.Assert.AreEqual(Sequence, value.ToString());
                count++;
            }
            seqReader.Close();
            NUnit.Framework.Assert.IsFalse(count == 0);
            if (withCounters)
            {
                CounterGroup counters = job.GetCounters().GetGroup(typeof(MultipleOutputs).FullName
                                                                   );
                NUnit.Framework.Assert.AreEqual(9, counters.Size());
                NUnit.Framework.Assert.AreEqual(4, counters.FindCounter(Text).GetValue());
                NUnit.Framework.Assert.AreEqual(2, counters.FindCounter(Sequence + "_A").GetValue
                                                    ());
                NUnit.Framework.Assert.AreEqual(4, counters.FindCounter(Sequence + "_B").GetValue
                                                    ());
                NUnit.Framework.Assert.AreEqual(2, counters.FindCounter(Sequence + "_C").GetValue
                                                    ());
                NUnit.Framework.Assert.AreEqual(2, counters.FindCounter("a").GetValue());
                NUnit.Framework.Assert.AreEqual(2, counters.FindCounter("b").GetValue());
                NUnit.Framework.Assert.AreEqual(4, counters.FindCounter("c").GetValue());
                NUnit.Framework.Assert.AreEqual(4, counters.FindCounter("d").GetValue());
                NUnit.Framework.Assert.AreEqual(4, counters.FindCounter("e").GetValue());
            }
        }
Ejemplo n.º 20
0
        /// <exception cref="System.Exception"/>
        private void ValidateOutput(JobConf conf, RunningJob runningJob, IList <string> mapperBadRecords
                                    , IList <string> redBadRecords)
        {
            Log.Info(runningJob.GetCounters().ToString());
            NUnit.Framework.Assert.IsTrue(runningJob.IsSuccessful());
            //validate counters
            Counters counters = runningJob.GetCounters();

            NUnit.Framework.Assert.AreEqual(counters.FindCounter(TaskCounter.MapSkippedRecords
                                                                 ).GetCounter(), mapperBadRecords.Count);
            int mapRecs = input.Count - mapperBadRecords.Count;

            NUnit.Framework.Assert.AreEqual(counters.FindCounter(TaskCounter.MapInputRecords)
                                            .GetCounter(), mapRecs);
            NUnit.Framework.Assert.AreEqual(counters.FindCounter(TaskCounter.MapOutputRecords
                                                                 ).GetCounter(), mapRecs);
            int redRecs = mapRecs - redBadRecords.Count;

            NUnit.Framework.Assert.AreEqual(counters.FindCounter(TaskCounter.ReduceSkippedRecords
                                                                 ).GetCounter(), redBadRecords.Count);
            NUnit.Framework.Assert.AreEqual(counters.FindCounter(TaskCounter.ReduceSkippedGroups
                                                                 ).GetCounter(), redBadRecords.Count);
            NUnit.Framework.Assert.AreEqual(counters.FindCounter(TaskCounter.ReduceInputGroups
                                                                 ).GetCounter(), redRecs);
            NUnit.Framework.Assert.AreEqual(counters.FindCounter(TaskCounter.ReduceInputRecords
                                                                 ).GetCounter(), redRecs);
            NUnit.Framework.Assert.AreEqual(counters.FindCounter(TaskCounter.ReduceOutputRecords
                                                                 ).GetCounter(), redRecs);
            //validate skipped records
            Path skipDir = SkipBadRecords.GetSkipOutputPath(conf);

            NUnit.Framework.Assert.IsNotNull(skipDir);
            Path[]         skips      = FileUtil.Stat2Paths(GetFileSystem().ListStatus(skipDir));
            IList <string> mapSkipped = new AList <string>();
            IList <string> redSkipped = new AList <string>();

            foreach (Path skipPath in skips)
            {
                Log.Info("skipPath: " + skipPath);
                SequenceFile.Reader reader = new SequenceFile.Reader(GetFileSystem(), skipPath, conf
                                                                     );
                object key   = ReflectionUtils.NewInstance(reader.GetKeyClass(), conf);
                object value = ReflectionUtils.NewInstance(reader.GetValueClass(), conf);
                key = reader.Next(key);
                while (key != null)
                {
                    value = reader.GetCurrentValue(value);
                    Log.Debug("key:" + key + " value:" + value.ToString());
                    if (skipPath.GetName().Contains("_r_"))
                    {
                        redSkipped.AddItem(value.ToString());
                    }
                    else
                    {
                        mapSkipped.AddItem(value.ToString());
                    }
                    key = reader.Next(key);
                }
                reader.Close();
            }
            NUnit.Framework.Assert.IsTrue(mapSkipped.ContainsAll(mapperBadRecords));
            NUnit.Framework.Assert.IsTrue(redSkipped.ContainsAll(redBadRecords));
            Path[] outputFiles = FileUtil.Stat2Paths(GetFileSystem().ListStatus(GetOutputDir(
                                                                                    ), new Utils.OutputFileUtils.OutputFilesFilter()));
            IList <string> mapperOutput = GetProcessed(input, mapperBadRecords);

            Log.Debug("mapperOutput " + mapperOutput.Count);
            IList <string> reducerOutput = GetProcessed(mapperOutput, redBadRecords);

            Log.Debug("reducerOutput " + reducerOutput.Count);
            if (outputFiles.Length > 0)
            {
                InputStream    @is     = GetFileSystem().Open(outputFiles[0]);
                BufferedReader reader  = new BufferedReader(new InputStreamReader(@is));
                string         line    = reader.ReadLine();
                int            counter = 0;
                while (line != null)
                {
                    counter++;
                    StringTokenizer tokeniz = new StringTokenizer(line, "\t");
                    string          key     = tokeniz.NextToken();
                    string          value   = tokeniz.NextToken();
                    Log.Debug("Output: key:" + key + "  value:" + value);
                    NUnit.Framework.Assert.IsTrue(value.Contains("hello"));
                    NUnit.Framework.Assert.IsTrue(reducerOutput.Contains(value));
                    line = reader.ReadLine();
                }
                reader.Close();
                NUnit.Framework.Assert.AreEqual(reducerOutput.Count, counter);
            }
        }
Ejemplo n.º 21
0
        /// <exception cref="System.Exception"/>
        private static void Launch()
        {
            //
            // Generate distribution of ints.  This is the answer key.
            //
            Configuration conf       = new Configuration();
            int           countsToGo = counts;

            int[] dist = new int[range];
            for (int i = 0; i < range; i++)
            {
                double avgInts = (1.0 * countsToGo) / (range - i);
                dist[i] = (int)Math.Max(0, Math.Round(avgInts + (Math.Sqrt(avgInts) * r.NextGaussian
                                                                     ())));
                countsToGo -= dist[i];
            }
            if (countsToGo > 0)
            {
                dist[dist.Length - 1] += countsToGo;
            }
            //
            // Write the answer key to a file.
            //
            Path testdir = new Path(TestDir.GetAbsolutePath());

            if (!fs.Mkdirs(testdir))
            {
                throw new IOException("Mkdirs failed to create " + testdir.ToString());
            }
            Path randomIns = new Path(testdir, "genins");

            if (!fs.Mkdirs(randomIns))
            {
                throw new IOException("Mkdirs failed to create " + randomIns.ToString());
            }
            Path answerkey = new Path(randomIns, "answer.key");

            SequenceFile.Writer @out = SequenceFile.CreateWriter(fs, conf, answerkey, typeof(
                                                                     IntWritable), typeof(IntWritable), SequenceFile.CompressionType.None);
            try
            {
                for (int i_1 = 0; i_1 < range; i_1++)
                {
                    @out.Append(new IntWritable(i_1), new IntWritable(dist[i_1]));
                }
            }
            finally
            {
                @out.Close();
            }
            PrintFiles(randomIns, conf);
            //
            // Now we need to generate the random numbers according to
            // the above distribution.
            //
            // We create a lot of map tasks, each of which takes at least
            // one "line" of the distribution.  (That is, a certain number
            // X is to be generated Y number of times.)
            //
            // A map task emits Y key/val pairs.  The val is X.  The key
            // is a randomly-generated number.
            //
            // The reduce task gets its input sorted by key.  That is, sorted
            // in random order.  It then emits a single line of text that
            // for the given values.  It does not emit the key.
            //
            // Because there's just one reduce task, we emit a single big
            // file of random numbers.
            //
            Path randomOuts = new Path(testdir, "genouts");

            fs.Delete(randomOuts, true);
            Job genJob = Job.GetInstance(conf);

            FileInputFormat.SetInputPaths(genJob, randomIns);
            genJob.SetInputFormatClass(typeof(SequenceFileInputFormat));
            genJob.SetMapperClass(typeof(TestMapReduce.RandomGenMapper));
            FileOutputFormat.SetOutputPath(genJob, randomOuts);
            genJob.SetOutputKeyClass(typeof(IntWritable));
            genJob.SetOutputValueClass(typeof(IntWritable));
            genJob.SetReducerClass(typeof(TestMapReduce.RandomGenReducer));
            genJob.SetNumReduceTasks(1);
            genJob.WaitForCompletion(true);
            PrintFiles(randomOuts, conf);
            //
            // Next, we read the big file in and regenerate the
            // original map.  It's split into a number of parts.
            // (That number is 'intermediateReduces'.)
            //
            // We have many map tasks, each of which read at least one
            // of the output numbers.  For each number read in, the
            // map task emits a key/value pair where the key is the
            // number and the value is "1".
            //
            // We have a single reduce task, which receives its input
            // sorted by the key emitted above.  For each key, there will
            // be a certain number of "1" values.  The reduce task sums
            // these values to compute how many times the given key was
            // emitted.
            //
            // The reduce task then emits a key/val pair where the key
            // is the number in question, and the value is the number of
            // times the key was emitted.  This is the same format as the
            // original answer key (except that numbers emitted zero times
            // will not appear in the regenerated key.)  The answer set
            // is split into a number of pieces.  A final MapReduce job
            // will merge them.
            //
            // There's not really a need to go to 10 reduces here
            // instead of 1.  But we want to test what happens when
            // you have multiple reduces at once.
            //
            int  intermediateReduces = 10;
            Path intermediateOuts    = new Path(testdir, "intermediateouts");

            fs.Delete(intermediateOuts, true);
            Job checkJob = Job.GetInstance(conf);

            FileInputFormat.SetInputPaths(checkJob, randomOuts);
            checkJob.SetMapperClass(typeof(TestMapReduce.RandomCheckMapper));
            FileOutputFormat.SetOutputPath(checkJob, intermediateOuts);
            checkJob.SetOutputKeyClass(typeof(IntWritable));
            checkJob.SetOutputValueClass(typeof(IntWritable));
            checkJob.SetOutputFormatClass(typeof(MapFileOutputFormat));
            checkJob.SetReducerClass(typeof(TestMapReduce.RandomCheckReducer));
            checkJob.SetNumReduceTasks(intermediateReduces);
            checkJob.WaitForCompletion(true);
            PrintFiles(intermediateOuts, conf);
            //
            // OK, now we take the output from the last job and
            // merge it down to a single file.  The map() and reduce()
            // functions don't really do anything except reemit tuples.
            // But by having a single reduce task here, we end up merging
            // all the files.
            //
            Path finalOuts = new Path(testdir, "finalouts");

            fs.Delete(finalOuts, true);
            Job mergeJob = Job.GetInstance(conf);

            FileInputFormat.SetInputPaths(mergeJob, intermediateOuts);
            mergeJob.SetInputFormatClass(typeof(SequenceFileInputFormat));
            mergeJob.SetMapperClass(typeof(TestMapReduce.MergeMapper));
            FileOutputFormat.SetOutputPath(mergeJob, finalOuts);
            mergeJob.SetOutputKeyClass(typeof(IntWritable));
            mergeJob.SetOutputValueClass(typeof(IntWritable));
            mergeJob.SetOutputFormatClass(typeof(SequenceFileOutputFormat));
            mergeJob.SetReducerClass(typeof(TestMapReduce.MergeReducer));
            mergeJob.SetNumReduceTasks(1);
            mergeJob.WaitForCompletion(true);
            PrintFiles(finalOuts, conf);
            //
            // Finally, we compare the reconstructed answer key with the
            // original one.  Remember, we need to ignore zero-count items
            // in the original key.
            //
            bool success       = true;
            Path recomputedkey = new Path(finalOuts, "part-r-00000");

            SequenceFile.Reader @in = new SequenceFile.Reader(fs, recomputedkey, conf);
            int totalseen           = 0;

            try
            {
                IntWritable key = new IntWritable();
                IntWritable val = new IntWritable();
                for (int i_1 = 0; i_1 < range; i_1++)
                {
                    if (dist[i_1] == 0)
                    {
                        continue;
                    }
                    if ([email protected](key, val))
                    {
                        System.Console.Error.WriteLine("Cannot read entry " + i_1);
                        success = false;
                        break;
                    }
                    else
                    {
                        if (!((key.Get() == i_1) && (val.Get() == dist[i_1])))
                        {
                            System.Console.Error.WriteLine("Mismatch!  Pos=" + key.Get() + ", i=" + i_1 + ", val="
                                                           + val.Get() + ", dist[i]=" + dist[i_1]);
                            success = false;
                        }
                        totalseen += val.Get();
                    }
                }
                if (success)
                {
                    if (@in.Next(key, val))
                    {
                        System.Console.Error.WriteLine("Unnecessary lines in recomputed key!");
                        success = false;
                    }
                }
            }
            finally
            {
                @in.Close();
            }
            int originalTotal = 0;

            for (int i_2 = 0; i_2 < dist.Length; i_2++)
            {
                originalTotal += dist[i_2];
            }
            System.Console.Out.WriteLine("Original sum: " + originalTotal);
            System.Console.Out.WriteLine("Recomputed sum: " + totalseen);
            //
            // Write to "results" whether the test succeeded or not.
            //
            Path           resultFile = new Path(testdir, "results");
            BufferedWriter bw         = new BufferedWriter(new OutputStreamWriter(fs.Create(resultFile
                                                                                            )));

            try
            {
                bw.Write("Success=" + success + "\n");
                System.Console.Out.WriteLine("Success=" + success);
            }
            finally
            {
                bw.Close();
            }
            NUnit.Framework.Assert.IsTrue("testMapRed failed", success);
            fs.Delete(testdir, true);
        }
Ejemplo n.º 22
0
            /// <exception cref="System.IO.IOException"/>
            internal static void CheckRecords(Configuration defaults, Path sortInput, Path sortOutput
                                              )
            {
                FileSystem inputfs   = sortInput.GetFileSystem(defaults);
                FileSystem outputfs  = sortOutput.GetFileSystem(defaults);
                FileSystem defaultfs = FileSystem.Get(defaults);
                JobConf    jobConf   = new JobConf(defaults, typeof(SortValidator.RecordStatsChecker));

                jobConf.SetJobName("sortvalidate-recordstats-checker");
                int noSortReduceTasks = outputfs.ListStatus(sortOutput, sortPathsFilter).Length;

                jobConf.SetInt(SortReduces, noSortReduceTasks);
                int noSortInputpaths = inputfs.ListStatus(sortInput).Length;

                jobConf.SetInputFormat(typeof(SortValidator.RecordStatsChecker.NonSplitableSequenceFileInputFormat
                                              ));
                jobConf.SetOutputFormat(typeof(SequenceFileOutputFormat));
                jobConf.SetOutputKeyClass(typeof(IntWritable));
                jobConf.SetOutputValueClass(typeof(SortValidator.RecordStatsChecker.RecordStatsWritable
                                                   ));
                jobConf.SetMapperClass(typeof(SortValidator.RecordStatsChecker.Map));
                jobConf.SetCombinerClass(typeof(SortValidator.RecordStatsChecker.Reduce));
                jobConf.SetReducerClass(typeof(SortValidator.RecordStatsChecker.Reduce));
                jobConf.SetNumMapTasks(noSortReduceTasks);
                jobConf.SetNumReduceTasks(1);
                FileInputFormat.SetInputPaths(jobConf, sortInput);
                FileInputFormat.AddInputPath(jobConf, sortOutput);
                Path outputPath = new Path(new Path("/tmp", "sortvalidate"), UUID.RandomUUID().ToString
                                               ());

                if (defaultfs.Exists(outputPath))
                {
                    defaultfs.Delete(outputPath, true);
                }
                FileOutputFormat.SetOutputPath(jobConf, outputPath);
                // Uncomment to run locally in a single process
                //job_conf.set(JTConfig.JT, "local");
                Path[] inputPaths = FileInputFormat.GetInputPaths(jobConf);
                System.Console.Out.WriteLine("\nSortValidator.RecordStatsChecker: Validate sort "
                                             + "from " + inputPaths[0] + " (" + noSortInputpaths + " files), " + inputPaths[
                                                 1] + " (" + noSortReduceTasks + " files) into " + FileOutputFormat.GetOutputPath
                                                 (jobConf) + " with 1 reducer.");
                DateTime startTime = new DateTime();

                System.Console.Out.WriteLine("Job started: " + startTime);
                JobClient.RunJob(jobConf);
                try
                {
                    DateTime end_time = new DateTime();
                    System.Console.Out.WriteLine("Job ended: " + end_time);
                    System.Console.Out.WriteLine("The job took " + (end_time.GetTime() - startTime.GetTime
                                                                        ()) / 1000 + " seconds.");
                    // Check to ensure that the statistics of the
                    // framework's sort-input and sort-output match
                    SequenceFile.Reader stats = new SequenceFile.Reader(defaultfs, new Path(outputPath
                                                                                            , "part-00000"), defaults);
                    try
                    {
                        IntWritable k1 = new IntWritable();
                        IntWritable k2 = new IntWritable();
                        SortValidator.RecordStatsChecker.RecordStatsWritable v1 = new SortValidator.RecordStatsChecker.RecordStatsWritable
                                                                                      ();
                        SortValidator.RecordStatsChecker.RecordStatsWritable v2 = new SortValidator.RecordStatsChecker.RecordStatsWritable
                                                                                      ();
                        if (!stats.Next(k1, v1))
                        {
                            throw new IOException("Failed to read record #1 from reduce's output");
                        }
                        if (!stats.Next(k2, v2))
                        {
                            throw new IOException("Failed to read record #2 from reduce's output");
                        }
                        if ((v1.GetBytes() != v2.GetBytes()) || (v1.GetRecords() != v2.GetRecords()) || v1
                            .GetChecksum() != v2.GetChecksum())
                        {
                            throw new IOException("(" + v1.GetBytes() + ", " + v1.GetRecords() + ", " + v1.GetChecksum
                                                      () + ") v/s (" + v2.GetBytes() + ", " + v2.GetRecords() + ", " + v2.GetChecksum(
                                                      ) + ")");
                        }
                    }
                    finally
                    {
                        stats.Close();
                    }
                }
                finally
                {
                    defaultfs.Delete(outputPath, true);
                }
            }
Ejemplo n.º 23
0
        /// <exception cref="System.Exception"/>
        public virtual void TestNestedJoin()
        {
            // outer(inner(S1,...,Sn),outer(S1,...Sn))
            int     Sources = 3;
            int     Items   = (Sources + 1) * (Sources + 1);
            JobConf job     = new JobConf();
            Path    @base   = cluster.GetFileSystem().MakeQualified(new Path("/nested"));

            int[][] source = new int[Sources][];
            for (int i = 0; i < Sources; ++i)
            {
                source[i] = new int[Items];
                for (int j = 0; j < Items; ++j)
                {
                    source[i][j] = (i + 2) * (j + 1);
                }
            }
            Path[] src = new Path[Sources];
            SequenceFile.Writer[] @out = CreateWriters(@base, job, Sources, src);
            IntWritable           k    = new IntWritable();

            for (int i_1 = 0; i_1 < Sources; ++i_1)
            {
                IntWritable v = new IntWritable();
                v.Set(i_1);
                for (int j = 0; j < Items; ++j)
                {
                    k.Set(source[i_1][j]);
                    @out[i_1].Append(k, v);
                }
                @out[i_1].Close();
            }
            @out = null;
            StringBuilder sb = new StringBuilder();

            sb.Append("outer(inner(");
            for (int i_2 = 0; i_2 < Sources; ++i_2)
            {
                sb.Append(CompositeInputFormat.Compose(typeof(SequenceFileInputFormat), src[i_2].
                                                       ToString()));
                if (i_2 + 1 != Sources)
                {
                    sb.Append(",");
                }
            }
            sb.Append("),outer(");
            sb.Append(CompositeInputFormat.Compose(typeof(TestDatamerge.Fake_IF), "foobar"));
            sb.Append(",");
            for (int i_3 = 0; i_3 < Sources; ++i_3)
            {
                sb.Append(CompositeInputFormat.Compose(typeof(SequenceFileInputFormat), src[i_3].
                                                       ToString()));
                sb.Append(",");
            }
            sb.Append(CompositeInputFormat.Compose(typeof(TestDatamerge.Fake_IF), "raboof") +
                      "))");
            job.Set("mapreduce.join.expr", sb.ToString());
            job.SetInputFormat(typeof(CompositeInputFormat));
            Path outf = new Path(@base, "out");

            FileOutputFormat.SetOutputPath(job, outf);
            TestDatamerge.Fake_IF.SetKeyClass(job, typeof(IntWritable));
            TestDatamerge.Fake_IF.SetValClass(job, typeof(IntWritable));
            job.SetMapperClass(typeof(IdentityMapper));
            job.SetReducerClass(typeof(IdentityReducer));
            job.SetNumReduceTasks(0);
            job.SetOutputKeyClass(typeof(IntWritable));
            job.SetOutputValueClass(typeof(TupleWritable));
            job.SetOutputFormat(typeof(SequenceFileOutputFormat));
            JobClient.RunJob(job);
            FileStatus[] outlist = cluster.GetFileSystem().ListStatus(outf, new Utils.OutputFileUtils.OutputFilesFilter
                                                                          ());
            NUnit.Framework.Assert.AreEqual(1, outlist.Length);
            NUnit.Framework.Assert.IsTrue(0 < outlist[0].GetLen());
            SequenceFile.Reader r = new SequenceFile.Reader(cluster.GetFileSystem(), outlist[
                                                                0].GetPath(), job);
            TupleWritable v_1 = new TupleWritable();

            while (r.Next(k, v_1))
            {
                NUnit.Framework.Assert.IsFalse(((TupleWritable)v_1.Get(1)).Has(0));
                NUnit.Framework.Assert.IsFalse(((TupleWritable)v_1.Get(1)).Has(Sources + 1));
                bool chk = true;
                int  ki  = k.Get();
                for (int i_4 = 2; i_4 < Sources + 2; ++i_4)
                {
                    if ((ki % i_4) == 0 && ki <= i_4 * Items)
                    {
                        NUnit.Framework.Assert.AreEqual(i_4 - 2, ((IntWritable)((TupleWritable)v_1.Get(1)
                                                                                ).Get((i_4 - 1))).Get());
                    }
                    else
                    {
                        chk = false;
                    }
                }
                if (chk)
                {
                    // present in all sources; chk inner
                    NUnit.Framework.Assert.IsTrue(v_1.Has(0));
                    for (int i_5 = 0; i_5 < Sources; ++i_5)
                    {
                        NUnit.Framework.Assert.IsTrue(((TupleWritable)v_1.Get(0)).Has(i_5));
                    }
                }
                else
                {
                    // should not be present in inner join
                    NUnit.Framework.Assert.IsFalse(v_1.Has(0));
                }
            }
            r.Close();
            @base.GetFileSystem(job).Delete(@base, true);
        }
Ejemplo n.º 24
0
        /// <exception cref="System.IO.IOException"/>
        private void ReadTest(FileSystem fs, int count, int seed, Path file)
        {
            Log.Debug("reading " + count + " records");
            SequenceFile.Reader   reader    = new SequenceFile.Reader(fs, file, conf);
            RandomDatum.Generator generator = new RandomDatum.Generator(seed);
            RandomDatum           k         = new RandomDatum();
            RandomDatum           v         = new RandomDatum();
            DataOutputBuffer      rawKey    = new DataOutputBuffer();

            SequenceFile.ValueBytes rawValue = reader.CreateValueBytes();
            for (int i = 0; i < count; i++)
            {
                generator.Next();
                RandomDatum key   = generator.GetKey();
                RandomDatum value = generator.GetValue();
                try
                {
                    if ((i % 5) == 0)
                    {
                        // Testing 'raw' apis
                        rawKey.Reset();
                        reader.NextRaw(rawKey, rawValue);
                    }
                    else
                    {
                        // Testing 'non-raw' apis
                        if ((i % 2) == 0)
                        {
                            reader.Next(k);
                            reader.GetCurrentValue(v);
                        }
                        else
                        {
                            reader.Next(k, v);
                        }
                        // Check
                        if (!k.Equals(key))
                        {
                            throw new RuntimeException("wrong key at " + i);
                        }
                        if (!v.Equals(value))
                        {
                            throw new RuntimeException("wrong value at " + i);
                        }
                    }
                }
                catch (IOException ioe)
                {
                    Log.Info("Problem on row " + i);
                    Log.Info("Expected key = " + key);
                    Log.Info("Expected len = " + key.GetLength());
                    Log.Info("Actual key = " + k);
                    Log.Info("Actual len = " + k.GetLength());
                    Log.Info("Expected value = " + value);
                    Log.Info("Expected len = " + value.GetLength());
                    Log.Info("Actual value = " + v);
                    Log.Info("Actual len = " + v.GetLength());
                    Log.Info("Key equals: " + k.Equals(key));
                    Log.Info("value equals: " + v.Equals(value));
                    throw;
                }
            }
            reader.Close();
        }
Ejemplo n.º 25
0
 /// <exception cref="System.IO.IOException"/>
 public override void Close()
 {
     @in.Close();
 }
Ejemplo n.º 26
0
        /// <summary>Run a map/reduce job for estimating Pi.</summary>
        /// <returns>the estimated value of Pi</returns>
        /// <exception cref="System.IO.IOException"/>
        /// <exception cref="System.TypeLoadException"/>
        /// <exception cref="System.Exception"/>
        public static BigDecimal EstimatePi(int numMaps, long numPoints, Path tmpDir, Configuration
                                            conf)
        {
            Job job = Job.GetInstance(conf);

            //setup job conf
            job.SetJobName(typeof(QuasiMonteCarlo).Name);
            job.SetJarByClass(typeof(QuasiMonteCarlo));
            job.SetInputFormatClass(typeof(SequenceFileInputFormat));
            job.SetOutputKeyClass(typeof(BooleanWritable));
            job.SetOutputValueClass(typeof(LongWritable));
            job.SetOutputFormatClass(typeof(SequenceFileOutputFormat));
            job.SetMapperClass(typeof(QuasiMonteCarlo.QmcMapper));
            job.SetReducerClass(typeof(QuasiMonteCarlo.QmcReducer));
            job.SetNumReduceTasks(1);
            // turn off speculative execution, because DFS doesn't handle
            // multiple writers to the same file.
            job.SetSpeculativeExecution(false);
            //setup input/output directories
            Path inDir  = new Path(tmpDir, "in");
            Path outDir = new Path(tmpDir, "out");

            FileInputFormat.SetInputPaths(job, inDir);
            FileOutputFormat.SetOutputPath(job, outDir);
            FileSystem fs = FileSystem.Get(conf);

            if (fs.Exists(tmpDir))
            {
                throw new IOException("Tmp directory " + fs.MakeQualified(tmpDir) + " already exists.  Please remove it first."
                                      );
            }
            if (!fs.Mkdirs(inDir))
            {
                throw new IOException("Cannot create input directory " + inDir);
            }
            try
            {
                //generate an input file for each map task
                for (int i = 0; i < numMaps; ++i)
                {
                    Path                file   = new Path(inDir, "part" + i);
                    LongWritable        offset = new LongWritable(i * numPoints);
                    LongWritable        size   = new LongWritable(numPoints);
                    SequenceFile.Writer writer = SequenceFile.CreateWriter(fs, conf, file, typeof(LongWritable
                                                                                                  ), typeof(LongWritable), SequenceFile.CompressionType.None);
                    try
                    {
                        writer.Append(offset, size);
                    }
                    finally
                    {
                        writer.Close();
                    }
                    System.Console.Out.WriteLine("Wrote input for Map #" + i);
                }
                //start a map/reduce job
                System.Console.Out.WriteLine("Starting Job");
                long startTime = Runtime.CurrentTimeMillis();
                job.WaitForCompletion(true);
                double duration = (Runtime.CurrentTimeMillis() - startTime) / 1000.0;
                System.Console.Out.WriteLine("Job Finished in " + duration + " seconds");
                //read outputs
                Path                inFile     = new Path(outDir, "reduce-out");
                LongWritable        numInside  = new LongWritable();
                LongWritable        numOutside = new LongWritable();
                SequenceFile.Reader reader     = new SequenceFile.Reader(fs, inFile, conf);
                try
                {
                    reader.Next(numInside, numOutside);
                }
                finally
                {
                    reader.Close();
                }
                //compute estimated value
                BigDecimal numTotal = BigDecimal.ValueOf(numMaps).Multiply(BigDecimal.ValueOf(numPoints
                                                                                              ));
                return(BigDecimal.ValueOf(4).SetScale(20).Multiply(BigDecimal.ValueOf(numInside.Get
                                                                                          ())).Divide(numTotal, RoundingMode.HalfUp));
            }
            finally
            {
                fs.Delete(tmpDir, true);
            }
        }