예제 #1
0
 public Writer(Configuration conf, FileSystem fs, string dirName, Type keyClass, Type
               valClass, SequenceFile.CompressionType compress, CompressionCodec codec, Progressable
               progress)
     : this(conf, new Path(dirName), KeyClass(keyClass), ValueClass(valClass), Compression
                (compress, codec), Progressable(progress))
 {
 }
예제 #2
0
 public Writer(Configuration conf, FileSystem fs, string dirName, WritableComparator
               comparator, Type valClass, SequenceFile.CompressionType compress, Progressable
               progress)
     : this(conf, new Path(dirName), Comparator(comparator), ValueClass(valClass), Compression
                (compress), Progressable(progress))
 {
 }
예제 #3
0
        /// <exception cref="System.Exception"/>
        private void CheckCompression(bool compressMapOutputs, SequenceFile.CompressionType
                                      redCompression, bool includeCombine)
        {
            JobConf    conf    = new JobConf(typeof(TestMapRed));
            Path       testdir = new Path(TestDir.GetAbsolutePath());
            Path       inDir   = new Path(testdir, "in");
            Path       outDir  = new Path(testdir, "out");
            FileSystem fs      = FileSystem.Get(conf);

            fs.Delete(testdir, true);
            FileInputFormat.SetInputPaths(conf, inDir);
            FileOutputFormat.SetOutputPath(conf, outDir);
            conf.SetMapperClass(typeof(TestMapRed.MyMap));
            conf.SetReducerClass(typeof(TestMapRed.MyReduce));
            conf.SetOutputKeyClass(typeof(Text));
            conf.SetOutputValueClass(typeof(Text));
            conf.SetOutputFormat(typeof(SequenceFileOutputFormat));
            conf.Set(MRConfig.FrameworkName, MRConfig.LocalFrameworkName);
            if (includeCombine)
            {
                conf.SetCombinerClass(typeof(IdentityReducer));
            }
            conf.SetCompressMapOutput(compressMapOutputs);
            SequenceFileOutputFormat.SetOutputCompressionType(conf, redCompression);
            try
            {
                if (!fs.Mkdirs(testdir))
                {
                    throw new IOException("Mkdirs failed to create " + testdir.ToString());
                }
                if (!fs.Mkdirs(inDir))
                {
                    throw new IOException("Mkdirs failed to create " + inDir.ToString());
                }
                Path             inFile = new Path(inDir, "part0");
                DataOutputStream f      = fs.Create(inFile);
                f.WriteBytes("Owen was here\n");
                f.WriteBytes("Hadoop is fun\n");
                f.WriteBytes("Is this done, yet?\n");
                f.Close();
                RunningJob rj = JobClient.RunJob(conf);
                NUnit.Framework.Assert.IsTrue("job was complete", rj.IsComplete());
                NUnit.Framework.Assert.IsTrue("job was successful", rj.IsSuccessful());
                Path output = new Path(outDir, Task.GetOutputName(0));
                NUnit.Framework.Assert.IsTrue("reduce output exists " + output, fs.Exists(output)
                                              );
                SequenceFile.Reader rdr = new SequenceFile.Reader(fs, output, conf);
                NUnit.Framework.Assert.AreEqual("is reduce output compressed " + output, redCompression
                                                != SequenceFile.CompressionType.None, rdr.IsCompressed());
                rdr.Close();
            }
            finally
            {
                fs.Delete(testdir, true);
            }
        }
예제 #4
0
        /// <exception cref="System.IO.IOException"/>
        private static void CreateMapFile(Configuration conf, FileSystem fs, Path path, CompressionCodec
                                          codec, SequenceFile.CompressionType type, int records)
        {
            MapFile.Writer writer = new MapFile.Writer(conf, path, MapFile.Writer.KeyClass(typeof(
                                                                                               Text)), MapFile.Writer.ValueClass(typeof(Text)), MapFile.Writer.Compression(type
                                                                                                                                                                           , codec));
            Text key = new Text();

            for (int j = 0; j < records; j++)
            {
                key.Set(string.Format("%03d", j));
                writer.Append(key, key);
            }
            writer.Close();
        }
        /// <exception cref="System.IO.IOException"/>
        public virtual void RunTest(SequenceFile.CompressionType compressionType)
        {
            JobConf    job     = new JobConf();
            FileSystem fs      = FileSystem.GetLocal(job);
            Path       dir     = new Path(Runtime.GetProperty("test.build.data", ".") + "/mapred");
            Path       file    = new Path(dir, "test.seq");
            Path       tempDir = new Path(dir, "tmp");

            fs.Delete(dir, true);
            FileInputFormat.SetInputPaths(job, dir);
            fs.Mkdirs(tempDir);
            LongWritable tkey = new LongWritable();
            Text         tval = new Text();

            SequenceFile.Writer writer = SequenceFile.CreateWriter(fs, job, file, typeof(LongWritable
                                                                                         ), typeof(Text), compressionType, new DefaultCodec());
            try
            {
                for (int i = 0; i < Records; ++i)
                {
                    tkey.Set(1234);
                    tval.Set("valuevaluevaluevaluevaluevaluevaluevaluevaluevaluevalue");
                    writer.Append(tkey, tval);
                }
            }
            finally
            {
                writer.Close();
            }
            long fileLength = fs.GetFileStatus(file).GetLen();

            Log.Info("With compression = " + compressionType + ": " + "compressed length = "
                     + fileLength);
            SequenceFile.Sorter sorter = new SequenceFile.Sorter(fs, job.GetOutputKeyComparator
                                                                     (), job.GetMapOutputKeyClass(), job.GetMapOutputValueClass(), job);
            Path[] paths = new Path[] { file };
            SequenceFile.Sorter.RawKeyValueIterator rIter = sorter.Merge(paths, tempDir, false
                                                                         );
            int count = 0;

            while (rIter.Next())
            {
                count++;
            }
            NUnit.Framework.Assert.AreEqual(Records, count);
            NUnit.Framework.Assert.AreEqual(1.0f, rIter.GetProgress().Get());
        }
예제 #6
0
 /// <exception cref="System.IO.IOException"/>
 private void WriteTest(FileSystem fs, int count, int seed, Path file, SequenceFile.CompressionType
                        compressionType, CompressionCodec codec)
 {
     fs.Delete(file, true);
     Log.Info("creating " + count + " records with " + compressionType + " compression"
              );
     SequenceFile.Writer writer = SequenceFile.CreateWriter(fs, conf, file, typeof(RandomDatum
                                                                                   ), typeof(RandomDatum), compressionType, codec);
     RandomDatum.Generator generator = new RandomDatum.Generator(seed);
     for (int i = 0; i < count; i++)
     {
         generator.Next();
         RandomDatum key   = generator.GetKey();
         RandomDatum value = generator.GetValue();
         writer.Append(key, value);
     }
     writer.Close();
 }
예제 #7
0
        /// <exception cref="System.Exception"/>
        private void CodecTestMapFile(Type clazz, SequenceFile.CompressionType type, int
                                      records)
        {
            FileSystem fs = FileSystem.Get(conf);

            Log.Info("Creating MapFiles with " + records + " records using codec " + clazz.Name
                     );
            Path path = new Path(new Path(Runtime.GetProperty("test.build.data", "/tmp")), clazz
                                 .Name + "-" + type + "-" + records);

            Log.Info("Writing " + path);
            CreateMapFile(conf, fs, path, System.Activator.CreateInstance(clazz), type, records
                          );
            MapFile.Reader reader = new MapFile.Reader(path, conf);
            Text           key1   = new Text("002");

            NUnit.Framework.Assert.IsNotNull(reader.Get(key1, new Text()));
            Text key2 = new Text("004");

            NUnit.Framework.Assert.IsNotNull(reader.Get(key2, new Text()));
        }
        /// <exception cref="System.IO.IOException"/>
        public override RecordWriter <BytesWritable, BytesWritable> GetRecordWriter(FileSystem
                                                                                    ignored, JobConf job, string name, Progressable progress)
        {
            // get the path of the temporary output file
            Path             file  = FileOutputFormat.GetTaskOutputPath(job, name);
            FileSystem       fs    = file.GetFileSystem(job);
            CompressionCodec codec = null;

            SequenceFile.CompressionType compressionType = SequenceFile.CompressionType.None;
            if (GetCompressOutput(job))
            {
                // find the kind of compression to do
                compressionType = GetOutputCompressionType(job);
                // find the right codec
                Type codecClass = GetOutputCompressorClass(job, typeof(DefaultCodec));
                codec = ReflectionUtils.NewInstance(codecClass, job);
            }
            SequenceFile.Writer @out = SequenceFile.CreateWriter(fs, job, file, GetSequenceFileOutputKeyClass
                                                                     (job), GetSequenceFileOutputValueClass(job), compressionType, codec, progress);
            return(new _RecordWriter_138(@out));
        }
예제 #9
0
        /// <exception cref="System.IO.IOException"/>
        protected internal virtual SequenceFile.Writer GetSequenceWriter(TaskAttemptContext
                                                                         context, Type keyClass, Type valueClass)
        {
            Configuration    conf  = context.GetConfiguration();
            CompressionCodec codec = null;

            SequenceFile.CompressionType compressionType = SequenceFile.CompressionType.None;
            if (GetCompressOutput(context))
            {
                // find the kind of compression to do
                compressionType = GetOutputCompressionType(context);
                // find the right codec
                Type codecClass = GetOutputCompressorClass(context, typeof(DefaultCodec));
                codec = (CompressionCodec)ReflectionUtils.NewInstance(codecClass, conf);
            }
            // get the path of the temporary output file
            Path       file = GetDefaultWorkFile(context, string.Empty);
            FileSystem fs   = file.GetFileSystem(conf);

            return(SequenceFile.CreateWriter(fs, conf, file, keyClass, valueClass, compressionType
                                             , codec, context));
        }
예제 #10
0
        /// <exception cref="System.IO.IOException"/>
        public override RecordWriter <WritableComparable, Writable> GetRecordWriter(FileSystem
                                                                                    ignored, JobConf job, string name, Progressable progress)
        {
            // get the path of the temporary output file
            Path             file  = FileOutputFormat.GetTaskOutputPath(job, name);
            FileSystem       fs    = file.GetFileSystem(job);
            CompressionCodec codec = null;

            SequenceFile.CompressionType compressionType = SequenceFile.CompressionType.None;
            if (GetCompressOutput(job))
            {
                // find the kind of compression to do
                compressionType = SequenceFileOutputFormat.GetOutputCompressionType(job);
                // find the right codec
                Type codecClass = GetOutputCompressorClass(job, typeof(DefaultCodec));
                codec = ReflectionUtils.NewInstance(codecClass, job);
            }
            // ignore the progress parameter, since MapFile is local
            MapFile.Writer @out = new MapFile.Writer(job, fs, file.ToString(), job.GetOutputKeyClass
                                                         ().AsSubclass <WritableComparable>(), job.GetOutputValueClass().AsSubclass <Writable
                                                                                                                                     >(), compressionType, codec, progress);
            return(new _RecordWriter_72(@out));
        }
예제 #11
0
 /// <exception cref="System.IO.IOException"/>
 private void MergeTest(FileSystem fs, int count, int seed, Path file, SequenceFile.CompressionType
                        compressionType, bool fast, int factor, int megabytes)
 {
     Log.Debug("creating " + factor + " files with " + count / factor + " records");
     SequenceFile.Writer[] writers = new SequenceFile.Writer[factor];
     Path[] names       = new Path[factor];
     Path[] sortedNames = new Path[factor];
     for (int i = 0; i < factor; i++)
     {
         names[i]       = file.Suffix("." + i);
         sortedNames[i] = names[i].Suffix(".sorted");
         fs.Delete(names[i], true);
         fs.Delete(sortedNames[i], true);
         writers[i] = SequenceFile.CreateWriter(fs, conf, names[i], typeof(RandomDatum), typeof(
                                                    RandomDatum), compressionType);
     }
     RandomDatum.Generator generator = new RandomDatum.Generator(seed);
     for (int i_1 = 0; i_1 < count; i_1++)
     {
         generator.Next();
         RandomDatum key   = generator.GetKey();
         RandomDatum value = generator.GetValue();
         writers[i_1 % factor].Append(key, value);
     }
     for (int i_2 = 0; i_2 < factor; i_2++)
     {
         writers[i_2].Close();
     }
     for (int i_3 = 0; i_3 < factor; i_3++)
     {
         Log.Debug("sorting file " + i_3 + " with " + count / factor + " records");
         NewSorter(fs, fast, megabytes, factor).Sort(names[i_3], sortedNames[i_3]);
     }
     Log.Info("merging " + factor + " files with " + count / factor + " debug");
     fs.Delete(new Path(file + ".sorted"), true);
     NewSorter(fs, fast, megabytes, factor).Merge(sortedNames, file.Suffix(".sorted"));
 }
예제 #12
0
        /// <exception cref="System.IO.IOException"/>
        public override RecordWriter <WritableComparable <object>, Writable> GetRecordWriter
            (TaskAttemptContext context)
        {
            Configuration    conf  = context.GetConfiguration();
            CompressionCodec codec = null;

            SequenceFile.CompressionType compressionType = SequenceFile.CompressionType.None;
            if (GetCompressOutput(context))
            {
                // find the kind of compression to do
                compressionType = SequenceFileOutputFormat.GetOutputCompressionType(context);
                // find the right codec
                Type codecClass = GetOutputCompressorClass(context, typeof(DefaultCodec));
                codec = (CompressionCodec)ReflectionUtils.NewInstance(codecClass, conf);
            }
            Path       file = GetDefaultWorkFile(context, string.Empty);
            FileSystem fs   = file.GetFileSystem(conf);

            // ignore the progress parameter, since MapFile is local
            MapFile.Writer @out = new MapFile.Writer(conf, fs, file.ToString(), context.GetOutputKeyClass
                                                         ().AsSubclass <WritableComparable>(), context.GetOutputValueClass().AsSubclass <Writable
                                                                                                                                         >(), compressionType, codec, context);
            return(new _RecordWriter_75(@out));
        }
예제 #13
0
 /// <summary>Create a set naming the element comparator and compression type.</summary>
 /// <exception cref="System.IO.IOException"/>
 public Writer(Configuration conf, FileSystem fs, string dirName, WritableComparator
               comparator, SequenceFile.CompressionType compress)
     : base(conf, new Path(dirName), Comparator(comparator), ValueClass(typeof(NullWritable
                                                                               )), Compression(compress))
 {
 }
예제 #14
0
 /// <summary>Create a set naming the element class and compression type.</summary>
 /// <exception cref="System.IO.IOException"/>
 public Writer(Configuration conf, FileSystem fs, string dirName, Type keyClass, SequenceFile.CompressionType
               compress)
     : this(conf, fs, dirName, WritableComparator.Get(keyClass, conf), compress)
 {
 }
 /// <summary>
 /// Set the
 /// <see cref="Org.Apache.Hadoop.IO.SequenceFile.CompressionType"/>
 /// for the output
 /// <see cref="Org.Apache.Hadoop.IO.SequenceFile"/>
 /// .
 /// </summary>
 /// <param name="conf">
 /// the
 /// <see cref="JobConf"/>
 /// to modify
 /// </param>
 /// <param name="style">
 /// the
 /// <see cref="Org.Apache.Hadoop.IO.SequenceFile.CompressionType"/>
 /// for the output
 /// <see cref="Org.Apache.Hadoop.IO.SequenceFile"/>
 ///
 /// </param>
 public static void SetOutputCompressionType(JobConf conf, SequenceFile.CompressionType
                                             style)
 {
     SetCompressOutput(conf, true);
     conf.Set(FileOutputFormat.CompressType, style.ToString());
 }
예제 #16
0
 /// <summary>
 /// Set the
 /// <see cref="Org.Apache.Hadoop.IO.SequenceFile.CompressionType"/>
 /// for the output
 /// <see cref="Org.Apache.Hadoop.IO.SequenceFile"/>
 /// .
 /// </summary>
 /// <param name="job">
 /// the
 /// <see cref="Org.Apache.Hadoop.Mapreduce.Job"/>
 /// to modify
 /// </param>
 /// <param name="style">
 /// the
 /// <see cref="Org.Apache.Hadoop.IO.SequenceFile.CompressionType"/>
 /// for the output
 /// <see cref="Org.Apache.Hadoop.IO.SequenceFile"/>
 ///
 /// </param>
 public static void SetOutputCompressionType(Job job, SequenceFile.CompressionType
                                             style)
 {
     SetCompressOutput(job, true);
     job.GetConfiguration().Set(FileOutputFormat.CompressType, style.ToString());
 }
예제 #17
0
        /// <summary>For debugging and testing.</summary>
        /// <exception cref="System.Exception"/>
        public static void Main(string[] args)
        {
            int    count            = 1024 * 1024;
            int    megabytes        = 1;
            int    factor           = 10;
            bool   create           = true;
            bool   rwonly           = false;
            bool   check            = false;
            bool   fast             = false;
            bool   merge            = false;
            string compressType     = "NONE";
            string compressionCodec = "org.apache.hadoop.io.compress.DefaultCodec";
            Path   file             = null;
            int    seed             = new Random().Next();
            string usage            = "Usage: SequenceFile " + "[-count N] " + "[-seed #] [-check] [-compressType <NONE|RECORD|BLOCK>] "
                                      + "-codec <compressionCodec> " + "[[-rwonly] | {[-megabytes M] [-factor F] [-nocreate] [-fast] [-merge]}] "
                                      + " file";

            if (args.Length == 0)
            {
                System.Console.Error.WriteLine(usage);
                System.Environment.Exit(-1);
            }
            FileSystem fs = null;

            try
            {
                for (int i = 0; i < args.Length; ++i)
                {
                    // parse command line
                    if (args[i] == null)
                    {
                        continue;
                    }
                    else
                    {
                        if (args[i].Equals("-count"))
                        {
                            count = System.Convert.ToInt32(args[++i]);
                        }
                        else
                        {
                            if (args[i].Equals("-megabytes"))
                            {
                                megabytes = System.Convert.ToInt32(args[++i]);
                            }
                            else
                            {
                                if (args[i].Equals("-factor"))
                                {
                                    factor = System.Convert.ToInt32(args[++i]);
                                }
                                else
                                {
                                    if (args[i].Equals("-seed"))
                                    {
                                        seed = System.Convert.ToInt32(args[++i]);
                                    }
                                    else
                                    {
                                        if (args[i].Equals("-rwonly"))
                                        {
                                            rwonly = true;
                                        }
                                        else
                                        {
                                            if (args[i].Equals("-nocreate"))
                                            {
                                                create = false;
                                            }
                                            else
                                            {
                                                if (args[i].Equals("-check"))
                                                {
                                                    check = true;
                                                }
                                                else
                                                {
                                                    if (args[i].Equals("-fast"))
                                                    {
                                                        fast = true;
                                                    }
                                                    else
                                                    {
                                                        if (args[i].Equals("-merge"))
                                                        {
                                                            merge = true;
                                                        }
                                                        else
                                                        {
                                                            if (args[i].Equals("-compressType"))
                                                            {
                                                                compressType = args[++i];
                                                            }
                                                            else
                                                            {
                                                                if (args[i].Equals("-codec"))
                                                                {
                                                                    compressionCodec = args[++i];
                                                                }
                                                                else
                                                                {
                                                                    // file is required parameter
                                                                    file = new Path(args[i]);
                                                                }
                                                            }
                                                        }
                                                    }
                                                }
                                            }
                                        }
                                    }
                                }
                            }
                        }
                    }
                }
                TestSequenceFile test = new TestSequenceFile();
                fs = file.GetFileSystem(test.conf);
                Log.Info("count = " + count);
                Log.Info("megabytes = " + megabytes);
                Log.Info("factor = " + factor);
                Log.Info("create = " + create);
                Log.Info("seed = " + seed);
                Log.Info("rwonly = " + rwonly);
                Log.Info("check = " + check);
                Log.Info("fast = " + fast);
                Log.Info("merge = " + merge);
                Log.Info("compressType = " + compressType);
                Log.Info("compressionCodec = " + compressionCodec);
                Log.Info("file = " + file);
                if (rwonly && (!create || merge || fast))
                {
                    System.Console.Error.WriteLine(usage);
                    System.Environment.Exit(-1);
                }
                SequenceFile.CompressionType compressionType = SequenceFile.CompressionType.ValueOf
                                                                   (compressType);
                CompressionCodec codec = (CompressionCodec)ReflectionUtils.NewInstance(test.conf.
                                                                                       GetClassByName(compressionCodec), test.conf);
                if (rwonly || (create && !merge))
                {
                    test.WriteTest(fs, count, seed, file, compressionType, codec);
                    test.ReadTest(fs, count, seed, file);
                }
                if (!rwonly)
                {
                    if (merge)
                    {
                        test.MergeTest(fs, count, seed, file, compressionType, fast, factor, megabytes);
                    }
                    else
                    {
                        test.SortTest(fs, count, megabytes, factor, fast, file);
                    }
                }
                if (check)
                {
                    test.CheckSort(fs, count, seed, file);
                }
            }
            finally
            {
                fs.Close();
            }
        }
예제 #18
0
 /// <summary>Create the named file for values of the named class.</summary>
 /// <exception cref="System.IO.IOException"/>
 public Writer(Configuration conf, FileSystem fs, string file, Type valClass, SequenceFile.CompressionType
               compress, Progressable progress)
     : base(conf, new Path(file), KeyClass(typeof(LongWritable)), ValueClass(valClass)
            , Compression(compress), Progressable(progress))
 {
 }
예제 #19
0
 /// <exception cref="System.IO.IOException"/>
 private static void WriteTest(FileSystem fs, RandomDatum[] data, string file, SequenceFile.CompressionType
                               compress)
 {
     MapFile.Delete(fs, file);
     Log.Info("creating with " + data.Length + " records");
     SetFile.Writer writer = new SetFile.Writer(conf, fs, file, WritableComparator.Get
                                                    (typeof(RandomDatum)), compress);
     for (int i = 0; i < data.Length; i++)
     {
         writer.Append(data[i]);
     }
     writer.Close();
 }