Example #1
0
 /// <exception cref="System.IO.IOException"/>
 public override RecordWriter <BytesWritable, BytesWritable> GetRecordWriter(TaskAttemptContext
                                                                             context)
 {
     SequenceFile.Writer @out = GetSequenceWriter(context, GetSequenceFileOutputKeyClass
                                                      (context), GetSequenceFileOutputValueClass(context));
     return(new _RecordWriter_140(@out));
 }
Example #2
0
 /// <exception cref="System.IO.IOException"/>
 private static void CreateFiles(int length, int numFiles, Random random, Job job)
 {
     TestCombineSequenceFileInputFormat.Range[] ranges = CreateRanges(length, numFiles
                                                                      , random);
     for (int i = 0; i < numFiles; i++)
     {
         Path file = new Path(workDir, "test_" + i + ".seq");
         // create a file with length entries
         SequenceFile.Writer writer = SequenceFile.CreateWriter(localFs, job.GetConfiguration
                                                                    (), file, typeof(IntWritable), typeof(BytesWritable));
         TestCombineSequenceFileInputFormat.Range range = ranges[i];
         try
         {
             for (int j = range.start; j < range.end; j++)
             {
                 IntWritable key  = new IntWritable(j);
                 byte[]      data = new byte[random.Next(10)];
                 random.NextBytes(data);
                 BytesWritable value = new BytesWritable(data);
                 writer.Append(key, value);
             }
         }
         finally
         {
             writer.Close();
         }
     }
 }
Example #3
0
        /// <exception cref="System.Exception"/>
        public virtual void TestAppendSort()
        {
            GenericTestUtils.AssumeInNativeProfile();
            Path file = new Path(RootPath, "testseqappendSort.seq");

            fs.Delete(file, true);
            Path sortedFile = new Path(RootPath, "testseqappendSort.seq.sort");

            fs.Delete(sortedFile, true);
            SequenceFile.Sorter sorter = new SequenceFile.Sorter(fs, new JavaSerializationComparator
                                                                 <long>(), typeof(long), typeof(string), conf);
            SequenceFile.Writer.Option compressOption = SequenceFile.Writer.Compression(SequenceFile.CompressionType
                                                                                        .Block, new GzipCodec());
            SequenceFile.Writer writer = SequenceFile.CreateWriter(conf, SequenceFile.Writer.
                                                                   File(file), SequenceFile.Writer.KeyClass(typeof(long)), SequenceFile.Writer.ValueClass
                                                                       (typeof(string)), compressOption);
            writer.Append(2L, "two");
            writer.Append(1L, "one");
            writer.Close();
            writer = SequenceFile.CreateWriter(conf, SequenceFile.Writer.File(file), SequenceFile.Writer
                                               .KeyClass(typeof(long)), SequenceFile.Writer.ValueClass(typeof(string)), SequenceFile.Writer
                                               .AppendIfExists(true), compressOption);
            writer.Append(4L, "four");
            writer.Append(3L, "three");
            writer.Close();
            // Sort file after append
            sorter.Sort(file, sortedFile);
            VerifyAll4Values(sortedFile);
            fs.DeleteOnExit(file);
            fs.DeleteOnExit(sortedFile);
        }
        /// <exception cref="System.IO.IOException"/>
        private static Path WritePartitionFile <T>(string testname, Configuration conf, T[]
                                                   splits)
            where T : WritableComparable <object>
        {
            FileSystem fs      = FileSystem.GetLocal(conf);
            Path       testdir = new Path(Runtime.GetProperty("test.build.data", "/tmp")).MakeQualified
                                     (fs);
            Path p = new Path(testdir, testname + "/_partition.lst");

            TotalOrderPartitioner.SetPartitionFile(conf, p);
            conf.SetInt(MRJobConfig.NumReduces, splits.Length + 1);
            SequenceFile.Writer w = null;
            try
            {
                w = SequenceFile.CreateWriter(fs, conf, p, splits[0].GetType(), typeof(NullWritable
                                                                                       ), SequenceFile.CompressionType.None);
                for (int i = 0; i < splits.Length; ++i)
                {
                    w.Append(splits[i], NullWritable.Get());
                }
            }
            finally
            {
                if (null != w)
                {
                    w.Close();
                }
            }
            return(p);
        }
Example #5
0
 /// <exception cref="System.IO.IOException"/>
 private static void CreateControlFile(FileSystem fs, int fileSize, int nrFiles)
 {
     // in MB
     Log.Info("creating control file: " + fileSize + " mega bytes, " + nrFiles + " files"
              );
     fs.Delete(ControlDir, true);
     for (int i = 0; i < nrFiles; i++)
     {
         string name                = GetFileName(i);
         Path   controlFile         = new Path(ControlDir, "in_file_" + name);
         SequenceFile.Writer writer = null;
         try
         {
             writer = SequenceFile.CreateWriter(fs, fsConfig, controlFile, typeof(Text), typeof(
                                                    LongWritable), SequenceFile.CompressionType.None);
             writer.Append(new Text(name), new LongWritable(fileSize));
         }
         catch (Exception e)
         {
             throw new IOException(e.GetLocalizedMessage());
         }
         finally
         {
             if (writer != null)
             {
                 writer.Close();
             }
             writer = null;
         }
     }
     Log.Info("created control files for: " + nrFiles + " files");
 }
Example #6
0
        public virtual void TestSequenceFileSync()
        {
            Configuration      conf    = new HdfsConfiguration();
            MiniDFSCluster     cluster = new MiniDFSCluster.Builder(conf).Build();
            FileSystem         fs      = cluster.GetFileSystem();
            Path               p       = new Path("/testSequenceFileSync/foo");
            int                len     = 1 << 16;
            FSDataOutputStream @out    = fs.Create(p, FsPermission.GetDefault(), EnumSet.Of(CreateFlag
                                                                                            .Create, CreateFlag.Overwrite, CreateFlag.SyncBlock), 4096, (short)1, len, null);

            SequenceFile.Writer w = SequenceFile.CreateWriter(new Configuration(), SequenceFile.Writer
                                                              .Stream(@out), SequenceFile.Writer.KeyClass(typeof(RandomDatum)), SequenceFile.Writer
                                                              .ValueClass(typeof(RandomDatum)), SequenceFile.Writer.Compression(SequenceFile.CompressionType
                                                                                                                                .None, new DefaultCodec()));
            w.Hflush();
            CheckSyncMetric(cluster, 0);
            w.Hsync();
            CheckSyncMetric(cluster, 1);
            int seed = new Random().Next();

            RandomDatum.Generator generator = new RandomDatum.Generator(seed);
            generator.Next();
            w.Append(generator.GetKey(), generator.GetValue());
            w.Hsync();
            CheckSyncMetric(cluster, 2);
            w.Close();
            CheckSyncMetric(cluster, 2);
            @out.Close();
            CheckSyncMetric(cluster, 3);
            cluster.Shutdown();
        }
Example #7
0
        /// <exception cref="System.IO.IOException"/>
        private void ListSubtree(FileStatus rootStatus, SequenceFile.Writer writer)
        {
            Path rootFile = rootStatus.GetPath();

            if (rootStatus.IsFile())
            {
                nrFiles++;
                // For a regular file generate <fName,offset> pairs
                long blockSize  = fs.GetDefaultBlockSize(rootFile);
                long fileLength = rootStatus.GetLen();
                for (long offset = 0; offset < fileLength; offset += blockSize)
                {
                    writer.Append(new Text(rootFile.ToString()), new LongWritable(offset));
                }
                return;
            }
            FileStatus[] children = null;
            try
            {
                children = fs.ListStatus(rootFile);
            }
            catch (FileNotFoundException)
            {
                throw new IOException("Could not get listing for " + rootFile);
            }
            for (int i = 0; i < children.Length; i++)
            {
                ListSubtree(children[i], writer);
            }
        }
Example #8
0
        public virtual void Configure()
        {
            Path       testdir = new Path(TestDir.GetAbsolutePath());
            Path       inDir   = new Path(testdir, "in");
            Path       outDir  = new Path(testdir, "out");
            FileSystem fs      = FileSystem.Get(conf);

            fs.Delete(testdir, true);
            conf.SetInt(JobContext.IoSortMb, 1);
            conf.SetInputFormat(typeof(SequenceFileInputFormat));
            FileInputFormat.SetInputPaths(conf, inDir);
            FileOutputFormat.SetOutputPath(conf, outDir);
            conf.SetMapperClass(typeof(TestMapOutputType.TextGen));
            conf.SetReducerClass(typeof(TestMapOutputType.TextReduce));
            conf.SetOutputKeyClass(typeof(Text));
            conf.SetOutputValueClass(typeof(Text));
            conf.Set(MRConfig.FrameworkName, MRConfig.LocalFrameworkName);
            conf.SetOutputFormat(typeof(SequenceFileOutputFormat));
            if (!fs.Mkdirs(testdir))
            {
                throw new IOException("Mkdirs failed to create " + testdir.ToString());
            }
            if (!fs.Mkdirs(inDir))
            {
                throw new IOException("Mkdirs failed to create " + inDir.ToString());
            }
            Path inFile = new Path(inDir, "part0");

            SequenceFile.Writer writer = SequenceFile.CreateWriter(fs, conf, inFile, typeof(Text
                                                                                            ), typeof(Text));
            writer.Append(new Text("rec: 1"), new Text("Hello"));
            writer.Close();
            jc = new JobClient(conf);
        }
Example #9
0
        /// <summary>Create control files before a test run.</summary>
        /// <remarks>
        /// Create control files before a test run.
        /// Number of files created is equal to the number of maps specified
        /// </remarks>
        /// <exception cref="System.IO.IOException">on error</exception>
        private static void CreateControlFiles()
        {
            FileSystem tempFS = FileSystem.Get(config);

            Log.Info("Creating " + numberOfMaps + " control files");
            for (int i = 0; i < numberOfMaps; i++)
            {
                string strFileName         = "NNBench_Controlfile_" + i;
                Path   filePath            = new Path(new Path(baseDir, ControlDirName), strFileName);
                SequenceFile.Writer writer = null;
                try
                {
                    writer = SequenceFile.CreateWriter(tempFS, config, filePath, typeof(Text), typeof(
                                                           LongWritable), SequenceFile.CompressionType.None);
                    writer.Append(new Text(strFileName), new LongWritable(0l));
                }
                finally
                {
                    if (writer != null)
                    {
                        writer.Close();
                    }
                }
            }
        }
Example #10
0
        /// <exception cref="System.IO.IOException"/>
        /// <exception cref="Sharpen.TimeoutException"/>
        /// <exception cref="System.Exception"/>
        internal static void WriteFile(NameNode namenode, Configuration conf, Path name,
                                       short replication)
        {
            FileSystem fileSys = FileSystem.Get(conf);

            SequenceFile.Writer writer = SequenceFile.CreateWriter(fileSys, conf, name, typeof(
                                                                       BytesWritable), typeof(BytesWritable), SequenceFile.CompressionType.None);
            writer.Append(new BytesWritable(), new BytesWritable());
            writer.Close();
            fileSys.SetReplication(name, replication);
            DFSTestUtil.WaitReplication(fileSys, name, replication);
        }
Example #11
0
        /// <summary>Test that makes sure the FileSystem passed to createWriter</summary>
        /// <exception cref="System.Exception"/>
        public virtual void TestCreateUsesFsArg()
        {
            FileSystem fs    = FileSystem.GetLocal(conf);
            FileSystem spyFs = Org.Mockito.Mockito.Spy(fs);
            Path       p     = new Path(Runtime.GetProperty("test.build.data", ".") + "/testCreateUsesFSArg.seq"
                                        );

            SequenceFile.Writer writer = SequenceFile.CreateWriter(spyFs, conf, p, typeof(NullWritable
                                                                                          ), typeof(NullWritable));
            writer.Close();
            Org.Mockito.Mockito.Verify(spyFs).GetDefaultReplication(p);
        }
Example #12
0
 /// <exception cref="System.IO.IOException"/>
 private void WriteSkippedRec(KEY key, VALUE value)
 {
     if (this.skipWriter == null)
     {
         Path skipDir  = SkipBadRecords.GetSkipOutputPath(this._enclosing.conf);
         Path skipFile = new Path(skipDir, this._enclosing.GetTaskID().ToString());
         this.skipWriter = SequenceFile.CreateWriter(skipFile.GetFileSystem(this._enclosing
                                                                            .conf), this._enclosing.conf, skipFile, this.keyClass, this.valClass, SequenceFile.CompressionType
                                                     .Block, this.reporter);
     }
     this.skipWriter.Append(key, value);
 }
Example #13
0
        public virtual void TestNullKeys()
        {
            JobConf          conf   = new JobConf(typeof(TestMapRed));
            FileSystem       fs     = FileSystem.GetLocal(conf);
            HashSet <string> values = new HashSet <string>();
            string           m      = "AAAAAAAAAAAAAA";

            for (int i = 1; i < 11; ++i)
            {
                values.AddItem(m);
                m = m.Replace((char)('A' + i - 1), (char)('A' + i));
            }
            Path testdir = new Path(Runtime.GetProperty("test.build.data", "/tmp")).MakeQualified
                               (fs);

            fs.Delete(testdir, true);
            Path inFile = new Path(testdir, "nullin/blah");

            SequenceFile.Writer w = SequenceFile.CreateWriter(fs, conf, inFile, typeof(NullWritable
                                                                                       ), typeof(Text), SequenceFile.CompressionType.None);
            Text t = new Text();

            foreach (string s in values)
            {
                t.Set(s);
                w.Append(NullWritable.Get(), t);
            }
            w.Close();
            FileInputFormat.SetInputPaths(conf, inFile);
            FileOutputFormat.SetOutputPath(conf, new Path(testdir, "nullout"));
            conf.SetMapperClass(typeof(TestMapRed.NullMapper));
            conf.SetReducerClass(typeof(IdentityReducer));
            conf.SetOutputKeyClass(typeof(NullWritable));
            conf.SetOutputValueClass(typeof(Text));
            conf.SetInputFormat(typeof(SequenceFileInputFormat));
            conf.SetOutputFormat(typeof(SequenceFileOutputFormat));
            conf.SetNumReduceTasks(1);
            conf.Set(MRConfig.FrameworkName, MRConfig.LocalFrameworkName);
            JobClient.RunJob(conf);
            // Since null keys all equal, allow any ordering
            SequenceFile.Reader r = new SequenceFile.Reader(fs, new Path(testdir, "nullout/part-00000"
                                                                         ), conf);
            m = "AAAAAAAAAAAAAA";
            for (int i_1 = 1; r.Next(NullWritable.Get(), t); ++i_1)
            {
                NUnit.Framework.Assert.IsTrue("Unexpected value: " + t, values.Remove(t.ToString(
                                                                                          )));
                m = m.Replace((char)('A' + i_1 - 1), (char)('A' + i_1));
            }
            NUnit.Framework.Assert.IsTrue("Missing values: " + values.ToString(), values.IsEmpty
                                              ());
        }
Example #14
0
        /// <exception cref="System.IO.IOException"/>
        public static void WriteSequenceFile(SequenceFile.Writer writer, int numRecords)
        {
            IntWritable key = new IntWritable();
            Text        val = new Text();

            for (int numWritten = 0; numWritten < numRecords; ++numWritten)
            {
                key.Set(numWritten);
                RandomText(val, numWritten, Recordsize);
                writer.Append(key, val);
            }
            writer.Close();
        }
Example #15
0
            /// <summary>Reduce task done, write output to a file.</summary>
            /// <exception cref="System.IO.IOException"/>
            protected override void Cleanup(Reducer.Context context)
            {
                //write output to a file
                Configuration conf    = context.GetConfiguration();
                Path          outDir  = new Path(conf.Get(FileOutputFormat.Outdir));
                Path          outFile = new Path(outDir, "reduce-out");
                FileSystem    fileSys = FileSystem.Get(conf);

                SequenceFile.Writer writer = SequenceFile.CreateWriter(fileSys, conf, outFile, typeof(
                                                                           LongWritable), typeof(LongWritable), SequenceFile.CompressionType.None);
                writer.Append(new LongWritable(numInside), new LongWritable(numOutside));
                writer.Close();
            }
Example #16
0
 public virtual void RunJob(int items)
 {
     try
     {
         JobConf    conf    = new JobConf(typeof(TestMapRed));
         Path       testdir = new Path(TestDir.GetAbsolutePath());
         Path       inDir   = new Path(testdir, "in");
         Path       outDir  = new Path(testdir, "out");
         FileSystem fs      = FileSystem.Get(conf);
         fs.Delete(testdir, true);
         conf.SetInt(JobContext.IoSortMb, 1);
         conf.SetInputFormat(typeof(SequenceFileInputFormat));
         FileInputFormat.SetInputPaths(conf, inDir);
         FileOutputFormat.SetOutputPath(conf, outDir);
         conf.SetMapperClass(typeof(IdentityMapper));
         conf.SetReducerClass(typeof(IdentityReducer));
         conf.SetOutputKeyClass(typeof(Text));
         conf.SetOutputValueClass(typeof(Text));
         conf.SetOutputFormat(typeof(SequenceFileOutputFormat));
         conf.Set(MRConfig.FrameworkName, MRConfig.LocalFrameworkName);
         if (!fs.Mkdirs(testdir))
         {
             throw new IOException("Mkdirs failed to create " + testdir.ToString());
         }
         if (!fs.Mkdirs(inDir))
         {
             throw new IOException("Mkdirs failed to create " + inDir.ToString());
         }
         Path inFile = new Path(inDir, "part0");
         SequenceFile.Writer writer = SequenceFile.CreateWriter(fs, conf, inFile, typeof(Text
                                                                                         ), typeof(Text));
         StringBuilder content = new StringBuilder();
         for (int i = 0; i < 1000; i++)
         {
             content.Append(i).Append(": This is one more line of content\n");
         }
         Org.Apache.Hadoop.IO.Text text = new Org.Apache.Hadoop.IO.Text(content.ToString()
                                                                        );
         for (int i_1 = 0; i_1 < items; i_1++)
         {
             writer.Append(new Org.Apache.Hadoop.IO.Text("rec:" + i_1), text);
         }
         writer.Close();
         JobClient.RunJob(conf);
     }
     catch (Exception e)
     {
         NUnit.Framework.Assert.IsTrue("Threw exception:" + e, false);
     }
 }
Example #17
0
        /// <exception cref="System.IO.IOException"/>
        /// <exception cref="System.TypeLoadException"/>
        /// <exception cref="InstantiationException"/>
        /// <exception cref="System.MemberAccessException"/>
        private static void SequenceFileCodecTest(Configuration conf, int lines, string codecClass
                                                  , int blockSize)
        {
            Path filePath = new Path("SequenceFileCodecTest." + codecClass);

            // Configuration
            conf.SetInt("io.seqfile.compress.blocksize", blockSize);
            // Create the SequenceFile
            FileSystem fs = FileSystem.Get(conf);

            Log.Info("Creating SequenceFile with codec \"" + codecClass + "\"");
            SequenceFile.Writer writer = SequenceFile.CreateWriter(fs, conf, filePath, typeof(
                                                                       Text), typeof(Text), SequenceFile.CompressionType.Block, (CompressionCodec)System.Activator.CreateInstance
                                                                       (Runtime.GetType(codecClass)));
            // Write some data
            Log.Info("Writing to SequenceFile...");
            for (int i = 0; i < lines; i++)
            {
                Text key   = new Text("key" + i);
                Text value = new Text("value" + i);
                writer.Append(key, value);
            }
            writer.Close();
            // Read the data back and check
            Log.Info("Reading from the SequenceFile...");
            SequenceFile.Reader reader  = new SequenceFile.Reader(fs, filePath, conf);
            Writable            key_1   = (Writable)System.Activator.CreateInstance(reader.GetKeyClass());
            Writable            value_1 = (Writable)System.Activator.CreateInstance(reader.GetValueClass
                                                                                        ());
            int lc = 0;

            try
            {
                while (reader.Next(key_1, value_1))
                {
                    Assert.Equal("key" + lc, key_1.ToString());
                    Assert.Equal("value" + lc, value_1.ToString());
                    lc++;
                }
            }
            finally
            {
                reader.Close();
            }
            Assert.Equal(lines, lc);
            // Delete temporary files
            fs.Delete(filePath, false);
            Log.Info("SUCCESS! Completed SequenceFileCodecTest with codec \"" + codecClass +
                     "\"");
        }
Example #18
0
 /// <exception cref="System.IO.IOException"/>
 private static SequenceFile.Writer[] CreateWriters(Path testdir, Configuration conf
                                                    , int srcs, Path[] src)
 {
     for (int i = 0; i < srcs; ++i)
     {
         src[i] = new Path(testdir, Sharpen.Extensions.ToString(i + 10, 36));
     }
     SequenceFile.Writer[] @out = new SequenceFile.Writer[srcs];
     for (int i_1 = 0; i_1 < srcs; ++i_1)
     {
         @out[i_1] = new SequenceFile.Writer(testdir.GetFileSystem(conf), conf, src[i_1],
                                             typeof(IntWritable), typeof(IntWritable));
     }
     return(@out);
 }
Example #19
0
        /// <exception cref="System.IO.IOException"/>
        public virtual void TestClose()
        {
            Configuration   conf = new Configuration();
            LocalFileSystem fs   = FileSystem.GetLocal(conf);
            // create a sequence file 1
            Path path1 = new Path(Runtime.GetProperty("test.build.data", ".") + "/test1.seq");

            SequenceFile.Writer writer = SequenceFile.CreateWriter(fs, conf, path1, typeof(Text
                                                                                           ), typeof(NullWritable), SequenceFile.CompressionType.Block);
            writer.Append(new Text("file1-1"), NullWritable.Get());
            writer.Append(new Text("file1-2"), NullWritable.Get());
            writer.Close();
            Path path2 = new Path(Runtime.GetProperty("test.build.data", ".") + "/test2.seq");

            writer = SequenceFile.CreateWriter(fs, conf, path2, typeof(Text), typeof(NullWritable
                                                                                     ), SequenceFile.CompressionType.Block);
            writer.Append(new Text("file2-1"), NullWritable.Get());
            writer.Append(new Text("file2-2"), NullWritable.Get());
            writer.Close();
            // Create a reader which uses 4 BuiltInZLibInflater instances
            SequenceFile.Reader reader = new SequenceFile.Reader(fs, path1, conf);
            // Returns the 4 BuiltInZLibInflater instances to the CodecPool
            reader.Close();
            // The second close _could_ erroneously returns the same
            // 4 BuiltInZLibInflater instances to the CodecPool again
            reader.Close();
            // The first reader gets 4 BuiltInZLibInflater instances from the CodecPool
            SequenceFile.Reader reader1 = new SequenceFile.Reader(fs, path1, conf);
            // read first value from reader1
            Text text = new Text();

            reader1.Next(text);
            Assert.Equal("file1-1", text.ToString());
            // The second reader _could_ get the same 4 BuiltInZLibInflater
            // instances from the CodePool as reader1
            SequenceFile.Reader reader2 = new SequenceFile.Reader(fs, path2, conf);
            // read first value from reader2
            reader2.Next(text);
            Assert.Equal("file2-1", text.ToString());
            // read second value from reader1
            reader1.Next(text);
            Assert.Equal("file1-2", text.ToString());
            // read second value from reader2 (this throws an exception)
            reader2.Next(text);
            Assert.Equal("file2-2", text.ToString());
            NUnit.Framework.Assert.IsFalse(reader1.Next(text));
            NUnit.Framework.Assert.IsFalse(reader2.Next(text));
        }
Example #20
0
        public virtual void TestLowSyncpoint()
        {
            Configuration conf = new Configuration();
            FileSystem    fs   = FileSystem.GetLocal(conf);
            Path          path = new Path(Runtime.GetProperty("test.build.data", "/tmp"), "sequencefile.sync.test"
                                          );
            IntWritable input = new IntWritable();
            Text        val   = new Text();

            SequenceFile.Writer writer = new SequenceFile.Writer(fs, conf, path, typeof(IntWritable
                                                                                        ), typeof(Text));
            try
            {
                WriteSequenceFile(writer, Numrecords);
                for (int i = 0; i < 5; i++)
                {
                    SequenceFile.Reader reader;
                    //try different SequenceFile.Reader constructors
                    if (i % 2 == 0)
                    {
                        reader = new SequenceFile.Reader(fs, path, conf);
                    }
                    else
                    {
                        FSDataInputStream @in = fs.Open(path);
                        long length           = fs.GetFileStatus(path).GetLen();
                        int  buffersize       = conf.GetInt("io.file.buffer.size", 4096);
                        reader = new SequenceFile.Reader(@in, buffersize, 0L, length, conf);
                    }
                    try
                    {
                        ForOffset(reader, input, val, i, 0, 0);
                        ForOffset(reader, input, val, i, 65, 0);
                        ForOffset(reader, input, val, i, 2000, 21);
                        ForOffset(reader, input, val, i, 0, 0);
                    }
                    finally
                    {
                        reader.Close();
                    }
                }
            }
            finally
            {
                fs.Delete(path, false);
            }
        }
Example #21
0
        /// <exception cref="System.IO.IOException"/>
        private static void CreateBigMapInputFile(Configuration conf, FileSystem fs, Path
                                                  dir, long fileSizeInMB)
        {
            // Check if the input path exists and is non-empty
            if (fs.Exists(dir))
            {
                FileStatus[] list = fs.ListStatus(dir);
                if (list.Length > 0)
                {
                    throw new IOException("Input path: " + dir + " already exists... ");
                }
            }
            Path file = new Path(dir, "part-0");

            SequenceFile.Writer writer = SequenceFile.CreateWriter(fs, conf, file, typeof(BytesWritable
                                                                                          ), typeof(BytesWritable), SequenceFile.CompressionType.None);
            long          numBytesToWrite = fileSizeInMB * 1024 * 1024;
            int           minKeySize      = conf.GetInt(MinKey, 10);
            int           keySizeRange    = conf.GetInt(MaxKey, 1000) - minKeySize;
            int           minValueSize    = conf.GetInt(MinValue, 0);
            int           valueSizeRange  = conf.GetInt(MaxValue, 20000) - minValueSize;
            BytesWritable randomKey       = new BytesWritable();
            BytesWritable randomValue     = new BytesWritable();

            Log.Info("Writing " + numBytesToWrite + " bytes to " + file + " with " + "minKeySize: "
                     + minKeySize + " keySizeRange: " + keySizeRange + " minValueSize: " + minValueSize
                     + " valueSizeRange: " + valueSizeRange);
            long start = Runtime.CurrentTimeMillis();

            while (numBytesToWrite > 0)
            {
                int keyLength = minKeySize + (keySizeRange != 0 ? random.Next(keySizeRange) : 0);
                randomKey.SetSize(keyLength);
                RandomizeBytes(randomKey.GetBytes(), 0, randomKey.GetLength());
                int valueLength = minValueSize + (valueSizeRange != 0 ? random.Next(valueSizeRange
                                                                                    ) : 0);
                randomValue.SetSize(valueLength);
                RandomizeBytes(randomValue.GetBytes(), 0, randomValue.GetLength());
                writer.Append(randomKey, randomValue);
                numBytesToWrite -= keyLength + valueLength;
            }
            writer.Close();
            long end = Runtime.CurrentTimeMillis();

            Log.Info("Created " + file + " of size: " + fileSizeInMB + "MB in " + (end - start
                                                                                   ) / 1000 + "secs");
        }
        /// <exception cref="System.IO.IOException"/>
        public virtual void RunTest(SequenceFile.CompressionType compressionType)
        {
            JobConf    job     = new JobConf();
            FileSystem fs      = FileSystem.GetLocal(job);
            Path       dir     = new Path(Runtime.GetProperty("test.build.data", ".") + "/mapred");
            Path       file    = new Path(dir, "test.seq");
            Path       tempDir = new Path(dir, "tmp");

            fs.Delete(dir, true);
            FileInputFormat.SetInputPaths(job, dir);
            fs.Mkdirs(tempDir);
            LongWritable tkey = new LongWritable();
            Text         tval = new Text();

            SequenceFile.Writer writer = SequenceFile.CreateWriter(fs, job, file, typeof(LongWritable
                                                                                         ), typeof(Text), compressionType, new DefaultCodec());
            try
            {
                for (int i = 0; i < Records; ++i)
                {
                    tkey.Set(1234);
                    tval.Set("valuevaluevaluevaluevaluevaluevaluevaluevaluevaluevalue");
                    writer.Append(tkey, tval);
                }
            }
            finally
            {
                writer.Close();
            }
            long fileLength = fs.GetFileStatus(file).GetLen();

            Log.Info("With compression = " + compressionType + ": " + "compressed length = "
                     + fileLength);
            SequenceFile.Sorter sorter = new SequenceFile.Sorter(fs, job.GetOutputKeyComparator
                                                                     (), job.GetMapOutputKeyClass(), job.GetMapOutputValueClass(), job);
            Path[] paths = new Path[] { file };
            SequenceFile.Sorter.RawKeyValueIterator rIter = sorter.Merge(paths, tempDir, false
                                                                         );
            int count = 0;

            while (rIter.Next())
            {
                count++;
            }
            NUnit.Framework.Assert.AreEqual(Records, count);
            NUnit.Framework.Assert.AreEqual(1.0f, rIter.GetProgress().Get());
        }
Example #23
0
 /// <exception cref="System.IO.IOException"/>
 private void WriteMetadataTest(FileSystem fs, int count, int seed, Path file, SequenceFile.CompressionType
                                compressionType, CompressionCodec codec, SequenceFile.Metadata metadata)
 {
     fs.Delete(file, true);
     Log.Info("creating " + count + " records with metadata and with " + compressionType
              + " compression");
     SequenceFile.Writer writer = SequenceFile.CreateWriter(fs, conf, file, typeof(RandomDatum
                                                                                   ), typeof(RandomDatum), compressionType, codec, null, metadata);
     RandomDatum.Generator generator = new RandomDatum.Generator(seed);
     for (int i = 0; i < count; i++)
     {
         generator.Next();
         RandomDatum key   = generator.GetKey();
         RandomDatum value = generator.GetValue();
         writer.Append(key, value);
     }
     writer.Close();
 }
Example #24
0
        public virtual void Configure()
        {
            Path       testdir = new Path(TestDir.GetAbsolutePath());
            Path       inDir   = new Path(testdir, "in");
            Path       outDir  = new Path(testdir, "out");
            FileSystem fs      = FileSystem.Get(conf);

            fs.Delete(testdir, true);
            conf.SetInputFormat(typeof(SequenceFileInputFormat));
            FileInputFormat.SetInputPaths(conf, inDir);
            FileOutputFormat.SetOutputPath(conf, outDir);
            conf.SetOutputKeyClass(typeof(IntWritable));
            conf.SetOutputValueClass(typeof(Text));
            conf.SetMapOutputValueClass(typeof(IntWritable));
            // set up two map jobs, so we can test merge phase in Reduce also
            conf.SetNumMapTasks(2);
            conf.Set(MRConfig.FrameworkName, MRConfig.LocalFrameworkName);
            conf.SetOutputFormat(typeof(SequenceFileOutputFormat));
            if (!fs.Mkdirs(testdir))
            {
                throw new IOException("Mkdirs failed to create " + testdir.ToString());
            }
            if (!fs.Mkdirs(inDir))
            {
                throw new IOException("Mkdirs failed to create " + inDir.ToString());
            }
            // set up input data in 2 files
            Path inFile = new Path(inDir, "part0");

            SequenceFile.Writer writer = SequenceFile.CreateWriter(fs, conf, inFile, typeof(IntWritable
                                                                                            ), typeof(IntWritable));
            writer.Append(new IntWritable(11), new IntWritable(999));
            writer.Append(new IntWritable(23), new IntWritable(456));
            writer.Append(new IntWritable(10), new IntWritable(780));
            writer.Close();
            inFile = new Path(inDir, "part1");
            writer = SequenceFile.CreateWriter(fs, conf, inFile, typeof(IntWritable), typeof(
                                                   IntWritable));
            writer.Append(new IntWritable(45), new IntWritable(100));
            writer.Append(new IntWritable(18), new IntWritable(200));
            writer.Append(new IntWritable(27), new IntWritable(300));
            writer.Close();
            jc = new JobClient(conf);
        }
        /// <exception cref="System.Exception"/>
        public virtual void TestJavaSerialization()
        {
            Path file = new Path(Runtime.GetProperty("test.build.data", ".") + "/testseqser.seq"
                                 );

            fs.Delete(file, true);
            SequenceFile.Writer writer = SequenceFile.CreateWriter(fs, conf, file, typeof(long
                                                                                          ), typeof(string));
            writer.Append(1L, "one");
            writer.Append(2L, "two");
            writer.Close();
            SequenceFile.Reader reader = new SequenceFile.Reader(fs, file, conf);
            Assert.Equal(1L, reader.Next((object)null));
            Assert.Equal("one", reader.GetCurrentValue((object)null));
            Assert.Equal(2L, reader.Next((object)null));
            Assert.Equal("two", reader.GetCurrentValue((object)null));
            NUnit.Framework.Assert.IsNull(reader.Next((object)null));
            reader.Close();
        }
Example #26
0
        // clean up after all to restore the system state
        /// <exception cref="System.IO.IOException"/>
        private void CreateInputFile(string rootName)
        {
            Cleanup();
            // clean up if previous run failed
            Path inputFile = new Path(MapInputDir, "in_file");

            SequenceFile.Writer writer = SequenceFile.CreateWriter(fs, fsConfig, inputFile, typeof(
                                                                       Text), typeof(LongWritable), SequenceFile.CompressionType.None);
            try
            {
                nrFiles = 0;
                ListSubtree(new Path(rootName), writer);
            }
            finally
            {
                writer.Close();
            }
            Log.Info("Created map input files.");
        }
Example #27
0
 /// <exception cref="System.Exception"/>
 private static void CreateSequenceFile(int numRecords)
 {
     // create a file with length entries
     SequenceFile.Writer writer = SequenceFile.CreateWriter(fs, conf, inFile, typeof(Text
                                                                                     ), typeof(BytesWritable));
     try
     {
         for (int i = 1; i <= numRecords; i++)
         {
             Text   key  = new Text(Sharpen.Extensions.ToString(i));
             byte[] data = new byte[random.Next(10)];
             random.NextBytes(data);
             BytesWritable value = new BytesWritable(data);
             writer.Append(key, value);
         }
     }
     finally
     {
         writer.Close();
     }
 }
        /// <exception cref="System.IO.IOException"/>
        public override RecordWriter <BytesWritable, BytesWritable> GetRecordWriter(FileSystem
                                                                                    ignored, JobConf job, string name, Progressable progress)
        {
            // get the path of the temporary output file
            Path             file  = FileOutputFormat.GetTaskOutputPath(job, name);
            FileSystem       fs    = file.GetFileSystem(job);
            CompressionCodec codec = null;

            SequenceFile.CompressionType compressionType = SequenceFile.CompressionType.None;
            if (GetCompressOutput(job))
            {
                // find the kind of compression to do
                compressionType = GetOutputCompressionType(job);
                // find the right codec
                Type codecClass = GetOutputCompressorClass(job, typeof(DefaultCodec));
                codec = ReflectionUtils.NewInstance(codecClass, job);
            }
            SequenceFile.Writer @out = SequenceFile.CreateWriter(fs, job, file, GetSequenceFileOutputKeyClass
                                                                     (job), GetSequenceFileOutputValueClass(job), compressionType, codec, progress);
            return(new _RecordWriter_138(@out));
        }
Example #29
0
        /// <summary>Write a partition file for the given job, using the Sampler provided.</summary>
        /// <remarks>
        /// Write a partition file for the given job, using the Sampler provided.
        /// Queries the sampler for a sample keyset, sorts by the output key
        /// comparator, selects the keys for each rank, and writes to the destination
        /// returned from
        /// <see cref="TotalOrderPartitioner{K, V}.GetPartitionFile(Org.Apache.Hadoop.Conf.Configuration)
        ///     "/>
        /// .
        /// </remarks>
        /// <exception cref="System.IO.IOException"/>
        /// <exception cref="System.TypeLoadException"/>
        /// <exception cref="System.Exception"/>
        public static void WritePartitionFile <K, V>(Job job, InputSampler.Sampler <K, V> sampler
                                                     )
        {
            // getInputFormat, getOutputKeyComparator
            Configuration conf          = job.GetConfiguration();
            InputFormat   inf           = ReflectionUtils.NewInstance(job.GetInputFormatClass(), conf);
            int           numPartitions = job.GetNumReduceTasks();

            K[] samples = (K[])sampler.GetSample(inf, job);
            Log.Info("Using " + samples.Length + " samples");
            RawComparator <K> comparator = (RawComparator <K>)job.GetSortComparator();

            Arrays.Sort(samples, comparator);
            Path       dst = new Path(TotalOrderPartitioner.GetPartitionFile(conf));
            FileSystem fs  = dst.GetFileSystem(conf);

            if (fs.Exists(dst))
            {
                fs.Delete(dst, false);
            }
            SequenceFile.Writer writer = SequenceFile.CreateWriter(fs, conf, dst, job.GetMapOutputKeyClass
                                                                       (), typeof(NullWritable));
            NullWritable nullValue = NullWritable.Get();
            float        stepSize  = samples.Length / (float)numPartitions;
            int          last      = -1;

            for (int i = 1; i < numPartitions; ++i)
            {
                int k = Math.Round(stepSize * i);
                while (last >= k && comparator.Compare(samples[last], samples[k]) == 0)
                {
                    ++k;
                }
                writer.Append(samples[k], nullValue);
                last = k;
            }
            writer.Close();
        }
Example #30
0
            /// <exception cref="System.IO.IOException"/>
            public SeqFileAppendable(FileSystem fs, Path path, int osBufferSize, string compress
                                     , int minBlkSize)
            {
                Configuration conf = new Configuration();

                conf.SetBoolean("hadoop.native.lib", true);
                CompressionCodec codec = null;

                if ("lzo".Equals(compress))
                {
                    codec = Compression.Algorithm.Lzo.GetCodec();
                }
                else
                {
                    if ("gz".Equals(compress))
                    {
                        codec = Compression.Algorithm.Gz.GetCodec();
                    }
                    else
                    {
                        if (!"none".Equals(compress))
                        {
                            throw new IOException("Codec not supported.");
                        }
                    }
                }
                this.fsdos = fs.Create(path, true, osBufferSize);
                if (!"none".Equals(compress))
                {
                    writer = SequenceFile.CreateWriter(conf, fsdos, typeof(BytesWritable), typeof(BytesWritable
                                                                                                  ), SequenceFile.CompressionType.Block, codec);
                }
                else
                {
                    writer = SequenceFile.CreateWriter(conf, fsdos, typeof(BytesWritable), typeof(BytesWritable
                                                                                                  ), SequenceFile.CompressionType.None, null);
                }
            }