Exemple #1
0
        /// <exception cref="System.Exception"/>
        public virtual void TestAppendSort()
        {
            GenericTestUtils.AssumeInNativeProfile();
            Path file = new Path(RootPath, "testseqappendSort.seq");

            fs.Delete(file, true);
            Path sortedFile = new Path(RootPath, "testseqappendSort.seq.sort");

            fs.Delete(sortedFile, true);
            SequenceFile.Sorter sorter = new SequenceFile.Sorter(fs, new JavaSerializationComparator
                                                                 <long>(), typeof(long), typeof(string), conf);
            SequenceFile.Writer.Option compressOption = SequenceFile.Writer.Compression(SequenceFile.CompressionType
                                                                                        .Block, new GzipCodec());
            SequenceFile.Writer writer = SequenceFile.CreateWriter(conf, SequenceFile.Writer.
                                                                   File(file), SequenceFile.Writer.KeyClass(typeof(long)), SequenceFile.Writer.ValueClass
                                                                       (typeof(string)), compressOption);
            writer.Append(2L, "two");
            writer.Append(1L, "one");
            writer.Close();
            writer = SequenceFile.CreateWriter(conf, SequenceFile.Writer.File(file), SequenceFile.Writer
                                               .KeyClass(typeof(long)), SequenceFile.Writer.ValueClass(typeof(string)), SequenceFile.Writer
                                               .AppendIfExists(true), compressOption);
            writer.Append(4L, "four");
            writer.Append(3L, "three");
            writer.Close();
            // Sort file after append
            sorter.Sort(file, sortedFile);
            VerifyAll4Values(sortedFile);
            fs.DeleteOnExit(file);
            fs.DeleteOnExit(sortedFile);
        }
        /// <exception cref="System.IO.IOException"/>
        public virtual void TestClose()
        {
            Configuration   conf = new Configuration();
            LocalFileSystem fs   = FileSystem.GetLocal(conf);
            // create a sequence file 1
            Path path1 = new Path(Runtime.GetProperty("test.build.data", ".") + "/test1.seq");

            SequenceFile.Writer writer = SequenceFile.CreateWriter(fs, conf, path1, typeof(Text
                                                                                           ), typeof(NullWritable), SequenceFile.CompressionType.Block);
            writer.Append(new Text("file1-1"), NullWritable.Get());
            writer.Append(new Text("file1-2"), NullWritable.Get());
            writer.Close();
            Path path2 = new Path(Runtime.GetProperty("test.build.data", ".") + "/test2.seq");

            writer = SequenceFile.CreateWriter(fs, conf, path2, typeof(Text), typeof(NullWritable
                                                                                     ), SequenceFile.CompressionType.Block);
            writer.Append(new Text("file2-1"), NullWritable.Get());
            writer.Append(new Text("file2-2"), NullWritable.Get());
            writer.Close();
            // Create a reader which uses 4 BuiltInZLibInflater instances
            SequenceFile.Reader reader = new SequenceFile.Reader(fs, path1, conf);
            // Returns the 4 BuiltInZLibInflater instances to the CodecPool
            reader.Close();
            // The second close _could_ erroneously returns the same
            // 4 BuiltInZLibInflater instances to the CodecPool again
            reader.Close();
            // The first reader gets 4 BuiltInZLibInflater instances from the CodecPool
            SequenceFile.Reader reader1 = new SequenceFile.Reader(fs, path1, conf);
            // read first value from reader1
            Text text = new Text();

            reader1.Next(text);
            Assert.Equal("file1-1", text.ToString());
            // The second reader _could_ get the same 4 BuiltInZLibInflater
            // instances from the CodePool as reader1
            SequenceFile.Reader reader2 = new SequenceFile.Reader(fs, path2, conf);
            // read first value from reader2
            reader2.Next(text);
            Assert.Equal("file2-1", text.ToString());
            // read second value from reader1
            reader1.Next(text);
            Assert.Equal("file1-2", text.ToString());
            // read second value from reader2 (this throws an exception)
            reader2.Next(text);
            Assert.Equal("file2-2", text.ToString());
            NUnit.Framework.Assert.IsFalse(reader1.Next(text));
            NUnit.Framework.Assert.IsFalse(reader2.Next(text));
        }
        public virtual void Configure()
        {
            Path       testdir = new Path(TestDir.GetAbsolutePath());
            Path       inDir   = new Path(testdir, "in");
            Path       outDir  = new Path(testdir, "out");
            FileSystem fs      = FileSystem.Get(conf);

            fs.Delete(testdir, true);
            conf.SetInt(JobContext.IoSortMb, 1);
            conf.SetInputFormat(typeof(SequenceFileInputFormat));
            FileInputFormat.SetInputPaths(conf, inDir);
            FileOutputFormat.SetOutputPath(conf, outDir);
            conf.SetMapperClass(typeof(TestMapOutputType.TextGen));
            conf.SetReducerClass(typeof(TestMapOutputType.TextReduce));
            conf.SetOutputKeyClass(typeof(Text));
            conf.SetOutputValueClass(typeof(Text));
            conf.Set(MRConfig.FrameworkName, MRConfig.LocalFrameworkName);
            conf.SetOutputFormat(typeof(SequenceFileOutputFormat));
            if (!fs.Mkdirs(testdir))
            {
                throw new IOException("Mkdirs failed to create " + testdir.ToString());
            }
            if (!fs.Mkdirs(inDir))
            {
                throw new IOException("Mkdirs failed to create " + inDir.ToString());
            }
            Path inFile = new Path(inDir, "part0");

            SequenceFile.Writer writer = SequenceFile.CreateWriter(fs, conf, inFile, typeof(Text
                                                                                            ), typeof(Text));
            writer.Append(new Text("rec: 1"), new Text("Hello"));
            writer.Close();
            jc = new JobClient(conf);
        }
Exemple #4
0
 /// <exception cref="System.IO.IOException"/>
 private static void CreateFiles(int length, int numFiles, Random random, Job job)
 {
     TestCombineSequenceFileInputFormat.Range[] ranges = CreateRanges(length, numFiles
                                                                      , random);
     for (int i = 0; i < numFiles; i++)
     {
         Path file = new Path(workDir, "test_" + i + ".seq");
         // create a file with length entries
         SequenceFile.Writer writer = SequenceFile.CreateWriter(localFs, job.GetConfiguration
                                                                    (), file, typeof(IntWritable), typeof(BytesWritable));
         TestCombineSequenceFileInputFormat.Range range = ranges[i];
         try
         {
             for (int j = range.start; j < range.end; j++)
             {
                 IntWritable key  = new IntWritable(j);
                 byte[]      data = new byte[random.Next(10)];
                 random.NextBytes(data);
                 BytesWritable value = new BytesWritable(data);
                 writer.Append(key, value);
             }
         }
         finally
         {
             writer.Close();
         }
     }
 }
Exemple #5
0
        /// <summary>Create control files before a test run.</summary>
        /// <remarks>
        /// Create control files before a test run.
        /// Number of files created is equal to the number of maps specified
        /// </remarks>
        /// <exception cref="System.IO.IOException">on error</exception>
        private static void CreateControlFiles()
        {
            FileSystem tempFS = FileSystem.Get(config);

            Log.Info("Creating " + numberOfMaps + " control files");
            for (int i = 0; i < numberOfMaps; i++)
            {
                string strFileName         = "NNBench_Controlfile_" + i;
                Path   filePath            = new Path(new Path(baseDir, ControlDirName), strFileName);
                SequenceFile.Writer writer = null;
                try
                {
                    writer = SequenceFile.CreateWriter(tempFS, config, filePath, typeof(Text), typeof(
                                                           LongWritable), SequenceFile.CompressionType.None);
                    writer.Append(new Text(strFileName), new LongWritable(0l));
                }
                finally
                {
                    if (writer != null)
                    {
                        writer.Close();
                    }
                }
            }
        }
Exemple #6
0
 /// <exception cref="System.IO.IOException"/>
 private static void CreateControlFile(FileSystem fs, int fileSize, int nrFiles)
 {
     // in MB
     Log.Info("creating control file: " + fileSize + " mega bytes, " + nrFiles + " files"
              );
     fs.Delete(ControlDir, true);
     for (int i = 0; i < nrFiles; i++)
     {
         string name                = GetFileName(i);
         Path   controlFile         = new Path(ControlDir, "in_file_" + name);
         SequenceFile.Writer writer = null;
         try
         {
             writer = SequenceFile.CreateWriter(fs, fsConfig, controlFile, typeof(Text), typeof(
                                                    LongWritable), SequenceFile.CompressionType.None);
             writer.Append(new Text(name), new LongWritable(fileSize));
         }
         catch (Exception e)
         {
             throw new IOException(e.GetLocalizedMessage());
         }
         finally
         {
             if (writer != null)
             {
                 writer.Close();
             }
             writer = null;
         }
     }
     Log.Info("created control files for: " + nrFiles + " files");
 }
        /// <exception cref="System.IO.IOException"/>
        private static Path WritePartitionFile <T>(string testname, Configuration conf, T[]
                                                   splits)
            where T : WritableComparable <object>
        {
            FileSystem fs      = FileSystem.GetLocal(conf);
            Path       testdir = new Path(Runtime.GetProperty("test.build.data", "/tmp")).MakeQualified
                                     (fs);
            Path p = new Path(testdir, testname + "/_partition.lst");

            TotalOrderPartitioner.SetPartitionFile(conf, p);
            conf.SetInt(MRJobConfig.NumReduces, splits.Length + 1);
            SequenceFile.Writer w = null;
            try
            {
                w = SequenceFile.CreateWriter(fs, conf, p, splits[0].GetType(), typeof(NullWritable
                                                                                       ), SequenceFile.CompressionType.None);
                for (int i = 0; i < splits.Length; ++i)
                {
                    w.Append(splits[i], NullWritable.Get());
                }
            }
            finally
            {
                if (null != w)
                {
                    w.Close();
                }
            }
            return(p);
        }
Exemple #8
0
        public virtual void TestSequenceFileSync()
        {
            Configuration      conf    = new HdfsConfiguration();
            MiniDFSCluster     cluster = new MiniDFSCluster.Builder(conf).Build();
            FileSystem         fs      = cluster.GetFileSystem();
            Path               p       = new Path("/testSequenceFileSync/foo");
            int                len     = 1 << 16;
            FSDataOutputStream @out    = fs.Create(p, FsPermission.GetDefault(), EnumSet.Of(CreateFlag
                                                                                            .Create, CreateFlag.Overwrite, CreateFlag.SyncBlock), 4096, (short)1, len, null);

            SequenceFile.Writer w = SequenceFile.CreateWriter(new Configuration(), SequenceFile.Writer
                                                              .Stream(@out), SequenceFile.Writer.KeyClass(typeof(RandomDatum)), SequenceFile.Writer
                                                              .ValueClass(typeof(RandomDatum)), SequenceFile.Writer.Compression(SequenceFile.CompressionType
                                                                                                                                .None, new DefaultCodec()));
            w.Hflush();
            CheckSyncMetric(cluster, 0);
            w.Hsync();
            CheckSyncMetric(cluster, 1);
            int seed = new Random().Next();

            RandomDatum.Generator generator = new RandomDatum.Generator(seed);
            generator.Next();
            w.Append(generator.GetKey(), generator.GetValue());
            w.Hsync();
            CheckSyncMetric(cluster, 2);
            w.Close();
            CheckSyncMetric(cluster, 2);
            @out.Close();
            CheckSyncMetric(cluster, 3);
            cluster.Shutdown();
        }
Exemple #9
0
        /// <exception cref="System.IO.IOException"/>
        private void ListSubtree(FileStatus rootStatus, SequenceFile.Writer writer)
        {
            Path rootFile = rootStatus.GetPath();

            if (rootStatus.IsFile())
            {
                nrFiles++;
                // For a regular file generate <fName,offset> pairs
                long blockSize  = fs.GetDefaultBlockSize(rootFile);
                long fileLength = rootStatus.GetLen();
                for (long offset = 0; offset < fileLength; offset += blockSize)
                {
                    writer.Append(new Text(rootFile.ToString()), new LongWritable(offset));
                }
                return;
            }
            FileStatus[] children = null;
            try
            {
                children = fs.ListStatus(rootFile);
            }
            catch (FileNotFoundException)
            {
                throw new IOException("Could not get listing for " + rootFile);
            }
            for (int i = 0; i < children.Length; i++)
            {
                ListSubtree(children[i], writer);
            }
        }
        /// <exception cref="System.Exception"/>
        public virtual void TestJavaSerialization()
        {
            Path file = new Path(Runtime.GetProperty("test.build.data", ".") + "/testseqser.seq"
                                 );

            fs.Delete(file, true);
            SequenceFile.Writer writer = SequenceFile.CreateWriter(fs, conf, file, typeof(long
                                                                                          ), typeof(string));
            writer.Append(1L, "one");
            writer.Append(2L, "two");
            writer.Close();
            SequenceFile.Reader reader = new SequenceFile.Reader(fs, file, conf);
            Assert.Equal(1L, reader.Next((object)null));
            Assert.Equal("one", reader.GetCurrentValue((object)null));
            Assert.Equal(2L, reader.Next((object)null));
            Assert.Equal("two", reader.GetCurrentValue((object)null));
            NUnit.Framework.Assert.IsNull(reader.Next((object)null));
            reader.Close();
        }
Exemple #11
0
        /// <exception cref="System.IO.IOException"/>
        /// <exception cref="Sharpen.TimeoutException"/>
        /// <exception cref="System.Exception"/>
        internal static void WriteFile(NameNode namenode, Configuration conf, Path name,
                                       short replication)
        {
            FileSystem fileSys = FileSystem.Get(conf);

            SequenceFile.Writer writer = SequenceFile.CreateWriter(fileSys, conf, name, typeof(
                                                                       BytesWritable), typeof(BytesWritable), SequenceFile.CompressionType.None);
            writer.Append(new BytesWritable(), new BytesWritable());
            writer.Close();
            fileSys.SetReplication(name, replication);
            DFSTestUtil.WaitReplication(fileSys, name, replication);
        }
Exemple #12
0
        public virtual void TestNullKeys()
        {
            JobConf          conf   = new JobConf(typeof(TestMapRed));
            FileSystem       fs     = FileSystem.GetLocal(conf);
            HashSet <string> values = new HashSet <string>();
            string           m      = "AAAAAAAAAAAAAA";

            for (int i = 1; i < 11; ++i)
            {
                values.AddItem(m);
                m = m.Replace((char)('A' + i - 1), (char)('A' + i));
            }
            Path testdir = new Path(Runtime.GetProperty("test.build.data", "/tmp")).MakeQualified
                               (fs);

            fs.Delete(testdir, true);
            Path inFile = new Path(testdir, "nullin/blah");

            SequenceFile.Writer w = SequenceFile.CreateWriter(fs, conf, inFile, typeof(NullWritable
                                                                                       ), typeof(Text), SequenceFile.CompressionType.None);
            Text t = new Text();

            foreach (string s in values)
            {
                t.Set(s);
                w.Append(NullWritable.Get(), t);
            }
            w.Close();
            FileInputFormat.SetInputPaths(conf, inFile);
            FileOutputFormat.SetOutputPath(conf, new Path(testdir, "nullout"));
            conf.SetMapperClass(typeof(TestMapRed.NullMapper));
            conf.SetReducerClass(typeof(IdentityReducer));
            conf.SetOutputKeyClass(typeof(NullWritable));
            conf.SetOutputValueClass(typeof(Text));
            conf.SetInputFormat(typeof(SequenceFileInputFormat));
            conf.SetOutputFormat(typeof(SequenceFileOutputFormat));
            conf.SetNumReduceTasks(1);
            conf.Set(MRConfig.FrameworkName, MRConfig.LocalFrameworkName);
            JobClient.RunJob(conf);
            // Since null keys all equal, allow any ordering
            SequenceFile.Reader r = new SequenceFile.Reader(fs, new Path(testdir, "nullout/part-00000"
                                                                         ), conf);
            m = "AAAAAAAAAAAAAA";
            for (int i_1 = 1; r.Next(NullWritable.Get(), t); ++i_1)
            {
                NUnit.Framework.Assert.IsTrue("Unexpected value: " + t, values.Remove(t.ToString(
                                                                                          )));
                m = m.Replace((char)('A' + i_1 - 1), (char)('A' + i_1));
            }
            NUnit.Framework.Assert.IsTrue("Missing values: " + values.ToString(), values.IsEmpty
                                              ());
        }
            /// <summary>Reduce task done, write output to a file.</summary>
            /// <exception cref="System.IO.IOException"/>
            protected override void Cleanup(Reducer.Context context)
            {
                //write output to a file
                Configuration conf    = context.GetConfiguration();
                Path          outDir  = new Path(conf.Get(FileOutputFormat.Outdir));
                Path          outFile = new Path(outDir, "reduce-out");
                FileSystem    fileSys = FileSystem.Get(conf);

                SequenceFile.Writer writer = SequenceFile.CreateWriter(fileSys, conf, outFile, typeof(
                                                                           LongWritable), typeof(LongWritable), SequenceFile.CompressionType.None);
                writer.Append(new LongWritable(numInside), new LongWritable(numOutside));
                writer.Close();
            }
Exemple #14
0
        /// <exception cref="System.IO.IOException"/>
        public static void WriteSequenceFile(SequenceFile.Writer writer, int numRecords)
        {
            IntWritable key = new IntWritable();
            Text        val = new Text();

            for (int numWritten = 0; numWritten < numRecords; ++numWritten)
            {
                key.Set(numWritten);
                RandomText(val, numWritten, Recordsize);
                writer.Append(key, val);
            }
            writer.Close();
        }
Exemple #15
0
        /// <exception cref="System.IO.IOException"/>
        /// <exception cref="System.TypeLoadException"/>
        /// <exception cref="InstantiationException"/>
        /// <exception cref="System.MemberAccessException"/>
        private static void SequenceFileCodecTest(Configuration conf, int lines, string codecClass
                                                  , int blockSize)
        {
            Path filePath = new Path("SequenceFileCodecTest." + codecClass);

            // Configuration
            conf.SetInt("io.seqfile.compress.blocksize", blockSize);
            // Create the SequenceFile
            FileSystem fs = FileSystem.Get(conf);

            Log.Info("Creating SequenceFile with codec \"" + codecClass + "\"");
            SequenceFile.Writer writer = SequenceFile.CreateWriter(fs, conf, filePath, typeof(
                                                                       Text), typeof(Text), SequenceFile.CompressionType.Block, (CompressionCodec)System.Activator.CreateInstance
                                                                       (Runtime.GetType(codecClass)));
            // Write some data
            Log.Info("Writing to SequenceFile...");
            for (int i = 0; i < lines; i++)
            {
                Text key   = new Text("key" + i);
                Text value = new Text("value" + i);
                writer.Append(key, value);
            }
            writer.Close();
            // Read the data back and check
            Log.Info("Reading from the SequenceFile...");
            SequenceFile.Reader reader  = new SequenceFile.Reader(fs, filePath, conf);
            Writable            key_1   = (Writable)System.Activator.CreateInstance(reader.GetKeyClass());
            Writable            value_1 = (Writable)System.Activator.CreateInstance(reader.GetValueClass
                                                                                        ());
            int lc = 0;

            try
            {
                while (reader.Next(key_1, value_1))
                {
                    Assert.Equal("key" + lc, key_1.ToString());
                    Assert.Equal("value" + lc, value_1.ToString());
                    lc++;
                }
            }
            finally
            {
                reader.Close();
            }
            Assert.Equal(lines, lc);
            // Delete temporary files
            fs.Delete(filePath, false);
            Log.Info("SUCCESS! Completed SequenceFileCodecTest with codec \"" + codecClass +
                     "\"");
        }
Exemple #16
0
 public virtual void RunJob(int items)
 {
     try
     {
         JobConf    conf    = new JobConf(typeof(TestMapRed));
         Path       testdir = new Path(TestDir.GetAbsolutePath());
         Path       inDir   = new Path(testdir, "in");
         Path       outDir  = new Path(testdir, "out");
         FileSystem fs      = FileSystem.Get(conf);
         fs.Delete(testdir, true);
         conf.SetInt(JobContext.IoSortMb, 1);
         conf.SetInputFormat(typeof(SequenceFileInputFormat));
         FileInputFormat.SetInputPaths(conf, inDir);
         FileOutputFormat.SetOutputPath(conf, outDir);
         conf.SetMapperClass(typeof(IdentityMapper));
         conf.SetReducerClass(typeof(IdentityReducer));
         conf.SetOutputKeyClass(typeof(Text));
         conf.SetOutputValueClass(typeof(Text));
         conf.SetOutputFormat(typeof(SequenceFileOutputFormat));
         conf.Set(MRConfig.FrameworkName, MRConfig.LocalFrameworkName);
         if (!fs.Mkdirs(testdir))
         {
             throw new IOException("Mkdirs failed to create " + testdir.ToString());
         }
         if (!fs.Mkdirs(inDir))
         {
             throw new IOException("Mkdirs failed to create " + inDir.ToString());
         }
         Path inFile = new Path(inDir, "part0");
         SequenceFile.Writer writer = SequenceFile.CreateWriter(fs, conf, inFile, typeof(Text
                                                                                         ), typeof(Text));
         StringBuilder content = new StringBuilder();
         for (int i = 0; i < 1000; i++)
         {
             content.Append(i).Append(": This is one more line of content\n");
         }
         Org.Apache.Hadoop.IO.Text text = new Org.Apache.Hadoop.IO.Text(content.ToString()
                                                                        );
         for (int i_1 = 0; i_1 < items; i_1++)
         {
             writer.Append(new Org.Apache.Hadoop.IO.Text("rec:" + i_1), text);
         }
         writer.Close();
         JobClient.RunJob(conf);
     }
     catch (Exception e)
     {
         NUnit.Framework.Assert.IsTrue("Threw exception:" + e, false);
     }
 }
Exemple #17
0
        /// <exception cref="System.Exception"/>
        public virtual void TestAppendRecordCompression()
        {
            GenericTestUtils.AssumeInNativeProfile();
            Path file = new Path(RootPath, "testseqappendblockcompr.seq");

            fs.Delete(file, true);
            SequenceFile.Writer.Option compressOption = SequenceFile.Writer.Compression(SequenceFile.CompressionType
                                                                                        .Record, new GzipCodec());
            SequenceFile.Writer writer = SequenceFile.CreateWriter(conf, SequenceFile.Writer.
                                                                   File(file), SequenceFile.Writer.KeyClass(typeof(long)), SequenceFile.Writer.ValueClass
                                                                       (typeof(string)), compressOption);
            writer.Append(1L, "one");
            writer.Append(2L, "two");
            writer.Close();
            Verify2Values(file);
            writer = SequenceFile.CreateWriter(conf, SequenceFile.Writer.File(file), SequenceFile.Writer
                                               .KeyClass(typeof(long)), SequenceFile.Writer.ValueClass(typeof(string)), SequenceFile.Writer
                                               .AppendIfExists(true), compressOption);
            writer.Append(3L, "three");
            writer.Append(4L, "four");
            writer.Close();
            VerifyAll4Values(file);
            fs.DeleteOnExit(file);
        }
Exemple #18
0
        /// <exception cref="System.IO.IOException"/>
        private static void CreateBigMapInputFile(Configuration conf, FileSystem fs, Path
                                                  dir, long fileSizeInMB)
        {
            // Check if the input path exists and is non-empty
            if (fs.Exists(dir))
            {
                FileStatus[] list = fs.ListStatus(dir);
                if (list.Length > 0)
                {
                    throw new IOException("Input path: " + dir + " already exists... ");
                }
            }
            Path file = new Path(dir, "part-0");

            SequenceFile.Writer writer = SequenceFile.CreateWriter(fs, conf, file, typeof(BytesWritable
                                                                                          ), typeof(BytesWritable), SequenceFile.CompressionType.None);
            long          numBytesToWrite = fileSizeInMB * 1024 * 1024;
            int           minKeySize      = conf.GetInt(MinKey, 10);
            int           keySizeRange    = conf.GetInt(MaxKey, 1000) - minKeySize;
            int           minValueSize    = conf.GetInt(MinValue, 0);
            int           valueSizeRange  = conf.GetInt(MaxValue, 20000) - minValueSize;
            BytesWritable randomKey       = new BytesWritable();
            BytesWritable randomValue     = new BytesWritable();

            Log.Info("Writing " + numBytesToWrite + " bytes to " + file + " with " + "minKeySize: "
                     + minKeySize + " keySizeRange: " + keySizeRange + " minValueSize: " + minValueSize
                     + " valueSizeRange: " + valueSizeRange);
            long start = Runtime.CurrentTimeMillis();

            while (numBytesToWrite > 0)
            {
                int keyLength = minKeySize + (keySizeRange != 0 ? random.Next(keySizeRange) : 0);
                randomKey.SetSize(keyLength);
                RandomizeBytes(randomKey.GetBytes(), 0, randomKey.GetLength());
                int valueLength = minValueSize + (valueSizeRange != 0 ? random.Next(valueSizeRange
                                                                                    ) : 0);
                randomValue.SetSize(valueLength);
                RandomizeBytes(randomValue.GetBytes(), 0, randomValue.GetLength());
                writer.Append(randomKey, randomValue);
                numBytesToWrite -= keyLength + valueLength;
            }
            writer.Close();
            long end = Runtime.CurrentTimeMillis();

            Log.Info("Created " + file + " of size: " + fileSizeInMB + "MB in " + (end - start
                                                                                   ) / 1000 + "secs");
        }
        /// <exception cref="System.IO.IOException"/>
        public virtual void RunTest(SequenceFile.CompressionType compressionType)
        {
            JobConf    job     = new JobConf();
            FileSystem fs      = FileSystem.GetLocal(job);
            Path       dir     = new Path(Runtime.GetProperty("test.build.data", ".") + "/mapred");
            Path       file    = new Path(dir, "test.seq");
            Path       tempDir = new Path(dir, "tmp");

            fs.Delete(dir, true);
            FileInputFormat.SetInputPaths(job, dir);
            fs.Mkdirs(tempDir);
            LongWritable tkey = new LongWritable();
            Text         tval = new Text();

            SequenceFile.Writer writer = SequenceFile.CreateWriter(fs, job, file, typeof(LongWritable
                                                                                         ), typeof(Text), compressionType, new DefaultCodec());
            try
            {
                for (int i = 0; i < Records; ++i)
                {
                    tkey.Set(1234);
                    tval.Set("valuevaluevaluevaluevaluevaluevaluevaluevaluevaluevalue");
                    writer.Append(tkey, tval);
                }
            }
            finally
            {
                writer.Close();
            }
            long fileLength = fs.GetFileStatus(file).GetLen();

            Log.Info("With compression = " + compressionType + ": " + "compressed length = "
                     + fileLength);
            SequenceFile.Sorter sorter = new SequenceFile.Sorter(fs, job.GetOutputKeyComparator
                                                                     (), job.GetMapOutputKeyClass(), job.GetMapOutputValueClass(), job);
            Path[] paths = new Path[] { file };
            SequenceFile.Sorter.RawKeyValueIterator rIter = sorter.Merge(paths, tempDir, false
                                                                         );
            int count = 0;

            while (rIter.Next())
            {
                count++;
            }
            NUnit.Framework.Assert.AreEqual(Records, count);
            NUnit.Framework.Assert.AreEqual(1.0f, rIter.GetProgress().Get());
        }
 /// <exception cref="System.IO.IOException"/>
 private void WriteMetadataTest(FileSystem fs, int count, int seed, Path file, SequenceFile.CompressionType
                                compressionType, CompressionCodec codec, SequenceFile.Metadata metadata)
 {
     fs.Delete(file, true);
     Log.Info("creating " + count + " records with metadata and with " + compressionType
              + " compression");
     SequenceFile.Writer writer = SequenceFile.CreateWriter(fs, conf, file, typeof(RandomDatum
                                                                                   ), typeof(RandomDatum), compressionType, codec, null, metadata);
     RandomDatum.Generator generator = new RandomDatum.Generator(seed);
     for (int i = 0; i < count; i++)
     {
         generator.Next();
         RandomDatum key   = generator.GetKey();
         RandomDatum value = generator.GetValue();
         writer.Append(key, value);
     }
     writer.Close();
 }
Exemple #21
0
 /// <exception cref="System.Exception"/>
 private static void CreateSequenceFile(int numRecords)
 {
     // create a file with length entries
     SequenceFile.Writer writer = SequenceFile.CreateWriter(fs, conf, inFile, typeof(Text
                                                                                     ), typeof(BytesWritable));
     try
     {
         for (int i = 1; i <= numRecords; i++)
         {
             Text   key  = new Text(Sharpen.Extensions.ToString(i));
             byte[] data = new byte[random.Next(10)];
             random.NextBytes(data);
             BytesWritable value = new BytesWritable(data);
             writer.Append(key, value);
         }
     }
     finally
     {
         writer.Close();
     }
 }
Exemple #22
0
        /// <summary>Write a partition file for the given job, using the Sampler provided.</summary>
        /// <remarks>
        /// Write a partition file for the given job, using the Sampler provided.
        /// Queries the sampler for a sample keyset, sorts by the output key
        /// comparator, selects the keys for each rank, and writes to the destination
        /// returned from
        /// <see cref="TotalOrderPartitioner{K, V}.GetPartitionFile(Org.Apache.Hadoop.Conf.Configuration)
        ///     "/>
        /// .
        /// </remarks>
        /// <exception cref="System.IO.IOException"/>
        /// <exception cref="System.TypeLoadException"/>
        /// <exception cref="System.Exception"/>
        public static void WritePartitionFile <K, V>(Job job, InputSampler.Sampler <K, V> sampler
                                                     )
        {
            // getInputFormat, getOutputKeyComparator
            Configuration conf          = job.GetConfiguration();
            InputFormat   inf           = ReflectionUtils.NewInstance(job.GetInputFormatClass(), conf);
            int           numPartitions = job.GetNumReduceTasks();

            K[] samples = (K[])sampler.GetSample(inf, job);
            Log.Info("Using " + samples.Length + " samples");
            RawComparator <K> comparator = (RawComparator <K>)job.GetSortComparator();

            Arrays.Sort(samples, comparator);
            Path       dst = new Path(TotalOrderPartitioner.GetPartitionFile(conf));
            FileSystem fs  = dst.GetFileSystem(conf);

            if (fs.Exists(dst))
            {
                fs.Delete(dst, false);
            }
            SequenceFile.Writer writer = SequenceFile.CreateWriter(fs, conf, dst, job.GetMapOutputKeyClass
                                                                       (), typeof(NullWritable));
            NullWritable nullValue = NullWritable.Get();
            float        stepSize  = samples.Length / (float)numPartitions;
            int          last      = -1;

            for (int i = 1; i < numPartitions; ++i)
            {
                int k = Math.Round(stepSize * i);
                while (last >= k && comparator.Compare(samples[last], samples[k]) == 0)
                {
                    ++k;
                }
                writer.Append(samples[k], nullValue);
                last = k;
            }
            writer.Close();
        }
Exemple #23
0
 /// <exception cref="System.IO.IOException"/>
 internal virtual void CreateTempFile(Path p, Configuration conf)
 {
     SequenceFile.Writer writer = null;
     try
     {
         writer = SequenceFile.CreateWriter(fs, conf, p, typeof(Text), typeof(Text), SequenceFile.CompressionType
                                            .None);
         writer.Append(new Text("text"), new Text("moretext"));
     }
     catch (Exception e)
     {
         throw new IOException(e.GetLocalizedMessage());
     }
     finally
     {
         if (writer != null)
         {
             writer.Close();
         }
         writer = null;
     }
     Log.Info("created: " + p);
 }
Exemple #24
0
        /// <exception cref="System.Exception"/>
        public static void CreateControlFile(FileSystem fs, long megaBytes, int numFiles,
                                             long seed)
        {
            Log.Info("creating control file: " + megaBytes + " bytes, " + numFiles + " files"
                     );
            Path controlFile = new Path(ControlDir, "files");

            fs.Delete(controlFile, true);
            Random random = new Random(seed);

            SequenceFile.Writer writer = SequenceFile.CreateWriter(fs, conf, controlFile, typeof(
                                                                       Text), typeof(LongWritable), SequenceFile.CompressionType.None);
            long totalSize = 0;
            long maxSize   = ((megaBytes / numFiles) * 2) + 1;

            try
            {
                while (totalSize < megaBytes)
                {
                    Text name = new Text(System.Convert.ToString(random.NextLong()));
                    long size = random.NextLong();
                    if (size < 0)
                    {
                        size = -size;
                    }
                    size = size % maxSize;
                    //LOG.info(" adding: name="+name+" size="+size);
                    writer.Append(name, new LongWritable(size));
                    totalSize += size;
                }
            }
            finally
            {
                writer.Close();
            }
            Log.Info("created control file for: " + totalSize + " bytes");
        }
Exemple #25
0
        public virtual void Configure()
        {
            Path       testdir = new Path(TestDir.GetAbsolutePath());
            Path       inDir   = new Path(testdir, "in");
            Path       outDir  = new Path(testdir, "out");
            FileSystem fs      = FileSystem.Get(conf);

            fs.Delete(testdir, true);
            conf.SetInputFormat(typeof(SequenceFileInputFormat));
            FileInputFormat.SetInputPaths(conf, inDir);
            FileOutputFormat.SetOutputPath(conf, outDir);
            conf.SetOutputKeyClass(typeof(IntWritable));
            conf.SetOutputValueClass(typeof(Text));
            conf.SetMapOutputValueClass(typeof(IntWritable));
            // set up two map jobs, so we can test merge phase in Reduce also
            conf.SetNumMapTasks(2);
            conf.Set(MRConfig.FrameworkName, MRConfig.LocalFrameworkName);
            conf.SetOutputFormat(typeof(SequenceFileOutputFormat));
            if (!fs.Mkdirs(testdir))
            {
                throw new IOException("Mkdirs failed to create " + testdir.ToString());
            }
            if (!fs.Mkdirs(inDir))
            {
                throw new IOException("Mkdirs failed to create " + inDir.ToString());
            }
            // set up input data in 2 files
            Path inFile = new Path(inDir, "part0");

            SequenceFile.Writer writer = SequenceFile.CreateWriter(fs, conf, inFile, typeof(IntWritable
                                                                                            ), typeof(IntWritable));
            writer.Append(new IntWritable(11), new IntWritable(999));
            writer.Append(new IntWritable(23), new IntWritable(456));
            writer.Append(new IntWritable(10), new IntWritable(780));
            writer.Close();
            inFile = new Path(inDir, "part1");
            writer = SequenceFile.CreateWriter(fs, conf, inFile, typeof(IntWritable), typeof(
                                                   IntWritable));
            writer.Append(new IntWritable(45), new IntWritable(100));
            writer.Append(new IntWritable(18), new IntWritable(200));
            writer.Append(new IntWritable(27), new IntWritable(300));
            writer.Close();
            jc = new JobClient(conf);
        }
Exemple #26
0
 /// <exception cref="System.IO.IOException"/>
 public virtual void Append(BytesWritable key, BytesWritable value)
 {
     writer.Append(key, value);
 }
Exemple #27
0
        /// <exception cref="System.Exception"/>
        private static void Launch()
        {
            //
            // Generate distribution of ints.  This is the answer key.
            //
            Configuration conf       = new Configuration();
            int           countsToGo = counts;

            int[] dist = new int[range];
            for (int i = 0; i < range; i++)
            {
                double avgInts = (1.0 * countsToGo) / (range - i);
                dist[i] = (int)Math.Max(0, Math.Round(avgInts + (Math.Sqrt(avgInts) * r.NextGaussian
                                                                     ())));
                countsToGo -= dist[i];
            }
            if (countsToGo > 0)
            {
                dist[dist.Length - 1] += countsToGo;
            }
            //
            // Write the answer key to a file.
            //
            Path testdir = new Path(TestDir.GetAbsolutePath());

            if (!fs.Mkdirs(testdir))
            {
                throw new IOException("Mkdirs failed to create " + testdir.ToString());
            }
            Path randomIns = new Path(testdir, "genins");

            if (!fs.Mkdirs(randomIns))
            {
                throw new IOException("Mkdirs failed to create " + randomIns.ToString());
            }
            Path answerkey = new Path(randomIns, "answer.key");

            SequenceFile.Writer @out = SequenceFile.CreateWriter(fs, conf, answerkey, typeof(
                                                                     IntWritable), typeof(IntWritable), SequenceFile.CompressionType.None);
            try
            {
                for (int i_1 = 0; i_1 < range; i_1++)
                {
                    @out.Append(new IntWritable(i_1), new IntWritable(dist[i_1]));
                }
            }
            finally
            {
                @out.Close();
            }
            PrintFiles(randomIns, conf);
            //
            // Now we need to generate the random numbers according to
            // the above distribution.
            //
            // We create a lot of map tasks, each of which takes at least
            // one "line" of the distribution.  (That is, a certain number
            // X is to be generated Y number of times.)
            //
            // A map task emits Y key/val pairs.  The val is X.  The key
            // is a randomly-generated number.
            //
            // The reduce task gets its input sorted by key.  That is, sorted
            // in random order.  It then emits a single line of text that
            // for the given values.  It does not emit the key.
            //
            // Because there's just one reduce task, we emit a single big
            // file of random numbers.
            //
            Path randomOuts = new Path(testdir, "genouts");

            fs.Delete(randomOuts, true);
            Job genJob = Job.GetInstance(conf);

            FileInputFormat.SetInputPaths(genJob, randomIns);
            genJob.SetInputFormatClass(typeof(SequenceFileInputFormat));
            genJob.SetMapperClass(typeof(TestMapReduce.RandomGenMapper));
            FileOutputFormat.SetOutputPath(genJob, randomOuts);
            genJob.SetOutputKeyClass(typeof(IntWritable));
            genJob.SetOutputValueClass(typeof(IntWritable));
            genJob.SetReducerClass(typeof(TestMapReduce.RandomGenReducer));
            genJob.SetNumReduceTasks(1);
            genJob.WaitForCompletion(true);
            PrintFiles(randomOuts, conf);
            //
            // Next, we read the big file in and regenerate the
            // original map.  It's split into a number of parts.
            // (That number is 'intermediateReduces'.)
            //
            // We have many map tasks, each of which read at least one
            // of the output numbers.  For each number read in, the
            // map task emits a key/value pair where the key is the
            // number and the value is "1".
            //
            // We have a single reduce task, which receives its input
            // sorted by the key emitted above.  For each key, there will
            // be a certain number of "1" values.  The reduce task sums
            // these values to compute how many times the given key was
            // emitted.
            //
            // The reduce task then emits a key/val pair where the key
            // is the number in question, and the value is the number of
            // times the key was emitted.  This is the same format as the
            // original answer key (except that numbers emitted zero times
            // will not appear in the regenerated key.)  The answer set
            // is split into a number of pieces.  A final MapReduce job
            // will merge them.
            //
            // There's not really a need to go to 10 reduces here
            // instead of 1.  But we want to test what happens when
            // you have multiple reduces at once.
            //
            int  intermediateReduces = 10;
            Path intermediateOuts    = new Path(testdir, "intermediateouts");

            fs.Delete(intermediateOuts, true);
            Job checkJob = Job.GetInstance(conf);

            FileInputFormat.SetInputPaths(checkJob, randomOuts);
            checkJob.SetMapperClass(typeof(TestMapReduce.RandomCheckMapper));
            FileOutputFormat.SetOutputPath(checkJob, intermediateOuts);
            checkJob.SetOutputKeyClass(typeof(IntWritable));
            checkJob.SetOutputValueClass(typeof(IntWritable));
            checkJob.SetOutputFormatClass(typeof(MapFileOutputFormat));
            checkJob.SetReducerClass(typeof(TestMapReduce.RandomCheckReducer));
            checkJob.SetNumReduceTasks(intermediateReduces);
            checkJob.WaitForCompletion(true);
            PrintFiles(intermediateOuts, conf);
            //
            // OK, now we take the output from the last job and
            // merge it down to a single file.  The map() and reduce()
            // functions don't really do anything except reemit tuples.
            // But by having a single reduce task here, we end up merging
            // all the files.
            //
            Path finalOuts = new Path(testdir, "finalouts");

            fs.Delete(finalOuts, true);
            Job mergeJob = Job.GetInstance(conf);

            FileInputFormat.SetInputPaths(mergeJob, intermediateOuts);
            mergeJob.SetInputFormatClass(typeof(SequenceFileInputFormat));
            mergeJob.SetMapperClass(typeof(TestMapReduce.MergeMapper));
            FileOutputFormat.SetOutputPath(mergeJob, finalOuts);
            mergeJob.SetOutputKeyClass(typeof(IntWritable));
            mergeJob.SetOutputValueClass(typeof(IntWritable));
            mergeJob.SetOutputFormatClass(typeof(SequenceFileOutputFormat));
            mergeJob.SetReducerClass(typeof(TestMapReduce.MergeReducer));
            mergeJob.SetNumReduceTasks(1);
            mergeJob.WaitForCompletion(true);
            PrintFiles(finalOuts, conf);
            //
            // Finally, we compare the reconstructed answer key with the
            // original one.  Remember, we need to ignore zero-count items
            // in the original key.
            //
            bool success       = true;
            Path recomputedkey = new Path(finalOuts, "part-r-00000");

            SequenceFile.Reader @in = new SequenceFile.Reader(fs, recomputedkey, conf);
            int totalseen           = 0;

            try
            {
                IntWritable key = new IntWritable();
                IntWritable val = new IntWritable();
                for (int i_1 = 0; i_1 < range; i_1++)
                {
                    if (dist[i_1] == 0)
                    {
                        continue;
                    }
                    if ([email protected](key, val))
                    {
                        System.Console.Error.WriteLine("Cannot read entry " + i_1);
                        success = false;
                        break;
                    }
                    else
                    {
                        if (!((key.Get() == i_1) && (val.Get() == dist[i_1])))
                        {
                            System.Console.Error.WriteLine("Mismatch!  Pos=" + key.Get() + ", i=" + i_1 + ", val="
                                                           + val.Get() + ", dist[i]=" + dist[i_1]);
                            success = false;
                        }
                        totalseen += val.Get();
                    }
                }
                if (success)
                {
                    if (@in.Next(key, val))
                    {
                        System.Console.Error.WriteLine("Unnecessary lines in recomputed key!");
                        success = false;
                    }
                }
            }
            finally
            {
                @in.Close();
            }
            int originalTotal = 0;

            for (int i_2 = 0; i_2 < dist.Length; i_2++)
            {
                originalTotal += dist[i_2];
            }
            System.Console.Out.WriteLine("Original sum: " + originalTotal);
            System.Console.Out.WriteLine("Recomputed sum: " + totalseen);
            //
            // Write to "results" whether the test succeeded or not.
            //
            Path           resultFile = new Path(testdir, "results");
            BufferedWriter bw         = new BufferedWriter(new OutputStreamWriter(fs.Create(resultFile
                                                                                            )));

            try
            {
                bw.Write("Success=" + success + "\n");
                System.Console.Out.WriteLine("Success=" + success);
            }
            finally
            {
                bw.Close();
            }
            NUnit.Framework.Assert.IsTrue("testMapRed failed", success);
            fs.Delete(testdir, true);
        }
Exemple #28
0
        /// <exception cref="System.IO.IOException"/>
        /// <exception cref="System.Exception"/>
        public virtual void TestBinary()
        {
            Job        job  = Job.GetInstance();
            FileSystem fs   = FileSystem.GetLocal(job.GetConfiguration());
            Path       dir  = new Path(Runtime.GetProperty("test.build.data", ".") + "/mapred");
            Path       file = new Path(dir, "testbinary.seq");
            Random     r    = new Random();
            long       seed = r.NextLong();

            r.SetSeed(seed);
            fs.Delete(dir, true);
            FileInputFormat.SetInputPaths(job, dir);
            Text tkey = new Text();
            Text tval = new Text();

            SequenceFile.Writer writer = new SequenceFile.Writer(fs, job.GetConfiguration(),
                                                                 file, typeof(Text), typeof(Text));
            try
            {
                for (int i = 0; i < Records; ++i)
                {
                    tkey.Set(Sharpen.Extensions.ToString(r.Next(), 36));
                    tval.Set(System.Convert.ToString(r.NextLong(), 36));
                    writer.Append(tkey, tval);
                }
            }
            finally
            {
                writer.Close();
            }
            TaskAttemptContext context = MapReduceTestUtil.CreateDummyMapTaskAttemptContext(job
                                                                                            .GetConfiguration());
            InputFormat <BytesWritable, BytesWritable> bformat = new SequenceFileAsBinaryInputFormat
                                                                     ();
            int count = 0;

            r.SetSeed(seed);
            BytesWritable   bkey   = new BytesWritable();
            BytesWritable   bval   = new BytesWritable();
            Text            cmpkey = new Text();
            Text            cmpval = new Text();
            DataInputBuffer buf    = new DataInputBuffer();

            FileInputFormat.SetInputPaths(job, file);
            foreach (InputSplit split in bformat.GetSplits(job))
            {
                RecordReader <BytesWritable, BytesWritable> reader = bformat.CreateRecordReader(split
                                                                                                , context);
                MapContext <BytesWritable, BytesWritable, BytesWritable, BytesWritable> mcontext =
                    new MapContextImpl <BytesWritable, BytesWritable, BytesWritable, BytesWritable>(job
                                                                                                    .GetConfiguration(), context.GetTaskAttemptID(), reader, null, null, MapReduceTestUtil
                                                                                                    .CreateDummyReporter(), split);
                reader.Initialize(split, mcontext);
                try
                {
                    while (reader.NextKeyValue())
                    {
                        bkey = reader.GetCurrentKey();
                        bval = reader.GetCurrentValue();
                        tkey.Set(Sharpen.Extensions.ToString(r.Next(), 36));
                        tval.Set(System.Convert.ToString(r.NextLong(), 36));
                        buf.Reset(bkey.GetBytes(), bkey.GetLength());
                        cmpkey.ReadFields(buf);
                        buf.Reset(bval.GetBytes(), bval.GetLength());
                        cmpval.ReadFields(buf);
                        NUnit.Framework.Assert.IsTrue("Keys don't match: " + "*" + cmpkey.ToString() + ":"
                                                      + tkey.ToString() + "*", cmpkey.ToString().Equals(tkey.ToString()));
                        NUnit.Framework.Assert.IsTrue("Vals don't match: " + "*" + cmpval.ToString() + ":"
                                                      + tval.ToString() + "*", cmpval.ToString().Equals(tval.ToString()));
                        ++count;
                    }
                }
                finally
                {
                    reader.Close();
                }
            }
            NUnit.Framework.Assert.AreEqual("Some records not found", Records, count);
        }
        /// <summary>Run a map/reduce job for estimating Pi.</summary>
        /// <returns>the estimated value of Pi</returns>
        /// <exception cref="System.IO.IOException"/>
        /// <exception cref="System.TypeLoadException"/>
        /// <exception cref="System.Exception"/>
        public static BigDecimal EstimatePi(int numMaps, long numPoints, Path tmpDir, Configuration
                                            conf)
        {
            Job job = Job.GetInstance(conf);

            //setup job conf
            job.SetJobName(typeof(QuasiMonteCarlo).Name);
            job.SetJarByClass(typeof(QuasiMonteCarlo));
            job.SetInputFormatClass(typeof(SequenceFileInputFormat));
            job.SetOutputKeyClass(typeof(BooleanWritable));
            job.SetOutputValueClass(typeof(LongWritable));
            job.SetOutputFormatClass(typeof(SequenceFileOutputFormat));
            job.SetMapperClass(typeof(QuasiMonteCarlo.QmcMapper));
            job.SetReducerClass(typeof(QuasiMonteCarlo.QmcReducer));
            job.SetNumReduceTasks(1);
            // turn off speculative execution, because DFS doesn't handle
            // multiple writers to the same file.
            job.SetSpeculativeExecution(false);
            //setup input/output directories
            Path inDir  = new Path(tmpDir, "in");
            Path outDir = new Path(tmpDir, "out");

            FileInputFormat.SetInputPaths(job, inDir);
            FileOutputFormat.SetOutputPath(job, outDir);
            FileSystem fs = FileSystem.Get(conf);

            if (fs.Exists(tmpDir))
            {
                throw new IOException("Tmp directory " + fs.MakeQualified(tmpDir) + " already exists.  Please remove it first."
                                      );
            }
            if (!fs.Mkdirs(inDir))
            {
                throw new IOException("Cannot create input directory " + inDir);
            }
            try
            {
                //generate an input file for each map task
                for (int i = 0; i < numMaps; ++i)
                {
                    Path                file   = new Path(inDir, "part" + i);
                    LongWritable        offset = new LongWritable(i * numPoints);
                    LongWritable        size   = new LongWritable(numPoints);
                    SequenceFile.Writer writer = SequenceFile.CreateWriter(fs, conf, file, typeof(LongWritable
                                                                                                  ), typeof(LongWritable), SequenceFile.CompressionType.None);
                    try
                    {
                        writer.Append(offset, size);
                    }
                    finally
                    {
                        writer.Close();
                    }
                    System.Console.Out.WriteLine("Wrote input for Map #" + i);
                }
                //start a map/reduce job
                System.Console.Out.WriteLine("Starting Job");
                long startTime = Runtime.CurrentTimeMillis();
                job.WaitForCompletion(true);
                double duration = (Runtime.CurrentTimeMillis() - startTime) / 1000.0;
                System.Console.Out.WriteLine("Job Finished in " + duration + " seconds");
                //read outputs
                Path                inFile     = new Path(outDir, "reduce-out");
                LongWritable        numInside  = new LongWritable();
                LongWritable        numOutside = new LongWritable();
                SequenceFile.Reader reader     = new SequenceFile.Reader(fs, inFile, conf);
                try
                {
                    reader.Next(numInside, numOutside);
                }
                finally
                {
                    reader.Close();
                }
                //compute estimated value
                BigDecimal numTotal = BigDecimal.ValueOf(numMaps).Multiply(BigDecimal.ValueOf(numPoints
                                                                                              ));
                return(BigDecimal.ValueOf(4).SetScale(20).Multiply(BigDecimal.ValueOf(numInside.Get
                                                                                          ())).Divide(numTotal, RoundingMode.HalfUp));
            }
            finally
            {
                fs.Delete(tmpDir, true);
            }
        }
Exemple #30
0
        /// <exception cref="System.Exception"/>
        public virtual void TestAppend()
        {
            Path file = new Path(RootPath, "testseqappend.seq");

            fs.Delete(file, true);
            Text key1   = new Text("Key1");
            Text value1 = new Text("Value1");
            Text value2 = new Text("Updated");

            SequenceFile.Metadata metadata = new SequenceFile.Metadata();
            metadata.Set(key1, value1);
            SequenceFile.Writer.Option metadataOption = SequenceFile.Writer.Metadata(metadata
                                                                                     );
            SequenceFile.Writer writer = SequenceFile.CreateWriter(conf, SequenceFile.Writer.
                                                                   File(file), SequenceFile.Writer.KeyClass(typeof(long)), SequenceFile.Writer.ValueClass
                                                                       (typeof(string)), metadataOption);
            writer.Append(1L, "one");
            writer.Append(2L, "two");
            writer.Close();
            Verify2Values(file);
            metadata.Set(key1, value2);
            writer = SequenceFile.CreateWriter(conf, SequenceFile.Writer.File(file), SequenceFile.Writer
                                               .KeyClass(typeof(long)), SequenceFile.Writer.ValueClass(typeof(string)), SequenceFile.Writer
                                               .AppendIfExists(true), metadataOption);
            // Verify the Meta data is not changed
            Assert.Equal(value1, writer.metadata.Get(key1));
            writer.Append(3L, "three");
            writer.Append(4L, "four");
            writer.Close();
            VerifyAll4Values(file);
            // Verify the Meta data readable after append
            SequenceFile.Reader reader = new SequenceFile.Reader(conf, SequenceFile.Reader.File
                                                                     (file));
            Assert.Equal(value1, reader.GetMetadata().Get(key1));
            reader.Close();
            // Verify failure if the compression details are different
            try
            {
                SequenceFile.Writer.Option wrongCompressOption = SequenceFile.Writer.Compression(
                    SequenceFile.CompressionType.Record, new GzipCodec());
                writer = SequenceFile.CreateWriter(conf, SequenceFile.Writer.File(file), SequenceFile.Writer
                                                   .KeyClass(typeof(long)), SequenceFile.Writer.ValueClass(typeof(string)), SequenceFile.Writer
                                                   .AppendIfExists(true), wrongCompressOption);
                writer.Close();
                NUnit.Framework.Assert.Fail("Expected IllegalArgumentException for compression options"
                                            );
            }
            catch (ArgumentException)
            {
            }
            // Expected exception. Ignore it
            try
            {
                SequenceFile.Writer.Option wrongCompressOption = SequenceFile.Writer.Compression(
                    SequenceFile.CompressionType.Block, new DefaultCodec());
                writer = SequenceFile.CreateWriter(conf, SequenceFile.Writer.File(file), SequenceFile.Writer
                                                   .KeyClass(typeof(long)), SequenceFile.Writer.ValueClass(typeof(string)), SequenceFile.Writer
                                                   .AppendIfExists(true), wrongCompressOption);
                writer.Close();
                NUnit.Framework.Assert.Fail("Expected IllegalArgumentException for compression options"
                                            );
            }
            catch (ArgumentException)
            {
            }
            // Expected exception. Ignore it
            fs.DeleteOnExit(file);
        }