/// <exception cref="System.Exception"/>
        public virtual void TestFormat()
        {
            JobConf    job      = new JobConf(conf);
            FileSystem fs       = FileSystem.GetLocal(conf);
            Path       dir      = new Path(Runtime.GetProperty("test.build.data", ".") + "/mapred");
            Path       file     = new Path(dir, "test.seq");
            Reporter   reporter = Reporter.Null;
            int        seed     = new Random().Next();
            //LOG.info("seed = "+seed);
            Random random = new Random(seed);

            fs.Delete(dir, true);
            FileInputFormat.SetInputPaths(job, dir);
            // for a variety of lengths
            for (int length = 0; length < MaxLength; length += random.Next(MaxLength / 10) +
                                                               1)
            {
                //LOG.info("creating; entries = " + length);
                // create a file with length entries
                SequenceFile.Writer writer = SequenceFile.CreateWriter(fs, conf, file, typeof(IntWritable
                                                                                              ), typeof(BytesWritable));
                try
                {
                    for (int i = 0; i < length; i++)
                    {
                        IntWritable key  = new IntWritable(i);
                        byte[]      data = new byte[random.Next(10)];
                        random.NextBytes(data);
                        BytesWritable value = new BytesWritable(data);
                        writer.Append(key, value);
                    }
                }
                finally
                {
                    writer.Close();
                }
                // try splitting the file in a variety of sizes
                InputFormat <IntWritable, BytesWritable> format = new SequenceFileInputFormat <IntWritable
                                                                                               , BytesWritable>();
                IntWritable   key_1   = new IntWritable();
                BytesWritable value_1 = new BytesWritable();
                for (int i_1 = 0; i_1 < 3; i_1++)
                {
                    int numSplits = random.Next(MaxLength / (SequenceFile.SyncInterval / 20)) + 1;
                    //LOG.info("splitting: requesting = " + numSplits);
                    InputSplit[] splits = format.GetSplits(job, numSplits);
                    //LOG.info("splitting: got =        " + splits.length);
                    // check each split
                    BitSet bits = new BitSet(length);
                    for (int j = 0; j < splits.Length; j++)
                    {
                        RecordReader <IntWritable, BytesWritable> reader = format.GetRecordReader(splits[j
                                                                                                  ], job, reporter);
                        try
                        {
                            int count = 0;
                            while (reader.Next(key_1, value_1))
                            {
                                // if (bits.get(key.get())) {
                                // LOG.info("splits["+j+"]="+splits[j]+" : " + key.get());
                                // LOG.info("@"+reader.getPos());
                                // }
                                NUnit.Framework.Assert.IsFalse("Key in multiple partitions.", bits.Get(key_1.Get(
                                                                                                           )));
                                bits.Set(key_1.Get());
                                count++;
                            }
                        }
                        finally
                        {
                            //LOG.info("splits["+j+"]="+splits[j]+" count=" + count);
                            reader.Close();
                        }
                    }
                    NUnit.Framework.Assert.AreEqual("Some keys in no partition.", length, bits.Cardinality
                                                        ());
                }
            }
        }
        // A random task attempt id for testing.
        /// <exception cref="System.IO.IOException"/>
        public virtual void TestBinary()
        {
            JobConf    job = new JobConf();
            FileSystem fs  = FileSystem.GetLocal(job);
            Path       dir = new Path(new Path(new Path(Runtime.GetProperty("test.build.data", ".")
                                                        ), FileOutputCommitter.TempDirName), "_" + attempt);
            Path   file = new Path(dir, "testbinary.seq");
            Random r    = new Random();
            long   seed = r.NextLong();

            r.SetSeed(seed);
            fs.Delete(dir, true);
            if (!fs.Mkdirs(dir))
            {
                Fail("Failed to create output directory");
            }
            job.Set(JobContext.TaskAttemptId, attempt);
            FileOutputFormat.SetOutputPath(job, dir.GetParent().GetParent());
            FileOutputFormat.SetWorkOutputPath(job, dir);
            SequenceFileAsBinaryOutputFormat.SetSequenceFileOutputKeyClass(job, typeof(IntWritable
                                                                                       ));
            SequenceFileAsBinaryOutputFormat.SetSequenceFileOutputValueClass(job, typeof(DoubleWritable
                                                                                         ));
            SequenceFileAsBinaryOutputFormat.SetCompressOutput(job, true);
            SequenceFileAsBinaryOutputFormat.SetOutputCompressionType(job, SequenceFile.CompressionType
                                                                      .Block);
            BytesWritable bkey = new BytesWritable();
            BytesWritable bval = new BytesWritable();
            RecordWriter <BytesWritable, BytesWritable> writer = new SequenceFileAsBinaryOutputFormat
                                                                     ().GetRecordWriter(fs, job, file.ToString(), Reporter.Null);
            IntWritable      iwritable = new IntWritable();
            DoubleWritable   dwritable = new DoubleWritable();
            DataOutputBuffer outbuf    = new DataOutputBuffer();

            Log.Info("Creating data by SequenceFileAsBinaryOutputFormat");
            try
            {
                for (int i = 0; i < Records; ++i)
                {
                    iwritable = new IntWritable(r.Next());
                    iwritable.Write(outbuf);
                    bkey.Set(outbuf.GetData(), 0, outbuf.GetLength());
                    outbuf.Reset();
                    dwritable = new DoubleWritable(r.NextDouble());
                    dwritable.Write(outbuf);
                    bval.Set(outbuf.GetData(), 0, outbuf.GetLength());
                    outbuf.Reset();
                    writer.Write(bkey, bval);
                }
            }
            finally
            {
                writer.Close(Reporter.Null);
            }
            InputFormat <IntWritable, DoubleWritable> iformat = new SequenceFileInputFormat <IntWritable
                                                                                             , DoubleWritable>();
            int count = 0;

            r.SetSeed(seed);
            DataInputBuffer buf       = new DataInputBuffer();
            int             NumSplits = 3;

            SequenceFileInputFormat.AddInputPath(job, file);
            Log.Info("Reading data by SequenceFileInputFormat");
            foreach (InputSplit split in iformat.GetSplits(job, NumSplits))
            {
                RecordReader <IntWritable, DoubleWritable> reader = iformat.GetRecordReader(split,
                                                                                            job, Reporter.Null);
                try
                {
                    int    sourceInt;
                    double sourceDouble;
                    while (reader.Next(iwritable, dwritable))
                    {
                        sourceInt    = r.Next();
                        sourceDouble = r.NextDouble();
                        NUnit.Framework.Assert.AreEqual("Keys don't match: " + "*" + iwritable.Get() + ":"
                                                        + sourceInt + "*", sourceInt, iwritable.Get());
                        NUnit.Framework.Assert.IsTrue("Vals don't match: " + "*" + dwritable.Get() + ":"
                                                      + sourceDouble + "*", double.Compare(dwritable.Get(), sourceDouble) == 0);
                        ++count;
                    }
                }
                finally
                {
                    reader.Close();
                }
            }
            NUnit.Framework.Assert.AreEqual("Some records not found", Records, count);
        }
Beispiel #3
0
        /// <exception cref="System.Exception"/>
        public virtual void TestGetSplitHosts()
        {
            int             numBlocks         = 3;
            int             block1Size        = 100;
            int             block2Size        = 150;
            int             block3Size        = 75;
            int             fileSize          = block1Size + block2Size + block3Size;
            int             replicationFactor = 3;
            NetworkTopology clusterMap        = new NetworkTopology();

            BlockLocation[] bs          = new BlockLocation[numBlocks];
            string[]        block1Hosts = new string[] { "host1", "host2", "host3" };
            string[]        block1Names = new string[] { "host1:100", "host2:100", "host3:100" };
            string[]        block1Racks = new string[] { "/rack1/", "/rack1/", "/rack2/" };
            string[]        block1Paths = new string[replicationFactor];
            for (int i = 0; i < replicationFactor; i++)
            {
                block1Paths[i] = block1Racks[i] + block1Names[i];
            }
            bs[0] = new BlockLocation(block1Names, block1Hosts, block1Paths, 0, block1Size);
            string[] block2Hosts = new string[] { "host4", "host5", "host6" };
            string[] block2Names = new string[] { "host4:100", "host5:100", "host6:100" };
            string[] block2Racks = new string[] { "/rack2/", "/rack3/", "/rack3/" };
            string[] block2Paths = new string[replicationFactor];
            for (int i_1 = 0; i_1 < replicationFactor; i_1++)
            {
                block2Paths[i_1] = block2Racks[i_1] + block2Names[i_1];
            }
            bs[1] = new BlockLocation(block2Names, block2Hosts, block2Paths, block1Size, block2Size
                                      );
            string[] block3Hosts = new string[] { "host1", "host7", "host8" };
            string[] block3Names = new string[] { "host1:100", "host7:100", "host8:100" };
            string[] block3Racks = new string[] { "/rack1/", "/rack4/", "/rack4/" };
            string[] block3Paths = new string[replicationFactor];
            for (int i_2 = 0; i_2 < replicationFactor; i_2++)
            {
                block3Paths[i_2] = block3Racks[i_2] + block3Names[i_2];
            }
            bs[2] = new BlockLocation(block3Names, block3Hosts, block3Paths, block1Size + block2Size
                                      , block3Size);
            SequenceFileInputFormat <string, string> sif = new SequenceFileInputFormat <string,
                                                                                        string>();

            string[] hosts = sif.GetSplitHosts(bs, 0, fileSize, clusterMap);
            // Contributions By Racks are
            // Rack1   175
            // Rack2   275
            // Rack3   150
            // So, Rack2 hosts, host4 and host 3 should be returned
            // even if their individual contribution is not the highest
            NUnit.Framework.Assert.IsTrue(hosts.Length == replicationFactor);
            NUnit.Framework.Assert.IsTrue(Sharpen.Runtime.EqualsIgnoreCase(hosts[0], "host4")
                                          );
            NUnit.Framework.Assert.IsTrue(Sharpen.Runtime.EqualsIgnoreCase(hosts[1], "host3")
                                          );
            NUnit.Framework.Assert.IsTrue(Sharpen.Runtime.EqualsIgnoreCase(hosts[2], "host1")
                                          );
            // Now Create the blocks without topology information
            bs[0] = new BlockLocation(block1Names, block1Hosts, 0, block1Size);
            bs[1] = new BlockLocation(block2Names, block2Hosts, block1Size, block2Size);
            bs[2] = new BlockLocation(block3Names, block3Hosts, block1Size + block2Size, block3Size
                                      );
            hosts = sif.GetSplitHosts(bs, 0, fileSize, clusterMap);
            // host1 makes the highest contribution among all hosts
            // So, that should be returned before others
            NUnit.Framework.Assert.IsTrue(hosts.Length == replicationFactor);
            NUnit.Framework.Assert.IsTrue(Sharpen.Runtime.EqualsIgnoreCase(hosts[0], "host1")
                                          );
        }