예제 #1
0
 /// <summary>
 /// Read the next k,v pair into the head of this object; return true iff
 /// the RR and this are exhausted.
 /// </summary>
 /// <exception cref="System.IO.IOException"/>
 /// <exception cref="System.Exception"/>
 private bool Next()
 {
     empty = !rr.NextKeyValue();
     key   = rr.GetCurrentKey();
     value = rr.GetCurrentValue();
     return(!empty);
 }
예제 #2
0
        /// <exception cref="System.Exception"/>
        public virtual int TestFormat(Configuration conf, int tupleSize, bool firstTuple,
                                      bool secondTuple, TestJoinProperties.TestType ttype)
        {
            Job job = Job.GetInstance(conf);
            CompositeInputFormat format = new CompositeInputFormat();
            int count = 0;

            foreach (InputSplit split in (IList <InputSplit>)format.GetSplits(job))
            {
                TaskAttemptContext context = MapReduceTestUtil.CreateDummyMapTaskAttemptContext(conf
                                                                                                );
                RecordReader reader   = format.CreateRecordReader(split, context);
                MapContext   mcontext = new MapContextImpl(conf, context.GetTaskAttemptID(), reader
                                                           , null, null, MapReduceTestUtil.CreateDummyReporter(), split);
                reader.Initialize(split, mcontext);
                WritableComparable key   = null;
                Writable           value = null;
                while (reader.NextKeyValue())
                {
                    key   = (WritableComparable)reader.GetCurrentKey();
                    value = (Writable)reader.GetCurrentValue();
                    ValidateKeyValue(key, value, tupleSize, firstTuple, secondTuple, ttype);
                    count++;
                }
            }
            return(count);
        }
예제 #3
0
            public override void Run()
            {
                long records = 0;

                try
                {
                    TaskAttemptContext context = new TaskAttemptContextImpl(job.GetConfiguration(), new
                                                                            TaskAttemptID());
                    RecordReader <Text, Text> reader = inFormat.CreateRecordReader(splits[sampleStep *
                                                                                          idx], context);
                    reader.Initialize(splits[sampleStep * idx], context);
                    while (reader.NextKeyValue())
                    {
                        sampler.AddKey(new Text(reader.GetCurrentKey()));
                        records += 1;
                        if (recordsPerSample <= records)
                        {
                            break;
                        }
                    }
                }
                catch (IOException ie)
                {
                    System.Console.Error.WriteLine("Got an exception while reading splits " + StringUtils
                                                   .StringifyException(ie));
                    throw new RuntimeException(ie);
                }
                catch (Exception)
                {
                }
            }
        /// <exception cref="System.Exception"/>
        private static IList <string> ReadSplit(FixedLengthInputFormat format, InputSplit
                                                split, Job job)
        {
            IList <string>     result  = new AList <string>();
            TaskAttemptContext context = MapReduceTestUtil.CreateDummyMapTaskAttemptContext(job
                                                                                            .GetConfiguration());
            RecordReader <LongWritable, BytesWritable> reader = format.CreateRecordReader(split
                                                                                          , context);
            MapContext <LongWritable, BytesWritable, LongWritable, BytesWritable> mcontext = new
                                                                                             MapContextImpl <LongWritable, BytesWritable, LongWritable, BytesWritable>(job.GetConfiguration
                                                                                                                                                                           (), context.GetTaskAttemptID(), reader, null, null, MapReduceTestUtil.CreateDummyReporter
                                                                                                                                                                           (), split);
            LongWritable  key;
            BytesWritable value;

            try
            {
                reader.Initialize(split, mcontext);
                while (reader.NextKeyValue())
                {
                    key   = reader.GetCurrentKey();
                    value = reader.GetCurrentValue();
                    result.AddItem(Sharpen.Runtime.GetStringForBytes(value.GetBytes(), 0, value.GetLength
                                                                         ()));
                }
            }
            finally
            {
                reader.Close();
            }
            return(result);
        }
예제 #5
0
            /// <summary>From each split sampled, take the first numSamples / numSplits records.</summary>
            /// <exception cref="System.IO.IOException"/>
            /// <exception cref="System.Exception"/>
            public virtual K[] GetSample(InputFormat <K, V> inf, Job job)
            {
                // ArrayList::toArray doesn't preserve type
                IList <InputSplit> splits  = inf.GetSplits(job);
                AList <K>          samples = new AList <K>(numSamples);
                int  splitsToSample        = Math.Min(maxSplitsSampled, splits.Count);
                int  samplesPerSplit       = numSamples / splitsToSample;
                long records = 0;

                for (int i = 0; i < splitsToSample; ++i)
                {
                    TaskAttemptContext samplingContext = new TaskAttemptContextImpl(job.GetConfiguration
                                                                                        (), new TaskAttemptID());
                    RecordReader <K, V> reader = inf.CreateRecordReader(splits[i], samplingContext);
                    reader.Initialize(splits[i], samplingContext);
                    while (reader.NextKeyValue())
                    {
                        samples.AddItem(ReflectionUtils.Copy(job.GetConfiguration(), reader.GetCurrentKey
                                                                 (), null));
                        ++records;
                        if ((i + 1) * samplesPerSplit <= records)
                        {
                            break;
                        }
                    }
                    reader.Close();
                }
                return((K[])Sharpen.Collections.ToArray(samples));
            }
예제 #6
0
            /// <summary>
            /// Randomize the split order, then take the specified number of keys from
            /// each split sampled, where each key is selected with the specified
            /// probability and possibly replaced by a subsequently selected key when
            /// the quota of keys from that split is satisfied.
            /// </summary>
            /// <exception cref="System.IO.IOException"/>
            /// <exception cref="System.Exception"/>
            public virtual K[] GetSample(InputFormat <K, V> inf, Job job)
            {
                // ArrayList::toArray doesn't preserve type
                IList <InputSplit> splits  = inf.GetSplits(job);
                AList <K>          samples = new AList <K>(numSamples);
                int    splitsToSample      = Math.Min(maxSplitsSampled, splits.Count);
                Random r    = new Random();
                long   seed = r.NextLong();

                r.SetSeed(seed);
                Log.Debug("seed: " + seed);
                // shuffle splits
                for (int i = 0; i < splits.Count; ++i)
                {
                    InputSplit tmp = splits[i];
                    int        j   = r.Next(splits.Count);
                    splits.Set(i, splits[j]);
                    splits.Set(j, tmp);
                }
                // our target rate is in terms of the maximum number of sample splits,
                // but we accept the possibility of sampling additional splits to hit
                // the target sample keyset
                for (int i_1 = 0; i_1 < splitsToSample || (i_1 < splits.Count && samples.Count <
                                                           numSamples); ++i_1)
                {
                    TaskAttemptContext samplingContext = new TaskAttemptContextImpl(job.GetConfiguration
                                                                                        (), new TaskAttemptID());
                    RecordReader <K, V> reader = inf.CreateRecordReader(splits[i_1], samplingContext);
                    reader.Initialize(splits[i_1], samplingContext);
                    while (reader.NextKeyValue())
                    {
                        if (r.NextDouble() <= freq)
                        {
                            if (samples.Count < numSamples)
                            {
                                samples.AddItem(ReflectionUtils.Copy(job.GetConfiguration(), reader.GetCurrentKey
                                                                         (), null));
                            }
                            else
                            {
                                // When exceeding the maximum number of samples, replace a
                                // random element with this one, then adjust the frequency
                                // to reflect the possibility of existing elements being
                                // pushed out
                                int ind = r.Next(numSamples);
                                if (ind != numSamples)
                                {
                                    samples.Set(ind, ReflectionUtils.Copy(job.GetConfiguration(), reader.GetCurrentKey
                                                                              (), null));
                                }
                                freq *= (numSamples - 1) / (double)numSamples;
                            }
                        }
                    }
                    reader.Close();
                }
                return((K[])Sharpen.Collections.ToArray(samples));
            }
 /// <exception cref="System.IO.IOException"/>
 /// <exception cref="System.Exception"/>
 public override bool NextKeyValue()
 {
     while ((curReader == null) || !curReader.NextKeyValue())
     {
         if (!InitNextRecordReader())
         {
             return(false);
         }
     }
     return(true);
 }
예제 #8
0
        /// <exception cref="System.IO.IOException"/>
        /// <exception cref="System.Exception"/>
        internal virtual void CheckFormat(Job job, int expectedN, int lastN)
        {
            NLineInputFormat   format = new NLineInputFormat();
            IList <InputSplit> splits = format.GetSplits(job);
            int count = 0;

            for (int i = 0; i < splits.Count; i++)
            {
                NUnit.Framework.Assert.AreEqual("There are no split locations", 0, splits[i].GetLocations
                                                    ().Length);
                TaskAttemptContext context = MapReduceTestUtil.CreateDummyMapTaskAttemptContext(job
                                                                                                .GetConfiguration());
                RecordReader <LongWritable, Text> reader = format.CreateRecordReader(splits[i], context
                                                                                     );
                Type clazz = reader.GetType();
                NUnit.Framework.Assert.AreEqual("reader class is LineRecordReader.", typeof(LineRecordReader
                                                                                            ), clazz);
                MapContext <LongWritable, Text, LongWritable, Text> mcontext = new MapContextImpl <
                    LongWritable, Text, LongWritable, Text>(job.GetConfiguration(), context.GetTaskAttemptID
                                                                (), reader, null, null, MapReduceTestUtil.CreateDummyReporter(), splits[i]);
                reader.Initialize(splits[i], mcontext);
                try
                {
                    count = 0;
                    while (reader.NextKeyValue())
                    {
                        count++;
                    }
                }
                finally
                {
                    reader.Close();
                }
                if (i == splits.Count - 1)
                {
                    NUnit.Framework.Assert.AreEqual("number of lines in split(" + i + ") is wrong", lastN
                                                    , count);
                }
                else
                {
                    NUnit.Framework.Assert.AreEqual("number of lines in split(" + i + ") is wrong", expectedN
                                                    , count);
                }
            }
        }
예제 #9
0
        /// <exception cref="System.IO.IOException"/>
        /// <exception cref="System.Exception"/>
        private static IList <Text> ReadSplit(InputFormat <LongWritable, Text> format, InputSplit
                                              split, Job job)
        {
            IList <Text>       result  = new AList <Text>();
            Configuration      conf    = job.GetConfiguration();
            TaskAttemptContext context = MapReduceTestUtil.CreateDummyMapTaskAttemptContext(conf
                                                                                            );
            RecordReader <LongWritable, Text> reader = format.CreateRecordReader(split, MapReduceTestUtil
                                                                                 .CreateDummyMapTaskAttemptContext(conf));
            MapContext <LongWritable, Text, LongWritable, Text> mcontext = new MapContextImpl <
                LongWritable, Text, LongWritable, Text>(conf, context.GetTaskAttemptID(), reader
                                                        , null, null, MapReduceTestUtil.CreateDummyReporter(), split);

            reader.Initialize(split, mcontext);
            while (reader.NextKeyValue())
            {
                result.AddItem(new Text(reader.GetCurrentValue()));
            }
            return(result);
        }
예제 #10
0
        /// <exception cref="System.IO.IOException"/>
        /// <exception cref="System.Exception"/>
        private int CountRecords(int numSplits)
        {
            InputFormat <Text, BytesWritable> format = new SequenceFileInputFilter <Text, BytesWritable
                                                                                    >();

            if (numSplits == 0)
            {
                numSplits = random.Next(MaxLength / (SequenceFile.SyncInterval / 20)) + 1;
            }
            FileInputFormat.SetMaxInputSplitSize(job, fs.GetFileStatus(inFile).GetLen() / numSplits
                                                 );
            TaskAttemptContext context = MapReduceTestUtil.CreateDummyMapTaskAttemptContext(job
                                                                                            .GetConfiguration());
            // check each split
            int count = 0;

            foreach (InputSplit split in format.GetSplits(job))
            {
                RecordReader <Text, BytesWritable> reader = format.CreateRecordReader(split, context
                                                                                      );
                MapContext <Text, BytesWritable, Text, BytesWritable> mcontext = new MapContextImpl
                                                                                 <Text, BytesWritable, Text, BytesWritable>(job.GetConfiguration(), context.GetTaskAttemptID
                                                                                                                                (), reader, null, null, MapReduceTestUtil.CreateDummyReporter(), split);
                reader.Initialize(split, mcontext);
                try
                {
                    while (reader.NextKeyValue())
                    {
                        Log.Info("Accept record " + reader.GetCurrentKey().ToString());
                        count++;
                    }
                }
                finally
                {
                    reader.Close();
                }
            }
            return(count);
        }
예제 #11
0
 /// <exception cref="System.IO.IOException"/>
 /// <exception cref="System.Exception"/>
 public override bool NextKeyValue()
 {
     return(reader.NextKeyValue());
 }
예제 #12
0
 /// <exception cref="System.IO.IOException"/>
 /// <exception cref="System.Exception"/>
 public override bool NextKeyValue()
 {
     return(originalRR.NextKeyValue());
 }
예제 #13
0
 /// <exception cref="System.IO.IOException"/>
 /// <exception cref="System.Exception"/>
 public override bool NextKeyValue()
 {
     return(delegate_.NextKeyValue());
 }
예제 #14
0
        /// <exception cref="System.IO.IOException"/>
        /// <exception cref="System.Exception"/>
        public virtual void TestBinary()
        {
            Job        job  = Job.GetInstance();
            FileSystem fs   = FileSystem.GetLocal(job.GetConfiguration());
            Path       dir  = new Path(Runtime.GetProperty("test.build.data", ".") + "/mapred");
            Path       file = new Path(dir, "testbinary.seq");
            Random     r    = new Random();
            long       seed = r.NextLong();

            r.SetSeed(seed);
            fs.Delete(dir, true);
            FileInputFormat.SetInputPaths(job, dir);
            Text tkey = new Text();
            Text tval = new Text();

            SequenceFile.Writer writer = new SequenceFile.Writer(fs, job.GetConfiguration(),
                                                                 file, typeof(Text), typeof(Text));
            try
            {
                for (int i = 0; i < Records; ++i)
                {
                    tkey.Set(Sharpen.Extensions.ToString(r.Next(), 36));
                    tval.Set(System.Convert.ToString(r.NextLong(), 36));
                    writer.Append(tkey, tval);
                }
            }
            finally
            {
                writer.Close();
            }
            TaskAttemptContext context = MapReduceTestUtil.CreateDummyMapTaskAttemptContext(job
                                                                                            .GetConfiguration());
            InputFormat <BytesWritable, BytesWritable> bformat = new SequenceFileAsBinaryInputFormat
                                                                     ();
            int count = 0;

            r.SetSeed(seed);
            BytesWritable   bkey   = new BytesWritable();
            BytesWritable   bval   = new BytesWritable();
            Text            cmpkey = new Text();
            Text            cmpval = new Text();
            DataInputBuffer buf    = new DataInputBuffer();

            FileInputFormat.SetInputPaths(job, file);
            foreach (InputSplit split in bformat.GetSplits(job))
            {
                RecordReader <BytesWritable, BytesWritable> reader = bformat.CreateRecordReader(split
                                                                                                , context);
                MapContext <BytesWritable, BytesWritable, BytesWritable, BytesWritable> mcontext =
                    new MapContextImpl <BytesWritable, BytesWritable, BytesWritable, BytesWritable>(job
                                                                                                    .GetConfiguration(), context.GetTaskAttemptID(), reader, null, null, MapReduceTestUtil
                                                                                                    .CreateDummyReporter(), split);
                reader.Initialize(split, mcontext);
                try
                {
                    while (reader.NextKeyValue())
                    {
                        bkey = reader.GetCurrentKey();
                        bval = reader.GetCurrentValue();
                        tkey.Set(Sharpen.Extensions.ToString(r.Next(), 36));
                        tval.Set(System.Convert.ToString(r.NextLong(), 36));
                        buf.Reset(bkey.GetBytes(), bkey.GetLength());
                        cmpkey.ReadFields(buf);
                        buf.Reset(bval.GetBytes(), bval.GetLength());
                        cmpval.ReadFields(buf);
                        NUnit.Framework.Assert.IsTrue("Keys don't match: " + "*" + cmpkey.ToString() + ":"
                                                      + tkey.ToString() + "*", cmpkey.ToString().Equals(tkey.ToString()));
                        NUnit.Framework.Assert.IsTrue("Vals don't match: " + "*" + cmpval.ToString() + ":"
                                                      + tval.ToString() + "*", cmpval.ToString().Equals(tval.ToString()));
                        ++count;
                    }
                }
                finally
                {
                    reader.Close();
                }
            }
            NUnit.Framework.Assert.AreEqual("Some records not found", Records, count);
        }
예제 #15
0
        public virtual void TestSplitableCodecs()
        {
            Job           job  = Job.GetInstance(defaultConf);
            Configuration conf = job.GetConfiguration();
            // Create the codec
            CompressionCodec codec = null;

            try
            {
                codec = (CompressionCodec)ReflectionUtils.NewInstance(conf.GetClassByName("org.apache.hadoop.io.compress.BZip2Codec"
                                                                                          ), conf);
            }
            catch (TypeLoadException)
            {
                throw new IOException("Illegal codec!");
            }
            Path file = new Path(workDir, "test" + codec.GetDefaultExtension());
            int  seed = new Random().Next();

            Log.Info("seed = " + seed);
            Random random = new Random(seed);

            localFs.Delete(workDir, true);
            FileInputFormat.SetInputPaths(job, workDir);
            int MaxLength = 500000;

            FileInputFormat.SetMaxInputSplitSize(job, MaxLength / 20);
            // for a variety of lengths
            for (int length = 0; length < MaxLength; length += random.Next(MaxLength / 4) + 1)
            {
                Log.Info("creating; entries = " + length);
                // create a file with length entries
                TextWriter writer = new OutputStreamWriter(codec.CreateOutputStream(localFs.Create
                                                                                        (file)));
                try
                {
                    for (int i = 0; i < length; i++)
                    {
                        writer.Write(Sharpen.Extensions.ToString(i * 2));
                        writer.Write("\t");
                        writer.Write(Sharpen.Extensions.ToString(i));
                        writer.Write("\n");
                    }
                }
                finally
                {
                    writer.Close();
                }
                // try splitting the file in a variety of sizes
                KeyValueTextInputFormat format = new KeyValueTextInputFormat();
                NUnit.Framework.Assert.IsTrue("KVTIF claims not splittable", format.IsSplitable(job
                                                                                                , file));
                for (int i_1 = 0; i_1 < 3; i_1++)
                {
                    int numSplits = random.Next(MaxLength / 2000) + 1;
                    Log.Info("splitting: requesting = " + numSplits);
                    IList <InputSplit> splits = format.GetSplits(job);
                    Log.Info("splitting: got =        " + splits.Count);
                    // check each split
                    BitSet bits = new BitSet(length);
                    for (int j = 0; j < splits.Count; j++)
                    {
                        Log.Debug("split[" + j + "]= " + splits[j]);
                        TaskAttemptContext context = MapReduceTestUtil.CreateDummyMapTaskAttemptContext(job
                                                                                                        .GetConfiguration());
                        RecordReader <Text, Text> reader = format.CreateRecordReader(splits[j], context);
                        Type clazz = reader.GetType();
                        MapContext <Text, Text, Text, Text> mcontext = new MapContextImpl <Text, Text, Text
                                                                                           , Text>(job.GetConfiguration(), context.GetTaskAttemptID(), reader, null, null,
                                                                                                   MapReduceTestUtil.CreateDummyReporter(), splits[j]);
                        reader.Initialize(splits[j], mcontext);
                        Text key   = null;
                        Text value = null;
                        try
                        {
                            int count = 0;
                            while (reader.NextKeyValue())
                            {
                                key   = reader.GetCurrentKey();
                                value = reader.GetCurrentValue();
                                int k = System.Convert.ToInt32(key.ToString());
                                int v = System.Convert.ToInt32(value.ToString());
                                NUnit.Framework.Assert.AreEqual("Bad key", 0, k % 2);
                                NUnit.Framework.Assert.AreEqual("Mismatched key/value", k / 2, v);
                                Log.Debug("read " + k + "," + v);
                                NUnit.Framework.Assert.IsFalse(k + "," + v + " in multiple partitions.", bits.Get
                                                                   (v));
                                bits.Set(v);
                                count++;
                            }
                            if (count > 0)
                            {
                                Log.Info("splits[" + j + "]=" + splits[j] + " count=" + count);
                            }
                            else
                            {
                                Log.Debug("splits[" + j + "]=" + splits[j] + " count=" + count);
                            }
                        }
                        finally
                        {
                            reader.Close();
                        }
                    }
                    NUnit.Framework.Assert.AreEqual("Some keys in no partition.", length, bits.Cardinality
                                                        ());
                }
            }
        }
        /// <exception cref="System.Exception"/>
        private void RunRandomTests(CompressionCodec codec)
        {
            StringBuilder fileName = new StringBuilder("testFormat.txt");

            if (codec != null)
            {
                fileName.Append(".gz");
            }
            localFs.Delete(workDir, true);
            Path file = new Path(workDir, fileName.ToString());
            int  seed = new Random().Next();

            Log.Info("Seed = " + seed);
            Random        random   = new Random(seed);
            int           MaxTests = 20;
            LongWritable  key;
            BytesWritable value;

            for (int i = 0; i < MaxTests; i++)
            {
                Log.Info("----------------------------------------------------------");
                // Maximum total records of 999
                int totalRecords = random.Next(999) + 1;
                // Test an empty file
                if (i == 8)
                {
                    totalRecords = 0;
                }
                // Maximum bytes in a record of 100K
                int recordLength = random.Next(1024 * 100) + 1;
                // For the 11th test, force a record length of 1
                if (i == 10)
                {
                    recordLength = 1;
                }
                // The total bytes in the test file
                int fileSize = (totalRecords * recordLength);
                Log.Info("totalRecords=" + totalRecords + " recordLength=" + recordLength);
                // Create the job
                Job job = Job.GetInstance(defaultConf);
                if (codec != null)
                {
                    ReflectionUtils.SetConf(codec, job.GetConfiguration());
                }
                // Create the test file
                AList <string> recordList = CreateFile(file, codec, recordLength, totalRecords);
                NUnit.Framework.Assert.IsTrue(localFs.Exists(file));
                //set the fixed length record length config property for the job
                FixedLengthInputFormat.SetRecordLength(job.GetConfiguration(), recordLength);
                int numSplits = 1;
                // Arbitrarily set number of splits.
                if (i > 0)
                {
                    if (i == (MaxTests - 1))
                    {
                        // Test a split size that is less than record len
                        numSplits = (int)(fileSize / Math.Floor(recordLength / 2));
                    }
                    else
                    {
                        if (MaxTests % i == 0)
                        {
                            // Let us create a split size that is forced to be
                            // smaller than the end file itself, (ensures 1+ splits)
                            numSplits = fileSize / (fileSize - random.Next(fileSize));
                        }
                        else
                        {
                            // Just pick a random split size with no upper bound
                            numSplits = Math.Max(1, fileSize / random.Next(int.MaxValue));
                        }
                    }
                    Log.Info("Number of splits set to: " + numSplits);
                }
                job.GetConfiguration().SetLong("mapreduce.input.fileinputformat.split.maxsize", (
                                                   long)(fileSize / numSplits));
                // setup the input path
                FileInputFormat.SetInputPaths(job, workDir);
                // Try splitting the file in a variety of sizes
                FixedLengthInputFormat format = new FixedLengthInputFormat();
                IList <InputSplit>     splits = format.GetSplits(job);
                Log.Info("Actual number of splits = " + splits.Count);
                // Test combined split lengths = total file size
                long recordOffset = 0;
                int  recordNumber = 0;
                foreach (InputSplit split in splits)
                {
                    TaskAttemptContext context = MapReduceTestUtil.CreateDummyMapTaskAttemptContext(job
                                                                                                    .GetConfiguration());
                    RecordReader <LongWritable, BytesWritable> reader = format.CreateRecordReader(split
                                                                                                  , context);
                    MapContext <LongWritable, BytesWritable, LongWritable, BytesWritable> mcontext = new
                                                                                                     MapContextImpl <LongWritable, BytesWritable, LongWritable, BytesWritable>(job.GetConfiguration
                                                                                                                                                                                   (), context.GetTaskAttemptID(), reader, null, null, MapReduceTestUtil.CreateDummyReporter
                                                                                                                                                                                   (), split);
                    reader.Initialize(split, mcontext);
                    Type clazz = reader.GetType();
                    NUnit.Framework.Assert.AreEqual("RecordReader class should be FixedLengthRecordReader:"
                                                    , typeof(FixedLengthRecordReader), clazz);
                    // Plow through the records in this split
                    while (reader.NextKeyValue())
                    {
                        key   = reader.GetCurrentKey();
                        value = reader.GetCurrentValue();
                        NUnit.Framework.Assert.AreEqual("Checking key", (long)(recordNumber * recordLength
                                                                               ), key.Get());
                        string valueString = Sharpen.Runtime.GetStringForBytes(value.GetBytes(), 0, value
                                                                               .GetLength());
                        NUnit.Framework.Assert.AreEqual("Checking record length:", recordLength, value.GetLength
                                                            ());
                        NUnit.Framework.Assert.IsTrue("Checking for more records than expected:", recordNumber
                                                      < totalRecords);
                        string origRecord = recordList[recordNumber];
                        NUnit.Framework.Assert.AreEqual("Checking record content:", origRecord, valueString
                                                        );
                        recordNumber++;
                    }
                    reader.Close();
                }
                NUnit.Framework.Assert.AreEqual("Total original records should be total read records:"
                                                , recordList.Count, recordNumber);
            }
        }
예제 #17
0
        public virtual void TestFormat()
        {
            Job  job  = Job.GetInstance(new Configuration(defaultConf));
            Path file = new Path(workDir, "test.txt");
            int  seed = new Random().Next();

            Log.Info("seed = " + seed);
            Random random = new Random(seed);

            localFs.Delete(workDir, true);
            FileInputFormat.SetInputPaths(job, workDir);
            int MaxLength = 10000;

            // for a variety of lengths
            for (int length = 0; length < MaxLength; length += random.Next(MaxLength / 10) +
                                                               1)
            {
                Log.Debug("creating; entries = " + length);
                // create a file with length entries
                TextWriter writer = new OutputStreamWriter(localFs.Create(file));
                try
                {
                    for (int i = 0; i < length; i++)
                    {
                        writer.Write(Sharpen.Extensions.ToString(i * 2));
                        writer.Write("\t");
                        writer.Write(Sharpen.Extensions.ToString(i));
                        writer.Write("\n");
                    }
                }
                finally
                {
                    writer.Close();
                }
                // try splitting the file in a variety of sizes
                KeyValueTextInputFormat format = new KeyValueTextInputFormat();
                for (int i_1 = 0; i_1 < 3; i_1++)
                {
                    int numSplits = random.Next(MaxLength / 20) + 1;
                    Log.Debug("splitting: requesting = " + numSplits);
                    IList <InputSplit> splits = format.GetSplits(job);
                    Log.Debug("splitting: got =        " + splits.Count);
                    // check each split
                    BitSet bits = new BitSet(length);
                    for (int j = 0; j < splits.Count; j++)
                    {
                        Log.Debug("split[" + j + "]= " + splits[j]);
                        TaskAttemptContext context = MapReduceTestUtil.CreateDummyMapTaskAttemptContext(job
                                                                                                        .GetConfiguration());
                        RecordReader <Text, Text> reader = format.CreateRecordReader(splits[j], context);
                        Type clazz = reader.GetType();
                        NUnit.Framework.Assert.AreEqual("reader class is KeyValueLineRecordReader.", typeof(
                                                            KeyValueLineRecordReader), clazz);
                        MapContext <Text, Text, Text, Text> mcontext = new MapContextImpl <Text, Text, Text
                                                                                           , Text>(job.GetConfiguration(), context.GetTaskAttemptID(), reader, null, null,
                                                                                                   MapReduceTestUtil.CreateDummyReporter(), splits[j]);
                        reader.Initialize(splits[j], mcontext);
                        Text key   = null;
                        Text value = null;
                        try
                        {
                            int count = 0;
                            while (reader.NextKeyValue())
                            {
                                key   = reader.GetCurrentKey();
                                clazz = key.GetType();
                                NUnit.Framework.Assert.AreEqual("Key class is Text.", typeof(Text), clazz);
                                value = reader.GetCurrentValue();
                                clazz = value.GetType();
                                NUnit.Framework.Assert.AreEqual("Value class is Text.", typeof(Text), clazz);
                                int k = System.Convert.ToInt32(key.ToString());
                                int v = System.Convert.ToInt32(value.ToString());
                                NUnit.Framework.Assert.AreEqual("Bad key", 0, k % 2);
                                NUnit.Framework.Assert.AreEqual("Mismatched key/value", k / 2, v);
                                Log.Debug("read " + v);
                                NUnit.Framework.Assert.IsFalse("Key in multiple partitions.", bits.Get(v));
                                bits.Set(v);
                                count++;
                            }
                            Log.Debug("splits[" + j + "]=" + splits[j] + " count=" + count);
                        }
                        finally
                        {
                            reader.Close();
                        }
                    }
                    NUnit.Framework.Assert.AreEqual("Some keys in no partition.", length, bits.Cardinality
                                                        ());
                }
            }
        }
        /// <exception cref="System.IO.IOException"/>
        /// <exception cref="System.Exception"/>
        public virtual void TestBinary()
        {
            Configuration conf   = new Configuration();
            Job           job    = Job.GetInstance(conf);
            Path          outdir = new Path(Runtime.GetProperty("test.build.data", "/tmp"), "outseq");
            Random        r      = new Random();
            long          seed   = r.NextLong();

            r.SetSeed(seed);
            FileOutputFormat.SetOutputPath(job, outdir);
            SequenceFileAsBinaryOutputFormat.SetSequenceFileOutputKeyClass(job, typeof(IntWritable
                                                                                       ));
            SequenceFileAsBinaryOutputFormat.SetSequenceFileOutputValueClass(job, typeof(DoubleWritable
                                                                                         ));
            SequenceFileAsBinaryOutputFormat.SetCompressOutput(job, true);
            SequenceFileAsBinaryOutputFormat.SetOutputCompressionType(job, SequenceFile.CompressionType
                                                                      .Block);
            BytesWritable      bkey    = new BytesWritable();
            BytesWritable      bval    = new BytesWritable();
            TaskAttemptContext context = MapReduceTestUtil.CreateDummyMapTaskAttemptContext(job
                                                                                            .GetConfiguration());
            OutputFormat <BytesWritable, BytesWritable> outputFormat = new SequenceFileAsBinaryOutputFormat
                                                                           ();
            OutputCommitter committer = outputFormat.GetOutputCommitter(context);

            committer.SetupJob(job);
            RecordWriter <BytesWritable, BytesWritable> writer = outputFormat.GetRecordWriter(
                context);
            IntWritable      iwritable = new IntWritable();
            DoubleWritable   dwritable = new DoubleWritable();
            DataOutputBuffer outbuf    = new DataOutputBuffer();

            Log.Info("Creating data by SequenceFileAsBinaryOutputFormat");
            try
            {
                for (int i = 0; i < Records; ++i)
                {
                    iwritable = new IntWritable(r.Next());
                    iwritable.Write(outbuf);
                    bkey.Set(outbuf.GetData(), 0, outbuf.GetLength());
                    outbuf.Reset();
                    dwritable = new DoubleWritable(r.NextDouble());
                    dwritable.Write(outbuf);
                    bval.Set(outbuf.GetData(), 0, outbuf.GetLength());
                    outbuf.Reset();
                    writer.Write(bkey, bval);
                }
            }
            finally
            {
                writer.Close(context);
            }
            committer.CommitTask(context);
            committer.CommitJob(job);
            InputFormat <IntWritable, DoubleWritable> iformat = new SequenceFileInputFormat <IntWritable
                                                                                             , DoubleWritable>();
            int count = 0;

            r.SetSeed(seed);
            SequenceFileInputFormat.SetInputPaths(job, outdir);
            Log.Info("Reading data by SequenceFileInputFormat");
            foreach (InputSplit split in iformat.GetSplits(job))
            {
                RecordReader <IntWritable, DoubleWritable> reader = iformat.CreateRecordReader(split
                                                                                               , context);
                MapContext <IntWritable, DoubleWritable, BytesWritable, BytesWritable> mcontext =
                    new MapContextImpl <IntWritable, DoubleWritable, BytesWritable, BytesWritable>(job
                                                                                                   .GetConfiguration(), context.GetTaskAttemptID(), reader, null, null, MapReduceTestUtil
                                                                                                   .CreateDummyReporter(), split);
                reader.Initialize(split, mcontext);
                try
                {
                    int    sourceInt;
                    double sourceDouble;
                    while (reader.NextKeyValue())
                    {
                        sourceInt    = r.Next();
                        sourceDouble = r.NextDouble();
                        iwritable    = reader.GetCurrentKey();
                        dwritable    = reader.GetCurrentValue();
                        NUnit.Framework.Assert.AreEqual("Keys don't match: " + "*" + iwritable.Get() + ":"
                                                        + sourceInt + "*", sourceInt, iwritable.Get());
                        NUnit.Framework.Assert.IsTrue("Vals don't match: " + "*" + dwritable.Get() + ":"
                                                      + sourceDouble + "*", double.Compare(dwritable.Get(), sourceDouble) == 0);
                        ++count;
                    }
                }
                finally
                {
                    reader.Close();
                }
            }
            NUnit.Framework.Assert.AreEqual("Some records not found", Records, count);
        }
예제 #19
0
 /// <exception cref="System.IO.IOException"/>
 /// <exception cref="System.Exception"/>
 public virtual bool NextKeyValue()
 {
     return(reader.NextKeyValue());
 }
예제 #20
0
        /// <exception cref="System.Exception"/>
        public virtual void TestFormat()
        {
            Job    job    = Job.GetInstance(new Configuration(defaultConf));
            Random random = new Random();
            long   seed   = random.NextLong();

            Log.Info("seed = " + seed);
            random.SetSeed(seed);
            localFs.Delete(workDir, true);
            FileInputFormat.SetInputPaths(job, workDir);
            int length   = 10000;
            int numFiles = 10;

            // create files with various lengths
            CreateFiles(length, numFiles, random);
            // create a combined split for the files
            CombineTextInputFormat format = new CombineTextInputFormat();

            for (int i = 0; i < 3; i++)
            {
                int numSplits = random.Next(length / 20) + 1;
                Log.Info("splitting: requesting = " + numSplits);
                IList <InputSplit> splits = format.GetSplits(job);
                Log.Info("splitting: got =        " + splits.Count);
                // we should have a single split as the length is comfortably smaller than
                // the block size
                NUnit.Framework.Assert.AreEqual("We got more than one splits!", 1, splits.Count);
                InputSplit split = splits[0];
                NUnit.Framework.Assert.AreEqual("It should be CombineFileSplit", typeof(CombineFileSplit
                                                                                        ), split.GetType());
                // check the split
                BitSet bits = new BitSet(length);
                Log.Debug("split= " + split);
                TaskAttemptContext context = MapReduceTestUtil.CreateDummyMapTaskAttemptContext(job
                                                                                                .GetConfiguration());
                RecordReader <LongWritable, Text> reader = format.CreateRecordReader(split, context
                                                                                     );
                NUnit.Framework.Assert.AreEqual("reader class is CombineFileRecordReader.", typeof(
                                                    CombineFileRecordReader), reader.GetType());
                MapContext <LongWritable, Text, LongWritable, Text> mcontext = new MapContextImpl <
                    LongWritable, Text, LongWritable, Text>(job.GetConfiguration(), context.GetTaskAttemptID
                                                                (), reader, null, null, MapReduceTestUtil.CreateDummyReporter(), split);
                reader.Initialize(split, mcontext);
                try
                {
                    int count = 0;
                    while (reader.NextKeyValue())
                    {
                        LongWritable key = reader.GetCurrentKey();
                        NUnit.Framework.Assert.IsNotNull("Key should not be null.", key);
                        Text value = reader.GetCurrentValue();
                        int  v     = System.Convert.ToInt32(value.ToString());
                        Log.Debug("read " + v);
                        NUnit.Framework.Assert.IsFalse("Key in multiple partitions.", bits.Get(v));
                        bits.Set(v);
                        count++;
                    }
                    Log.Debug("split=" + split + " count=" + count);
                }
                finally
                {
                    reader.Close();
                }
                NUnit.Framework.Assert.AreEqual("Some keys in no partition.", length, bits.Cardinality
                                                    ());
            }
        }
예제 #21
0
        /// <exception cref="System.Exception"/>
        public virtual void TestFormat()
        {
            Job        job    = Job.GetInstance(conf);
            FileSystem fs     = FileSystem.GetLocal(conf);
            Path       dir    = new Path(Runtime.GetProperty("test.build.data", ".") + "/mapred");
            Path       file   = new Path(dir, "test.seq");
            int        seed   = new Random().Next();
            Random     random = new Random(seed);

            fs.Delete(dir, true);
            FileInputFormat.SetInputPaths(job, dir);
            // for a variety of lengths
            for (int length = 0; length < MaxLength; length += random.Next(MaxLength / 10) +
                                                               1)
            {
                // create a file with length entries
                SequenceFile.Writer writer = SequenceFile.CreateWriter(fs, conf, file, typeof(IntWritable
                                                                                              ), typeof(LongWritable));
                try
                {
                    for (int i = 0; i < length; i++)
                    {
                        IntWritable  key   = new IntWritable(i);
                        LongWritable value = new LongWritable(10 * i);
                        writer.Append(key, value);
                    }
                }
                finally
                {
                    writer.Close();
                }
                TaskAttemptContext context = MapReduceTestUtil.CreateDummyMapTaskAttemptContext(job
                                                                                                .GetConfiguration());
                // try splitting the file in a variety of sizes
                InputFormat <Text, Text> format = new SequenceFileAsTextInputFormat();
                for (int i_1 = 0; i_1 < 3; i_1++)
                {
                    // check each split
                    BitSet bits      = new BitSet(length);
                    int    numSplits = random.Next(MaxLength / (SequenceFile.SyncInterval / 20)) + 1;
                    FileInputFormat.SetMaxInputSplitSize(job, fs.GetFileStatus(file).GetLen() / numSplits
                                                         );
                    foreach (InputSplit split in format.GetSplits(job))
                    {
                        RecordReader <Text, Text>           reader   = format.CreateRecordReader(split, context);
                        MapContext <Text, Text, Text, Text> mcontext = new MapContextImpl <Text, Text, Text
                                                                                           , Text>(job.GetConfiguration(), context.GetTaskAttemptID(), reader, null, null,
                                                                                                   MapReduceTestUtil.CreateDummyReporter(), split);
                        reader.Initialize(split, mcontext);
                        Type readerClass = reader.GetType();
                        NUnit.Framework.Assert.AreEqual("reader class is SequenceFileAsTextRecordReader."
                                                        , typeof(SequenceFileAsTextRecordReader), readerClass);
                        Text key;
                        try
                        {
                            int count = 0;
                            while (reader.NextKeyValue())
                            {
                                key = reader.GetCurrentKey();
                                int keyInt = System.Convert.ToInt32(key.ToString());
                                NUnit.Framework.Assert.IsFalse("Key in multiple partitions.", bits.Get(keyInt));
                                bits.Set(keyInt);
                                count++;
                            }
                        }
                        finally
                        {
                            reader.Close();
                        }
                    }
                    NUnit.Framework.Assert.AreEqual("Some keys in no partition.", length, bits.Cardinality
                                                        ());
                }
            }
        }