Example #1
0
        public virtual void TestZeroCopy()
        {
            byte[]        bytes   = GetBytesForString("brock");
            BytesWritable zeroBuf = new BytesWritable(bytes, bytes.Length);
            // new
            BytesWritable copyBuf = new BytesWritable(bytes);

            // old
            // using zero copy constructor shouldn't result in a copy
            Assert.True(bytes == zeroBuf.Bytes, "copy took place, backing array != array passed to constructor");
            Assert.True(zeroBuf.Length == bytes.Length, "length of BW should backing byte array");
            Assert.Equal(zeroBuf, copyBuf, "objects with same backing array should be equal");
            Assert.Equal(zeroBuf.ToString(), copyBuf.ToString(),
                         "string repr of objects with same backing array should be equal");
            Assert.True(zeroBuf.CompareTo(copyBuf) == 0, "compare order objects with same backing array should be equal");
            Assert.True(zeroBuf.GetHashCode() == copyBuf.GetHashCode(),
                        "hash of objects with same backing array should be equal");

            // ensure expanding buffer is handled correctly
            // for buffers created with zero copy api
            byte[] buffer = new byte[bytes.Length * 5];
            zeroBuf.Set(buffer, 0, buffer.Length);
            // expand internal buffer
            zeroBuf.Set(bytes, 0, bytes.Length);
            // set back to normal contents
            Assert.Equal(zeroBuf, copyBuf, "buffer created with (array, len) has bad contents");
            Assert.True(zeroBuf.Length == copyBuf.Length, "buffer created with (array, len) has bad length");
        }
Example #2
0
        private void FillKey(BytesWritable o)
        {
            int len = keyLenRNG.NextInt();

            if (len < MinKeyLen)
            {
                len = MinKeyLen;
            }
            o.SetSize(len);
            int n = MinKeyLen;

            while (n < len)
            {
                byte[] word = dict[random.Next(dict.Length)];
                int    l    = Math.Min(word.Length, len - n);
                System.Array.Copy(word, 0, o.Get(), n, l);
                n += l;
            }
            if (sorted && WritableComparator.CompareBytes(lastKey.Get(), MinKeyLen, lastKey.GetSize
                                                              () - MinKeyLen, o.Get(), MinKeyLen, o.GetSize() - MinKeyLen) > 0)
            {
                IncrementPrefix();
            }
            System.Array.Copy(prefix, 0, o.Get(), 0, MinKeyLen);
            lastKey.Set(o);
        }
Example #3
0
        private static string TextifyBytes(Text t)
        {
            BytesWritable b = new BytesWritable();

            b.Set(t.GetBytes(), 0, t.GetLength());
            return(b.ToString());
        }
Example #4
0
 public virtual void Next(BytesWritable key, BytesWritable value, bool dupKey)
 {
     if (dupKey)
     {
         key.Set(lastKey);
     }
     else
     {
         FillKey(key);
     }
     FillValue(value);
 }
Example #5
0
        private void FillBuffer(Random rng, BytesWritable bw, byte[] tmp, int len)
        {
            int n = 0;

            while (n < len)
            {
                byte[] word = dictionary[rng.Next(dictionary.Length)];
                int    l    = Math.Min(word.Length, len - n);
                System.Array.Copy(word, 0, tmp, n, l);
                n += l;
            }
            bw.Set(tmp, 0, len);
        }
Example #6
0
        /// <exception cref="System.IO.IOException"/>
        public static void TestValue(int val, int vintlen)
        {
            DataOutputBuffer buf   = new DataOutputBuffer();
            DataInputBuffer  inbuf = new DataInputBuffer();

            WritableUtils.WriteVInt(buf, val);
            if (Log.IsDebugEnabled())
            {
                Log.Debug("Value = " + val);
                BytesWritable printer = new BytesWritable();
                printer.Set(buf.GetData(), 0, buf.GetLength());
                Log.Debug("Buffer = " + printer);
            }
            inbuf.Reset(buf.GetData(), 0, buf.GetLength());
            Assert.Equal(val, WritableUtils.ReadVInt(inbuf));
            Assert.Equal(vintlen, buf.GetLength());
            Assert.Equal(vintlen, WritableUtils.GetVIntSize(val));
            Assert.Equal(vintlen, WritableUtils.DecodeVIntSize(buf.GetData
                                                                   ()[0]));
        }
Example #7
0
        /// <summary>Advance to the next key/value pair.</summary>
        /// <exception cref="System.IO.IOException"/>
        /// <exception cref="System.Exception"/>
        public override bool NextKeyValue()
        {
            if (!hasMore)
            {
                key   = null;
                value = null;
                return(false);
            }
            firstValue = !nextKeyIsSame;
            DataInputBuffer nextKey = input.GetKey();

            currentRawKey.Set(nextKey.GetData(), nextKey.GetPosition(), nextKey.GetLength() -
                              nextKey.GetPosition());
            buffer.Reset(currentRawKey.GetBytes(), 0, currentRawKey.GetLength());
            key = keyDeserializer.Deserialize(key);
            DataInputBuffer nextVal = input.GetValue();

            buffer.Reset(nextVal.GetData(), nextVal.GetPosition(), nextVal.GetLength() - nextVal
                         .GetPosition());
            value              = valueDeserializer.Deserialize(value);
            currentKeyLength   = nextKey.GetLength() - nextKey.GetPosition();
            currentValueLength = nextVal.GetLength() - nextVal.GetPosition();
            if (isMarked)
            {
                backupStore.Write(nextKey, nextVal);
            }
            hasMore = input.Next();
            if (hasMore)
            {
                nextKey       = input.GetKey();
                nextKeyIsSame = comparator.Compare(currentRawKey.GetBytes(), 0, currentRawKey.GetLength
                                                       (), nextKey.GetData(), nextKey.GetPosition(), nextKey.GetLength() - nextKey.GetPosition
                                                       ()) == 0;
            }
            else
            {
                nextKeyIsSame = false;
            }
            inputValueCounter.Increment(1);
            return(true);
        }
Example #8
0
        /// <exception cref="System.IO.IOException"/>
        private void TimeWrite(Path path, TestTFileSeqFileComparison.KVAppendable appendable
                               , int baseKlen, int baseVlen, long fileSize)
        {
            int           maxKlen = baseKlen * 2;
            int           maxVlen = baseVlen * 2;
            BytesWritable key     = new BytesWritable();
            BytesWritable value   = new BytesWritable();

            byte[] keyBuffer   = new byte[maxKlen];
            byte[] valueBuffer = new byte[maxVlen];
            Random rng         = new Random(options.seed);
            long   totalBytes  = 0;

            PrintlnWithTimestamp("Start writing: " + path.GetName() + "...");
            StartTime();
            for (long i = 0; true; ++i)
            {
                if (i % 1000 == 0)
                {
                    // test the size for every 1000 rows.
                    if (fs.GetFileStatus(path).GetLen() >= fileSize)
                    {
                        break;
                    }
                }
                int klen = rng.Next(baseKlen) + baseKlen;
                int vlen = rng.Next(baseVlen) + baseVlen;
                FillBuffer(rng, key, keyBuffer, klen);
                FillBuffer(rng, value, valueBuffer, vlen);
                key.Set(keyBuffer, 0, klen);
                value.Set(valueBuffer, 0, vlen);
                appendable.Append(key, value);
                totalBytes += klen;
                totalBytes += vlen;
            }
            StopTime();
            appendable.Close();
            ReportStats(path, totalBytes);
        }
        /// <exception cref="System.IO.IOException"/>
        /// <exception cref="System.Exception"/>
        public virtual void TestBinary()
        {
            Configuration conf   = new Configuration();
            Job           job    = Job.GetInstance(conf);
            Path          outdir = new Path(Runtime.GetProperty("test.build.data", "/tmp"), "outseq");
            Random        r      = new Random();
            long          seed   = r.NextLong();

            r.SetSeed(seed);
            FileOutputFormat.SetOutputPath(job, outdir);
            SequenceFileAsBinaryOutputFormat.SetSequenceFileOutputKeyClass(job, typeof(IntWritable
                                                                                       ));
            SequenceFileAsBinaryOutputFormat.SetSequenceFileOutputValueClass(job, typeof(DoubleWritable
                                                                                         ));
            SequenceFileAsBinaryOutputFormat.SetCompressOutput(job, true);
            SequenceFileAsBinaryOutputFormat.SetOutputCompressionType(job, SequenceFile.CompressionType
                                                                      .Block);
            BytesWritable      bkey    = new BytesWritable();
            BytesWritable      bval    = new BytesWritable();
            TaskAttemptContext context = MapReduceTestUtil.CreateDummyMapTaskAttemptContext(job
                                                                                            .GetConfiguration());
            OutputFormat <BytesWritable, BytesWritable> outputFormat = new SequenceFileAsBinaryOutputFormat
                                                                           ();
            OutputCommitter committer = outputFormat.GetOutputCommitter(context);

            committer.SetupJob(job);
            RecordWriter <BytesWritable, BytesWritable> writer = outputFormat.GetRecordWriter(
                context);
            IntWritable      iwritable = new IntWritable();
            DoubleWritable   dwritable = new DoubleWritable();
            DataOutputBuffer outbuf    = new DataOutputBuffer();

            Log.Info("Creating data by SequenceFileAsBinaryOutputFormat");
            try
            {
                for (int i = 0; i < Records; ++i)
                {
                    iwritable = new IntWritable(r.Next());
                    iwritable.Write(outbuf);
                    bkey.Set(outbuf.GetData(), 0, outbuf.GetLength());
                    outbuf.Reset();
                    dwritable = new DoubleWritable(r.NextDouble());
                    dwritable.Write(outbuf);
                    bval.Set(outbuf.GetData(), 0, outbuf.GetLength());
                    outbuf.Reset();
                    writer.Write(bkey, bval);
                }
            }
            finally
            {
                writer.Close(context);
            }
            committer.CommitTask(context);
            committer.CommitJob(job);
            InputFormat <IntWritable, DoubleWritable> iformat = new SequenceFileInputFormat <IntWritable
                                                                                             , DoubleWritable>();
            int count = 0;

            r.SetSeed(seed);
            SequenceFileInputFormat.SetInputPaths(job, outdir);
            Log.Info("Reading data by SequenceFileInputFormat");
            foreach (InputSplit split in iformat.GetSplits(job))
            {
                RecordReader <IntWritable, DoubleWritable> reader = iformat.CreateRecordReader(split
                                                                                               , context);
                MapContext <IntWritable, DoubleWritable, BytesWritable, BytesWritable> mcontext =
                    new MapContextImpl <IntWritable, DoubleWritable, BytesWritable, BytesWritable>(job
                                                                                                   .GetConfiguration(), context.GetTaskAttemptID(), reader, null, null, MapReduceTestUtil
                                                                                                   .CreateDummyReporter(), split);
                reader.Initialize(split, mcontext);
                try
                {
                    int    sourceInt;
                    double sourceDouble;
                    while (reader.NextKeyValue())
                    {
                        sourceInt    = r.Next();
                        sourceDouble = r.NextDouble();
                        iwritable    = reader.GetCurrentKey();
                        dwritable    = reader.GetCurrentValue();
                        NUnit.Framework.Assert.AreEqual("Keys don't match: " + "*" + iwritable.Get() + ":"
                                                        + sourceInt + "*", sourceInt, iwritable.Get());
                        NUnit.Framework.Assert.IsTrue("Vals don't match: " + "*" + dwritable.Get() + ":"
                                                      + sourceDouble + "*", double.Compare(dwritable.Get(), sourceDouble) == 0);
                        ++count;
                    }
                }
                finally
                {
                    reader.Close();
                }
            }
            NUnit.Framework.Assert.AreEqual("Some records not found", Records, count);
        }
        // A random task attempt id for testing.
        /// <exception cref="System.IO.IOException"/>
        public virtual void TestBinary()
        {
            JobConf    job = new JobConf();
            FileSystem fs  = FileSystem.GetLocal(job);
            Path       dir = new Path(new Path(new Path(Runtime.GetProperty("test.build.data", ".")
                                                        ), FileOutputCommitter.TempDirName), "_" + attempt);
            Path   file = new Path(dir, "testbinary.seq");
            Random r    = new Random();
            long   seed = r.NextLong();

            r.SetSeed(seed);
            fs.Delete(dir, true);
            if (!fs.Mkdirs(dir))
            {
                Fail("Failed to create output directory");
            }
            job.Set(JobContext.TaskAttemptId, attempt);
            FileOutputFormat.SetOutputPath(job, dir.GetParent().GetParent());
            FileOutputFormat.SetWorkOutputPath(job, dir);
            SequenceFileAsBinaryOutputFormat.SetSequenceFileOutputKeyClass(job, typeof(IntWritable
                                                                                       ));
            SequenceFileAsBinaryOutputFormat.SetSequenceFileOutputValueClass(job, typeof(DoubleWritable
                                                                                         ));
            SequenceFileAsBinaryOutputFormat.SetCompressOutput(job, true);
            SequenceFileAsBinaryOutputFormat.SetOutputCompressionType(job, SequenceFile.CompressionType
                                                                      .Block);
            BytesWritable bkey = new BytesWritable();
            BytesWritable bval = new BytesWritable();
            RecordWriter <BytesWritable, BytesWritable> writer = new SequenceFileAsBinaryOutputFormat
                                                                     ().GetRecordWriter(fs, job, file.ToString(), Reporter.Null);
            IntWritable      iwritable = new IntWritable();
            DoubleWritable   dwritable = new DoubleWritable();
            DataOutputBuffer outbuf    = new DataOutputBuffer();

            Log.Info("Creating data by SequenceFileAsBinaryOutputFormat");
            try
            {
                for (int i = 0; i < Records; ++i)
                {
                    iwritable = new IntWritable(r.Next());
                    iwritable.Write(outbuf);
                    bkey.Set(outbuf.GetData(), 0, outbuf.GetLength());
                    outbuf.Reset();
                    dwritable = new DoubleWritable(r.NextDouble());
                    dwritable.Write(outbuf);
                    bval.Set(outbuf.GetData(), 0, outbuf.GetLength());
                    outbuf.Reset();
                    writer.Write(bkey, bval);
                }
            }
            finally
            {
                writer.Close(Reporter.Null);
            }
            InputFormat <IntWritable, DoubleWritable> iformat = new SequenceFileInputFormat <IntWritable
                                                                                             , DoubleWritable>();
            int count = 0;

            r.SetSeed(seed);
            DataInputBuffer buf       = new DataInputBuffer();
            int             NumSplits = 3;

            SequenceFileInputFormat.AddInputPath(job, file);
            Log.Info("Reading data by SequenceFileInputFormat");
            foreach (InputSplit split in iformat.GetSplits(job, NumSplits))
            {
                RecordReader <IntWritable, DoubleWritable> reader = iformat.GetRecordReader(split,
                                                                                            job, Reporter.Null);
                try
                {
                    int    sourceInt;
                    double sourceDouble;
                    while (reader.Next(iwritable, dwritable))
                    {
                        sourceInt    = r.Next();
                        sourceDouble = r.NextDouble();
                        NUnit.Framework.Assert.AreEqual("Keys don't match: " + "*" + iwritable.Get() + ":"
                                                        + sourceInt + "*", sourceInt, iwritable.Get());
                        NUnit.Framework.Assert.IsTrue("Vals don't match: " + "*" + dwritable.Get() + ":"
                                                      + sourceDouble + "*", double.Compare(dwritable.Get(), sourceDouble) == 0);
                        ++count;
                    }
                }
                finally
                {
                    reader.Close();
                }
            }
            NUnit.Framework.Assert.AreEqual("Some records not found", Records, count);
        }