Example #1
0
 private static byte[] Pair(BytesWritable a, BytesWritable b)
 {
     byte[] pairData = new byte[a.GetLength() + b.GetLength()];
     System.Array.Copy(a.GetBytes(), 0, pairData, 0, a.GetLength());
     System.Array.Copy(b.GetBytes(), 0, pairData, a.GetLength(), b.GetLength());
     return(pairData);
 }
Example #2
0
 /// <summary>Write the given object to the stream.</summary>
 /// <remarks>
 /// Write the given object to the stream. If it is a Text or BytesWritable,
 /// write it directly. Otherwise, write it to a buffer and then write the
 /// length and data to the stream.
 /// </remarks>
 /// <param name="obj">the object to write</param>
 /// <exception cref="System.IO.IOException"/>
 private void WriteObject(Writable obj)
 {
     // For Text and BytesWritable, encode them directly, so that they end up
     // in C++ as the natural translations.
     if (obj is Text)
     {
         Text t   = (Text)obj;
         int  len = t.GetLength();
         WritableUtils.WriteVInt(stream, len);
         stream.Write(t.GetBytes(), 0, len);
     }
     else
     {
         if (obj is BytesWritable)
         {
             BytesWritable b   = (BytesWritable)obj;
             int           len = b.GetLength();
             WritableUtils.WriteVInt(stream, len);
             stream.Write(b.GetBytes(), 0, len);
         }
         else
         {
             buffer.Reset();
             obj.Write(buffer);
             int length = buffer.GetLength();
             WritableUtils.WriteVInt(stream, length);
             stream.Write(buffer.GetData(), 0, length);
         }
     }
 }
Example #3
0
        /// <summary>Advance to the next key/value pair.</summary>
        /// <exception cref="System.IO.IOException"/>
        /// <exception cref="System.Exception"/>
        public override bool NextKeyValue()
        {
            if (!hasMore)
            {
                key   = null;
                value = null;
                return(false);
            }
            firstValue = !nextKeyIsSame;
            DataInputBuffer nextKey = input.GetKey();

            currentRawKey.Set(nextKey.GetData(), nextKey.GetPosition(), nextKey.GetLength() -
                              nextKey.GetPosition());
            buffer.Reset(currentRawKey.GetBytes(), 0, currentRawKey.GetLength());
            key = keyDeserializer.Deserialize(key);
            DataInputBuffer nextVal = input.GetValue();

            buffer.Reset(nextVal.GetData(), nextVal.GetPosition(), nextVal.GetLength() - nextVal
                         .GetPosition());
            value              = valueDeserializer.Deserialize(value);
            currentKeyLength   = nextKey.GetLength() - nextKey.GetPosition();
            currentValueLength = nextVal.GetLength() - nextVal.GetPosition();
            if (isMarked)
            {
                backupStore.Write(nextKey, nextVal);
            }
            hasMore = input.Next();
            if (hasMore)
            {
                nextKey       = input.GetKey();
                nextKeyIsSame = comparator.Compare(currentRawKey.GetBytes(), 0, currentRawKey.GetLength
                                                       (), nextKey.GetData(), nextKey.GetPosition(), nextKey.GetLength() - nextKey.GetPosition
                                                       ()) == 0;
            }
            else
            {
                nextKeyIsSame = false;
            }
            inputValueCounter.Increment(1);
            return(true);
        }
Example #4
0
        /// <exception cref="System.IO.IOException"/>
        private static void CreateBigMapInputFile(Configuration conf, FileSystem fs, Path
                                                  dir, long fileSizeInMB)
        {
            // Check if the input path exists and is non-empty
            if (fs.Exists(dir))
            {
                FileStatus[] list = fs.ListStatus(dir);
                if (list.Length > 0)
                {
                    throw new IOException("Input path: " + dir + " already exists... ");
                }
            }
            Path file = new Path(dir, "part-0");

            SequenceFile.Writer writer = SequenceFile.CreateWriter(fs, conf, file, typeof(BytesWritable
                                                                                          ), typeof(BytesWritable), SequenceFile.CompressionType.None);
            long          numBytesToWrite = fileSizeInMB * 1024 * 1024;
            int           minKeySize      = conf.GetInt(MinKey, 10);
            int           keySizeRange    = conf.GetInt(MaxKey, 1000) - minKeySize;
            int           minValueSize    = conf.GetInt(MinValue, 0);
            int           valueSizeRange  = conf.GetInt(MaxValue, 20000) - minValueSize;
            BytesWritable randomKey       = new BytesWritable();
            BytesWritable randomValue     = new BytesWritable();

            Log.Info("Writing " + numBytesToWrite + " bytes to " + file + " with " + "minKeySize: "
                     + minKeySize + " keySizeRange: " + keySizeRange + " minValueSize: " + minValueSize
                     + " valueSizeRange: " + valueSizeRange);
            long start = Runtime.CurrentTimeMillis();

            while (numBytesToWrite > 0)
            {
                int keyLength = minKeySize + (keySizeRange != 0 ? random.Next(keySizeRange) : 0);
                randomKey.SetSize(keyLength);
                RandomizeBytes(randomKey.GetBytes(), 0, randomKey.GetLength());
                int valueLength = minValueSize + (valueSizeRange != 0 ? random.Next(valueSizeRange
                                                                                    ) : 0);
                randomValue.SetSize(valueLength);
                RandomizeBytes(randomValue.GetBytes(), 0, randomValue.GetLength());
                writer.Append(randomKey, randomValue);
                numBytesToWrite -= keyLength + valueLength;
            }
            writer.Close();
            long end = Runtime.CurrentTimeMillis();

            Log.Info("Created " + file + " of size: " + fileSizeInMB + "MB in " + (end - start
                                                                                   ) / 1000 + "secs");
        }
Example #5
0
        /// <exception cref="System.IO.IOException"/>
        protected internal virtual void WriteObject(Writable obj, DataOutputStream stream
                                                    )
        {
            // For Text and BytesWritable, encode them directly, so that they end up
            // in C++ as the natural translations.
            DataOutputBuffer buffer = new DataOutputBuffer();

            if (obj is Text)
            {
                Text t   = (Text)obj;
                int  len = t.GetLength();
                WritableUtils.WriteVLong(stream, len);
                stream.Flush();
                stream.Write(t.GetBytes(), 0, len);
                stream.Flush();
            }
            else
            {
                if (obj is BytesWritable)
                {
                    BytesWritable b   = (BytesWritable)obj;
                    int           len = b.GetLength();
                    WritableUtils.WriteVLong(stream, len);
                    stream.Write(b.GetBytes(), 0, len);
                }
                else
                {
                    buffer.Reset();
                    obj.Write(buffer);
                    int length = buffer.GetLength();
                    WritableUtils.WriteVInt(stream, length);
                    stream.Write(buffer.GetData(), 0, length);
                }
            }
            stream.Flush();
        }
Example #6
0
        /// <exception cref="System.IO.IOException"/>
        private static IList <string> ReadSplit(FixedLengthInputFormat format, InputSplit
                                                split, JobConf job)
        {
            IList <string> result = new AList <string>();
            RecordReader <LongWritable, BytesWritable> reader = format.GetRecordReader(split,
                                                                                       job, voidReporter);
            LongWritable  key   = reader.CreateKey();
            BytesWritable value = reader.CreateValue();

            try
            {
                while (reader.Next(key, value))
                {
                    result.AddItem(Sharpen.Runtime.GetStringForBytes(value.GetBytes(), 0, value.GetLength
                                                                         ()));
                }
            }
            finally
            {
                reader.Close();
            }
            return(result);
        }
Example #7
0
        /// <exception cref="System.IO.IOException"/>
        private void RunRandomTests(CompressionCodec codec)
        {
            StringBuilder fileName = new StringBuilder("testFormat.txt");

            if (codec != null)
            {
                fileName.Append(".gz");
            }
            localFs.Delete(workDir, true);
            Path file = new Path(workDir, fileName.ToString());
            int  seed = new Random().Next();

            Log.Info("Seed = " + seed);
            Random        random   = new Random(seed);
            int           MaxTests = 20;
            LongWritable  key      = new LongWritable();
            BytesWritable value    = new BytesWritable();

            for (int i = 0; i < MaxTests; i++)
            {
                Log.Info("----------------------------------------------------------");
                // Maximum total records of 999
                int totalRecords = random.Next(999) + 1;
                // Test an empty file
                if (i == 8)
                {
                    totalRecords = 0;
                }
                // Maximum bytes in a record of 100K
                int recordLength = random.Next(1024 * 100) + 1;
                // For the 11th test, force a record length of 1
                if (i == 10)
                {
                    recordLength = 1;
                }
                // The total bytes in the test file
                int fileSize = (totalRecords * recordLength);
                Log.Info("totalRecords=" + totalRecords + " recordLength=" + recordLength);
                // Create the job
                JobConf job = new JobConf(defaultConf);
                if (codec != null)
                {
                    ReflectionUtils.SetConf(codec, job);
                }
                // Create the test file
                AList <string> recordList = CreateFile(file, codec, recordLength, totalRecords);
                NUnit.Framework.Assert.IsTrue(localFs.Exists(file));
                //set the fixed length record length config property for the job
                FixedLengthInputFormat.SetRecordLength(job, recordLength);
                int numSplits = 1;
                // Arbitrarily set number of splits.
                if (i > 0)
                {
                    if (i == (MaxTests - 1))
                    {
                        // Test a split size that is less than record len
                        numSplits = (int)(fileSize / Math.Floor(recordLength / 2));
                    }
                    else
                    {
                        if (MaxTests % i == 0)
                        {
                            // Let us create a split size that is forced to be
                            // smaller than the end file itself, (ensures 1+ splits)
                            numSplits = fileSize / (fileSize - random.Next(fileSize));
                        }
                        else
                        {
                            // Just pick a random split size with no upper bound
                            numSplits = Math.Max(1, fileSize / random.Next(int.MaxValue));
                        }
                    }
                    Log.Info("Number of splits set to: " + numSplits);
                }
                // Setup the input path
                FileInputFormat.SetInputPaths(job, workDir);
                // Try splitting the file in a variety of sizes
                FixedLengthInputFormat format = new FixedLengthInputFormat();
                format.Configure(job);
                InputSplit[] splits = format.GetSplits(job, numSplits);
                Log.Info("Actual number of splits = " + splits.Length);
                // Test combined split lengths = total file size
                long recordOffset = 0;
                int  recordNumber = 0;
                foreach (InputSplit split in splits)
                {
                    RecordReader <LongWritable, BytesWritable> reader = format.GetRecordReader(split,
                                                                                               job, voidReporter);
                    Type clazz = reader.GetType();
                    NUnit.Framework.Assert.AreEqual("RecordReader class should be FixedLengthRecordReader:"
                                                    , typeof(FixedLengthRecordReader), clazz);
                    // Plow through the records in this split
                    while (reader.Next(key, value))
                    {
                        NUnit.Framework.Assert.AreEqual("Checking key", (long)(recordNumber * recordLength
                                                                               ), key.Get());
                        string valueString = Sharpen.Runtime.GetStringForBytes(value.GetBytes(), 0, value
                                                                               .GetLength());
                        NUnit.Framework.Assert.AreEqual("Checking record length:", recordLength, value.GetLength
                                                            ());
                        NUnit.Framework.Assert.IsTrue("Checking for more records than expected:", recordNumber
                                                      < totalRecords);
                        string origRecord = recordList[recordNumber];
                        NUnit.Framework.Assert.AreEqual("Checking record content:", origRecord, valueString
                                                        );
                        recordNumber++;
                    }
                    reader.Close();
                }
                NUnit.Framework.Assert.AreEqual("Total original records should be total read records:"
                                                , recordList.Count, recordNumber);
            }
        }
Example #8
0
        /// <exception cref="System.IO.IOException"/>
        /// <exception cref="System.Exception"/>
        public virtual void TestBinary()
        {
            Job        job  = Job.GetInstance();
            FileSystem fs   = FileSystem.GetLocal(job.GetConfiguration());
            Path       dir  = new Path(Runtime.GetProperty("test.build.data", ".") + "/mapred");
            Path       file = new Path(dir, "testbinary.seq");
            Random     r    = new Random();
            long       seed = r.NextLong();

            r.SetSeed(seed);
            fs.Delete(dir, true);
            FileInputFormat.SetInputPaths(job, dir);
            Text tkey = new Text();
            Text tval = new Text();

            SequenceFile.Writer writer = new SequenceFile.Writer(fs, job.GetConfiguration(),
                                                                 file, typeof(Text), typeof(Text));
            try
            {
                for (int i = 0; i < Records; ++i)
                {
                    tkey.Set(Sharpen.Extensions.ToString(r.Next(), 36));
                    tval.Set(System.Convert.ToString(r.NextLong(), 36));
                    writer.Append(tkey, tval);
                }
            }
            finally
            {
                writer.Close();
            }
            TaskAttemptContext context = MapReduceTestUtil.CreateDummyMapTaskAttemptContext(job
                                                                                            .GetConfiguration());
            InputFormat <BytesWritable, BytesWritable> bformat = new SequenceFileAsBinaryInputFormat
                                                                     ();
            int count = 0;

            r.SetSeed(seed);
            BytesWritable   bkey   = new BytesWritable();
            BytesWritable   bval   = new BytesWritable();
            Text            cmpkey = new Text();
            Text            cmpval = new Text();
            DataInputBuffer buf    = new DataInputBuffer();

            FileInputFormat.SetInputPaths(job, file);
            foreach (InputSplit split in bformat.GetSplits(job))
            {
                RecordReader <BytesWritable, BytesWritable> reader = bformat.CreateRecordReader(split
                                                                                                , context);
                MapContext <BytesWritable, BytesWritable, BytesWritable, BytesWritable> mcontext =
                    new MapContextImpl <BytesWritable, BytesWritable, BytesWritable, BytesWritable>(job
                                                                                                    .GetConfiguration(), context.GetTaskAttemptID(), reader, null, null, MapReduceTestUtil
                                                                                                    .CreateDummyReporter(), split);
                reader.Initialize(split, mcontext);
                try
                {
                    while (reader.NextKeyValue())
                    {
                        bkey = reader.GetCurrentKey();
                        bval = reader.GetCurrentValue();
                        tkey.Set(Sharpen.Extensions.ToString(r.Next(), 36));
                        tval.Set(System.Convert.ToString(r.NextLong(), 36));
                        buf.Reset(bkey.GetBytes(), bkey.GetLength());
                        cmpkey.ReadFields(buf);
                        buf.Reset(bval.GetBytes(), bval.GetLength());
                        cmpval.ReadFields(buf);
                        NUnit.Framework.Assert.IsTrue("Keys don't match: " + "*" + cmpkey.ToString() + ":"
                                                      + tkey.ToString() + "*", cmpkey.ToString().Equals(tkey.ToString()));
                        NUnit.Framework.Assert.IsTrue("Vals don't match: " + "*" + cmpval.ToString() + ":"
                                                      + tval.ToString() + "*", cmpval.ToString().Equals(tval.ToString()));
                        ++count;
                    }
                }
                finally
                {
                    reader.Close();
                }
            }
            NUnit.Framework.Assert.AreEqual("Some records not found", Records, count);
        }
 /// <exception cref="Sharpen.DigestException"/>
 private long MD5Hashcode(BytesWritable key)
 {
     return(MD5Hashcode(key.GetBytes(), 0, key.GetLength()));
 }
        public virtual void TestSortedLongWritable()
        {
            Configuration      conf = new Configuration();
            Path               path = new Path(Root, name);
            FileSystem         fs   = path.GetFileSystem(conf);
            FSDataOutputStream @out = fs.Create(path);

            try
            {
                TFile.Writer writer = new TFile.Writer(@out, BlockSize, "gz", jClassLongWritableComparator
                                                       , conf);
                try
                {
                    LongWritable key = new LongWritable(0);
                    for (long i = 0; i < Nentry; ++i)
                    {
                        key.Set(Cube(i - Nentry / 2));
                        DataOutputStream dos = writer.PrepareAppendKey(-1);
                        try
                        {
                            key.Write(dos);
                        }
                        finally
                        {
                            dos.Close();
                        }
                        dos = writer.PrepareAppendValue(-1);
                        try
                        {
                            dos.Write(Runtime.GetBytesForString(BuildValue(i)));
                        }
                        finally
                        {
                            dos.Close();
                        }
                    }
                }
                finally
                {
                    writer.Close();
                }
            }
            finally
            {
                @out.Close();
            }
            FSDataInputStream @in = fs.Open(path);

            try
            {
                TFile.Reader reader = new TFile.Reader(@in, fs.GetFileStatus(path).GetLen(), conf
                                                       );
                try
                {
                    TFile.Reader.Scanner scanner = reader.CreateScanner();
                    long          i     = 0;
                    BytesWritable value = new BytesWritable();
                    for (; !scanner.AtEnd(); scanner.Advance())
                    {
                        scanner.Entry().GetValue(value);
                        Assert.Equal(BuildValue(i), Runtime.GetStringForBytes(
                                         value.GetBytes(), 0, value.GetLength()));
                        ++i;
                    }
                }
                finally
                {
                    reader.Close();
                }
            }
            finally
            {
                @in.Close();
            }
        }
Example #11
0
        /// <exception cref="System.IO.IOException"/>
        public virtual void TestBinary()
        {
            JobConf    job  = new JobConf();
            FileSystem fs   = FileSystem.GetLocal(job);
            Path       dir  = new Path(Runtime.GetProperty("test.build.data", ".") + "/mapred");
            Path       file = new Path(dir, "testbinary.seq");
            Random     r    = new Random();
            long       seed = r.NextLong();

            r.SetSeed(seed);
            fs.Delete(dir, true);
            FileInputFormat.SetInputPaths(job, dir);
            Text tkey = new Text();
            Text tval = new Text();

            SequenceFile.Writer writer = new SequenceFile.Writer(fs, job, file, typeof(Text),
                                                                 typeof(Text));
            try
            {
                for (int i = 0; i < Records; ++i)
                {
                    tkey.Set(Sharpen.Extensions.ToString(r.Next(), 36));
                    tval.Set(System.Convert.ToString(r.NextLong(), 36));
                    writer.Append(tkey, tval);
                }
            }
            finally
            {
                writer.Close();
            }
            InputFormat <BytesWritable, BytesWritable> bformat = new SequenceFileAsBinaryInputFormat
                                                                     ();
            int count = 0;

            r.SetSeed(seed);
            BytesWritable   bkey      = new BytesWritable();
            BytesWritable   bval      = new BytesWritable();
            Text            cmpkey    = new Text();
            Text            cmpval    = new Text();
            DataInputBuffer buf       = new DataInputBuffer();
            int             NumSplits = 3;

            FileInputFormat.SetInputPaths(job, file);
            foreach (InputSplit split in bformat.GetSplits(job, NumSplits))
            {
                RecordReader <BytesWritable, BytesWritable> reader = bformat.GetRecordReader(split
                                                                                             , job, Reporter.Null);
                try
                {
                    while (reader.Next(bkey, bval))
                    {
                        tkey.Set(Sharpen.Extensions.ToString(r.Next(), 36));
                        tval.Set(System.Convert.ToString(r.NextLong(), 36));
                        buf.Reset(bkey.GetBytes(), bkey.GetLength());
                        cmpkey.ReadFields(buf);
                        buf.Reset(bval.GetBytes(), bval.GetLength());
                        cmpval.ReadFields(buf);
                        NUnit.Framework.Assert.IsTrue("Keys don't match: " + "*" + cmpkey.ToString() + ":"
                                                      + tkey.ToString() + "*", cmpkey.ToString().Equals(tkey.ToString()));
                        NUnit.Framework.Assert.IsTrue("Vals don't match: " + "*" + cmpval.ToString() + ":"
                                                      + tval.ToString() + "*", cmpval.ToString().Equals(tval.ToString()));
                        ++count;
                    }
                }
                finally
                {
                    reader.Close();
                }
            }
            NUnit.Framework.Assert.AreEqual("Some records not found", Records, count);
        }
Example #12
0
 /// <exception cref="System.IO.IOException"/>
 public virtual void WriteUncompressedBytes(DataOutputStream outStream)
 {
     outStream.Write(value.GetBytes(), 0, value.GetLength());
 }