/// <exception cref="System.IO.IOException"/>
 public virtual void Close()
 {
     if (curReader != null)
     {
         curReader.Close();
         curReader = null;
     }
 }
 /// <exception cref="System.IO.IOException"/>
 public override void Close()
 {
     if (curReader != null)
     {
         curReader.Close();
         curReader = null;
     }
 }
예제 #3
0
            /// <summary>From each split sampled, take the first numSamples / numSplits records.</summary>
            /// <exception cref="System.IO.IOException"/>
            /// <exception cref="System.Exception"/>
            public virtual K[] GetSample(InputFormat <K, V> inf, Job job)
            {
                // ArrayList::toArray doesn't preserve type
                IList <InputSplit> splits  = inf.GetSplits(job);
                AList <K>          samples = new AList <K>(numSamples);
                int  splitsToSample        = Math.Min(maxSplitsSampled, splits.Count);
                int  samplesPerSplit       = numSamples / splitsToSample;
                long records = 0;

                for (int i = 0; i < splitsToSample; ++i)
                {
                    TaskAttemptContext samplingContext = new TaskAttemptContextImpl(job.GetConfiguration
                                                                                        (), new TaskAttemptID());
                    RecordReader <K, V> reader = inf.CreateRecordReader(splits[i], samplingContext);
                    reader.Initialize(splits[i], samplingContext);
                    while (reader.NextKeyValue())
                    {
                        samples.AddItem(ReflectionUtils.Copy(job.GetConfiguration(), reader.GetCurrentKey
                                                                 (), null));
                        ++records;
                        if ((i + 1) * samplesPerSplit <= records)
                        {
                            break;
                        }
                    }
                    reader.Close();
                }
                return((K[])Sharpen.Collections.ToArray(samples));
            }
예제 #4
0
            /// <summary>From each split sampled, take the first numSamples / numSplits records.</summary>
            /// <exception cref="System.IO.IOException"/>
            public virtual K[] GetSample(InputFormat <K, V> inf, JobConf job)
            {
                // ArrayList::toArray doesn't preserve type
                InputSplit[] splits          = inf.GetSplits(job, job.GetNumMapTasks());
                AList <K>    samples         = new AList <K>(numSamples);
                int          splitsToSample  = Math.Min(maxSplitsSampled, splits.Length);
                int          splitStep       = splits.Length / splitsToSample;
                int          samplesPerSplit = numSamples / splitsToSample;
                long         records         = 0;

                for (int i = 0; i < splitsToSample; ++i)
                {
                    RecordReader <K, V> reader = inf.GetRecordReader(splits[i * splitStep], job, Reporter
                                                                     .Null);
                    K key   = reader.CreateKey();
                    V value = reader.CreateValue();
                    while (reader.Next(key, value))
                    {
                        samples.AddItem(key);
                        key = reader.CreateKey();
                        ++records;
                        if ((i + 1) * samplesPerSplit <= records)
                        {
                            break;
                        }
                    }
                    reader.Close();
                }
                return((K[])Sharpen.Collections.ToArray(samples));
            }
        /// <exception cref="System.Exception"/>
        private static IList <string> ReadSplit(FixedLengthInputFormat format, InputSplit
                                                split, Job job)
        {
            IList <string>     result  = new AList <string>();
            TaskAttemptContext context = MapReduceTestUtil.CreateDummyMapTaskAttemptContext(job
                                                                                            .GetConfiguration());
            RecordReader <LongWritable, BytesWritable> reader = format.CreateRecordReader(split
                                                                                          , context);
            MapContext <LongWritable, BytesWritable, LongWritable, BytesWritable> mcontext = new
                                                                                             MapContextImpl <LongWritable, BytesWritable, LongWritable, BytesWritable>(job.GetConfiguration
                                                                                                                                                                           (), context.GetTaskAttemptID(), reader, null, null, MapReduceTestUtil.CreateDummyReporter
                                                                                                                                                                           (), split);
            LongWritable  key;
            BytesWritable value;

            try
            {
                reader.Initialize(split, mcontext);
                while (reader.NextKeyValue())
                {
                    key   = reader.GetCurrentKey();
                    value = reader.GetCurrentValue();
                    result.AddItem(Sharpen.Runtime.GetStringForBytes(value.GetBytes(), 0, value.GetLength
                                                                         ()));
                }
            }
            finally
            {
                reader.Close();
            }
            return(result);
        }
예제 #6
0
        /// <exception cref="System.IO.IOException"/>
        private static IList <Text> ReadSplit(KeyValueTextInputFormat format, InputSplit split
                                              , JobConf job)
        {
            IList <Text> result = new AList <Text>();
            RecordReader <Text, Text> reader = null;

            try
            {
                reader = format.GetRecordReader(split, job, voidReporter);
                Text key   = reader.CreateKey();
                Text value = reader.CreateValue();
                while (reader.Next(key, value))
                {
                    result.AddItem(value);
                    value = (Text)reader.CreateValue();
                }
            }
            finally
            {
                if (reader != null)
                {
                    reader.Close();
                }
            }
            return(result);
        }
예제 #7
0
        /// <exception cref="System.IO.IOException"/>
        private int CountRecords(int numSplits)
        {
            InputFormat <Text, BytesWritable> format = new SequenceFileInputFilter <Text, BytesWritable
                                                                                    >();
            Text          key   = new Text();
            BytesWritable value = new BytesWritable();

            if (numSplits == 0)
            {
                numSplits = random.Next(MaxLength / (SequenceFile.SyncInterval / 20)) + 1;
            }
            InputSplit[] splits = format.GetSplits(job, numSplits);
            // check each split
            int count = 0;

            Log.Info("Generated " + splits.Length + " splits.");
            for (int j = 0; j < splits.Length; j++)
            {
                RecordReader <Text, BytesWritable> reader = format.GetRecordReader(splits[j], job,
                                                                                   reporter);
                try
                {
                    while (reader.Next(key, value))
                    {
                        Log.Info("Accept record " + key.ToString());
                        count++;
                    }
                }
                finally
                {
                    reader.Close();
                }
            }
            return(count);
        }
예제 #8
0
        /// <summary>test DBInputFormat class.</summary>
        /// <remarks>test DBInputFormat class. Class should split result for chunks</remarks>
        /// <exception cref="System.Exception"/>
        public virtual void TestDBInputFormat()
        {
            JobConf configuration = new JobConf();

            SetupDriver(configuration);
            DBInputFormat <DBInputFormat.NullDBWritable> format = new DBInputFormat <DBInputFormat.NullDBWritable
                                                                                     >();

            format.SetConf(configuration);
            format.SetConf(configuration);
            DBInputFormat.DBInputSplit splitter = new DBInputFormat.DBInputSplit(1, 10);
            Reporter reporter = Org.Mockito.Mockito.Mock <Reporter>();
            RecordReader <LongWritable, DBInputFormat.NullDBWritable> reader = format.GetRecordReader
                                                                                   (splitter, configuration, reporter);

            configuration.SetInt(MRJobConfig.NumMaps, 3);
            InputSplit[] lSplits = format.GetSplits(configuration, 3);
            NUnit.Framework.Assert.AreEqual(5, lSplits[0].GetLength());
            NUnit.Framework.Assert.AreEqual(3, lSplits.Length);
            // test reader .Some simple tests
            NUnit.Framework.Assert.AreEqual(typeof(LongWritable), reader.CreateKey().GetType(
                                                ));
            NUnit.Framework.Assert.AreEqual(0, reader.GetPos());
            NUnit.Framework.Assert.AreEqual(0, reader.GetProgress(), 0.001);
            reader.Close();
        }
예제 #9
0
        /// <exception cref="System.IO.IOException"/>
        internal static long ReadBench(JobConf conf)
        {
            // InputFormat instantiation
            InputFormat  inf = conf.GetInputFormat();
            string       fn  = conf.Get("test.filebench.name", string.Empty);
            Path         pin = new Path(FileInputFormat.GetInputPaths(conf)[0], fn);
            FileStatus   @in = pin.GetFileSystem(conf).GetFileStatus(pin);
            RecordReader rr  = inf.GetRecordReader(new FileSplit(pin, 0, @in.GetLen(), (string
                                                                                        [])null), conf, Reporter.Null);

            try
            {
                object   key   = rr.CreateKey();
                object   val   = rr.CreateValue();
                DateTime start = new DateTime();
                while (rr.Next(key, val))
                {
                }
                DateTime end = new DateTime();
                return(end.GetTime() - start.GetTime());
            }
            finally
            {
                rr.Close();
            }
        }
예제 #10
0
            /// <summary>
            /// Randomize the split order, then take the specified number of keys from
            /// each split sampled, where each key is selected with the specified
            /// probability and possibly replaced by a subsequently selected key when
            /// the quota of keys from that split is satisfied.
            /// </summary>
            /// <exception cref="System.IO.IOException"/>
            /// <exception cref="System.Exception"/>
            public virtual K[] GetSample(InputFormat <K, V> inf, Job job)
            {
                // ArrayList::toArray doesn't preserve type
                IList <InputSplit> splits  = inf.GetSplits(job);
                AList <K>          samples = new AList <K>(numSamples);
                int    splitsToSample      = Math.Min(maxSplitsSampled, splits.Count);
                Random r    = new Random();
                long   seed = r.NextLong();

                r.SetSeed(seed);
                Log.Debug("seed: " + seed);
                // shuffle splits
                for (int i = 0; i < splits.Count; ++i)
                {
                    InputSplit tmp = splits[i];
                    int        j   = r.Next(splits.Count);
                    splits.Set(i, splits[j]);
                    splits.Set(j, tmp);
                }
                // our target rate is in terms of the maximum number of sample splits,
                // but we accept the possibility of sampling additional splits to hit
                // the target sample keyset
                for (int i_1 = 0; i_1 < splitsToSample || (i_1 < splits.Count && samples.Count <
                                                           numSamples); ++i_1)
                {
                    TaskAttemptContext samplingContext = new TaskAttemptContextImpl(job.GetConfiguration
                                                                                        (), new TaskAttemptID());
                    RecordReader <K, V> reader = inf.CreateRecordReader(splits[i_1], samplingContext);
                    reader.Initialize(splits[i_1], samplingContext);
                    while (reader.NextKeyValue())
                    {
                        if (r.NextDouble() <= freq)
                        {
                            if (samples.Count < numSamples)
                            {
                                samples.AddItem(ReflectionUtils.Copy(job.GetConfiguration(), reader.GetCurrentKey
                                                                         (), null));
                            }
                            else
                            {
                                // When exceeding the maximum number of samples, replace a
                                // random element with this one, then adjust the frequency
                                // to reflect the possibility of existing elements being
                                // pushed out
                                int ind = r.Next(numSamples);
                                if (ind != numSamples)
                                {
                                    samples.Set(ind, ReflectionUtils.Copy(job.GetConfiguration(), reader.GetCurrentKey
                                                                              (), null));
                                }
                                freq *= (numSamples - 1) / (double)numSamples;
                            }
                        }
                    }
                    reader.Close();
                }
                return((K[])Sharpen.Collections.ToArray(samples));
            }
예제 #11
0
            /// <summary>
            /// Randomize the split order, then take the specified number of keys from
            /// each split sampled, where each key is selected with the specified
            /// probability and possibly replaced by a subsequently selected key when
            /// the quota of keys from that split is satisfied.
            /// </summary>
            /// <exception cref="System.IO.IOException"/>
            public virtual K[] GetSample(InputFormat <K, V> inf, JobConf job)
            {
                // ArrayList::toArray doesn't preserve type
                InputSplit[] splits         = inf.GetSplits(job, job.GetNumMapTasks());
                AList <K>    samples        = new AList <K>(numSamples);
                int          splitsToSample = Math.Min(maxSplitsSampled, splits.Length);
                Random       r    = new Random();
                long         seed = r.NextLong();

                r.SetSeed(seed);
                Log.Debug("seed: " + seed);
                // shuffle splits
                for (int i = 0; i < splits.Length; ++i)
                {
                    InputSplit tmp = splits[i];
                    int        j   = r.Next(splits.Length);
                    splits[i] = splits[j];
                    splits[j] = tmp;
                }
                // our target rate is in terms of the maximum number of sample splits,
                // but we accept the possibility of sampling additional splits to hit
                // the target sample keyset
                for (int i_1 = 0; i_1 < splitsToSample || (i_1 < splits.Length && samples.Count <
                                                           numSamples); ++i_1)
                {
                    RecordReader <K, V> reader = inf.GetRecordReader(splits[i_1], job, Reporter.Null);
                    K key   = reader.CreateKey();
                    V value = reader.CreateValue();
                    while (reader.Next(key, value))
                    {
                        if (r.NextDouble() <= freq)
                        {
                            if (samples.Count < numSamples)
                            {
                                samples.AddItem(key);
                            }
                            else
                            {
                                // When exceeding the maximum number of samples, replace a
                                // random element with this one, then adjust the frequency
                                // to reflect the possibility of existing elements being
                                // pushed out
                                int ind = r.Next(numSamples);
                                if (ind != numSamples)
                                {
                                    samples.Set(ind, key);
                                }
                                freq *= (numSamples - 1) / (double)numSamples;
                            }
                            key = reader.CreateKey();
                        }
                    }
                    reader.Close();
                }
                return((K[])Sharpen.Collections.ToArray(samples));
            }
예제 #12
0
        /// <exception cref="System.Exception"/>
        public virtual void TestFormat()
        {
            JobConf  job      = new JobConf(conf);
            Reporter reporter = Reporter.Null;
            Random   random   = new Random();
            long     seed     = random.NextLong();

            Log.Info("seed = " + seed);
            random.SetSeed(seed);
            localFs.Delete(workDir, true);
            FileInputFormat.SetInputPaths(job, workDir);
            int length   = 10000;
            int numFiles = 10;

            // create a file with various lengths
            CreateFiles(length, numFiles, random);
            // create a combine split for the files
            InputFormat <IntWritable, BytesWritable> format = new CombineSequenceFileInputFormat
                                                              <IntWritable, BytesWritable>();
            IntWritable   key   = new IntWritable();
            BytesWritable value = new BytesWritable();

            for (int i = 0; i < 3; i++)
            {
                int numSplits = random.Next(length / (SequenceFile.SyncInterval / 20)) + 1;
                Log.Info("splitting: requesting = " + numSplits);
                InputSplit[] splits = format.GetSplits(job, numSplits);
                Log.Info("splitting: got =        " + splits.Length);
                // we should have a single split as the length is comfortably smaller than
                // the block size
                NUnit.Framework.Assert.AreEqual("We got more than one splits!", 1, splits.Length);
                InputSplit split = splits[0];
                NUnit.Framework.Assert.AreEqual("It should be CombineFileSplit", typeof(CombineFileSplit
                                                                                        ), split.GetType());
                // check each split
                BitSet bits = new BitSet(length);
                RecordReader <IntWritable, BytesWritable> reader = format.GetRecordReader(split, job
                                                                                          , reporter);
                try
                {
                    while (reader.Next(key, value))
                    {
                        NUnit.Framework.Assert.IsFalse("Key in multiple partitions.", bits.Get(key.Get())
                                                       );
                        bits.Set(key.Get());
                    }
                }
                finally
                {
                    reader.Close();
                }
                NUnit.Framework.Assert.AreEqual("Some keys in no partition.", length, bits.Cardinality
                                                    ());
            }
        }
예제 #13
0
 public static DataRecord.ScopeData  Read(RecordReader ins0)
 {
     if (ins0.Open("ScopeData"))
     {
         DataRecord.ScopeData scope = new DataRecord.ScopeData();
         scope.prefix = ins0.String("prefix");
         scope.requiresDigitPrefix = ins0.Bool("requiresDigitPrefix");
         scope.suffix = ins0.String("suffix");
         if (ins0.Close())
         {
             return(scope);
         }
     }
     return(null);
 }
        /// <exception cref="System.IO.IOException"/>
        private static IList <Text> ReadSplit(TextInputFormat format, InputSplit split, JobConf
                                              jobConf)
        {
            IList <Text> result = new AList <Text>();
            RecordReader <LongWritable, Text> reader = format.GetRecordReader(split, jobConf,
                                                                              voidReporter);
            LongWritable key   = reader.CreateKey();
            Text         value = reader.CreateValue();

            while (reader.Next(key, value))
            {
                result.AddItem(value);
                value = reader.CreateValue();
            }
            reader.Close();
            return(result);
        }
예제 #15
0
        /// <exception cref="System.IO.IOException"/>
        /// <exception cref="System.Exception"/>
        internal virtual void CheckFormat(Job job, int expectedN, int lastN)
        {
            NLineInputFormat   format = new NLineInputFormat();
            IList <InputSplit> splits = format.GetSplits(job);
            int count = 0;

            for (int i = 0; i < splits.Count; i++)
            {
                NUnit.Framework.Assert.AreEqual("There are no split locations", 0, splits[i].GetLocations
                                                    ().Length);
                TaskAttemptContext context = MapReduceTestUtil.CreateDummyMapTaskAttemptContext(job
                                                                                                .GetConfiguration());
                RecordReader <LongWritable, Text> reader = format.CreateRecordReader(splits[i], context
                                                                                     );
                Type clazz = reader.GetType();
                NUnit.Framework.Assert.AreEqual("reader class is LineRecordReader.", typeof(LineRecordReader
                                                                                            ), clazz);
                MapContext <LongWritable, Text, LongWritable, Text> mcontext = new MapContextImpl <
                    LongWritable, Text, LongWritable, Text>(job.GetConfiguration(), context.GetTaskAttemptID
                                                                (), reader, null, null, MapReduceTestUtil.CreateDummyReporter(), splits[i]);
                reader.Initialize(splits[i], mcontext);
                try
                {
                    count = 0;
                    while (reader.NextKeyValue())
                    {
                        count++;
                    }
                }
                finally
                {
                    reader.Close();
                }
                if (i == splits.Count - 1)
                {
                    NUnit.Framework.Assert.AreEqual("number of lines in split(" + i + ") is wrong", lastN
                                                    , count);
                }
                else
                {
                    NUnit.Framework.Assert.AreEqual("number of lines in split(" + i + ") is wrong", expectedN
                                                    , count);
                }
            }
        }
예제 #16
0
        // A reporter that does nothing
        /// <exception cref="System.IO.IOException"/>
        internal virtual void CheckFormat(JobConf job, int expectedN)
        {
            NLineInputFormat format = new NLineInputFormat();

            format.Configure(job);
            int ignoredNumSplits = 1;

            InputSplit[] splits = format.GetSplits(job, ignoredNumSplits);
            // check all splits except last one
            int count = 0;

            for (int j = 0; j < splits.Length - 1; j++)
            {
                NUnit.Framework.Assert.AreEqual("There are no split locations", 0, splits[j].GetLocations
                                                    ().Length);
                RecordReader <LongWritable, Text> reader = format.GetRecordReader(splits[j], job,
                                                                                  voidReporter);
                Type readerClass = reader.GetType();
                NUnit.Framework.Assert.AreEqual("reader class is LineRecordReader.", typeof(LineRecordReader
                                                                                            ), readerClass);
                LongWritable key      = reader.CreateKey();
                Type         keyClass = key.GetType();
                NUnit.Framework.Assert.AreEqual("Key class is LongWritable.", typeof(LongWritable
                                                                                     ), keyClass);
                Text value      = reader.CreateValue();
                Type valueClass = value.GetType();
                NUnit.Framework.Assert.AreEqual("Value class is Text.", typeof(Text), valueClass);
                try
                {
                    count = 0;
                    while (reader.Next(key, value))
                    {
                        count++;
                    }
                }
                finally
                {
                    reader.Close();
                }
                NUnit.Framework.Assert.AreEqual("number of lines in split is " + expectedN, expectedN
                                                , count);
            }
        }
예제 #17
0
        /// <exception cref="System.IO.IOException"/>
        /// <exception cref="System.Exception"/>
        private static IList <Text> ReadSplit(KeyValueTextInputFormat format, InputSplit split
                                              , Job job)
        {
            IList <Text>       result  = new AList <Text>();
            Configuration      conf    = job.GetConfiguration();
            TaskAttemptContext context = MapReduceTestUtil.CreateDummyMapTaskAttemptContext(conf
                                                                                            );
            RecordReader <Text, Text> reader = format.CreateRecordReader(split, MapReduceTestUtil
                                                                         .CreateDummyMapTaskAttemptContext(conf));
            MapContext <Text, Text, Text, Text> mcontext = new MapContextImpl <Text, Text, Text
                                                                               , Text>(conf, context.GetTaskAttemptID(), reader, null, null, MapReduceTestUtil.
                                                                                       CreateDummyReporter(), split);

            reader.Initialize(split, mcontext);
            while (reader.NextKeyValue())
            {
                result.AddItem(new Text(reader.GetCurrentValue()));
            }
            reader.Close();
            return(result);
        }
예제 #18
0
        public void Extract(out TableData data)
        {
            data = new TableData();
            string[] record;

            // Skip records.
            for (int i = 0; i < idOffset - 1; i++)
            {
                reader.ReadNextRecord(out record);
            }

            int recordCounter = 0;

            while (reader.ReadNextRecord(out record))
            {
                // Skip header and record column count.
                if (recordCounter == 0)
                {
                    data.header = record;
                    recordCounter++;

                    int columnCount = record.Length;
                    writer.CreateFields(columnCount + ADDITIONAL_COLUMNS_COUNT);
                    data.columnCount = columnCount;

                    continue;
                }

                string[] hashedRecord = BuildHashedRecord(record);
                writer.Write(hashedRecord.ToArray());
                recordCounter++;
                if (recordCounter % 1000 == 0)
                {
                    Console.WriteLine("Extracted {0} rows.", recordCounter);
                }
            }
            reader.Close();

            data.recordCount = recordCounter;
        }
예제 #19
0
        /// <exception cref="System.IO.IOException"/>
        /// <exception cref="System.Exception"/>
        private int CountRecords(int numSplits)
        {
            InputFormat <Text, BytesWritable> format = new SequenceFileInputFilter <Text, BytesWritable
                                                                                    >();

            if (numSplits == 0)
            {
                numSplits = random.Next(MaxLength / (SequenceFile.SyncInterval / 20)) + 1;
            }
            FileInputFormat.SetMaxInputSplitSize(job, fs.GetFileStatus(inFile).GetLen() / numSplits
                                                 );
            TaskAttemptContext context = MapReduceTestUtil.CreateDummyMapTaskAttemptContext(job
                                                                                            .GetConfiguration());
            // check each split
            int count = 0;

            foreach (InputSplit split in format.GetSplits(job))
            {
                RecordReader <Text, BytesWritable> reader = format.CreateRecordReader(split, context
                                                                                      );
                MapContext <Text, BytesWritable, Text, BytesWritable> mcontext = new MapContextImpl
                                                                                 <Text, BytesWritable, Text, BytesWritable>(job.GetConfiguration(), context.GetTaskAttemptID
                                                                                                                                (), reader, null, null, MapReduceTestUtil.CreateDummyReporter(), split);
                reader.Initialize(split, mcontext);
                try
                {
                    while (reader.NextKeyValue())
                    {
                        Log.Info("Accept record " + reader.GetCurrentKey().ToString());
                        count++;
                    }
                }
                finally
                {
                    reader.Close();
                }
            }
            return(count);
        }
예제 #20
0
        /// <exception cref="System.IO.IOException"/>
        private static IList <string> ReadSplit(FixedLengthInputFormat format, InputSplit
                                                split, JobConf job)
        {
            IList <string> result = new AList <string>();
            RecordReader <LongWritable, BytesWritable> reader = format.GetRecordReader(split,
                                                                                       job, voidReporter);
            LongWritable  key   = reader.CreateKey();
            BytesWritable value = reader.CreateValue();

            try
            {
                while (reader.Next(key, value))
                {
                    result.AddItem(Sharpen.Runtime.GetStringForBytes(value.GetBytes(), 0, value.GetLength
                                                                         ()));
                }
            }
            finally
            {
                reader.Close();
            }
            return(result);
        }
예제 #21
0
        /// <exception cref="System.Exception"/>
        public virtual void TestFormat()
        {
            Job    job    = Job.GetInstance(new Configuration(defaultConf));
            Random random = new Random();
            long   seed   = random.NextLong();

            Log.Info("seed = " + seed);
            random.SetSeed(seed);
            localFs.Delete(workDir, true);
            FileInputFormat.SetInputPaths(job, workDir);
            int length   = 10000;
            int numFiles = 10;

            // create files with various lengths
            CreateFiles(length, numFiles, random);
            // create a combined split for the files
            CombineTextInputFormat format = new CombineTextInputFormat();

            for (int i = 0; i < 3; i++)
            {
                int numSplits = random.Next(length / 20) + 1;
                Log.Info("splitting: requesting = " + numSplits);
                IList <InputSplit> splits = format.GetSplits(job);
                Log.Info("splitting: got =        " + splits.Count);
                // we should have a single split as the length is comfortably smaller than
                // the block size
                NUnit.Framework.Assert.AreEqual("We got more than one splits!", 1, splits.Count);
                InputSplit split = splits[0];
                NUnit.Framework.Assert.AreEqual("It should be CombineFileSplit", typeof(CombineFileSplit
                                                                                        ), split.GetType());
                // check the split
                BitSet bits = new BitSet(length);
                Log.Debug("split= " + split);
                TaskAttemptContext context = MapReduceTestUtil.CreateDummyMapTaskAttemptContext(job
                                                                                                .GetConfiguration());
                RecordReader <LongWritable, Text> reader = format.CreateRecordReader(split, context
                                                                                     );
                NUnit.Framework.Assert.AreEqual("reader class is CombineFileRecordReader.", typeof(
                                                    CombineFileRecordReader), reader.GetType());
                MapContext <LongWritable, Text, LongWritable, Text> mcontext = new MapContextImpl <
                    LongWritable, Text, LongWritable, Text>(job.GetConfiguration(), context.GetTaskAttemptID
                                                                (), reader, null, null, MapReduceTestUtil.CreateDummyReporter(), split);
                reader.Initialize(split, mcontext);
                try
                {
                    int count = 0;
                    while (reader.NextKeyValue())
                    {
                        LongWritable key = reader.GetCurrentKey();
                        NUnit.Framework.Assert.IsNotNull("Key should not be null.", key);
                        Text value = reader.GetCurrentValue();
                        int  v     = System.Convert.ToInt32(value.ToString());
                        Log.Debug("read " + v);
                        NUnit.Framework.Assert.IsFalse("Key in multiple partitions.", bits.Get(v));
                        bits.Set(v);
                        count++;
                    }
                    Log.Debug("split=" + split + " count=" + count);
                }
                finally
                {
                    reader.Close();
                }
                NUnit.Framework.Assert.AreEqual("Some keys in no partition.", length, bits.Cardinality
                                                    ());
            }
        }
 /// <exception cref="System.IO.IOException"/>
 public virtual void Close()
 {
     delegate_.Close();
 }
예제 #23
0
        /// <exception cref="System.Exception"/>
        public virtual void TestFormat()
        {
            JobConf job  = new JobConf();
            Path    file = new Path(workDir, "test.txt");
            // A reporter that does nothing
            Reporter reporter = Reporter.Null;
            int      seed     = new Random().Next();

            Log.Info("seed = " + seed);
            Random random = new Random(seed);

            localFs.Delete(workDir, true);
            FileInputFormat.SetInputPaths(job, workDir);
            // for a variety of lengths
            for (int length = 0; length < MaxLength; length += random.Next(MaxLength / 10) +
                                                               1)
            {
                Log.Debug("creating; entries = " + length);
                // create a file with length entries
                TextWriter writer = new OutputStreamWriter(localFs.Create(file));
                try
                {
                    for (int i = 0; i < length; i++)
                    {
                        writer.Write(Sharpen.Extensions.ToString(i * 2));
                        writer.Write("\t");
                        writer.Write(Sharpen.Extensions.ToString(i));
                        writer.Write("\n");
                    }
                }
                finally
                {
                    writer.Close();
                }
                // try splitting the file in a variety of sizes
                KeyValueTextInputFormat format = new KeyValueTextInputFormat();
                format.Configure(job);
                for (int i_1 = 0; i_1 < 3; i_1++)
                {
                    int numSplits = random.Next(MaxLength / 20) + 1;
                    Log.Debug("splitting: requesting = " + numSplits);
                    InputSplit[] splits = format.GetSplits(job, numSplits);
                    Log.Debug("splitting: got =        " + splits.Length);
                    // check each split
                    BitSet bits = new BitSet(length);
                    for (int j = 0; j < splits.Length; j++)
                    {
                        Log.Debug("split[" + j + "]= " + splits[j]);
                        RecordReader <Text, Text> reader = format.GetRecordReader(splits[j], job, reporter
                                                                                  );
                        Type readerClass = reader.GetType();
                        NUnit.Framework.Assert.AreEqual("reader class is KeyValueLineRecordReader.", typeof(
                                                            KeyValueLineRecordReader), readerClass);
                        Text key        = reader.CreateKey();
                        Type keyClass   = key.GetType();
                        Text value      = reader.CreateValue();
                        Type valueClass = value.GetType();
                        NUnit.Framework.Assert.AreEqual("Key class is Text.", typeof(Text), keyClass);
                        NUnit.Framework.Assert.AreEqual("Value class is Text.", typeof(Text), valueClass);
                        try
                        {
                            int count = 0;
                            while (reader.Next(key, value))
                            {
                                int v = System.Convert.ToInt32(value.ToString());
                                Log.Debug("read " + v);
                                if (bits.Get(v))
                                {
                                    Log.Warn("conflict with " + v + " in split " + j + " at position " + reader.GetPos
                                                 ());
                                }
                                NUnit.Framework.Assert.IsFalse("Key in multiple partitions.", bits.Get(v));
                                bits.Set(v);
                                count++;
                            }
                            Log.Debug("splits[" + j + "]=" + splits[j] + " count=" + count);
                        }
                        finally
                        {
                            reader.Close();
                        }
                    }
                    NUnit.Framework.Assert.AreEqual("Some keys in no partition.", length, bits.Cardinality
                                                        ());
                }
            }
        }
예제 #24
0
 /// <summary>Forward close request to proxied RR.</summary>
 /// <exception cref="System.IO.IOException"/>
 public override void Close()
 {
     rr.Close();
 }
예제 #25
0
 /// <summary>Forward close request to proxied RR.</summary>
 /// <exception cref="System.IO.IOException"/>
 public virtual void Close()
 {
     rr.Close();
 }
        /// <exception cref="System.IO.IOException"/>
        /// <exception cref="System.Exception"/>
        public virtual void TestBinary()
        {
            Configuration conf   = new Configuration();
            Job           job    = Job.GetInstance(conf);
            Path          outdir = new Path(Runtime.GetProperty("test.build.data", "/tmp"), "outseq");
            Random        r      = new Random();
            long          seed   = r.NextLong();

            r.SetSeed(seed);
            FileOutputFormat.SetOutputPath(job, outdir);
            SequenceFileAsBinaryOutputFormat.SetSequenceFileOutputKeyClass(job, typeof(IntWritable
                                                                                       ));
            SequenceFileAsBinaryOutputFormat.SetSequenceFileOutputValueClass(job, typeof(DoubleWritable
                                                                                         ));
            SequenceFileAsBinaryOutputFormat.SetCompressOutput(job, true);
            SequenceFileAsBinaryOutputFormat.SetOutputCompressionType(job, SequenceFile.CompressionType
                                                                      .Block);
            BytesWritable      bkey    = new BytesWritable();
            BytesWritable      bval    = new BytesWritable();
            TaskAttemptContext context = MapReduceTestUtil.CreateDummyMapTaskAttemptContext(job
                                                                                            .GetConfiguration());
            OutputFormat <BytesWritable, BytesWritable> outputFormat = new SequenceFileAsBinaryOutputFormat
                                                                           ();
            OutputCommitter committer = outputFormat.GetOutputCommitter(context);

            committer.SetupJob(job);
            RecordWriter <BytesWritable, BytesWritable> writer = outputFormat.GetRecordWriter(
                context);
            IntWritable      iwritable = new IntWritable();
            DoubleWritable   dwritable = new DoubleWritable();
            DataOutputBuffer outbuf    = new DataOutputBuffer();

            Log.Info("Creating data by SequenceFileAsBinaryOutputFormat");
            try
            {
                for (int i = 0; i < Records; ++i)
                {
                    iwritable = new IntWritable(r.Next());
                    iwritable.Write(outbuf);
                    bkey.Set(outbuf.GetData(), 0, outbuf.GetLength());
                    outbuf.Reset();
                    dwritable = new DoubleWritable(r.NextDouble());
                    dwritable.Write(outbuf);
                    bval.Set(outbuf.GetData(), 0, outbuf.GetLength());
                    outbuf.Reset();
                    writer.Write(bkey, bval);
                }
            }
            finally
            {
                writer.Close(context);
            }
            committer.CommitTask(context);
            committer.CommitJob(job);
            InputFormat <IntWritable, DoubleWritable> iformat = new SequenceFileInputFormat <IntWritable
                                                                                             , DoubleWritable>();
            int count = 0;

            r.SetSeed(seed);
            SequenceFileInputFormat.SetInputPaths(job, outdir);
            Log.Info("Reading data by SequenceFileInputFormat");
            foreach (InputSplit split in iformat.GetSplits(job))
            {
                RecordReader <IntWritable, DoubleWritable> reader = iformat.CreateRecordReader(split
                                                                                               , context);
                MapContext <IntWritable, DoubleWritable, BytesWritable, BytesWritable> mcontext =
                    new MapContextImpl <IntWritable, DoubleWritable, BytesWritable, BytesWritable>(job
                                                                                                   .GetConfiguration(), context.GetTaskAttemptID(), reader, null, null, MapReduceTestUtil
                                                                                                   .CreateDummyReporter(), split);
                reader.Initialize(split, mcontext);
                try
                {
                    int    sourceInt;
                    double sourceDouble;
                    while (reader.NextKeyValue())
                    {
                        sourceInt    = r.Next();
                        sourceDouble = r.NextDouble();
                        iwritable    = reader.GetCurrentKey();
                        dwritable    = reader.GetCurrentValue();
                        NUnit.Framework.Assert.AreEqual("Keys don't match: " + "*" + iwritable.Get() + ":"
                                                        + sourceInt + "*", sourceInt, iwritable.Get());
                        NUnit.Framework.Assert.IsTrue("Vals don't match: " + "*" + dwritable.Get() + ":"
                                                      + sourceDouble + "*", double.Compare(dwritable.Get(), sourceDouble) == 0);
                        ++count;
                    }
                }
                finally
                {
                    reader.Close();
                }
            }
            NUnit.Framework.Assert.AreEqual("Some records not found", Records, count);
        }
        // A random task attempt id for testing.
        /// <exception cref="System.IO.IOException"/>
        public virtual void TestBinary()
        {
            JobConf    job = new JobConf();
            FileSystem fs  = FileSystem.GetLocal(job);
            Path       dir = new Path(new Path(new Path(Runtime.GetProperty("test.build.data", ".")
                                                        ), FileOutputCommitter.TempDirName), "_" + attempt);
            Path   file = new Path(dir, "testbinary.seq");
            Random r    = new Random();
            long   seed = r.NextLong();

            r.SetSeed(seed);
            fs.Delete(dir, true);
            if (!fs.Mkdirs(dir))
            {
                Fail("Failed to create output directory");
            }
            job.Set(JobContext.TaskAttemptId, attempt);
            FileOutputFormat.SetOutputPath(job, dir.GetParent().GetParent());
            FileOutputFormat.SetWorkOutputPath(job, dir);
            SequenceFileAsBinaryOutputFormat.SetSequenceFileOutputKeyClass(job, typeof(IntWritable
                                                                                       ));
            SequenceFileAsBinaryOutputFormat.SetSequenceFileOutputValueClass(job, typeof(DoubleWritable
                                                                                         ));
            SequenceFileAsBinaryOutputFormat.SetCompressOutput(job, true);
            SequenceFileAsBinaryOutputFormat.SetOutputCompressionType(job, SequenceFile.CompressionType
                                                                      .Block);
            BytesWritable bkey = new BytesWritable();
            BytesWritable bval = new BytesWritable();
            RecordWriter <BytesWritable, BytesWritable> writer = new SequenceFileAsBinaryOutputFormat
                                                                     ().GetRecordWriter(fs, job, file.ToString(), Reporter.Null);
            IntWritable      iwritable = new IntWritable();
            DoubleWritable   dwritable = new DoubleWritable();
            DataOutputBuffer outbuf    = new DataOutputBuffer();

            Log.Info("Creating data by SequenceFileAsBinaryOutputFormat");
            try
            {
                for (int i = 0; i < Records; ++i)
                {
                    iwritable = new IntWritable(r.Next());
                    iwritable.Write(outbuf);
                    bkey.Set(outbuf.GetData(), 0, outbuf.GetLength());
                    outbuf.Reset();
                    dwritable = new DoubleWritable(r.NextDouble());
                    dwritable.Write(outbuf);
                    bval.Set(outbuf.GetData(), 0, outbuf.GetLength());
                    outbuf.Reset();
                    writer.Write(bkey, bval);
                }
            }
            finally
            {
                writer.Close(Reporter.Null);
            }
            InputFormat <IntWritable, DoubleWritable> iformat = new SequenceFileInputFormat <IntWritable
                                                                                             , DoubleWritable>();
            int count = 0;

            r.SetSeed(seed);
            DataInputBuffer buf       = new DataInputBuffer();
            int             NumSplits = 3;

            SequenceFileInputFormat.AddInputPath(job, file);
            Log.Info("Reading data by SequenceFileInputFormat");
            foreach (InputSplit split in iformat.GetSplits(job, NumSplits))
            {
                RecordReader <IntWritable, DoubleWritable> reader = iformat.GetRecordReader(split,
                                                                                            job, Reporter.Null);
                try
                {
                    int    sourceInt;
                    double sourceDouble;
                    while (reader.Next(iwritable, dwritable))
                    {
                        sourceInt    = r.Next();
                        sourceDouble = r.NextDouble();
                        NUnit.Framework.Assert.AreEqual("Keys don't match: " + "*" + iwritable.Get() + ":"
                                                        + sourceInt + "*", sourceInt, iwritable.Get());
                        NUnit.Framework.Assert.IsTrue("Vals don't match: " + "*" + dwritable.Get() + ":"
                                                      + sourceDouble + "*", double.Compare(dwritable.Get(), sourceDouble) == 0);
                        ++count;
                    }
                }
                finally
                {
                    reader.Close();
                }
            }
            NUnit.Framework.Assert.AreEqual("Some records not found", Records, count);
        }
예제 #28
0
        /// <exception cref="System.IO.IOException"/>
        public virtual void TestBinary()
        {
            JobConf    job  = new JobConf();
            FileSystem fs   = FileSystem.GetLocal(job);
            Path       dir  = new Path(Runtime.GetProperty("test.build.data", ".") + "/mapred");
            Path       file = new Path(dir, "testbinary.seq");
            Random     r    = new Random();
            long       seed = r.NextLong();

            r.SetSeed(seed);
            fs.Delete(dir, true);
            FileInputFormat.SetInputPaths(job, dir);
            Text tkey = new Text();
            Text tval = new Text();

            SequenceFile.Writer writer = new SequenceFile.Writer(fs, job, file, typeof(Text),
                                                                 typeof(Text));
            try
            {
                for (int i = 0; i < Records; ++i)
                {
                    tkey.Set(Sharpen.Extensions.ToString(r.Next(), 36));
                    tval.Set(System.Convert.ToString(r.NextLong(), 36));
                    writer.Append(tkey, tval);
                }
            }
            finally
            {
                writer.Close();
            }
            InputFormat <BytesWritable, BytesWritable> bformat = new SequenceFileAsBinaryInputFormat
                                                                     ();
            int count = 0;

            r.SetSeed(seed);
            BytesWritable   bkey      = new BytesWritable();
            BytesWritable   bval      = new BytesWritable();
            Text            cmpkey    = new Text();
            Text            cmpval    = new Text();
            DataInputBuffer buf       = new DataInputBuffer();
            int             NumSplits = 3;

            FileInputFormat.SetInputPaths(job, file);
            foreach (InputSplit split in bformat.GetSplits(job, NumSplits))
            {
                RecordReader <BytesWritable, BytesWritable> reader = bformat.GetRecordReader(split
                                                                                             , job, Reporter.Null);
                try
                {
                    while (reader.Next(bkey, bval))
                    {
                        tkey.Set(Sharpen.Extensions.ToString(r.Next(), 36));
                        tval.Set(System.Convert.ToString(r.NextLong(), 36));
                        buf.Reset(bkey.GetBytes(), bkey.GetLength());
                        cmpkey.ReadFields(buf);
                        buf.Reset(bval.GetBytes(), bval.GetLength());
                        cmpval.ReadFields(buf);
                        NUnit.Framework.Assert.IsTrue("Keys don't match: " + "*" + cmpkey.ToString() + ":"
                                                      + tkey.ToString() + "*", cmpkey.ToString().Equals(tkey.ToString()));
                        NUnit.Framework.Assert.IsTrue("Vals don't match: " + "*" + cmpval.ToString() + ":"
                                                      + tval.ToString() + "*", cmpval.ToString().Equals(tval.ToString()));
                        ++count;
                    }
                }
                finally
                {
                    reader.Close();
                }
            }
            NUnit.Framework.Assert.AreEqual("Some records not found", Records, count);
        }
예제 #29
0
        /// <exception cref="System.IO.IOException"/>
        private void RunRandomTests(CompressionCodec codec)
        {
            StringBuilder fileName = new StringBuilder("testFormat.txt");

            if (codec != null)
            {
                fileName.Append(".gz");
            }
            localFs.Delete(workDir, true);
            Path file = new Path(workDir, fileName.ToString());
            int  seed = new Random().Next();

            Log.Info("Seed = " + seed);
            Random        random   = new Random(seed);
            int           MaxTests = 20;
            LongWritable  key      = new LongWritable();
            BytesWritable value    = new BytesWritable();

            for (int i = 0; i < MaxTests; i++)
            {
                Log.Info("----------------------------------------------------------");
                // Maximum total records of 999
                int totalRecords = random.Next(999) + 1;
                // Test an empty file
                if (i == 8)
                {
                    totalRecords = 0;
                }
                // Maximum bytes in a record of 100K
                int recordLength = random.Next(1024 * 100) + 1;
                // For the 11th test, force a record length of 1
                if (i == 10)
                {
                    recordLength = 1;
                }
                // The total bytes in the test file
                int fileSize = (totalRecords * recordLength);
                Log.Info("totalRecords=" + totalRecords + " recordLength=" + recordLength);
                // Create the job
                JobConf job = new JobConf(defaultConf);
                if (codec != null)
                {
                    ReflectionUtils.SetConf(codec, job);
                }
                // Create the test file
                AList <string> recordList = CreateFile(file, codec, recordLength, totalRecords);
                NUnit.Framework.Assert.IsTrue(localFs.Exists(file));
                //set the fixed length record length config property for the job
                FixedLengthInputFormat.SetRecordLength(job, recordLength);
                int numSplits = 1;
                // Arbitrarily set number of splits.
                if (i > 0)
                {
                    if (i == (MaxTests - 1))
                    {
                        // Test a split size that is less than record len
                        numSplits = (int)(fileSize / Math.Floor(recordLength / 2));
                    }
                    else
                    {
                        if (MaxTests % i == 0)
                        {
                            // Let us create a split size that is forced to be
                            // smaller than the end file itself, (ensures 1+ splits)
                            numSplits = fileSize / (fileSize - random.Next(fileSize));
                        }
                        else
                        {
                            // Just pick a random split size with no upper bound
                            numSplits = Math.Max(1, fileSize / random.Next(int.MaxValue));
                        }
                    }
                    Log.Info("Number of splits set to: " + numSplits);
                }
                // Setup the input path
                FileInputFormat.SetInputPaths(job, workDir);
                // Try splitting the file in a variety of sizes
                FixedLengthInputFormat format = new FixedLengthInputFormat();
                format.Configure(job);
                InputSplit[] splits = format.GetSplits(job, numSplits);
                Log.Info("Actual number of splits = " + splits.Length);
                // Test combined split lengths = total file size
                long recordOffset = 0;
                int  recordNumber = 0;
                foreach (InputSplit split in splits)
                {
                    RecordReader <LongWritable, BytesWritable> reader = format.GetRecordReader(split,
                                                                                               job, voidReporter);
                    Type clazz = reader.GetType();
                    NUnit.Framework.Assert.AreEqual("RecordReader class should be FixedLengthRecordReader:"
                                                    , typeof(FixedLengthRecordReader), clazz);
                    // Plow through the records in this split
                    while (reader.Next(key, value))
                    {
                        NUnit.Framework.Assert.AreEqual("Checking key", (long)(recordNumber * recordLength
                                                                               ), key.Get());
                        string valueString = Sharpen.Runtime.GetStringForBytes(value.GetBytes(), 0, value
                                                                               .GetLength());
                        NUnit.Framework.Assert.AreEqual("Checking record length:", recordLength, value.GetLength
                                                            ());
                        NUnit.Framework.Assert.IsTrue("Checking for more records than expected:", recordNumber
                                                      < totalRecords);
                        string origRecord = recordList[recordNumber];
                        NUnit.Framework.Assert.AreEqual("Checking record content:", origRecord, valueString
                                                        );
                        recordNumber++;
                    }
                    reader.Close();
                }
                NUnit.Framework.Assert.AreEqual("Total original records should be total read records:"
                                                , recordList.Count, recordNumber);
            }
        }
예제 #30
0
        /// <exception cref="System.IO.IOException"/>
        /// <exception cref="System.Exception"/>
        public virtual void TestBinary()
        {
            Job        job  = Job.GetInstance();
            FileSystem fs   = FileSystem.GetLocal(job.GetConfiguration());
            Path       dir  = new Path(Runtime.GetProperty("test.build.data", ".") + "/mapred");
            Path       file = new Path(dir, "testbinary.seq");
            Random     r    = new Random();
            long       seed = r.NextLong();

            r.SetSeed(seed);
            fs.Delete(dir, true);
            FileInputFormat.SetInputPaths(job, dir);
            Text tkey = new Text();
            Text tval = new Text();

            SequenceFile.Writer writer = new SequenceFile.Writer(fs, job.GetConfiguration(),
                                                                 file, typeof(Text), typeof(Text));
            try
            {
                for (int i = 0; i < Records; ++i)
                {
                    tkey.Set(Sharpen.Extensions.ToString(r.Next(), 36));
                    tval.Set(System.Convert.ToString(r.NextLong(), 36));
                    writer.Append(tkey, tval);
                }
            }
            finally
            {
                writer.Close();
            }
            TaskAttemptContext context = MapReduceTestUtil.CreateDummyMapTaskAttemptContext(job
                                                                                            .GetConfiguration());
            InputFormat <BytesWritable, BytesWritable> bformat = new SequenceFileAsBinaryInputFormat
                                                                     ();
            int count = 0;

            r.SetSeed(seed);
            BytesWritable   bkey   = new BytesWritable();
            BytesWritable   bval   = new BytesWritable();
            Text            cmpkey = new Text();
            Text            cmpval = new Text();
            DataInputBuffer buf    = new DataInputBuffer();

            FileInputFormat.SetInputPaths(job, file);
            foreach (InputSplit split in bformat.GetSplits(job))
            {
                RecordReader <BytesWritable, BytesWritable> reader = bformat.CreateRecordReader(split
                                                                                                , context);
                MapContext <BytesWritable, BytesWritable, BytesWritable, BytesWritable> mcontext =
                    new MapContextImpl <BytesWritable, BytesWritable, BytesWritable, BytesWritable>(job
                                                                                                    .GetConfiguration(), context.GetTaskAttemptID(), reader, null, null, MapReduceTestUtil
                                                                                                    .CreateDummyReporter(), split);
                reader.Initialize(split, mcontext);
                try
                {
                    while (reader.NextKeyValue())
                    {
                        bkey = reader.GetCurrentKey();
                        bval = reader.GetCurrentValue();
                        tkey.Set(Sharpen.Extensions.ToString(r.Next(), 36));
                        tval.Set(System.Convert.ToString(r.NextLong(), 36));
                        buf.Reset(bkey.GetBytes(), bkey.GetLength());
                        cmpkey.ReadFields(buf);
                        buf.Reset(bval.GetBytes(), bval.GetLength());
                        cmpval.ReadFields(buf);
                        NUnit.Framework.Assert.IsTrue("Keys don't match: " + "*" + cmpkey.ToString() + ":"
                                                      + tkey.ToString() + "*", cmpkey.ToString().Equals(tkey.ToString()));
                        NUnit.Framework.Assert.IsTrue("Vals don't match: " + "*" + cmpval.ToString() + ":"
                                                      + tval.ToString() + "*", cmpval.ToString().Equals(tval.ToString()));
                        ++count;
                    }
                }
                finally
                {
                    reader.Close();
                }
            }
            NUnit.Framework.Assert.AreEqual("Some records not found", Records, count);
        }