Esempio n. 1
0
 /// <exception cref="System.IO.IOException"/>
 public override ComposableRecordReader GetRecordReader(InputSplit split, JobConf
                                                        job, Reporter reporter)
 {
     try
     {
         if (!rrCstrMap.Contains(ident))
         {
             throw new IOException("No RecordReader for " + ident);
         }
         return(rrCstrMap[ident].NewInstance(id, inf.GetRecordReader(split, GetConf(job),
                                                                     reporter), cmpcl));
     }
     catch (MemberAccessException e)
     {
         throw (IOException)Sharpen.Extensions.InitCause(new IOException(), e);
     }
     catch (InstantiationException e)
     {
         throw (IOException)Sharpen.Extensions.InitCause(new IOException(), e);
     }
     catch (TargetInvocationException e)
     {
         throw (IOException)Sharpen.Extensions.InitCause(new IOException(), e);
     }
 }
Esempio n. 2
0
        /// <exception cref="System.IO.IOException"/>
        internal static long ReadBench(JobConf conf)
        {
            // InputFormat instantiation
            InputFormat  inf = conf.GetInputFormat();
            string       fn  = conf.Get("test.filebench.name", string.Empty);
            Path         pin = new Path(FileInputFormat.GetInputPaths(conf)[0], fn);
            FileStatus   @in = pin.GetFileSystem(conf).GetFileStatus(pin);
            RecordReader rr  = inf.GetRecordReader(new FileSplit(pin, 0, @in.GetLen(), (string
                                                                                        [])null), conf, Reporter.Null);

            try
            {
                object   key   = rr.CreateKey();
                object   val   = rr.CreateValue();
                DateTime start = new DateTime();
                while (rr.Next(key, val))
                {
                }
                DateTime end = new DateTime();
                return(end.GetTime() - start.GetTime());
            }
            finally
            {
                rr.Close();
            }
        }
Esempio n. 3
0
            /// <summary>From each split sampled, take the first numSamples / numSplits records.</summary>
            /// <exception cref="System.IO.IOException"/>
            public virtual K[] GetSample(InputFormat <K, V> inf, JobConf job)
            {
                // ArrayList::toArray doesn't preserve type
                InputSplit[] splits          = inf.GetSplits(job, job.GetNumMapTasks());
                AList <K>    samples         = new AList <K>(numSamples);
                int          splitsToSample  = Math.Min(maxSplitsSampled, splits.Length);
                int          splitStep       = splits.Length / splitsToSample;
                int          samplesPerSplit = numSamples / splitsToSample;
                long         records         = 0;

                for (int i = 0; i < splitsToSample; ++i)
                {
                    RecordReader <K, V> reader = inf.GetRecordReader(splits[i * splitStep], job, Reporter
                                                                     .Null);
                    K key   = reader.CreateKey();
                    V value = reader.CreateValue();
                    while (reader.Next(key, value))
                    {
                        samples.AddItem(key);
                        key = reader.CreateKey();
                        ++records;
                        if ((i + 1) * samplesPerSplit <= records)
                        {
                            break;
                        }
                    }
                    reader.Close();
                }
                return((K[])Sharpen.Collections.ToArray(samples));
            }
Esempio n. 4
0
            /// <summary>
            /// Randomize the split order, then take the specified number of keys from
            /// each split sampled, where each key is selected with the specified
            /// probability and possibly replaced by a subsequently selected key when
            /// the quota of keys from that split is satisfied.
            /// </summary>
            /// <exception cref="System.IO.IOException"/>
            public virtual K[] GetSample(InputFormat <K, V> inf, JobConf job)
            {
                // ArrayList::toArray doesn't preserve type
                InputSplit[] splits         = inf.GetSplits(job, job.GetNumMapTasks());
                AList <K>    samples        = new AList <K>(numSamples);
                int          splitsToSample = Math.Min(maxSplitsSampled, splits.Length);
                Random       r    = new Random();
                long         seed = r.NextLong();

                r.SetSeed(seed);
                Log.Debug("seed: " + seed);
                // shuffle splits
                for (int i = 0; i < splits.Length; ++i)
                {
                    InputSplit tmp = splits[i];
                    int        j   = r.Next(splits.Length);
                    splits[i] = splits[j];
                    splits[j] = tmp;
                }
                // our target rate is in terms of the maximum number of sample splits,
                // but we accept the possibility of sampling additional splits to hit
                // the target sample keyset
                for (int i_1 = 0; i_1 < splitsToSample || (i_1 < splits.Length && samples.Count <
                                                           numSamples); ++i_1)
                {
                    RecordReader <K, V> reader = inf.GetRecordReader(splits[i_1], job, Reporter.Null);
                    K key   = reader.CreateKey();
                    V value = reader.CreateValue();
                    while (reader.Next(key, value))
                    {
                        if (r.NextDouble() <= freq)
                        {
                            if (samples.Count < numSamples)
                            {
                                samples.AddItem(key);
                            }
                            else
                            {
                                // When exceeding the maximum number of samples, replace a
                                // random element with this one, then adjust the frequency
                                // to reflect the possibility of existing elements being
                                // pushed out
                                int ind = r.Next(numSamples);
                                if (ind != numSamples)
                                {
                                    samples.Set(ind, key);
                                }
                                freq *= (numSamples - 1) / (double)numSamples;
                            }
                            key = reader.CreateKey();
                        }
                    }
                    reader.Close();
                }
                return((K[])Sharpen.Collections.ToArray(samples));
            }
Esempio n. 5
0
            /// <exception cref="System.IO.IOException"/>
            public virtual RecordReader GetRecordReader(InputSplit split, JobConf job, Reporter
                                                        reporter)
            {
                InputFormat indirIF = (InputFormat)ReflectionUtils.NewInstance(job.GetClass(GenericMRLoadGenerator
                                                                                            .IndirectInputFormat, typeof(SequenceFileInputFormat)), job);

                GenericMRLoadGenerator.IndirectInputFormat.IndirectSplit @is = ((GenericMRLoadGenerator.IndirectInputFormat.IndirectSplit
                                                                                 )split);
                return(indirIF.GetRecordReader(new FileSplit(@is.GetPath(), 0, @is.GetLength(), (
                                                                 string[])null), job, reporter));
            }
Esempio n. 6
0
        /// <exception cref="System.IO.IOException"/>
        public virtual RecordReader <K, V> GetRecordReader(InputSplit split, JobConf conf,
                                                           Reporter reporter)
        {
            // Find the InputFormat and then the RecordReader from the
            // TaggedInputSplit.
            TaggedInputSplit   taggedInputSplit = (TaggedInputSplit)split;
            InputFormat <K, V> inputFormat      = (InputFormat <K, V>)ReflectionUtils.NewInstance(taggedInputSplit
                                                                                                  .GetInputFormatClass(), conf);

            return(inputFormat.GetRecordReader(taggedInputSplit.GetInputSplit(), conf, reporter
                                               ));
        }
Esempio n. 7
0
        /// <exception cref="System.IO.IOException"/>
        private static IList <Text> ReadSplit(InputFormat <LongWritable, Text> format, InputSplit
                                              split, JobConf job)
        {
            IList <Text> result = new AList <Text>();
            RecordReader <LongWritable, Text> reader = format.GetRecordReader(split, job, voidReporter
                                                                              );
            LongWritable key   = reader.CreateKey();
            Text         value = reader.CreateValue();

            while (reader.Next(key, value))
            {
                result.AddItem(value);
                value = reader.CreateValue();
            }
            reader.Close();
            return(result);
        }