Пример #1
0
        /// <summary>
        /// Retrieves a map of
        /// <see cref="Org.Apache.Hadoop.FS.Path"/>
        /// s to the
        /// <see cref="Org.Apache.Hadoop.Mapred.Mapper{K1, V1, K2, V2}"/>
        /// class that
        /// should be used for them.
        /// </summary>
        /// <param name="conf">The confuration of the job</param>
        /// <seealso cref="AddInputPath(Org.Apache.Hadoop.Mapred.JobConf, Org.Apache.Hadoop.FS.Path, System.Type{T}, System.Type{T})
        ///     "/>
        /// <returns>A map of paths to mappers for the job</returns>
        internal static IDictionary <Path, Type> GetMapperTypeMap(JobConf conf)
        {
            if (conf.Get("mapreduce.input.multipleinputs.dir.mappers") == null)
            {
                return(Sharpen.Collections.EmptyMap());
            }
            IDictionary <Path, Type> m = new Dictionary <Path, Type>();

            string[] pathMappings = conf.Get("mapreduce.input.multipleinputs.dir.mappers").Split
                                        (",");
            foreach (string pathMapping in pathMappings)
            {
                string[] split = pathMapping.Split(";");
                Type     mapClass;
                try
                {
                    mapClass = (Type)conf.GetClassByName(split[1]);
                }
                catch (TypeLoadException e)
                {
                    throw new RuntimeException(e);
                }
                m[new Path(split[0])] = mapClass;
            }
            return(m);
        }
 private Type LoadLibJar(JobConf jobConf)
 {
     try
     {
         return(jobConf.GetClassByName("testjar.ClassWordCount"));
     }
     catch (TypeLoadException)
     {
         return(null);
     }
 }
Пример #3
0
        /// <summary>
        /// Get the
        /// <see cref="Org.Apache.Hadoop.IO.Compress.CompressionCodec"/>
        /// for compressing the job outputs.
        /// </summary>
        /// <param name="conf">
        /// the
        /// <see cref="JobConf"/>
        /// to look in
        /// </param>
        /// <param name="defaultValue">
        /// the
        /// <see cref="Org.Apache.Hadoop.IO.Compress.CompressionCodec"/>
        /// to return if not set
        /// </param>
        /// <returns>
        /// the
        /// <see cref="Org.Apache.Hadoop.IO.Compress.CompressionCodec"/>
        /// to be used to compress the
        /// job outputs
        /// </returns>
        /// <exception cref="System.ArgumentException">if the class was specified, but not found
        ///     </exception>
        public static Type GetOutputCompressorClass(JobConf conf, Type defaultValue)
        {
            Type   codecClass = defaultValue;
            string name       = conf.Get(FileOutputFormat.CompressCodec);

            if (name != null)
            {
                try
                {
                    codecClass = conf.GetClassByName(name).AsSubclass <CompressionCodec>();
                }
                catch (TypeLoadException e)
                {
                    throw new ArgumentException("Compression codec " + name + " was not found.", e);
                }
            }
            return(codecClass);
        }
Пример #4
0
            /// <summary>
            /// Let the first actual define the InputFormat and the second define
            /// the <tt>mapred.input.dir</tt> property.
            /// </summary>
            /// <exception cref="System.IO.IOException"/>
            internal override void Parse(IList <Parser.Token> ll, JobConf job)
            {
                StringBuilder sb             = new StringBuilder();
                IEnumerator <Parser.Token> i = ll.GetEnumerator();

                while (i.HasNext())
                {
                    Parser.Token t = i.Next();
                    if (Parser.TType.Comma.Equals(t.GetType()))
                    {
                        try
                        {
                            inf = (InputFormat)ReflectionUtils.NewInstance(job.GetClassByName(sb.ToString()),
                                                                           job);
                        }
                        catch (TypeLoadException e)
                        {
                            throw (IOException)Sharpen.Extensions.InitCause(new IOException(), e);
                        }
                        catch (ArgumentException e)
                        {
                            throw (IOException)Sharpen.Extensions.InitCause(new IOException(), e);
                        }
                        break;
                    }
                    sb.Append(t.GetStr());
                }
                if (!i.HasNext())
                {
                    throw new IOException("Parse error");
                }
                Parser.Token t_1 = i.Next();
                if (!Parser.TType.Quot.Equals(t_1.GetType()))
                {
                    throw new IOException("Expected quoted string");
                }
                indir = t_1.GetStr();
            }
Пример #5
0
        /// <summary>
        /// Retrieves a map of
        /// <see cref="Org.Apache.Hadoop.FS.Path"/>
        /// s to the
        /// <see cref="Org.Apache.Hadoop.Mapred.InputFormat{K, V}"/>
        /// class
        /// that should be used for them.
        /// </summary>
        /// <param name="conf">The confuration of the job</param>
        /// <seealso cref="AddInputPath(Org.Apache.Hadoop.Mapred.JobConf, Org.Apache.Hadoop.FS.Path, System.Type{T})
        ///     "/>
        /// <returns>A map of paths to inputformats for the job</returns>
        internal static IDictionary <Path, InputFormat> GetInputFormatMap(JobConf conf)
        {
            IDictionary <Path, InputFormat> m = new Dictionary <Path, InputFormat>();

            string[] pathMappings = conf.Get("mapreduce.input.multipleinputs.dir.formats").Split
                                        (",");
            foreach (string pathMapping in pathMappings)
            {
                string[]    split = pathMapping.Split(";");
                InputFormat inputFormat;
                try
                {
                    inputFormat = (InputFormat)ReflectionUtils.NewInstance(conf.GetClassByName(split[
                                                                                                   1]), conf);
                }
                catch (TypeLoadException e)
                {
                    throw new RuntimeException(e);
                }
                m[new Path(split[0])] = inputFormat;
            }
            return(m);
        }
Пример #6
0
        /// <exception cref="System.IO.IOException"/>
        public virtual void TestSplitableCodecs()
        {
            JobConf conf = new JobConf(defaultConf);
            int     seed = new Random().Next();
            // Create the codec
            CompressionCodec codec = null;

            try
            {
                codec = (CompressionCodec)ReflectionUtils.NewInstance(conf.GetClassByName("org.apache.hadoop.io.compress.BZip2Codec"
                                                                                          ), conf);
            }
            catch (TypeLoadException)
            {
                throw new IOException("Illegal codec!");
            }
            Path file = new Path(workDir, "test" + codec.GetDefaultExtension());
            // A reporter that does nothing
            Reporter reporter = Reporter.Null;

            Log.Info("seed = " + seed);
            Random     random  = new Random(seed);
            FileSystem localFs = FileSystem.GetLocal(conf);

            localFs.Delete(workDir, true);
            FileInputFormat.SetInputPaths(conf, workDir);
            int MaxLength = 500000;

            // for a variety of lengths
            for (int length = MaxLength / 2; length < MaxLength; length += random.Next(MaxLength
                                                                                       / 4) + 1)
            {
                Log.Info("creating; entries = " + length);
                // create a file with length entries
                TextWriter writer = new OutputStreamWriter(codec.CreateOutputStream(localFs.Create
                                                                                        (file)));
                try
                {
                    for (int i = 0; i < length; i++)
                    {
                        writer.Write(Sharpen.Extensions.ToString(i));
                        writer.Write("\n");
                    }
                }
                finally
                {
                    writer.Close();
                }
                // try splitting the file in a variety of sizes
                TextInputFormat format = new TextInputFormat();
                format.Configure(conf);
                LongWritable key   = new LongWritable();
                Text         value = new Text();
                for (int i_1 = 0; i_1 < 3; i_1++)
                {
                    int numSplits = random.Next(MaxLength / 2000) + 1;
                    Log.Info("splitting: requesting = " + numSplits);
                    InputSplit[] splits = format.GetSplits(conf, numSplits);
                    Log.Info("splitting: got =        " + splits.Length);
                    // check each split
                    BitSet bits = new BitSet(length);
                    for (int j = 0; j < splits.Length; j++)
                    {
                        Log.Debug("split[" + j + "]= " + splits[j]);
                        RecordReader <LongWritable, Text> reader = format.GetRecordReader(splits[j], conf,
                                                                                          reporter);
                        try
                        {
                            int counter = 0;
                            while (reader.Next(key, value))
                            {
                                int v = System.Convert.ToInt32(value.ToString());
                                Log.Debug("read " + v);
                                if (bits.Get(v))
                                {
                                    Log.Warn("conflict with " + v + " in split " + j + " at position " + reader.GetPos
                                                 ());
                                }
                                NUnit.Framework.Assert.IsFalse("Key in multiple partitions.", bits.Get(v));
                                bits.Set(v);
                                counter++;
                            }
                            if (counter > 0)
                            {
                                Log.Info("splits[" + j + "]=" + splits[j] + " count=" + counter);
                            }
                            else
                            {
                                Log.Debug("splits[" + j + "]=" + splits[j] + " count=" + counter);
                            }
                        }
                        finally
                        {
                            reader.Close();
                        }
                    }
                    NUnit.Framework.Assert.AreEqual("Some keys in no partition.", length, bits.Cardinality
                                                        ());
                }
            }
        }
Пример #7
0
 /// <exception cref="System.TypeLoadException"/>
 private static Type GetClass <InterfaceType>(CommandLine cl, string key, JobConf conf
                                              )
 {
     System.Type cls = typeof(InterfaceType);
     return(conf.GetClassByName(cl.GetOptionValue(key)).AsSubclass(cls));
 }