Beispiel #1
0
        // TODO Auto-generated method stub
        /// <exception cref="System.IO.IOException"/>
        public virtual void Reduce(Org.Apache.Hadoop.IO.Text key, IEnumerator <Org.Apache.Hadoop.IO.Text
                                                                               > values, OutputCollector <Org.Apache.Hadoop.IO.Text, Org.Apache.Hadoop.IO.Text>
                                   output, Reporter reporter)
        {
            string keyStr = key.ToString() + this.fieldSeparator;

            while (values.HasNext())
            {
                FieldSelectionHelper helper = new FieldSelectionHelper();
                helper.ExtractOutputKeyValue(keyStr, values.Next().ToString(), fieldSeparator, reduceOutputKeyFieldList
                                             , reduceOutputValueFieldList, allReduceValueFieldsFrom, false, false);
                output.Collect(helper.GetKey(), helper.GetValue());
            }
        }
Beispiel #2
0
            /// <exception cref="System.IO.IOException"/>
            public virtual string[] GetSorted()
            {
                string[] ret = new string[indices.Length];
                Org.Apache.Hadoop.IO.Text t   = new Org.Apache.Hadoop.IO.Text();
                DataInputBuffer           dib = new DataInputBuffer();

                for (int i = 0; i < ret.Length; ++i)
                {
                    int ii = indices[i];
                    dib.Reset(bytes, offsets[ii], ((ii + 1 == indices.Length) ? eob : offsets[ii + 1]
                                                   ) - offsets[ii]);
                    t.ReadFields(dib);
                    ret[i] = t.ToString();
                }
                return(ret);
            }
 /// <summary>Parse the command line arguments into lines and display the result.</summary>
 /// <param name="args"/>
 /// <exception cref="System.Exception"/>
 public static void Main(string[] args)
 {
     foreach (string arg in args)
     {
         System.Console.Out.WriteLine("Working on " + arg);
         LineReader reader = MakeStream(Unquote(arg));
         Org.Apache.Hadoop.IO.Text line = new Org.Apache.Hadoop.IO.Text();
         int size = reader.ReadLine(line);
         while (size > 0)
         {
             System.Console.Out.WriteLine("Got: " + line.ToString());
             size = reader.ReadLine(line);
         }
         reader.Close();
     }
 }
Beispiel #4
0
            /// <exception cref="System.IO.IOException"/>
            public virtual InputSplit[] GetSplits(JobConf job, int numSplits)
            {
                Path       src = new Path(job.Get(GenericMRLoadGenerator.IndirectInputFile, null));
                FileSystem fs  = src.GetFileSystem(job);
                AList <GenericMRLoadGenerator.IndirectInputFormat.IndirectSplit> splits = new AList
                                                                                          <GenericMRLoadGenerator.IndirectInputFormat.IndirectSplit>(numSplits);
                LongWritable key = new LongWritable();

                Org.Apache.Hadoop.IO.Text value = new Org.Apache.Hadoop.IO.Text();
                for (SequenceFile.Reader sl = new SequenceFile.Reader(fs, src, job); sl.Next(key,
                                                                                             value);)
                {
                    splits.AddItem(new GenericMRLoadGenerator.IndirectInputFormat.IndirectSplit(new Path
                                                                                                    (value.ToString()), key.Get()));
                }
                return(Sharpen.Collections.ToArray(splits, new GenericMRLoadGenerator.IndirectInputFormat.IndirectSplit
                                                   [splits.Count]));
            }
            /// <exception cref="System.IO.IOException"/>
            public override IList <InputSplit> GetSplits(JobContext job)
            {
                Configuration      conf   = job.GetConfiguration();
                Path               src    = new Path(conf.Get(IndirectInputFile, null));
                FileSystem         fs     = src.GetFileSystem(conf);
                IList <InputSplit> splits = new AList <InputSplit>();
                LongWritable       key    = new LongWritable();

                Org.Apache.Hadoop.IO.Text value = new Org.Apache.Hadoop.IO.Text();
                for (SequenceFile.Reader sl = new SequenceFile.Reader(fs, src, conf); sl.Next(key
                                                                                              , value);)
                {
                    splits.AddItem(new GenericMRLoadGenerator.IndirectInputFormat.IndirectSplit(new Path
                                                                                                    (value.ToString()), key.Get()));
                }
                return(splits);
            }