예제 #1
0
 /// <exception cref="System.IO.IOException"/>
 public virtual void Map(NullWritable nk, NullWritable nv, OutputCollector <Org.Apache.Hadoop.IO.Text
                                                                            , Org.Apache.Hadoop.IO.Text> output, Reporter reporter)
 {
     // Emit 4096 fixed-size records
     val.Set(b, 0, 1000);
     val.GetBytes()[0] = unchecked ((byte)id);
     for (int i = 0; i < 4096; ++i)
     {
         key.Set(fmt.Format(tagfmt, i).ToString());
         output.Collect(key, val);
         ((StringBuilder)fmt.Out()).Length = keylen;
     }
     // Emit two "tagged" records from the map. To validate the merge, segments
     // should have both a small and large record such that reading a large
     // record from an on-disk segment into an in-memory segment will write
     // over the beginning of a record in the in-memory segment, causing the
     // merge and/or validation to fail.
     // Add small, tagged record
     val.Set(b, 0, GetValLen(id, nMaps) - 128);
     val.GetBytes()[0] = unchecked ((byte)id);
     ((StringBuilder)fmt.Out()).Length = keylen;
     key.Set("A" + fmt.Format(tagfmt, id).ToString());
     output.Collect(key, val);
     // Add large, tagged record
     val.Set(b, 0, GetValLen(id, nMaps));
     val.GetBytes()[0] = unchecked ((byte)id);
     ((StringBuilder)fmt.Out()).Length = keylen;
     key.Set("B" + fmt.Format(tagfmt, id).ToString());
     output.Collect(key, val);
 }
예제 #2
0
            /// <exception cref="System.IO.IOException"/>
            internal override void CollectStats(OutputCollector <Text, Text> output, string name
                                                , long execTime, object corruptedBlock)
            {
                output.Collect(new Text(AccumulatingReducer.ValueTypeLong + "blocks"), new Text(1
                                                                                                .ToString()));
                if (corruptedBlock.GetType().FullName.EndsWith("String"))
                {
                    output.Collect(new Text(AccumulatingReducer.ValueTypeString + "badBlocks"), new Text
                                       ((string)corruptedBlock));
                    return;
                }
                long  totalSize   = ((long)corruptedBlock);
                float ioRateMbSec = (float)totalSize * 1000 / (execTime *unchecked ((int)(0x100000
                                                                                          )));

                Log.Info("Number of bytes processed = " + totalSize);
                Log.Info("Exec time = " + execTime);
                Log.Info("IO rate = " + ioRateMbSec);
                output.Collect(new Text(AccumulatingReducer.ValueTypeLong + "size"), new Text(totalSize
                                                                                              .ToString()));
                output.Collect(new Text(AccumulatingReducer.ValueTypeLong + "time"), new Text(execTime
                                                                                              .ToString()));
                output.Collect(new Text(AccumulatingReducer.ValueTypeFloat + "rate"), new Text((ioRateMbSec
                                                                                                * 1000).ToString()));
            }
예제 #3
0
        /// <summary>Combines values for a given key.</summary>
        /// <param name="key">
        /// the key is expected to be a Text object, whose prefix indicates
        /// the type of aggregation to aggregate the values.
        /// </param>
        /// <param name="values">the values to combine</param>
        /// <param name="output">to collect combined values</param>
        /// <exception cref="System.IO.IOException"/>
        public override void Reduce(Text key, IEnumerator <Text> values, OutputCollector <Text
                                                                                          , Text> output, Reporter reporter)
        {
            string          keyStr     = key.ToString();
            int             pos        = keyStr.IndexOf(ValueAggregatorDescriptor.TypeSeparator);
            string          type       = Sharpen.Runtime.Substring(keyStr, 0, pos);
            ValueAggregator aggregator = ValueAggregatorBaseDescriptor.GenerateValueAggregator
                                             (type);

            while (values.HasNext())
            {
                aggregator.AddNextValue(values.Next());
            }
            IEnumerator outputs = aggregator.GetCombinerOutput().GetEnumerator();

            while (outputs.HasNext())
            {
                object v = outputs.Next();
                if (v is Text)
                {
                    output.Collect(key, (Text)v);
                }
                else
                {
                    output.Collect(key, new Text(v.ToString()));
                }
            }
        }
예제 #4
0
            /// <summary>Reduce method</summary>
            /// <exception cref="System.IO.IOException"/>
            public virtual void Reduce(Text key, IEnumerator <Text> values, OutputCollector <Text
                                                                                             , Text> output, Reporter reporter)
            {
                string field = key.ToString();

                reporter.SetStatus("starting " + field + " ::host = " + hostName);
                // sum long values
                if (field.StartsWith("l:"))
                {
                    long lSum = 0;
                    while (values.HasNext())
                    {
                        lSum += long.Parse(values.Next().ToString());
                    }
                    output.Collect(key, new Text(lSum.ToString()));
                }
                if (field.StartsWith("min:"))
                {
                    long minVal = -1;
                    while (values.HasNext())
                    {
                        long value = long.Parse(values.Next().ToString());
                        if (minVal == -1)
                        {
                            minVal = value;
                        }
                        else
                        {
                            if (value != 0 && value < minVal)
                            {
                                minVal = value;
                            }
                        }
                    }
                    output.Collect(key, new Text(minVal.ToString()));
                }
                if (field.StartsWith("max:"))
                {
                    long maxVal = -1;
                    while (values.HasNext())
                    {
                        long value = long.Parse(values.Next().ToString());
                        if (maxVal == -1)
                        {
                            maxVal = value;
                        }
                        else
                        {
                            if (value > maxVal)
                            {
                                maxVal = value;
                            }
                        }
                    }
                    output.Collect(key, new Text(maxVal.ToString()));
                }
                reporter.SetStatus("finished " + field + " ::host = " + hostName);
            }
예제 #5
0
            /// <exception cref="System.IO.IOException"/>
            public virtual void Map(WritableComparable key, Text value, OutputCollector <Text,
                                                                                         Text> output, Reporter reporter)
            {
                string str = StringUtils.ToLowerCase(value.ToString());

                output.Collect(new Text(str), value);
            }
예제 #6
0
            /// <exception cref="System.IO.IOException"/>
            public override void Map(IntWritable key, TupleWritable val, OutputCollector <IntWritable
                                                                                          , IntWritable> @out, Reporter reporter)
            {
                int    k     = key.Get();
                string kvstr = "Unexpected tuple: " + Stringify(key, val);

                if (0 == k % (srcs * srcs))
                {
                    for (int i = 0; i < val.Size(); ++i)
                    {
                        NUnit.Framework.Assert.IsTrue(kvstr, val.Get(i) is IntWritable);
                        int vali = ((IntWritable)val.Get(i)).Get();
                        NUnit.Framework.Assert.IsTrue(kvstr, (vali - i) * srcs == 10 * k);
                    }
                }
                else
                {
                    for (int i = 0; i < val.Size(); ++i)
                    {
                        if (i == k % srcs)
                        {
                            NUnit.Framework.Assert.IsTrue(kvstr, val.Get(i) is IntWritable);
                            int vali = ((IntWritable)val.Get(i)).Get();
                            NUnit.Framework.Assert.IsTrue(kvstr, srcs * (vali - i) == 10 * (k - i));
                        }
                        else
                        {
                            NUnit.Framework.Assert.IsTrue(kvstr, !val.Has(i));
                        }
                    }
                }
                @out.Collect(key, one);
            }
예제 #7
0
            /// <exception cref="System.IO.IOException"/>
            public virtual void Map(WritableComparable key, Writable value, OutputCollector <BytesWritable
                                                                                             , BytesWritable> output, Reporter reporter)
            {
                int itemCount = 0;

                while (numBytesToWrite > 0)
                {
                    int keyLength = minKeySize + (keySizeRange != 0 ? random.Next(keySizeRange) : 0);
                    randomKey.SetSize(keyLength);
                    RandomizeBytes(randomKey.GetBytes(), 0, randomKey.GetLength());
                    int valueLength = minValueSize + (valueSizeRange != 0 ? random.Next(valueSizeRange
                                                                                        ) : 0);
                    randomValue.SetSize(valueLength);
                    RandomizeBytes(randomValue.GetBytes(), 0, randomValue.GetLength());
                    output.Collect(randomKey, randomValue);
                    numBytesToWrite -= keyLength + valueLength;
                    reporter.IncrCounter(ThreadedMapBenchmark.Counters.BytesWritten, 1);
                    reporter.IncrCounter(ThreadedMapBenchmark.Counters.RecordsWritten, 1);
                    if (++itemCount % 200 == 0)
                    {
                        reporter.SetStatus("wrote record " + itemCount + ". " + numBytesToWrite + " bytes left."
                                           );
                    }
                }
                reporter.SetStatus("done with " + itemCount + " records.");
            }
예제 #8
0
 /// <exception cref="System.IO.IOException"/>
 public virtual void Reduce(LongWritable key, IEnumerator <Text> values, OutputCollector
                            <LongWritable, Text> output, Reporter reporter)
 {
     while (values.HasNext())
     {
         Text value = values.Next();
         Log.Debug("REDUCE key:" + key + "  value:" + value);
         if (ReducerBadRecords[0].Equals(value.ToString()))
         {
             Log.Warn("REDUCE Encountered BAD record");
             System.Environment.Exit(-1);
         }
         else
         {
             if (ReducerBadRecords[1].Equals(value.ToString()))
             {
                 try
                 {
                     Log.Warn("REDUCE Encountered BAD record");
                     Sharpen.Thread.Sleep(15 * 60 * 1000);
                 }
                 catch (Exception e)
                 {
                     Sharpen.Runtime.PrintStackTrace(e);
                 }
             }
         }
         output.Collect(key, value);
     }
 }
예제 #9
0
            // keep track of the last key we've seen
            /// <exception cref="System.IO.IOException"/>
            public virtual void Reduce(IntWritable key, IEnumerator <IntWritable> values, OutputCollector
                                       <IntWritable, Text> @out, Reporter reporter)
            {
                // check key order
                int currentKey = key.Get();

                if (currentKey > lastKey)
                {
                    NUnit.Framework.Assert.Fail("Keys not in sorted descending order");
                }
                lastKey = currentKey;
                // check order of values
                IntWritable previous   = new IntWritable(int.MaxValue);
                int         valueCount = 0;

                while (values.HasNext())
                {
                    IntWritable current = values.Next();
                    // Check that the values are sorted
                    if (current.CompareTo(previous) > 0)
                    {
                        NUnit.Framework.Assert.Fail("Values generated by Mapper not in order");
                    }
                    previous = current;
                    ++valueCount;
                }
                if (valueCount != 5)
                {
                    NUnit.Framework.Assert.Fail("Values not grouped by primary key");
                }
                @out.Collect(key, new Text("success"));
            }
예제 #10
0
 /// <exception cref="System.IO.IOException"/>
 public override void Map(K key, V value, OutputCollector <K, V> output, Reporter reporter
                          )
 {
     output.Collect(key, value);
     reporter.IncrCounter(TestUserDefinedCounters.EnumCounter.MapRecords, 1);
     reporter.IncrCounter("StringCounter", "MapRecords", 1);
 }
예제 #11
0
 /// <exception cref="System.IO.IOException"/>
 public virtual void Reduce(IntWritable key, IEnumerator <IntWritable> it, OutputCollector
                            <IntWritable, IntWritable> @out, Reporter reporter)
 {
     while (it.HasNext())
     {
         @out.Collect(it.Next(), null);
     }
 }
 /// <exception cref="System.IO.IOException"/>
 public virtual void Reduce(LongWritable key, IEnumerator <Text> values, OutputCollector
                            <LongWritable, Text> output, Reporter reporter)
 {
     while (values.HasNext())
     {
         output.Collect(key, values.Next());
     }
 }
예제 #13
0
			/// <exception cref="System.IO.IOException"/>
			public virtual void Reduce(UTF8 key, IEnumerator<UTF8> values, OutputCollector<UTF8
				, UTF8> output, Reporter reporter)
			{
				while (values.HasNext())
				{
					output.Collect(key, new UTF8(values.Next().ToString()));
				}
			}
예제 #14
0
 /// <summary>Writes all keys and values directly to output.</summary>
 /// <exception cref="System.IO.IOException"/>
 public virtual void Reduce(K key, IEnumerator <V> values, OutputCollector <K, V> output
                            , Reporter reporter)
 {
     while (values.HasNext())
     {
         output.Collect(key, values.Next());
     }
 }
예제 #15
0
            /// <exception cref="System.IO.IOException"/>
            public virtual void Map(IntWritable key, IntWritable val, OutputCollector <IntWritable
                                                                                       , IntWritable> @out, Reporter reporter)
            {
                int keyint = key.Get();
                int valint = val.Get();

                @out.Collect(new IntWritable(keyint), new IntWritable(valint));
            }
예제 #16
0
                /// <exception cref="System.IO.IOException"/>
                public virtual void Map(BytesWritable key, BytesWritable value, OutputCollector <BytesWritable
                                                                                                 , IntWritable> output, Reporter reporter)
                {
                    // newKey = (key, value)
                    BytesWritable keyValue = new BytesWritable(Pair(key, value));

                    // output (newKey, value)
                    output.Collect(keyValue, this.value);
                }
예제 #17
0
            /// <exception cref="System.IO.IOException"/>
            public virtual void Map(LongWritable key, Text value, OutputCollector <Text, LongWritable
                                                                                   > output, Reporter reporter)
            {
                string v = value.ToString();
                string k = Sharpen.Runtime.Substring(v, 0, v.IndexOf(","));

                v = Sharpen.Runtime.Substring(v, v.IndexOf(",") + 1);
                output.Collect(new Text(k), new LongWritable(long.Parse(v)));
            }
예제 #18
0
 /// <exception cref="System.IO.IOException"/>
 public virtual void Map(LongWritable key, Text val, OutputCollector <LongWritable,
                                                                      Text> output, Reporter reporter)
 {
     // Everybody other than id 0 outputs
     if (!id.EndsWith("0_0"))
     {
         output.Collect(key, val);
     }
 }
예제 #19
0
 /// <exception cref="System.IO.IOException"/>
 public virtual void Map(WritableComparable key, Writable val, OutputCollector <WritableComparable
                                                                                , Writable> output, Reporter reporter)
 {
     NUnit.Framework.Assert.IsNotNull("Mapper not configured!", loader);
     // load the memory
     loader.Load();
     // work as identity mapper
     output.Collect(key, val);
 }
예제 #20
0
 /// <exception cref="System.IO.IOException"/>
 protected internal virtual void Emit(K key, V val, OutputCollector <K, V> @out)
 {
     ++total;
     while ((float)kept / total < keep)
     {
         ++kept;
         @out.Collect(key, val);
     }
 }
예제 #21
0
 /// <exception cref="System.IO.IOException"/>
 public virtual void Reduce(WritableComparable key, IEnumerator <Writable> val, OutputCollector
                            <WritableComparable, Writable> output, Reporter reporter)
 {
     NUnit.Framework.Assert.IsNotNull("Reducer not configured!", loader);
     // load the memory
     loader.Load();
     // work as identity reducer
     output.Collect(key, key);
 }
예제 #22
0
                /// <exception cref="System.IO.IOException"/>
                public virtual void Map(WritableComparable key, Writable value, OutputCollector <IntWritable
                                                                                                 , SortValidator.RecordStatsChecker.RecordStatsWritable> output, Reporter reporter
                                        )
                {
                    // Set up rawKey and rawValue on the first call to 'map'
                    if (recordId == -1)
                    {
                        rawKey   = CreateRaw(key.GetType());
                        rawValue = CreateRaw(value.GetType());
                    }
                    ++recordId;
                    if (this.key == sortOutput)
                    {
                        // Check if keys are 'sorted' if this
                        // record is from sort's output
                        if (prevKey == null)
                        {
                            prevKey  = key;
                            keyClass = prevKey.GetType();
                        }
                        else
                        {
                            // Sanity check
                            if (keyClass != key.GetType())
                            {
                                throw new IOException("Type mismatch in key: expected " + keyClass.FullName + ", received "
                                                      + key.GetType().FullName);
                            }
                            // Check if they were sorted correctly
                            if (prevKey.CompareTo(key) > 0)
                            {
                                throw new IOException("The 'map-reduce' framework wrongly" + " classifed (" + prevKey
                                                      + ") > (" + key + ") " + "for record# " + recordId);
                            }
                            prevKey = key;
                        }
                        // Check if the sorted output is 'partitioned' right
                        int keyPartition = partitioner.GetPartition(key, value, noSortReducers);
                        if (partition != keyPartition)
                        {
                            throw new IOException("Partitions do not match for record# " + recordId + " ! - '"
                                                  + partition + "' v/s '" + keyPartition + "'");
                        }
                    }
                    // Construct the record-stats and output (this.key, record-stats)
                    byte[] keyBytes    = rawKey.GetRawBytes(key);
                    int    keyBytesLen = rawKey.GetRawBytesLength(key);

                    byte[] valueBytes       = rawValue.GetRawBytes(value);
                    int    valueBytesLen    = rawValue.GetRawBytesLength(value);
                    int    keyValueChecksum = (WritableComparator.HashBytes(keyBytes, keyBytesLen) ^ WritableComparator
                                               .HashBytes(valueBytes, valueBytesLen));

                    output.Collect(this.key, new SortValidator.RecordStatsChecker.RecordStatsWritable
                                       ((keyBytesLen + valueBytesLen), 1, keyValueChecksum));
                }
예제 #23
0
            /// <exception cref="System.IO.IOException"/>
            public virtual void Map(LongWritable key, Text value, OutputCollector <Text, Text>
                                    output, Reporter reporter)
            {
                string record   = value.ToString();
                int    blankPos = record.IndexOf(" ");

                keyText.Set(Sharpen.Runtime.Substring(record, 0, blankPos));
                valueText.Set(Sharpen.Runtime.Substring(record, blankPos + 1));
                output.Collect(keyText, valueText);
            }
예제 #24
0
        /// <summary>The identify function.</summary>
        /// <remarks>The identify function. Input key/value pair is written directly to output.
        ///     </remarks>
        /// <exception cref="System.IO.IOException"/>
        public virtual void Map(K key, V val, OutputCollector <Org.Apache.Hadoop.IO.Text,
                                                               Org.Apache.Hadoop.IO.Text> output, Reporter reporter)
        {
            FieldSelectionHelper helper = new FieldSelectionHelper(FieldSelectionHelper.emptyText
                                                                   , FieldSelectionHelper.emptyText);

            helper.ExtractOutputKeyValue(key.ToString(), val.ToString(), fieldSeparator, mapOutputKeyFieldList
                                         , mapOutputValueFieldList, allMapValueFieldsFrom, ignoreInputKey, true);
            output.Collect(helper.GetKey(), helper.GetValue());
        }
예제 #25
0
        /// <exception cref="System.IO.IOException"/>
        public virtual void Reduce(Text key, IEnumerator <Text> values, OutputCollector <Text
                                                                                         , Text> output, Reporter reporter)
        {
            string field = key.ToString();

            reporter.SetStatus("starting " + field + " ::host = " + hostName);
            // concatenate strings
            if (field.StartsWith(ValueTypeString))
            {
                StringBuilder sSum = new StringBuilder();
                while (values.HasNext())
                {
                    sSum.Append(values.Next().ToString()).Append(";");
                }
                output.Collect(key, new Org.Apache.Hadoop.IO.Text(sSum.ToString()));
                reporter.SetStatus("finished " + field + " ::host = " + hostName);
                return;
            }
            // sum long values
            if (field.StartsWith(ValueTypeFloat))
            {
                float fSum = 0;
                while (values.HasNext())
                {
                    fSum += float.ParseFloat(values.Next().ToString());
                }
                output.Collect(key, new Org.Apache.Hadoop.IO.Text(fSum.ToString()));
                reporter.SetStatus("finished " + field + " ::host = " + hostName);
                return;
            }
            // sum long values
            if (field.StartsWith(ValueTypeLong))
            {
                long lSum = 0;
                while (values.HasNext())
                {
                    lSum += long.Parse(values.Next().ToString());
                }
                output.Collect(key, new Org.Apache.Hadoop.IO.Text(lSum.ToString()));
            }
            reporter.SetStatus("finished " + field + " ::host = " + hostName);
        }
예제 #26
0
            /// <exception cref="System.IO.IOException"/>
            public virtual void Map(IntWritable key, IntWritable val, OutputCollector <IntWritable
                                                                                       , IntWritable> @out, Reporter reporter)
            {
                int randomVal   = key.Get();
                int randomCount = val.Get();

                for (int i = 0; i < randomCount; i++)
                {
                    @out.Collect(new IntWritable(Math.Abs(r.Next())), new IntWritable(randomVal));
                }
            }
예제 #27
0
 /// <exception cref="System.IO.IOException"/>
 public virtual void Reduce(Org.Apache.Hadoop.IO.Text key, IEnumerator <Org.Apache.Hadoop.IO.Text
                                                                        > values, OutputCollector <Org.Apache.Hadoop.IO.Text, Org.Apache.Hadoop.IO.Text>
                            output, Reporter reporter)
 {
     Org.Apache.Hadoop.IO.Text dumbKey = new Org.Apache.Hadoop.IO.Text(string.Empty);
     while (values.HasNext())
     {
         Org.Apache.Hadoop.IO.Text data = values.Next();
         output.Collect(dumbKey, data);
     }
 }
예제 #28
0
            /// <exception cref="System.IO.IOException"/>
            public virtual void Reduce(Text key, IEnumerator <IntWritable> values, OutputCollector
                                       <Text, IntWritable> output, Reporter reporter)
            {
                int sum = 0;

                while (values.HasNext())
                {
                    sum += values.Next().Get();
                }
                output.Collect(key, new IntWritable(sum));
            }
예제 #29
0
        /// <exception cref="System.IO.IOException"/>
        public virtual void Map(K key, Text value, OutputCollector <Text, LongWritable> output
                                , Reporter reporter)
        {
            string  text    = value.ToString();
            Matcher matcher = pattern.Matcher(text);

            while (matcher.Find())
            {
                output.Collect(new Text(matcher.Group(group)), new LongWritable(1));
            }
        }
예제 #30
0
            /// <exception cref="System.IO.IOException"/>
            internal override void CollectStats(OutputCollector <Text, Text> output, string name
                                                , long execTime, long objSize)
            {
                long  totalSize   = objSize;
                float ioRateMbSec = (float)totalSize * 1000 / (execTime * Mega);

                Log.Info("Number of bytes processed = " + totalSize);
                Log.Info("Exec time = " + execTime);
                Log.Info("IO rate = " + ioRateMbSec);
                output.Collect(new Text(AccumulatingReducer.ValueTypeLong + "tasks"), new Text(1.
                                                                                               ToString()));
                output.Collect(new Text(AccumulatingReducer.ValueTypeLong + "size"), new Text(totalSize
                                                                                              .ToString()));
                output.Collect(new Text(AccumulatingReducer.ValueTypeLong + "time"), new Text(execTime
                                                                                              .ToString()));
                output.Collect(new Text(AccumulatingReducer.ValueTypeFloat + "rate"), new Text((ioRateMbSec
                                                                                                * 1000).ToString()));
                output.Collect(new Text(AccumulatingReducer.ValueTypeFloat + "sqrate"), new Text(
                                   (ioRateMbSec * ioRateMbSec * 1000).ToString()));
            }