/// <exception cref="System.IO.IOException"/> public virtual void Map(NullWritable nk, NullWritable nv, OutputCollector <Org.Apache.Hadoop.IO.Text , Org.Apache.Hadoop.IO.Text> output, Reporter reporter) { // Emit 4096 fixed-size records val.Set(b, 0, 1000); val.GetBytes()[0] = unchecked ((byte)id); for (int i = 0; i < 4096; ++i) { key.Set(fmt.Format(tagfmt, i).ToString()); output.Collect(key, val); ((StringBuilder)fmt.Out()).Length = keylen; } // Emit two "tagged" records from the map. To validate the merge, segments // should have both a small and large record such that reading a large // record from an on-disk segment into an in-memory segment will write // over the beginning of a record in the in-memory segment, causing the // merge and/or validation to fail. // Add small, tagged record val.Set(b, 0, GetValLen(id, nMaps) - 128); val.GetBytes()[0] = unchecked ((byte)id); ((StringBuilder)fmt.Out()).Length = keylen; key.Set("A" + fmt.Format(tagfmt, id).ToString()); output.Collect(key, val); // Add large, tagged record val.Set(b, 0, GetValLen(id, nMaps)); val.GetBytes()[0] = unchecked ((byte)id); ((StringBuilder)fmt.Out()).Length = keylen; key.Set("B" + fmt.Format(tagfmt, id).ToString()); output.Collect(key, val); }
/// <exception cref="System.IO.IOException"/> internal override void CollectStats(OutputCollector <Text, Text> output, string name , long execTime, object corruptedBlock) { output.Collect(new Text(AccumulatingReducer.ValueTypeLong + "blocks"), new Text(1 .ToString())); if (corruptedBlock.GetType().FullName.EndsWith("String")) { output.Collect(new Text(AccumulatingReducer.ValueTypeString + "badBlocks"), new Text ((string)corruptedBlock)); return; } long totalSize = ((long)corruptedBlock); float ioRateMbSec = (float)totalSize * 1000 / (execTime *unchecked ((int)(0x100000 ))); Log.Info("Number of bytes processed = " + totalSize); Log.Info("Exec time = " + execTime); Log.Info("IO rate = " + ioRateMbSec); output.Collect(new Text(AccumulatingReducer.ValueTypeLong + "size"), new Text(totalSize .ToString())); output.Collect(new Text(AccumulatingReducer.ValueTypeLong + "time"), new Text(execTime .ToString())); output.Collect(new Text(AccumulatingReducer.ValueTypeFloat + "rate"), new Text((ioRateMbSec * 1000).ToString())); }
/// <summary>Combines values for a given key.</summary> /// <param name="key"> /// the key is expected to be a Text object, whose prefix indicates /// the type of aggregation to aggregate the values. /// </param> /// <param name="values">the values to combine</param> /// <param name="output">to collect combined values</param> /// <exception cref="System.IO.IOException"/> public override void Reduce(Text key, IEnumerator <Text> values, OutputCollector <Text , Text> output, Reporter reporter) { string keyStr = key.ToString(); int pos = keyStr.IndexOf(ValueAggregatorDescriptor.TypeSeparator); string type = Sharpen.Runtime.Substring(keyStr, 0, pos); ValueAggregator aggregator = ValueAggregatorBaseDescriptor.GenerateValueAggregator (type); while (values.HasNext()) { aggregator.AddNextValue(values.Next()); } IEnumerator outputs = aggregator.GetCombinerOutput().GetEnumerator(); while (outputs.HasNext()) { object v = outputs.Next(); if (v is Text) { output.Collect(key, (Text)v); } else { output.Collect(key, new Text(v.ToString())); } } }
/// <summary>Reduce method</summary> /// <exception cref="System.IO.IOException"/> public virtual void Reduce(Text key, IEnumerator <Text> values, OutputCollector <Text , Text> output, Reporter reporter) { string field = key.ToString(); reporter.SetStatus("starting " + field + " ::host = " + hostName); // sum long values if (field.StartsWith("l:")) { long lSum = 0; while (values.HasNext()) { lSum += long.Parse(values.Next().ToString()); } output.Collect(key, new Text(lSum.ToString())); } if (field.StartsWith("min:")) { long minVal = -1; while (values.HasNext()) { long value = long.Parse(values.Next().ToString()); if (minVal == -1) { minVal = value; } else { if (value != 0 && value < minVal) { minVal = value; } } } output.Collect(key, new Text(minVal.ToString())); } if (field.StartsWith("max:")) { long maxVal = -1; while (values.HasNext()) { long value = long.Parse(values.Next().ToString()); if (maxVal == -1) { maxVal = value; } else { if (value > maxVal) { maxVal = value; } } } output.Collect(key, new Text(maxVal.ToString())); } reporter.SetStatus("finished " + field + " ::host = " + hostName); }
/// <exception cref="System.IO.IOException"/> public virtual void Map(WritableComparable key, Text value, OutputCollector <Text, Text> output, Reporter reporter) { string str = StringUtils.ToLowerCase(value.ToString()); output.Collect(new Text(str), value); }
/// <exception cref="System.IO.IOException"/> public override void Map(IntWritable key, TupleWritable val, OutputCollector <IntWritable , IntWritable> @out, Reporter reporter) { int k = key.Get(); string kvstr = "Unexpected tuple: " + Stringify(key, val); if (0 == k % (srcs * srcs)) { for (int i = 0; i < val.Size(); ++i) { NUnit.Framework.Assert.IsTrue(kvstr, val.Get(i) is IntWritable); int vali = ((IntWritable)val.Get(i)).Get(); NUnit.Framework.Assert.IsTrue(kvstr, (vali - i) * srcs == 10 * k); } } else { for (int i = 0; i < val.Size(); ++i) { if (i == k % srcs) { NUnit.Framework.Assert.IsTrue(kvstr, val.Get(i) is IntWritable); int vali = ((IntWritable)val.Get(i)).Get(); NUnit.Framework.Assert.IsTrue(kvstr, srcs * (vali - i) == 10 * (k - i)); } else { NUnit.Framework.Assert.IsTrue(kvstr, !val.Has(i)); } } } @out.Collect(key, one); }
/// <exception cref="System.IO.IOException"/> public virtual void Map(WritableComparable key, Writable value, OutputCollector <BytesWritable , BytesWritable> output, Reporter reporter) { int itemCount = 0; while (numBytesToWrite > 0) { int keyLength = minKeySize + (keySizeRange != 0 ? random.Next(keySizeRange) : 0); randomKey.SetSize(keyLength); RandomizeBytes(randomKey.GetBytes(), 0, randomKey.GetLength()); int valueLength = minValueSize + (valueSizeRange != 0 ? random.Next(valueSizeRange ) : 0); randomValue.SetSize(valueLength); RandomizeBytes(randomValue.GetBytes(), 0, randomValue.GetLength()); output.Collect(randomKey, randomValue); numBytesToWrite -= keyLength + valueLength; reporter.IncrCounter(ThreadedMapBenchmark.Counters.BytesWritten, 1); reporter.IncrCounter(ThreadedMapBenchmark.Counters.RecordsWritten, 1); if (++itemCount % 200 == 0) { reporter.SetStatus("wrote record " + itemCount + ". " + numBytesToWrite + " bytes left." ); } } reporter.SetStatus("done with " + itemCount + " records."); }
/// <exception cref="System.IO.IOException"/> public virtual void Reduce(LongWritable key, IEnumerator <Text> values, OutputCollector <LongWritable, Text> output, Reporter reporter) { while (values.HasNext()) { Text value = values.Next(); Log.Debug("REDUCE key:" + key + " value:" + value); if (ReducerBadRecords[0].Equals(value.ToString())) { Log.Warn("REDUCE Encountered BAD record"); System.Environment.Exit(-1); } else { if (ReducerBadRecords[1].Equals(value.ToString())) { try { Log.Warn("REDUCE Encountered BAD record"); Sharpen.Thread.Sleep(15 * 60 * 1000); } catch (Exception e) { Sharpen.Runtime.PrintStackTrace(e); } } } output.Collect(key, value); } }
// keep track of the last key we've seen /// <exception cref="System.IO.IOException"/> public virtual void Reduce(IntWritable key, IEnumerator <IntWritable> values, OutputCollector <IntWritable, Text> @out, Reporter reporter) { // check key order int currentKey = key.Get(); if (currentKey > lastKey) { NUnit.Framework.Assert.Fail("Keys not in sorted descending order"); } lastKey = currentKey; // check order of values IntWritable previous = new IntWritable(int.MaxValue); int valueCount = 0; while (values.HasNext()) { IntWritable current = values.Next(); // Check that the values are sorted if (current.CompareTo(previous) > 0) { NUnit.Framework.Assert.Fail("Values generated by Mapper not in order"); } previous = current; ++valueCount; } if (valueCount != 5) { NUnit.Framework.Assert.Fail("Values not grouped by primary key"); } @out.Collect(key, new Text("success")); }
/// <exception cref="System.IO.IOException"/> public override void Map(K key, V value, OutputCollector <K, V> output, Reporter reporter ) { output.Collect(key, value); reporter.IncrCounter(TestUserDefinedCounters.EnumCounter.MapRecords, 1); reporter.IncrCounter("StringCounter", "MapRecords", 1); }
/// <exception cref="System.IO.IOException"/> public virtual void Reduce(IntWritable key, IEnumerator <IntWritable> it, OutputCollector <IntWritable, IntWritable> @out, Reporter reporter) { while (it.HasNext()) { @out.Collect(it.Next(), null); } }
/// <exception cref="System.IO.IOException"/> public virtual void Reduce(LongWritable key, IEnumerator <Text> values, OutputCollector <LongWritable, Text> output, Reporter reporter) { while (values.HasNext()) { output.Collect(key, values.Next()); } }
/// <exception cref="System.IO.IOException"/> public virtual void Reduce(UTF8 key, IEnumerator<UTF8> values, OutputCollector<UTF8 , UTF8> output, Reporter reporter) { while (values.HasNext()) { output.Collect(key, new UTF8(values.Next().ToString())); } }
/// <summary>Writes all keys and values directly to output.</summary> /// <exception cref="System.IO.IOException"/> public virtual void Reduce(K key, IEnumerator <V> values, OutputCollector <K, V> output , Reporter reporter) { while (values.HasNext()) { output.Collect(key, values.Next()); } }
/// <exception cref="System.IO.IOException"/> public virtual void Map(IntWritable key, IntWritable val, OutputCollector <IntWritable , IntWritable> @out, Reporter reporter) { int keyint = key.Get(); int valint = val.Get(); @out.Collect(new IntWritable(keyint), new IntWritable(valint)); }
/// <exception cref="System.IO.IOException"/> public virtual void Map(BytesWritable key, BytesWritable value, OutputCollector <BytesWritable , IntWritable> output, Reporter reporter) { // newKey = (key, value) BytesWritable keyValue = new BytesWritable(Pair(key, value)); // output (newKey, value) output.Collect(keyValue, this.value); }
/// <exception cref="System.IO.IOException"/> public virtual void Map(LongWritable key, Text value, OutputCollector <Text, LongWritable > output, Reporter reporter) { string v = value.ToString(); string k = Sharpen.Runtime.Substring(v, 0, v.IndexOf(",")); v = Sharpen.Runtime.Substring(v, v.IndexOf(",") + 1); output.Collect(new Text(k), new LongWritable(long.Parse(v))); }
/// <exception cref="System.IO.IOException"/> public virtual void Map(LongWritable key, Text val, OutputCollector <LongWritable, Text> output, Reporter reporter) { // Everybody other than id 0 outputs if (!id.EndsWith("0_0")) { output.Collect(key, val); } }
/// <exception cref="System.IO.IOException"/> public virtual void Map(WritableComparable key, Writable val, OutputCollector <WritableComparable , Writable> output, Reporter reporter) { NUnit.Framework.Assert.IsNotNull("Mapper not configured!", loader); // load the memory loader.Load(); // work as identity mapper output.Collect(key, val); }
/// <exception cref="System.IO.IOException"/> protected internal virtual void Emit(K key, V val, OutputCollector <K, V> @out) { ++total; while ((float)kept / total < keep) { ++kept; @out.Collect(key, val); } }
/// <exception cref="System.IO.IOException"/> public virtual void Reduce(WritableComparable key, IEnumerator <Writable> val, OutputCollector <WritableComparable, Writable> output, Reporter reporter) { NUnit.Framework.Assert.IsNotNull("Reducer not configured!", loader); // load the memory loader.Load(); // work as identity reducer output.Collect(key, key); }
/// <exception cref="System.IO.IOException"/> public virtual void Map(WritableComparable key, Writable value, OutputCollector <IntWritable , SortValidator.RecordStatsChecker.RecordStatsWritable> output, Reporter reporter ) { // Set up rawKey and rawValue on the first call to 'map' if (recordId == -1) { rawKey = CreateRaw(key.GetType()); rawValue = CreateRaw(value.GetType()); } ++recordId; if (this.key == sortOutput) { // Check if keys are 'sorted' if this // record is from sort's output if (prevKey == null) { prevKey = key; keyClass = prevKey.GetType(); } else { // Sanity check if (keyClass != key.GetType()) { throw new IOException("Type mismatch in key: expected " + keyClass.FullName + ", received " + key.GetType().FullName); } // Check if they were sorted correctly if (prevKey.CompareTo(key) > 0) { throw new IOException("The 'map-reduce' framework wrongly" + " classifed (" + prevKey + ") > (" + key + ") " + "for record# " + recordId); } prevKey = key; } // Check if the sorted output is 'partitioned' right int keyPartition = partitioner.GetPartition(key, value, noSortReducers); if (partition != keyPartition) { throw new IOException("Partitions do not match for record# " + recordId + " ! - '" + partition + "' v/s '" + keyPartition + "'"); } } // Construct the record-stats and output (this.key, record-stats) byte[] keyBytes = rawKey.GetRawBytes(key); int keyBytesLen = rawKey.GetRawBytesLength(key); byte[] valueBytes = rawValue.GetRawBytes(value); int valueBytesLen = rawValue.GetRawBytesLength(value); int keyValueChecksum = (WritableComparator.HashBytes(keyBytes, keyBytesLen) ^ WritableComparator .HashBytes(valueBytes, valueBytesLen)); output.Collect(this.key, new SortValidator.RecordStatsChecker.RecordStatsWritable ((keyBytesLen + valueBytesLen), 1, keyValueChecksum)); }
/// <exception cref="System.IO.IOException"/> public virtual void Map(LongWritable key, Text value, OutputCollector <Text, Text> output, Reporter reporter) { string record = value.ToString(); int blankPos = record.IndexOf(" "); keyText.Set(Sharpen.Runtime.Substring(record, 0, blankPos)); valueText.Set(Sharpen.Runtime.Substring(record, blankPos + 1)); output.Collect(keyText, valueText); }
/// <summary>The identify function.</summary> /// <remarks>The identify function. Input key/value pair is written directly to output. /// </remarks> /// <exception cref="System.IO.IOException"/> public virtual void Map(K key, V val, OutputCollector <Org.Apache.Hadoop.IO.Text, Org.Apache.Hadoop.IO.Text> output, Reporter reporter) { FieldSelectionHelper helper = new FieldSelectionHelper(FieldSelectionHelper.emptyText , FieldSelectionHelper.emptyText); helper.ExtractOutputKeyValue(key.ToString(), val.ToString(), fieldSeparator, mapOutputKeyFieldList , mapOutputValueFieldList, allMapValueFieldsFrom, ignoreInputKey, true); output.Collect(helper.GetKey(), helper.GetValue()); }
/// <exception cref="System.IO.IOException"/> public virtual void Reduce(Text key, IEnumerator <Text> values, OutputCollector <Text , Text> output, Reporter reporter) { string field = key.ToString(); reporter.SetStatus("starting " + field + " ::host = " + hostName); // concatenate strings if (field.StartsWith(ValueTypeString)) { StringBuilder sSum = new StringBuilder(); while (values.HasNext()) { sSum.Append(values.Next().ToString()).Append(";"); } output.Collect(key, new Org.Apache.Hadoop.IO.Text(sSum.ToString())); reporter.SetStatus("finished " + field + " ::host = " + hostName); return; } // sum long values if (field.StartsWith(ValueTypeFloat)) { float fSum = 0; while (values.HasNext()) { fSum += float.ParseFloat(values.Next().ToString()); } output.Collect(key, new Org.Apache.Hadoop.IO.Text(fSum.ToString())); reporter.SetStatus("finished " + field + " ::host = " + hostName); return; } // sum long values if (field.StartsWith(ValueTypeLong)) { long lSum = 0; while (values.HasNext()) { lSum += long.Parse(values.Next().ToString()); } output.Collect(key, new Org.Apache.Hadoop.IO.Text(lSum.ToString())); } reporter.SetStatus("finished " + field + " ::host = " + hostName); }
/// <exception cref="System.IO.IOException"/> public virtual void Map(IntWritable key, IntWritable val, OutputCollector <IntWritable , IntWritable> @out, Reporter reporter) { int randomVal = key.Get(); int randomCount = val.Get(); for (int i = 0; i < randomCount; i++) { @out.Collect(new IntWritable(Math.Abs(r.Next())), new IntWritable(randomVal)); } }
/// <exception cref="System.IO.IOException"/> public virtual void Reduce(Org.Apache.Hadoop.IO.Text key, IEnumerator <Org.Apache.Hadoop.IO.Text > values, OutputCollector <Org.Apache.Hadoop.IO.Text, Org.Apache.Hadoop.IO.Text> output, Reporter reporter) { Org.Apache.Hadoop.IO.Text dumbKey = new Org.Apache.Hadoop.IO.Text(string.Empty); while (values.HasNext()) { Org.Apache.Hadoop.IO.Text data = values.Next(); output.Collect(dumbKey, data); } }
/// <exception cref="System.IO.IOException"/> public virtual void Reduce(Text key, IEnumerator <IntWritable> values, OutputCollector <Text, IntWritable> output, Reporter reporter) { int sum = 0; while (values.HasNext()) { sum += values.Next().Get(); } output.Collect(key, new IntWritable(sum)); }
/// <exception cref="System.IO.IOException"/> public virtual void Map(K key, Text value, OutputCollector <Text, LongWritable> output , Reporter reporter) { string text = value.ToString(); Matcher matcher = pattern.Matcher(text); while (matcher.Find()) { output.Collect(new Text(matcher.Group(group)), new LongWritable(1)); } }
/// <exception cref="System.IO.IOException"/> internal override void CollectStats(OutputCollector <Text, Text> output, string name , long execTime, long objSize) { long totalSize = objSize; float ioRateMbSec = (float)totalSize * 1000 / (execTime * Mega); Log.Info("Number of bytes processed = " + totalSize); Log.Info("Exec time = " + execTime); Log.Info("IO rate = " + ioRateMbSec); output.Collect(new Text(AccumulatingReducer.ValueTypeLong + "tasks"), new Text(1. ToString())); output.Collect(new Text(AccumulatingReducer.ValueTypeLong + "size"), new Text(totalSize .ToString())); output.Collect(new Text(AccumulatingReducer.ValueTypeLong + "time"), new Text(execTime .ToString())); output.Collect(new Text(AccumulatingReducer.ValueTypeFloat + "rate"), new Text((ioRateMbSec * 1000).ToString())); output.Collect(new Text(AccumulatingReducer.ValueTypeFloat + "sqrate"), new Text( (ioRateMbSec * ioRateMbSec * 1000).ToString())); }