// JobConfigurable public virtual int GetPartition(Text key, Text value, int numPartitions) { // Partitioner OperationOutput oo = new OperationOutput(key, value); return((oo.GetOperationType().GetHashCode() & int.MaxValue) % numPartitions); }
/* * (non-Javadoc) * * @see org.apache.hadoop.mapred.Reducer#reduce(java.lang.Object, * java.util.Iterator, org.apache.hadoop.mapred.OutputCollector, * org.apache.hadoop.mapred.Reporter) */ /// <exception cref="System.IO.IOException"/> public virtual void Reduce(Text key, IEnumerator <Text> values, OutputCollector <Text , Text> output, Reporter reporter) { // Reducer OperationOutput collector = null; int reduceAm = 0; int errorAm = 0; LogAndSetStatus(reporter, "Iterating over reduction values for key " + key); while (values.HasNext()) { Text value = values.Next(); try { OperationOutput val = new OperationOutput(key, value); if (collector == null) { collector = val; } else { collector = OperationOutput.Merge(collector, val); } Log.Info("Combined " + val + " into/with " + collector); ++reduceAm; } catch (Exception e) { ++errorAm; LogAndSetStatus(reporter, "Error iterating over reduction input " + value + " due to : " + StringUtils.StringifyException(e)); if (GetConfig().ShouldExitOnFirstError()) { break; } } } LogAndSetStatus(reporter, "Reduced " + reduceAm + " values with " + errorAm + " errors" ); if (collector != null) { LogAndSetStatus(reporter, "Writing output " + collector.GetKey() + " : " + collector .GetOutputValue()); output.Collect(collector.GetKey(), collector.GetOutputValue()); } }
/// <summary>Provides a more detailed report for a given operation.</summary> /// <remarks> /// Provides a more detailed report for a given operation. This will output the /// keys and values for all input and then sort based on measurement type and /// attempt to show rates for various metrics which have expected types to be /// able to measure there rate. Currently this will show rates for bytes /// written, success count, files created, directory entries, op count and /// bytes read if the variable for time taken is available for each measurement /// type. /// </remarks> /// <param name="operation">the operation that is being reported on.</param> /// <param name="input">the set of data for that that operation.</param> /// <param name="os"> /// any print writer for which output should be written to (along with /// the logging library) /// </param> internal virtual void OpReport(string operation, IList <OperationOutput> input, PrintWriter os) { WriteMessage("Basic report for operation type " + operation, os); WriteMessage(GetSectionDelimiter(), os); foreach (OperationOutput data in input) { WriteMessage("Measurement \"" + data.GetMeasurementType() + "\" = " + data.GetValue (), os); } // split up into measurement types for rates... IDictionary <string, OperationOutput> combined = new SortedDictionary <string, OperationOutput >(); foreach (OperationOutput data_1 in input) { if (combined.Contains(data_1.GetMeasurementType())) { OperationOutput curr = combined[data_1.GetMeasurementType()]; combined[data_1.GetMeasurementType()] = OperationOutput.Merge(curr, data_1); } else { combined[data_1.GetMeasurementType()] = data_1; } } // handle the known types OperationOutput timeTaken = combined[OkTimeTaken]; if (timeTaken != null) { long mTaken = long.Parse(timeTaken.GetValue().ToString()); if (mTaken > 0) { NumberFormat formatter = Formatter.GetDecimalFormatter(); foreach (string measurementType in combined.Keys) { double rate = null; string rateType = string.Empty; if (measurementType.Equals(BytesWritten)) { long mbWritten = long.Parse(combined[measurementType].GetValue().ToString()) / (Constants .Megabytes); rate = (double)mbWritten / (double)(mTaken / 1000.0d); rateType = "MB/sec"; } else { if (measurementType.Equals(Successes)) { long succ = long.Parse(combined[measurementType].GetValue().ToString()); rate = (double)succ / (double)(mTaken / 1000.0d); rateType = "successes/sec"; } else { if (measurementType.Equals(FilesCreated)) { long filesCreated = long.Parse(combined[measurementType].GetValue().ToString()); rate = (double)filesCreated / (double)(mTaken / 1000.0d); rateType = "files created/sec"; } else { if (measurementType.Equals(DirEntries)) { long entries = long.Parse(combined[measurementType].GetValue().ToString()); rate = (double)entries / (double)(mTaken / 1000.0d); rateType = "directory entries/sec"; } else { if (measurementType.Equals(OpCount)) { long opCount = long.Parse(combined[measurementType].GetValue().ToString()); rate = (double)opCount / (double)(mTaken / 1000.0d); rateType = "operations/sec"; } else { if (measurementType.Equals(BytesRead)) { long mbRead = long.Parse(combined[measurementType].GetValue().ToString()) / (Constants .Megabytes); rate = (double)mbRead / (double)(mTaken / 1000.0d); rateType = "MB/sec"; } } } } } } if (rate != null) { WriteMessage("Rate for measurement \"" + measurementType + "\" = " + formatter.Format (rate) + " " + rateType, os); } } } } WriteMessage(GetSectionDelimiter(), os); }
/* * (non-Javadoc) * * @see org.apache.hadoop.mapred.Mapper#map(java.lang.Object, * java.lang.Object, org.apache.hadoop.mapred.OutputCollector, * org.apache.hadoop.mapred.Reporter) */ /// <exception cref="System.IO.IOException"/> public virtual void Map(object key, object value, OutputCollector <Text, Text> output , Reporter reporter) { // Mapper LogAndSetStatus(reporter, "Running slive mapper for dummy key " + key + " and dummy value " + value); //Add taskID to randomSeed to deterministically seed rnd. Random rnd = config.GetRandomSeed() != null ? new Random(this.taskId + config.GetRandomSeed ()) : new Random(); WeightSelector selector = new WeightSelector(config, rnd); long startTime = Timer.Now(); long opAm = 0; long sleepOps = 0; int duration = GetConfig().GetDurationMilliseconds(); Range <long> sleepRange = GetConfig().GetSleepRange(); Operation sleeper = null; if (sleepRange != null) { sleeper = new SleepOp(GetConfig(), rnd); } while (Timer.Elapsed(startTime) < duration) { try { LogAndSetStatus(reporter, "Attempting to select operation #" + (opAm + 1)); int currElapsed = (int)(Timer.Elapsed(startTime)); Operation op = selector.Select(currElapsed, duration); if (op == null) { // no ops left break; } else { // got a good op ++opAm; RunOperation(op, reporter, output, opAm); } // do a sleep?? if (sleeper != null) { // these don't count against the number of operations ++sleepOps; RunOperation(sleeper, reporter, output, sleepOps); } } catch (Exception e) { LogAndSetStatus(reporter, "Failed at running due to " + StringUtils.StringifyException (e)); if (GetConfig().ShouldExitOnFirstError()) { break; } } } { // write out any accumulated mapper stats long timeTaken = Timer.Elapsed(startTime); OperationOutput opCount = new OperationOutput(OperationOutput.OutputType.Long, OpType , ReportWriter.OpCount, opAm); output.Collect(opCount.GetKey(), opCount.GetOutputValue()); OperationOutput overallTime = new OperationOutput(OperationOutput.OutputType.Long , OpType, ReportWriter.OkTimeTaken, timeTaken); output.Collect(overallTime.GetKey(), overallTime.GetOutputValue()); LogAndSetStatus(reporter, "Finished " + opAm + " operations in " + timeTaken + " milliseconds" ); } }
/// <summary> /// Attempts to write the report to the given output using the specified /// config. /// </summary> /// <remarks> /// Attempts to write the report to the given output using the specified /// config. It will open up the expected reducer output file and read in its /// contents and then split up by operation output and sort by operation type /// and then for each operation type it will generate a report to the specified /// result file and the console. /// </remarks> /// <param name="cfg">the config specifying the files and output</param> /// <exception cref="System.Exception">if files can not be opened/closed/read or invalid format /// </exception> private void WriteReport(ConfigExtractor cfg) { Path dn = cfg.GetOutputPath(); Log.Info("Writing report using contents of " + dn); FileSystem fs = dn.GetFileSystem(cfg.GetConfig()); FileStatus[] reduceFiles = fs.ListStatus(dn); BufferedReader fileReader = null; PrintWriter reportWriter = null; try { IList <OperationOutput> noOperations = new AList <OperationOutput>(); IDictionary <string, IList <OperationOutput> > splitTypes = new SortedDictionary <string , IList <OperationOutput> >(); foreach (FileStatus fn in reduceFiles) { if (!fn.GetPath().GetName().StartsWith("part")) { continue; } fileReader = new BufferedReader(new InputStreamReader(new DataInputStream(fs.Open (fn.GetPath())))); string line; while ((line = fileReader.ReadLine()) != null) { string[] pieces = line.Split("\t", 2); if (pieces.Length == 2) { OperationOutput data = new OperationOutput(pieces[0], pieces[1]); string op = (data.GetOperationType()); if (op != null) { IList <OperationOutput> opList = splitTypes[op]; if (opList == null) { opList = new AList <OperationOutput>(); } opList.AddItem(data); splitTypes[op] = opList; } else { noOperations.AddItem(data); } } else { throw new IOException("Unparseable line " + line); } } fileReader.Close(); fileReader = null; } FilePath resFile = null; if (cfg.GetResultFile() != null) { resFile = new FilePath(cfg.GetResultFile()); } if (resFile != null) { Log.Info("Report results being placed to logging output and to file " + resFile.GetCanonicalPath ()); reportWriter = new PrintWriter(new FileOutputStream(resFile)); } else { Log.Info("Report results being placed to logging output"); } ReportWriter reporter = new ReportWriter(); if (!noOperations.IsEmpty()) { reporter.BasicReport(noOperations, reportWriter); } foreach (string opType in splitTypes.Keys) { reporter.OpReport(opType, splitTypes[opType], reportWriter); } } finally { if (fileReader != null) { fileReader.Close(); } if (reportWriter != null) { reportWriter.Close(); } } }