示例#1
0
        // JobConfigurable
        public virtual int GetPartition(Text key, Text value, int numPartitions)
        {
            // Partitioner
            OperationOutput oo = new OperationOutput(key, value);

            return((oo.GetOperationType().GetHashCode() & int.MaxValue) % numPartitions);
        }
示例#2
0
        /// <summary>
        /// Attempts to write the report to the given output using the specified
        /// config.
        /// </summary>
        /// <remarks>
        /// Attempts to write the report to the given output using the specified
        /// config. It will open up the expected reducer output file and read in its
        /// contents and then split up by operation output and sort by operation type
        /// and then for each operation type it will generate a report to the specified
        /// result file and the console.
        /// </remarks>
        /// <param name="cfg">the config specifying the files and output</param>
        /// <exception cref="System.Exception">if files can not be opened/closed/read or invalid format
        ///     </exception>
        private void WriteReport(ConfigExtractor cfg)
        {
            Path dn = cfg.GetOutputPath();

            Log.Info("Writing report using contents of " + dn);
            FileSystem fs = dn.GetFileSystem(cfg.GetConfig());

            FileStatus[]   reduceFiles  = fs.ListStatus(dn);
            BufferedReader fileReader   = null;
            PrintWriter    reportWriter = null;

            try
            {
                IList <OperationOutput> noOperations = new AList <OperationOutput>();
                IDictionary <string, IList <OperationOutput> > splitTypes = new SortedDictionary <string
                                                                                                  , IList <OperationOutput> >();
                foreach (FileStatus fn in reduceFiles)
                {
                    if (!fn.GetPath().GetName().StartsWith("part"))
                    {
                        continue;
                    }
                    fileReader = new BufferedReader(new InputStreamReader(new DataInputStream(fs.Open
                                                                                                  (fn.GetPath()))));
                    string line;
                    while ((line = fileReader.ReadLine()) != null)
                    {
                        string[] pieces = line.Split("\t", 2);
                        if (pieces.Length == 2)
                        {
                            OperationOutput data = new OperationOutput(pieces[0], pieces[1]);
                            string          op   = (data.GetOperationType());
                            if (op != null)
                            {
                                IList <OperationOutput> opList = splitTypes[op];
                                if (opList == null)
                                {
                                    opList = new AList <OperationOutput>();
                                }
                                opList.AddItem(data);
                                splitTypes[op] = opList;
                            }
                            else
                            {
                                noOperations.AddItem(data);
                            }
                        }
                        else
                        {
                            throw new IOException("Unparseable line " + line);
                        }
                    }
                    fileReader.Close();
                    fileReader = null;
                }
                FilePath resFile = null;
                if (cfg.GetResultFile() != null)
                {
                    resFile = new FilePath(cfg.GetResultFile());
                }
                if (resFile != null)
                {
                    Log.Info("Report results being placed to logging output and to file " + resFile.GetCanonicalPath
                                 ());
                    reportWriter = new PrintWriter(new FileOutputStream(resFile));
                }
                else
                {
                    Log.Info("Report results being placed to logging output");
                }
                ReportWriter reporter = new ReportWriter();
                if (!noOperations.IsEmpty())
                {
                    reporter.BasicReport(noOperations, reportWriter);
                }
                foreach (string opType in splitTypes.Keys)
                {
                    reporter.OpReport(opType, splitTypes[opType], reportWriter);
                }
            }
            finally
            {
                if (fileReader != null)
                {
                    fileReader.Close();
                }
                if (reportWriter != null)
                {
                    reportWriter.Close();
                }
            }
        }