public static SaveOutputs ( AST formula_nodes, |
||
formula_nodes | AST | |
dag | ||
리턴 | string>.System.Collections.Generic.Dictionary |
// Get dictionary of inputs and the error they produce public Dictionary <AST.Address, Tuple <string, double> > TopOfKErrors(AST.Address[] terminal_formula_nodes, CellDict inputs, int k, CellDict correct_outputs, Excel.Application app, Excel.Workbook wb, string classification_file, DAG dag) { var eg = new ErrorGenerator(); var c = Classification.Deserialize(classification_file); var max_error_produced_dictionary = new Dictionary <AST.Address, Tuple <string, double> >(); foreach (KeyValuePair <AST.Address, string> pair in inputs) { AST.Address addr = pair.Key; string orig_value = pair.Value; //Load in the classification's dictionaries double max_error_produced = 0.0; string max_error_string = ""; // get k strings, in parallel string[] errorstrings = eg.GenerateErrorStrings(orig_value, c, k); for (int i = 0; i < k; i++) { CellDict cd = new CellDict(); cd.Add(addr, errorstrings[i]); //inject the typo Utility.InjectValues(app, wb, cd); // save function outputs CellDict incorrect_outputs = Utility.SaveOutputs(terminal_formula_nodes, dag); //remove the typo that was introduced cd.Clear(); cd.Add(addr, orig_value); Utility.InjectValues(app, wb, cd); double total_error = Utility.CalculateTotalError(correct_outputs, incorrect_outputs); //keep track of the largest observed max error if (total_error > max_error_produced) { max_error_produced = total_error; max_error_string = errorstrings[i]; } } //Add entry for this TreeNode in our dictionary with its max_error_produced max_error_produced_dictionary.Add(addr, new Tuple <string, double>(max_error_string, max_error_produced)); } return(max_error_produced_dictionary); }
// remove errors until none remain private UserResults SimulateUser(int nboots, double significance, CutoffKind ck, DAG dag, CellDict original_inputs, CellDict errord, CellDict correct_outputs, Excel.Workbook wb, Excel.Application app, AnalysisType analysis_type, bool weighted, bool all_outputs, long max_duration_in_ms, Stopwatch sw, String logfile, ProgBar pb ) { // init user results data structure var o = new UserResults(); HashSet <AST.Address> known_good = new HashSet <AST.Address>(); // initialize procedure var errors_remain = true; var max_errors = new ErrorDict(); var incorrect_outputs = Utility.SaveOutputs(dag.terminalFormulaNodes(all_outputs), dag); var errors_found = 0; var number_of_true_errors = errord.Count; Utility.UpdatePerFunctionMaxError(correct_outputs, incorrect_outputs, max_errors); // the corrected state of the spreadsheet CellDict partially_corrected_outputs = correct_outputs.ToDictionary(p => p.Key, p => p.Value); // remove errors loop var cells_inspected = 0; List <KeyValuePair <AST.Address, int> > filtered_high_scores = null; bool correction_made = true; while (errors_remain) { Console.Write("."); AST.Address flagged_cell = null; // choose the appropriate test if (analysis_type == AnalysisType.CheckCell5 || analysis_type == AnalysisType.CheckCell10 ) { flagged_cell = SimulationStep.CheckCell_Step(o, significance, ck, nboots, dag, app, weighted, all_outputs, correction_made, known_good, ref filtered_high_scores, max_duration_in_ms, sw, pb); } else if (analysis_type == AnalysisType.NormalPerRange) { flagged_cell = SimulationStep.NormalPerRange_Step(dag, wb, known_good, max_duration_in_ms, sw); } else if (analysis_type == AnalysisType.NormalAllInputs) { flagged_cell = SimulationStep.NormalAllOutputs_Step(dag, app, wb, known_good, max_duration_in_ms, sw); } // stop if the test no longer returns anything or if // the test is simply done inspecting based on a fixed threshold if (flagged_cell == null || (ck.isCountBased && ck.Threshold == cells_inspected)) { errors_remain = false; } else // a cell was flagged { //cells_inspected should only be incremented when a cell is actually flagged. If nothing is flagged, //then nothing is inspected, so cells_inspected doesn't increase. cells_inspected += 1; // check to see if the flagged value is actually an error if (errord.ContainsKey(flagged_cell)) { correction_made = true; errors_found += 1; // P(k) * rel(k) o.PrecRel_at_k.Add(errors_found / (double)cells_inspected); o.true_positives.Add(flagged_cell); // correct flagged cell flagged_cell.GetCOMObject(app).Value2 = original_inputs[flagged_cell]; Utility.UpdatePerFunctionMaxError(correct_outputs, partially_corrected_outputs, max_errors); // compute total error after applying this correction var current_total_error = Utility.CalculateTotalError(correct_outputs, partially_corrected_outputs); o.current_total_error.Add(current_total_error); // save outputs partially_corrected_outputs = Utility.SaveOutputs(dag.terminalFormulaNodes(all_outputs), dag); } else { correction_made = false; // numerator is 0 here because rel(k) = 0 when no error was found o.PrecRel_at_k.Add(0.0); o.false_positives.Add(flagged_cell); } // mark it as known good -- at this point the cell has been // 'inspected' regardless of whether it was an error // It was either corrected or marked as OK known_good.Add(flagged_cell); // compute output error magnitudes var output_error_magnitude = Utility.MeanErrorMagnitude(partially_corrected_outputs, correct_outputs); // compute input error magnitude double num_input_error_magnitude; double str_input_error_magnitude; if (errord.ContainsKey(flagged_cell)) { if (Utility.BothNumbers(errord[flagged_cell], original_inputs[flagged_cell])) { num_input_error_magnitude = Utility.NumericalMagnitudeChange(Double.Parse(errord[flagged_cell]), Double.Parse(original_inputs[flagged_cell])); str_input_error_magnitude = 0; } else { num_input_error_magnitude = 0; str_input_error_magnitude = Utility.StringMagnitudeChange(errord[flagged_cell], original_inputs[flagged_cell]); } } else { num_input_error_magnitude = 0; str_input_error_magnitude = 0; } // write error log var logentry = new LogEntry(analysis_type, wb.Name, flagged_cell, original_inputs[flagged_cell], errord.ContainsKey(flagged_cell) ? errord[flagged_cell] : original_inputs[flagged_cell], output_error_magnitude, num_input_error_magnitude, str_input_error_magnitude, true, correction_made, significance, ck.Threshold); logentry.WriteLog(logfile); _error_log.Add(logentry); } } // find all of the false negatives o.false_negatives = Utility.GetFalseNegatives(o.true_positives, o.false_positives, errord); o.max_errors = max_errors; var last_out_err_mag = Utility.MeanErrorMagnitude(partially_corrected_outputs, correct_outputs); // write out all false negative information foreach (AST.Address fn in o.false_negatives) { double num_input_error_magnitude; double str_input_error_magnitude; if (Utility.BothNumbers(errord[fn], original_inputs[fn])) { num_input_error_magnitude = Utility.NumericalMagnitudeChange(Double.Parse(errord[fn]), Double.Parse(original_inputs[fn])); str_input_error_magnitude = 0; } else { num_input_error_magnitude = 0; str_input_error_magnitude = Utility.StringMagnitudeChange(errord[fn], original_inputs[fn]); } // write error log _error_log.Add(new LogEntry(analysis_type, wb.Name, fn, original_inputs[fn], errord[fn], last_out_err_mag, num_input_error_magnitude, str_input_error_magnitude, false, true, significance, ck.Threshold)); } return(o); }
// returns the number of cells inspected public int Run(int nboots, // number of bootstraps string xlfile, // name of the workbook double significance, // significance threshold for test CutoffKind ck, // kind of threshold function to use Excel.Application app, // reference to Excel app Classification c, // data from which to generate errors Random r, // a random number generator AnalysisType analysisType, // the type of analysis to run bool weighted, // should we weigh things? bool all_outputs, // if !all_outputs, we only consider terminal outputs DAG dag, Excel.Workbook wb, AST.Address[] terminal_formula_cells, AST.Range[] terminal_input_vectors, CellDict original_inputs, CellDict correct_outputs, long max_duration_in_ms, String logfile, //filename for the output log ProgBar pb ) { //set wbname and path _wb_name = xlfile; _wb_path = wb.Path; _analysis_type = analysisType; _significance = significance; _all_outputs = all_outputs; _weighted = weighted; //Now we want to inject the errors from _errors Utility.InjectValues(app, wb, _errors); // save function outputs CellDict incorrect_outputs = Utility.SaveOutputs(terminal_formula_cells, dag); //Time the removal of errors Stopwatch sw = new Stopwatch(); sw.Start(); // remove errors until none remain; MODIFIES WORKBOOK _user = SimulateUser(nboots, significance, ck, dag, original_inputs, _errors, correct_outputs, wb, app, analysisType, weighted, all_outputs, max_duration_in_ms, sw, logfile, pb); sw.Stop(); TimeSpan elapsed = sw.Elapsed; _analysis_time = elapsed.TotalSeconds; // save partially-corrected outputs var partially_corrected_outputs = Utility.SaveOutputs(terminal_formula_cells, dag); // compute total relative error _error = Utility.CalculateNormalizedError(correct_outputs, partially_corrected_outputs, _user.max_errors); _total_relative_error = Utility.TotalRelativeError(_error); // compute starting total relative error (normalized by max_errors) ErrorDict starting_error = Utility.CalculateNormalizedError(correct_outputs, incorrect_outputs, _user.max_errors); _initial_total_relative_error = Utility.TotalRelativeError(starting_error); // effort _max_effort = dag.allCells().Length; _effort = (_user.true_positives.Count + _user.false_positives.Count); _expended_effort = (double)_effort / (double)_max_effort; // compute average precision // AveP = (\sum_{k=1}^n (P(k) * rel(k))) / |total positives| // where P(k) is the precision at threshold k, // rel(k) = \{ 1 if item at k is a true positive, 0 otherwise _average_precision = _user.PrecRel_at_k.Sum() / (double)_errors.Count; // restore original values Utility.InjectValues(app, wb, original_inputs); _tree_construct_time = dag.AnalysisMilliseconds / 1000.0; // flag that we're done; safe to print output results _simulation_run = true; // return the number of cells inspected return(_effort); }