// Propagate weights private static void PropagateWeights(DAG dag) { if (dag.containsLoop()) { throw new ContainsLoopException(); } // starting set of functions; roots in the forest var formulas = dag.terminalFormulaNodes(false); // for each forest foreach (AST.Address f in formulas) { dag.setWeight(f, PropagateNodeWeight(f, dag)); } }
public static PrepData PrepSimulation(Excel.Application app, Excel.Workbook wbh, ProgBar pb, bool ignore_parse_errors) { // build graph var dag = new DAG(wbh, app, ignore_parse_errors); if (dag.containsLoop()) { throw new DataDebugMethods.ContainsLoopException(); } pb.IncrementProgress(); // get terminal input and terminal formula nodes once var terminal_input_nodes = dag.terminalInputVectors(); var terminal_formula_nodes = dag.terminalFormulaNodes(true); ///the boolean indicates whether to use all outputs or not if (terminal_input_nodes.Length == 0) { throw new NoRangeInputs(); } if (terminal_formula_nodes.Length == 0) { throw new NoFormulas(); } // save original spreadsheet state CellDict original_inputs = UserSimulation.Utility.SaveInputs(dag); // force a recalculation before saving outputs, otherwise we may // erroneously conclude that the procedure did the wrong thing // based solely on Excel floating-point oddities UserSimulation.Utility.InjectValues(app, wbh, original_inputs); // save function outputs CellDict correct_outputs = UserSimulation.Utility.SaveOutputs(terminal_formula_nodes, dag); return new PrepData() { dag = dag, original_inputs = original_inputs, correct_outputs = correct_outputs, terminal_input_nodes = terminal_input_nodes, terminal_formula_nodes = terminal_formula_nodes }; }
// num_bootstraps: the number of bootstrap samples to get // inputs: a list of inputs; each TreeNode represents an entire input range // outputs: a list of outputs; each TreeNode represents a function public static TreeScore DataDebug(int num_bootstraps, DAG dag, Excel.Application app, bool weighted, bool all_outputs, long max_duration_in_ms, Stopwatch sw, double significance, ProgBar pb) { // this modifies the weights of each node PropagateWeights(dag); // filter out non-terminal functions var output_fns = dag.terminalFormulaNodes(all_outputs); // filter out non-terminal inputs var input_rngs = dag.terminalInputVectors(); // first idx: the index of the TreeNode in the "inputs" array // second idx: the ith bootstrap var resamples = new InputSample[input_rngs.Length][]; // RNG for sampling var rng = new Random(); // we save initial inputs and outputs here var initial_inputs = StoreInputs(input_rngs, dag); var initial_outputs = StoreOutputs(output_fns, dag); // Set progress bar max pb.setMax(input_rngs.Length * 2); #region RESAMPLE // populate bootstrap array // for each input range (a TreeNode) for (int i = 0; i < input_rngs.Length; i++) { // this TreeNode var t = input_rngs[i]; // resample resamples[i] = Resample(num_bootstraps, initial_inputs[t], rng); // update progress bar pb.IncrementProgress(); } #endregion RESAMPLE #region INFERENCE return Inference( num_bootstraps, resamples, initial_inputs, initial_outputs, input_rngs, output_fns, dag, weighted, significance, pb); #endregion INFERENCE }
// num_bootstraps: the number of bootstrap samples to get // inputs: a list of inputs; each TreeNode represents an entire input range // outputs: a list of outputs; each TreeNode represents a function public static TreeScore DataDebug(int num_bootstraps, DAG dag, Excel.Application app, bool weighted, bool all_outputs, long max_duration_in_ms, Stopwatch sw, double significance, ProgBar pb) { // this modifies the weights of each node PropagateWeights(dag); // filter out non-terminal functions var output_fns = dag.terminalFormulaNodes(all_outputs); // filter out non-terminal inputs var input_rngs = dag.terminalInputVectors(); // first idx: the index of the TreeNode in the "inputs" array // second idx: the ith bootstrap var resamples = new InputSample[input_rngs.Length][]; // RNG for sampling var rng = new Random(); // we save initial inputs and outputs here var initial_inputs = StoreInputs(input_rngs, dag); var initial_outputs = StoreOutputs(output_fns, dag); // Set progress bar max pb.setMax(input_rngs.Length * 2); #region RESAMPLE // populate bootstrap array // for each input range (a TreeNode) for (int i = 0; i < input_rngs.Length; i++) { // this TreeNode var t = input_rngs[i]; // resample resamples[i] = Resample(num_bootstraps, initial_inputs[t], rng); // update progress bar pb.IncrementProgress(); } #endregion RESAMPLE #region INFERENCE return(Inference( num_bootstraps, resamples, initial_inputs, initial_outputs, input_rngs, output_fns, dag, weighted, significance, pb)); #endregion INFERENCE }
// remove errors until none remain private UserResults SimulateUser(int nboots, double significance, CutoffKind ck, DAG dag, CellDict original_inputs, CellDict errord, CellDict correct_outputs, Excel.Workbook wb, Excel.Application app, AnalysisType analysis_type, bool weighted, bool all_outputs, long max_duration_in_ms, Stopwatch sw, String logfile, ProgBar pb ) { // init user results data structure var o = new UserResults(); HashSet<AST.Address> known_good = new HashSet<AST.Address>(); // initialize procedure var errors_remain = true; var max_errors = new ErrorDict(); var incorrect_outputs = Utility.SaveOutputs(dag.terminalFormulaNodes(all_outputs), dag); var errors_found = 0; var number_of_true_errors = errord.Count; Utility.UpdatePerFunctionMaxError(correct_outputs, incorrect_outputs, max_errors); // the corrected state of the spreadsheet CellDict partially_corrected_outputs = correct_outputs.ToDictionary(p => p.Key, p => p.Value); // remove errors loop var cells_inspected = 0; List<KeyValuePair<AST.Address, int>> filtered_high_scores = null; bool correction_made = true; while (errors_remain) { Console.Write("."); AST.Address flagged_cell = null; // choose the appropriate test if (analysis_type == AnalysisType.CheckCell5 || analysis_type == AnalysisType.CheckCell10 ) { flagged_cell = SimulationStep.CheckCell_Step(o, significance, ck, nboots, dag, app, weighted, all_outputs, correction_made, known_good, ref filtered_high_scores, max_duration_in_ms, sw, pb); } else if (analysis_type == AnalysisType.NormalPerRange) { flagged_cell = SimulationStep.NormalPerRange_Step(dag, wb, known_good, max_duration_in_ms, sw); } else if (analysis_type == AnalysisType.NormalAllInputs) { flagged_cell = SimulationStep.NormalAllOutputs_Step(dag, app, wb, known_good, max_duration_in_ms, sw); } // stop if the test no longer returns anything or if // the test is simply done inspecting based on a fixed threshold if (flagged_cell == null || (ck.isCountBased && ck.Threshold == cells_inspected)) { errors_remain = false; } else // a cell was flagged { //cells_inspected should only be incremented when a cell is actually flagged. If nothing is flagged, //then nothing is inspected, so cells_inspected doesn't increase. cells_inspected += 1; // check to see if the flagged value is actually an error if (errord.ContainsKey(flagged_cell)) { correction_made = true; errors_found += 1; // P(k) * rel(k) o.PrecRel_at_k.Add(errors_found / (double)cells_inspected); o.true_positives.Add(flagged_cell); // correct flagged cell flagged_cell.GetCOMObject(app).Value2 = original_inputs[flagged_cell]; Utility.UpdatePerFunctionMaxError(correct_outputs, partially_corrected_outputs, max_errors); // compute total error after applying this correction var current_total_error = Utility.CalculateTotalError(correct_outputs, partially_corrected_outputs); o.current_total_error.Add(current_total_error); // save outputs partially_corrected_outputs = Utility.SaveOutputs(dag.terminalFormulaNodes(all_outputs), dag); } else { correction_made = false; // numerator is 0 here because rel(k) = 0 when no error was found o.PrecRel_at_k.Add(0.0); o.false_positives.Add(flagged_cell); } // mark it as known good -- at this point the cell has been // 'inspected' regardless of whether it was an error // It was either corrected or marked as OK known_good.Add(flagged_cell); // compute output error magnitudes var output_error_magnitude = Utility.MeanErrorMagnitude(partially_corrected_outputs, correct_outputs); // compute input error magnitude double num_input_error_magnitude; double str_input_error_magnitude; if (errord.ContainsKey(flagged_cell)) { if (Utility.BothNumbers(errord[flagged_cell], original_inputs[flagged_cell])) { num_input_error_magnitude = Utility.NumericalMagnitudeChange(Double.Parse(errord[flagged_cell]), Double.Parse(original_inputs[flagged_cell])); str_input_error_magnitude = 0; } else { num_input_error_magnitude = 0; str_input_error_magnitude = Utility.StringMagnitudeChange(errord[flagged_cell], original_inputs[flagged_cell]); } } else { num_input_error_magnitude = 0; str_input_error_magnitude = 0; } // write error log var logentry = new LogEntry(analysis_type, wb.Name, flagged_cell, original_inputs[flagged_cell], errord.ContainsKey(flagged_cell) ? errord[flagged_cell] : original_inputs[flagged_cell], output_error_magnitude, num_input_error_magnitude, str_input_error_magnitude, true, correction_made, significance, ck.Threshold); logentry.WriteLog(logfile); _error_log.Add(logentry); } } // find all of the false negatives o.false_negatives = Utility.GetFalseNegatives(o.true_positives, o.false_positives, errord); o.max_errors = max_errors; var last_out_err_mag = Utility.MeanErrorMagnitude(partially_corrected_outputs, correct_outputs); // write out all false negative information foreach (AST.Address fn in o.false_negatives) { double num_input_error_magnitude; double str_input_error_magnitude; if (Utility.BothNumbers(errord[fn], original_inputs[fn])) { num_input_error_magnitude = Utility.NumericalMagnitudeChange(Double.Parse(errord[fn]), Double.Parse(original_inputs[fn])); str_input_error_magnitude = 0; } else { num_input_error_magnitude = 0; str_input_error_magnitude = Utility.StringMagnitudeChange(errord[fn], original_inputs[fn]); } // write error log _error_log.Add(new LogEntry(analysis_type, wb.Name, fn, original_inputs[fn], errord[fn], last_out_err_mag, num_input_error_magnitude, str_input_error_magnitude, false, true, significance, ck.Threshold)); } return o; }