public static PrepData PrepSimulation(Excel.Application app, Excel.Workbook wbh, ProgBar pb, bool ignore_parse_errors) { // build graph var dag = new DAG(wbh, app, ignore_parse_errors); if (dag.containsLoop()) { throw new DataDebugMethods.ContainsLoopException(); } pb.IncrementProgress(); // get terminal input and terminal formula nodes once var terminal_input_nodes = dag.terminalInputVectors(); var terminal_formula_nodes = dag.terminalFormulaNodes(true); ///the boolean indicates whether to use all outputs or not if (terminal_input_nodes.Length == 0) { throw new NoRangeInputs(); } if (terminal_formula_nodes.Length == 0) { throw new NoFormulas(); } // save original spreadsheet state CellDict original_inputs = UserSimulation.Utility.SaveInputs(dag); // force a recalculation before saving outputs, otherwise we may // erroneously conclude that the procedure did the wrong thing // based solely on Excel floating-point oddities UserSimulation.Utility.InjectValues(app, wbh, original_inputs); // save function outputs CellDict correct_outputs = UserSimulation.Utility.SaveOutputs(terminal_formula_nodes, dag); return new PrepData() { dag = dag, original_inputs = original_inputs, correct_outputs = correct_outputs, terminal_input_nodes = terminal_input_nodes, terminal_formula_nodes = terminal_formula_nodes }; }
public static AST.Address NormalAllOutputs_Step(DAG dag, Excel.Application app, Excel.Workbook wb, HashSet<AST.Address> known_good, long max_duration_in_ms, Stopwatch sw) { AST.Address flagged_cell = null; //Generate a normal distribution for the entire set of inputs var normal_dist = new DataDebugMethods.NormalDistribution(dag.terminalInputVectors(), app); // Get top outlier if (normal_dist.getErrorsCount() > 0) { for (int i = 0; i < normal_dist.getErrorsCount(); i++) { // check for timeout if (sw.ElapsedMilliseconds > max_duration_in_ms) { throw new TimeoutException("Timeout exception in NormalAllOutputs_Step."); } var flagged_com = normal_dist.getErrorAtPosition(i); flagged_cell = AST.Address.AddressFromCOMObject(flagged_com, wb); if (known_good.Contains(flagged_cell)) { flagged_cell = null; } else { break; } } } return flagged_cell; }
// num_bootstraps: the number of bootstrap samples to get // inputs: a list of inputs; each TreeNode represents an entire input range // outputs: a list of outputs; each TreeNode represents a function public static TreeScore DataDebug(int num_bootstraps, DAG dag, Excel.Application app, bool weighted, bool all_outputs, long max_duration_in_ms, Stopwatch sw, double significance, ProgBar pb) { // this modifies the weights of each node PropagateWeights(dag); // filter out non-terminal functions var output_fns = dag.terminalFormulaNodes(all_outputs); // filter out non-terminal inputs var input_rngs = dag.terminalInputVectors(); // first idx: the index of the TreeNode in the "inputs" array // second idx: the ith bootstrap var resamples = new InputSample[input_rngs.Length][]; // RNG for sampling var rng = new Random(); // we save initial inputs and outputs here var initial_inputs = StoreInputs(input_rngs, dag); var initial_outputs = StoreOutputs(output_fns, dag); // Set progress bar max pb.setMax(input_rngs.Length * 2); #region RESAMPLE // populate bootstrap array // for each input range (a TreeNode) for (int i = 0; i < input_rngs.Length; i++) { // this TreeNode var t = input_rngs[i]; // resample resamples[i] = Resample(num_bootstraps, initial_inputs[t], rng); // update progress bar pb.IncrementProgress(); } #endregion RESAMPLE #region INFERENCE return Inference( num_bootstraps, resamples, initial_inputs, initial_outputs, input_rngs, output_fns, dag, weighted, significance, pb); #endregion INFERENCE }
// num_bootstraps: the number of bootstrap samples to get // inputs: a list of inputs; each TreeNode represents an entire input range // outputs: a list of outputs; each TreeNode represents a function public static TreeScore DataDebug(int num_bootstraps, DAG dag, Excel.Application app, bool weighted, bool all_outputs, long max_duration_in_ms, Stopwatch sw, double significance, ProgBar pb) { // this modifies the weights of each node PropagateWeights(dag); // filter out non-terminal functions var output_fns = dag.terminalFormulaNodes(all_outputs); // filter out non-terminal inputs var input_rngs = dag.terminalInputVectors(); // first idx: the index of the TreeNode in the "inputs" array // second idx: the ith bootstrap var resamples = new InputSample[input_rngs.Length][]; // RNG for sampling var rng = new Random(); // we save initial inputs and outputs here var initial_inputs = StoreInputs(input_rngs, dag); var initial_outputs = StoreOutputs(output_fns, dag); // Set progress bar max pb.setMax(input_rngs.Length * 2); #region RESAMPLE // populate bootstrap array // for each input range (a TreeNode) for (int i = 0; i < input_rngs.Length; i++) { // this TreeNode var t = input_rngs[i]; // resample resamples[i] = Resample(num_bootstraps, initial_inputs[t], rng); // update progress bar pb.IncrementProgress(); } #endregion RESAMPLE #region INFERENCE return(Inference( num_bootstraps, resamples, initial_inputs, initial_outputs, input_rngs, output_fns, dag, weighted, significance, pb)); #endregion INFERENCE }
public void Analyze(long max_duration_in_ms) { var sw = new System.Diagnostics.Stopwatch(); sw.Start(); using (var pb = new ProgBar()) { // Disable screen updating during analysis to speed things up _app.ScreenUpdating = false; // Build dependency graph (modifies data) try { _dag = new DAG(_app.ActiveWorkbook, _app, IGNORE_PARSE_ERRORS); var num_input_cells = _dag.numberOfInputCells(); } catch (ExcelParserUtility.ParseException e) { // cleanup UI and then rethrow _app.ScreenUpdating = true; throw e; } if (_dag.terminalInputVectors().Length == 0) { System.Windows.Forms.MessageBox.Show("This spreadsheet contains no vector-input functions."); _app.ScreenUpdating = true; _flaggable = new KeyValuePair<AST.Address, int>[0]; return; } // Get bootstraps var scores = Analysis.DataDebug(NBOOTS, _dag, _app, weighted: USE_WEIGHTS, all_outputs: CONSIDER_ALL_OUTPUTS, max_duration_in_ms: max_duration_in_ms, sw: sw, significance: _tool_significance, pb: pb) .OrderByDescending(pair => pair.Value).ToArray(); if (_debug_mode) { var score_str = String.Join("\n", scores.Take(10).Select(score => score.Key.A1FullyQualified() + " -> " + score.Value.ToString())); System.Windows.Forms.MessageBox.Show(score_str); System.Windows.Forms.Clipboard.SetText(score_str); } List<KeyValuePair<AST.Address, int>> high_scores = new List<KeyValuePair<AST.Address, int>>(); // calculate cutoff idnex int thresh = scores.Length - Convert.ToInt32(scores.Length * _tool_significance); // filter out cells that are... _flaggable = scores.Where(pair => pair.Value >= scores[thresh].Value) // below threshold .Where(pair => !_known_good.Contains(pair.Key)) // known to be good .Where(pair => pair.Value != 0).ToArray(); // score == 0 // Enable screen updating when we're done _app.ScreenUpdating = true; sw.Stop(); } }