public static PrepData PrepSimulation(Excel.Application app, Excel.Workbook wbh, ProgBar pb, bool ignore_parse_errors) { // build graph var dag = new DAG(wbh, app, ignore_parse_errors); if (dag.containsLoop()) { throw new DataDebugMethods.ContainsLoopException(); } pb.IncrementProgress(); // get terminal input and terminal formula nodes once var terminal_input_nodes = dag.terminalInputVectors(); var terminal_formula_nodes = dag.terminalFormulaNodes(true); ///the boolean indicates whether to use all outputs or not if (terminal_input_nodes.Length == 0) { throw new NoRangeInputs(); } if (terminal_formula_nodes.Length == 0) { throw new NoFormulas(); } // save original spreadsheet state CellDict original_inputs = UserSimulation.Utility.SaveInputs(dag); // force a recalculation before saving outputs, otherwise we may // erroneously conclude that the procedure did the wrong thing // based solely on Excel floating-point oddities UserSimulation.Utility.InjectValues(app, wbh, original_inputs); // save function outputs CellDict correct_outputs = UserSimulation.Utility.SaveOutputs(terminal_formula_nodes, dag); return new PrepData() { dag = dag, original_inputs = original_inputs, correct_outputs = correct_outputs, terminal_input_nodes = terminal_input_nodes, terminal_formula_nodes = terminal_formula_nodes }; }
public static void RunSimulation(Excel.Application app, Excel.Workbook wbh, int nboots, double significance, double threshold, UserSimulation.Classification c, Random r, String outfile, long max_duration_in_ms, String logfile, ProgBar pb, PrepData prepdata, CellDict errors) { // write header if needed if (!System.IO.File.Exists(outfile)) { System.IO.File.AppendAllText(outfile, Simulation.HeaderRowForCSV()); } // CheckCell weighted, all outputs, quantile //var s_1 = new UserSimulation.Simulation(); //s_1.RunFromBatch(nboots, // number of bootstraps // wbh.FullName, // Excel filename // significance, // statistical significance threshold for hypothesis test // app, // Excel.Application // new QuantileCutoff(0.05), // max % extreme values to flag // c, // classification data // r, // random number generator // UserSimulation.AnalysisType.CheckCell5,// analysis type // true, // weighted analysis // true, // use all outputs for analysis // prepdata.graph, // AnalysisData // wbh, // Excel.Workbook // errors, // pre-generated errors // prepdata.terminal_input_nodes, // input range nodes // prepdata.terminal_formula_nodes, // output nodes // prepdata.original_inputs, // original input values // prepdata.correct_outputs, // original output values // max_duration_in_ms, // max duration of simulation // logfile); //System.IO.File.AppendAllText(outfile, s_1.FormatResultsAsCSV()); pb.IncrementProgress(); // CheckCell weighted, all outputs, quantile var s_4 = new UserSimulation.Simulation(); s_4.RunFromBatch(nboots, // number of bootstraps wbh.FullName, // Excel filename significance, // statistical significance of threshold app, // Excel.Application new QuantileCutoff(0.10), // max % extreme values to flag c, // classification data r, // random number generator UserSimulation.AnalysisType.CheckCell10,// analysis type true, // weighted analysis true, // use all outputs for analysis prepdata.dag, // AnalysisData wbh, // Excel.Workbook errors, // pre-generated errors prepdata.terminal_input_nodes, // input range nodes prepdata.terminal_formula_nodes, // output nodes prepdata.original_inputs, // original input values prepdata.correct_outputs, // original output values max_duration_in_ms, // max duration of simulation logfile); System.IO.File.AppendAllText(outfile, s_4.FormatResultsAsCSV()); pb.IncrementProgress(); // Normal, all inputs var s_2 = new UserSimulation.Simulation(); s_2.RunFromBatch(nboots, // irrelevant wbh.FullName, // Excel filename significance, // normal cutoff? app, // Excel.Application new NormalCutoff(threshold), // ?? c, // classification data r, // random number generator UserSimulation.AnalysisType.NormalAllInputs, // analysis type true, // irrelevant true, // irrelevant prepdata.dag, // AnalysisData wbh, // Excel.Workbook errors, // pre-generated errors prepdata.terminal_input_nodes, // input range nodes prepdata.terminal_formula_nodes, // output nodes prepdata.original_inputs, // original input values prepdata.correct_outputs, // original output values max_duration_in_ms, // max duration of simulation logfile); System.IO.File.AppendAllText(outfile, s_2.FormatResultsAsCSV()); pb.IncrementProgress(); // Normal, range inputs //var s_3 = new UserSimulation.Simulation(); //s_3.RunFromBatch(nboots, // irrelevant // wbh.FullName, // Excel filename // significance, // normal cutoff? // app, // Excel.Application // new NormalCutoff(threshold), // ?? // c, // classification data // r, // random number generator // UserSimulation.AnalysisType.NormalPerRange, // analysis type // true, // irrelevant // true, // irrelevant // prepdata.graph, // AnalysisData // wbh, // Excel.Workbook // errors, // pre-generated errors // prepdata.terminal_input_nodes, // input range nodes // prepdata.terminal_formula_nodes, // output nodes // prepdata.original_inputs, // original input values // prepdata.correct_outputs, // original output values // max_duration_in_ms, // max duration of simulation // logfile); //System.IO.File.AppendAllText(outfile, s_3.FormatResultsAsCSV()); pb.IncrementProgress(); }
// num_bootstraps: the number of bootstrap samples to get // inputs: a list of inputs; each TreeNode represents an entire input range // outputs: a list of outputs; each TreeNode represents a function public static TreeScore DataDebug(int num_bootstraps, DAG dag, Excel.Application app, bool weighted, bool all_outputs, long max_duration_in_ms, Stopwatch sw, double significance, ProgBar pb) { // this modifies the weights of each node PropagateWeights(dag); // filter out non-terminal functions var output_fns = dag.terminalFormulaNodes(all_outputs); // filter out non-terminal inputs var input_rngs = dag.terminalInputVectors(); // first idx: the index of the TreeNode in the "inputs" array // second idx: the ith bootstrap var resamples = new InputSample[input_rngs.Length][]; // RNG for sampling var rng = new Random(); // we save initial inputs and outputs here var initial_inputs = StoreInputs(input_rngs, dag); var initial_outputs = StoreOutputs(output_fns, dag); // Set progress bar max pb.setMax(input_rngs.Length * 2); #region RESAMPLE // populate bootstrap array // for each input range (a TreeNode) for (int i = 0; i < input_rngs.Length; i++) { // this TreeNode var t = input_rngs[i]; // resample resamples[i] = Resample(num_bootstraps, initial_inputs[t], rng); // update progress bar pb.IncrementProgress(); } #endregion RESAMPLE #region INFERENCE return Inference( num_bootstraps, resamples, initial_inputs, initial_outputs, input_rngs, output_fns, dag, weighted, significance, pb); #endregion INFERENCE }
public static TreeScore Inference( int num_bootstraps, InputSample[][] resamples, Dictionary<AST.Range, InputSample> initial_inputs, Dictionary<AST.Address, string> initial_outputs, AST.Range[] input_arr, AST.Address[] output_arr, DAG dag, bool weighted, double significance, ProgBar pb) { // synchronization token object lock_token = new Object(); // init thread event notification array var mres = new ManualResetEvent[input_arr.Length]; // init job storage var ddjs = new DataDebugJob[input_arr.Length]; // init started jobs count var sjobs = 0; // init completed jobs count var cjobs = 0; // last-ditch effort flag bool last_try = false; // init score storage var scores = new TreeScore(); for (int i = 0; i < input_arr.Length; i++) { try { #region BOOTSTRAP // bootstrapping is done in the parent STA thread because // the .NET threading model prohibits thread pools (which // are MTA) from accessing STA COM objects directly. // alloc bootstrap storage for each output (f), for each resample (b) FunctionOutput<string>[][] bs = new FunctionOutput<string>[initial_outputs.Count][]; for (int f = 0; f < initial_outputs.Count; f++) { bs[f] = new FunctionOutput<string>[num_bootstraps]; } // init memoization table for input vector i var memo = new BootMemo(); // fetch the input range TreeNode var input = input_arr[i]; // fetch the input range COM object var com = dag.getCOMRefForRange(input).Range; // compute outputs // replace the values of the COM object with the jth bootstrap, // save all function outputs, and // restore the original input for (var b = 0; b < num_bootstraps; b++) { // lookup outputs from memo table; otherwise do replacement, compute outputs, store them in table, and return them FunctionOutput<string>[] fos = memo.FastReplace(com, dag, initial_inputs[input], resamples[i][b], output_arr, false); for (var f = 0; f < output_arr.Length; f++) { bs[f][b] = fos[f]; } } // restore the original inputs; faster to do once, after bootstrapping is done BootMemo.ReplaceExcelRange(com, initial_inputs[input]); // TODO: restore formulas if it turns out that they were overwrittern // this should never be the case #endregion BOOTSTRAP #region HYPOTHESIS_TEST // cancellation token mres[i] = new ManualResetEvent(false); // set up job ddjs[i] = new DataDebugJob( dag, bs, initial_outputs, input_arr[i], output_arr, weighted, significance, mres[i] ); sjobs++; // hand job to thread pool ThreadPool.QueueUserWorkItem(ddjs[i].threadPoolCallback, i); #endregion HYPOTHESIS_TEST // update progress bar pb.IncrementProgress(); } catch (System.OutOfMemoryException e) { if (!last_try) { // If there are no more jobs running, but // we still can't allocate memory, try invoking // GC and then trying again cjobs = mres.Count(mre => mre.WaitOne(0)); if (sjobs - cjobs == 0) { GC.Collect(); last_try = true; } } else { // we just don't have enough memory throw e; } // wait for any of the 0..i-1 work items // to complete and try again WaitHandle.WaitAny(mres.Take(i).ToArray()); } } // Do not proceed until all hypothesis tests are done. // WaitHandle.WaitAll cannot be called on an STA thread which // is why we call WaitOne in a loop. // Merge scores as data becomes available. for (int i = 0; i < input_arr.Length; i++) { mres[i].WaitOne(); scores = DictAdd(scores, ddjs[i].Result); } return scores; }
public static TreeScore Inference( int num_bootstraps, InputSample[][] resamples, Dictionary <AST.Range, InputSample> initial_inputs, Dictionary <AST.Address, string> initial_outputs, AST.Range[] input_arr, AST.Address[] output_arr, DAG dag, bool weighted, double significance, ProgBar pb) { // synchronization token object lock_token = new Object(); // init thread event notification array var mres = new ManualResetEvent[input_arr.Length]; // init job storage var ddjs = new DataDebugJob[input_arr.Length]; // init started jobs count var sjobs = 0; // init completed jobs count var cjobs = 0; // last-ditch effort flag bool last_try = false; // init score storage var scores = new TreeScore(); for (int i = 0; i < input_arr.Length; i++) { try { #region BOOTSTRAP // bootstrapping is done in the parent STA thread because // the .NET threading model prohibits thread pools (which // are MTA) from accessing STA COM objects directly. // alloc bootstrap storage for each output (f), for each resample (b) FunctionOutput <string>[][] bs = new FunctionOutput <string> [initial_outputs.Count][]; for (int f = 0; f < initial_outputs.Count; f++) { bs[f] = new FunctionOutput <string> [num_bootstraps]; } // init memoization table for input vector i var memo = new BootMemo(); // fetch the input range TreeNode var input = input_arr[i]; // fetch the input range COM object var com = dag.getCOMRefForRange(input).Range; // compute outputs // replace the values of the COM object with the jth bootstrap, // save all function outputs, and // restore the original input for (var b = 0; b < num_bootstraps; b++) { // lookup outputs from memo table; otherwise do replacement, compute outputs, store them in table, and return them FunctionOutput <string>[] fos = memo.FastReplace(com, dag, initial_inputs[input], resamples[i][b], output_arr, false); for (var f = 0; f < output_arr.Length; f++) { bs[f][b] = fos[f]; } } // restore the original inputs; faster to do once, after bootstrapping is done BootMemo.ReplaceExcelRange(com, initial_inputs[input]); // TODO: restore formulas if it turns out that they were overwrittern // this should never be the case #endregion BOOTSTRAP #region HYPOTHESIS_TEST // cancellation token mres[i] = new ManualResetEvent(false); // set up job ddjs[i] = new DataDebugJob( dag, bs, initial_outputs, input_arr[i], output_arr, weighted, significance, mres[i] ); sjobs++; // hand job to thread pool ThreadPool.QueueUserWorkItem(ddjs[i].threadPoolCallback, i); #endregion HYPOTHESIS_TEST // update progress bar pb.IncrementProgress(); } catch (System.OutOfMemoryException e) { if (!last_try) { // If there are no more jobs running, but // we still can't allocate memory, try invoking // GC and then trying again cjobs = mres.Count(mre => mre.WaitOne(0)); if (sjobs - cjobs == 0) { GC.Collect(); last_try = true; } } else { // we just don't have enough memory throw e; } // wait for any of the 0..i-1 work items // to complete and try again WaitHandle.WaitAny(mres.Take(i).ToArray()); } } // Do not proceed until all hypothesis tests are done. // WaitHandle.WaitAll cannot be called on an STA thread which // is why we call WaitOne in a loop. // Merge scores as data becomes available. for (int i = 0; i < input_arr.Length; i++) { mres[i].WaitOne(); scores = DictAdd(scores, ddjs[i].Result); } return(scores); }
// num_bootstraps: the number of bootstrap samples to get // inputs: a list of inputs; each TreeNode represents an entire input range // outputs: a list of outputs; each TreeNode represents a function public static TreeScore DataDebug(int num_bootstraps, DAG dag, Excel.Application app, bool weighted, bool all_outputs, long max_duration_in_ms, Stopwatch sw, double significance, ProgBar pb) { // this modifies the weights of each node PropagateWeights(dag); // filter out non-terminal functions var output_fns = dag.terminalFormulaNodes(all_outputs); // filter out non-terminal inputs var input_rngs = dag.terminalInputVectors(); // first idx: the index of the TreeNode in the "inputs" array // second idx: the ith bootstrap var resamples = new InputSample[input_rngs.Length][]; // RNG for sampling var rng = new Random(); // we save initial inputs and outputs here var initial_inputs = StoreInputs(input_rngs, dag); var initial_outputs = StoreOutputs(output_fns, dag); // Set progress bar max pb.setMax(input_rngs.Length * 2); #region RESAMPLE // populate bootstrap array // for each input range (a TreeNode) for (int i = 0; i < input_rngs.Length; i++) { // this TreeNode var t = input_rngs[i]; // resample resamples[i] = Resample(num_bootstraps, initial_inputs[t], rng); // update progress bar pb.IncrementProgress(); } #endregion RESAMPLE #region INFERENCE return(Inference( num_bootstraps, resamples, initial_inputs, initial_outputs, input_rngs, output_fns, dag, weighted, significance, pb)); #endregion INFERENCE }