public FunctionOutput <string>[] FastReplace(Excel.Range com, DAG dag, InputSample original, InputSample sample, AST.Address[] outputs, bool replace_original) { FunctionOutput <string>[] fo_arr; if (!_d.TryGetValue(sample, out fo_arr)) { // replace the COM value ReplaceExcelRange(com, sample); // initialize array fo_arr = new FunctionOutput <string> [outputs.Length]; // grab all outputs for (var k = 0; k < outputs.Length; k++) { // save the output fo_arr[k] = new FunctionOutput <string>(dag.readCOMValueAtAddress(outputs[k]), sample.GetExcludes()); } // Add function values to cache // Don't care about return value _d.Add(sample, fo_arr); // restore the COM value if (replace_original) { ReplaceExcelRange(com, original); } } return(fo_arr); }
public FunctionOutput<string>[] FastReplace(Excel.Range com, DAG dag, InputSample original, InputSample sample, AST.Address[] outputs, bool replace_original) { FunctionOutput<string>[] fo_arr; if (!_d.TryGetValue(sample, out fo_arr)) { // replace the COM value ReplaceExcelRange(com, sample); // initialize array fo_arr = new FunctionOutput<string>[outputs.Length]; // grab all outputs for (var k = 0; k < outputs.Length; k++) { // save the output fo_arr[k] = new FunctionOutput<string>(dag.readCOMValueAtAddress(outputs[k]), sample.GetExcludes()); } // Add function values to cache // Don't care about return value _d.Add(sample, fo_arr); // restore the COM value if (replace_original) { ReplaceExcelRange(com, original); } } return fo_arr; }
public static void ReplaceExcelRange(Range com, InputSample input) { bool done = false; while (!done) { try { com.Value2 = input.GetInputArray(); done = true; } catch (Exception) { } } }
private static Dictionary <AST.Range, InputSample> StoreInputs(AST.Range[] inputs, DAG dag) { var d = new Dictionary <AST.Range, InputSample>(); foreach (AST.Range input_range in inputs) { var com = dag.getCOMRefForRange(input_range); var s = new InputSample(com.Height, com.Width); // store the entire COM array as a multiarray // in one fell swoop. s.AddArray(com.Range.Value2); // add stored input to dict d.Add(input_range, s); // this is to force excel to recalculate its outputs // exactly the same way that it will for our bootstraps BootMemo.ReplaceExcelRange(com.Range, s); } return(d); }
public static InputSample[] Resample(int num_bootstraps, InputSample orig_vals, Random rng) { // the resampled values go here var ss = new InputSample[num_bootstraps]; // sample with replacement to get i bootstrapped samples for (var i = 0; i < num_bootstraps; i++) { var s = new InputSample(orig_vals.Rows(), orig_vals.Columns()); // make a vector of index counters var inc_count = new int[orig_vals.Length()]; // randomly sample j values, with replacement for (var j = 0; j < orig_vals.Length(); j++) { // randomly select a value from the original values int input_idx = rng.Next(0, orig_vals.Length()); inc_count[input_idx] += 1; if (input_idx >= orig_vals.Length()) { throw new Exception("input_idx >= orig_vals.Length()"); } string value = orig_vals.GetInput(input_idx); s.Add(value); } // indicate which indices are excluded s.SetIncludes(inc_count); // add the new InputSample to the output array ss[i] = s; } return(ss); }
public override bool Equals(object obj) { InputSample other = (InputSample)obj; return(_includes.SequenceEqual(other.GetIncludes())); }
// num_bootstraps: the number of bootstrap samples to get // inputs: a list of inputs; each TreeNode represents an entire input range // outputs: a list of outputs; each TreeNode represents a function public static TreeScore DataDebug(int num_bootstraps, DAG dag, Excel.Application app, bool weighted, bool all_outputs, long max_duration_in_ms, Stopwatch sw, double significance, ProgBar pb) { // this modifies the weights of each node PropagateWeights(dag); // filter out non-terminal functions var output_fns = dag.terminalFormulaNodes(all_outputs); // filter out non-terminal inputs var input_rngs = dag.terminalInputVectors(); // first idx: the index of the TreeNode in the "inputs" array // second idx: the ith bootstrap var resamples = new InputSample[input_rngs.Length][]; // RNG for sampling var rng = new Random(); // we save initial inputs and outputs here var initial_inputs = StoreInputs(input_rngs, dag); var initial_outputs = StoreOutputs(output_fns, dag); // Set progress bar max pb.setMax(input_rngs.Length * 2); #region RESAMPLE // populate bootstrap array // for each input range (a TreeNode) for (int i = 0; i < input_rngs.Length; i++) { // this TreeNode var t = input_rngs[i]; // resample resamples[i] = Resample(num_bootstraps, initial_inputs[t], rng); // update progress bar pb.IncrementProgress(); } #endregion RESAMPLE #region INFERENCE return Inference( num_bootstraps, resamples, initial_inputs, initial_outputs, input_rngs, output_fns, dag, weighted, significance, pb); #endregion INFERENCE }
private static Dictionary<AST.Range, InputSample> StoreInputs(AST.Range[] inputs, DAG dag) { var d = new Dictionary<AST.Range, InputSample>(); foreach (AST.Range input_range in inputs) { var com = dag.getCOMRefForRange(input_range); var s = new InputSample(com.Height, com.Width); // store the entire COM array as a multiarray // in one fell swoop. s.AddArray(com.Range.Value2); // add stored input to dict d.Add(input_range, s); // this is to force excel to recalculate its outputs // exactly the same way that it will for our bootstraps BootMemo.ReplaceExcelRange(com.Range, s); } return d; }
public static InputSample[] Resample(int num_bootstraps, InputSample orig_vals, Random rng) { // the resampled values go here var ss = new InputSample[num_bootstraps]; // sample with replacement to get i bootstrapped samples for (var i = 0; i < num_bootstraps; i++) { var s = new InputSample(orig_vals.Rows(), orig_vals.Columns()); // make a vector of index counters var inc_count = new int[orig_vals.Length()]; // randomly sample j values, with replacement for (var j = 0; j < orig_vals.Length(); j++) { // randomly select a value from the original values int input_idx = rng.Next(0, orig_vals.Length()); inc_count[input_idx] += 1; if (input_idx >= orig_vals.Length()) { throw new Exception("input_idx >= orig_vals.Length()"); } string value = orig_vals.GetInput(input_idx); s.Add(value); } // indicate which indices are excluded s.SetIncludes(inc_count); // add the new InputSample to the output array ss[i] = s; } return ss; }
public static TreeScore Inference( int num_bootstraps, InputSample[][] resamples, Dictionary<AST.Range, InputSample> initial_inputs, Dictionary<AST.Address, string> initial_outputs, AST.Range[] input_arr, AST.Address[] output_arr, DAG dag, bool weighted, double significance, ProgBar pb) { // synchronization token object lock_token = new Object(); // init thread event notification array var mres = new ManualResetEvent[input_arr.Length]; // init job storage var ddjs = new DataDebugJob[input_arr.Length]; // init started jobs count var sjobs = 0; // init completed jobs count var cjobs = 0; // last-ditch effort flag bool last_try = false; // init score storage var scores = new TreeScore(); for (int i = 0; i < input_arr.Length; i++) { try { #region BOOTSTRAP // bootstrapping is done in the parent STA thread because // the .NET threading model prohibits thread pools (which // are MTA) from accessing STA COM objects directly. // alloc bootstrap storage for each output (f), for each resample (b) FunctionOutput<string>[][] bs = new FunctionOutput<string>[initial_outputs.Count][]; for (int f = 0; f < initial_outputs.Count; f++) { bs[f] = new FunctionOutput<string>[num_bootstraps]; } // init memoization table for input vector i var memo = new BootMemo(); // fetch the input range TreeNode var input = input_arr[i]; // fetch the input range COM object var com = dag.getCOMRefForRange(input).Range; // compute outputs // replace the values of the COM object with the jth bootstrap, // save all function outputs, and // restore the original input for (var b = 0; b < num_bootstraps; b++) { // lookup outputs from memo table; otherwise do replacement, compute outputs, store them in table, and return them FunctionOutput<string>[] fos = memo.FastReplace(com, dag, initial_inputs[input], resamples[i][b], output_arr, false); for (var f = 0; f < output_arr.Length; f++) { bs[f][b] = fos[f]; } } // restore the original inputs; faster to do once, after bootstrapping is done BootMemo.ReplaceExcelRange(com, initial_inputs[input]); // TODO: restore formulas if it turns out that they were overwrittern // this should never be the case #endregion BOOTSTRAP #region HYPOTHESIS_TEST // cancellation token mres[i] = new ManualResetEvent(false); // set up job ddjs[i] = new DataDebugJob( dag, bs, initial_outputs, input_arr[i], output_arr, weighted, significance, mres[i] ); sjobs++; // hand job to thread pool ThreadPool.QueueUserWorkItem(ddjs[i].threadPoolCallback, i); #endregion HYPOTHESIS_TEST // update progress bar pb.IncrementProgress(); } catch (System.OutOfMemoryException e) { if (!last_try) { // If there are no more jobs running, but // we still can't allocate memory, try invoking // GC and then trying again cjobs = mres.Count(mre => mre.WaitOne(0)); if (sjobs - cjobs == 0) { GC.Collect(); last_try = true; } } else { // we just don't have enough memory throw e; } // wait for any of the 0..i-1 work items // to complete and try again WaitHandle.WaitAny(mres.Take(i).ToArray()); } } // Do not proceed until all hypothesis tests are done. // WaitHandle.WaitAll cannot be called on an STA thread which // is why we call WaitOne in a loop. // Merge scores as data becomes available. for (int i = 0; i < input_arr.Length; i++) { mres[i].WaitOne(); scores = DictAdd(scores, ddjs[i].Result); } return scores; }
// num_bootstraps: the number of bootstrap samples to get // inputs: a list of inputs; each TreeNode represents an entire input range // outputs: a list of outputs; each TreeNode represents a function public static TreeScore DataDebug(int num_bootstraps, DAG dag, Excel.Application app, bool weighted, bool all_outputs, long max_duration_in_ms, Stopwatch sw, double significance, ProgBar pb) { // this modifies the weights of each node PropagateWeights(dag); // filter out non-terminal functions var output_fns = dag.terminalFormulaNodes(all_outputs); // filter out non-terminal inputs var input_rngs = dag.terminalInputVectors(); // first idx: the index of the TreeNode in the "inputs" array // second idx: the ith bootstrap var resamples = new InputSample[input_rngs.Length][]; // RNG for sampling var rng = new Random(); // we save initial inputs and outputs here var initial_inputs = StoreInputs(input_rngs, dag); var initial_outputs = StoreOutputs(output_fns, dag); // Set progress bar max pb.setMax(input_rngs.Length * 2); #region RESAMPLE // populate bootstrap array // for each input range (a TreeNode) for (int i = 0; i < input_rngs.Length; i++) { // this TreeNode var t = input_rngs[i]; // resample resamples[i] = Resample(num_bootstraps, initial_inputs[t], rng); // update progress bar pb.IncrementProgress(); } #endregion RESAMPLE #region INFERENCE return(Inference( num_bootstraps, resamples, initial_inputs, initial_outputs, input_rngs, output_fns, dag, weighted, significance, pb)); #endregion INFERENCE }