Beispiel #1
0
        public FunctionOutput <string>[] FastReplace(Excel.Range com, DAG dag, InputSample original, InputSample sample, AST.Address[] outputs, bool replace_original)
        {
            FunctionOutput <string>[] fo_arr;
            if (!_d.TryGetValue(sample, out fo_arr))
            {
                // replace the COM value
                ReplaceExcelRange(com, sample);

                // initialize array
                fo_arr = new FunctionOutput <string> [outputs.Length];

                // grab all outputs
                for (var k = 0; k < outputs.Length; k++)
                {
                    // save the output
                    fo_arr[k] = new FunctionOutput <string>(dag.readCOMValueAtAddress(outputs[k]), sample.GetExcludes());
                }

                // Add function values to cache
                // Don't care about return value
                _d.Add(sample, fo_arr);

                // restore the COM value
                if (replace_original)
                {
                    ReplaceExcelRange(com, original);
                }
            }
            return(fo_arr);
        }
Beispiel #2
0
        public FunctionOutput<string>[] FastReplace(Excel.Range com, DAG dag, InputSample original, InputSample sample, AST.Address[] outputs, bool replace_original)
        {
            FunctionOutput<string>[] fo_arr;
            if (!_d.TryGetValue(sample, out fo_arr))
            {
                // replace the COM value
                ReplaceExcelRange(com, sample);

                // initialize array
                fo_arr = new FunctionOutput<string>[outputs.Length];

                // grab all outputs
                for (var k = 0; k < outputs.Length; k++)
                {
                    // save the output
                    fo_arr[k] = new FunctionOutput<string>(dag.readCOMValueAtAddress(outputs[k]), sample.GetExcludes());
                }

                // Add function values to cache
                // Don't care about return value
                _d.Add(sample, fo_arr);

                // restore the COM value
                if (replace_original)
                {
                    ReplaceExcelRange(com, original);
                }
            }
            return fo_arr;
        }
Beispiel #3
0
        public static void ReplaceExcelRange(Range com, InputSample input)
        {
            bool done = false;

            while (!done)
            {
                try
                {
                    com.Value2 = input.GetInputArray();
                    done       = true;
                }
                catch (Exception)
                {
                }
            }
        }
Beispiel #4
0
        public static void ReplaceExcelRange(Range com, InputSample input)
        {
            bool done = false;
            while (!done)
            {
                try
                {
                    com.Value2 = input.GetInputArray();
                    done = true;
                }
                catch (Exception)
                {

                }
            }
        }
Beispiel #5
0
        private static Dictionary <AST.Range, InputSample> StoreInputs(AST.Range[] inputs, DAG dag)
        {
            var d = new Dictionary <AST.Range, InputSample>();

            foreach (AST.Range input_range in inputs)
            {
                var com = dag.getCOMRefForRange(input_range);
                var s   = new InputSample(com.Height, com.Width);

                // store the entire COM array as a multiarray
                // in one fell swoop.
                s.AddArray(com.Range.Value2);

                // add stored input to dict
                d.Add(input_range, s);

                // this is to force excel to recalculate its outputs
                // exactly the same way that it will for our bootstraps
                BootMemo.ReplaceExcelRange(com.Range, s);
            }

            return(d);
        }
Beispiel #6
0
        public static InputSample[] Resample(int num_bootstraps, InputSample orig_vals, Random rng)
        {
            // the resampled values go here
            var ss = new InputSample[num_bootstraps];

            // sample with replacement to get i bootstrapped samples
            for (var i = 0; i < num_bootstraps; i++)
            {
                var s = new InputSample(orig_vals.Rows(), orig_vals.Columns());

                // make a vector of index counters
                var inc_count = new int[orig_vals.Length()];

                // randomly sample j values, with replacement
                for (var j = 0; j < orig_vals.Length(); j++)
                {
                    // randomly select a value from the original values
                    int input_idx = rng.Next(0, orig_vals.Length());
                    inc_count[input_idx] += 1;
                    if (input_idx >= orig_vals.Length())
                    {
                        throw new Exception("input_idx >= orig_vals.Length()");
                    }
                    string value = orig_vals.GetInput(input_idx);
                    s.Add(value);
                }

                // indicate which indices are excluded
                s.SetIncludes(inc_count);

                // add the new InputSample to the output array
                ss[i] = s;
            }

            return(ss);
        }
Beispiel #7
0
        public override bool Equals(object obj)
        {
            InputSample other = (InputSample)obj;

            return(_includes.SequenceEqual(other.GetIncludes()));
        }
Beispiel #8
0
        // num_bootstraps: the number of bootstrap samples to get
        // inputs: a list of inputs; each TreeNode represents an entire input range
        // outputs: a list of outputs; each TreeNode represents a function
        public static TreeScore DataDebug(int num_bootstraps,
            DAG dag,
            Excel.Application app,
            bool weighted,
            bool all_outputs,
            long max_duration_in_ms,
            Stopwatch sw,
            double significance,
            ProgBar pb)
        {
            // this modifies the weights of each node
            PropagateWeights(dag);

            // filter out non-terminal functions
            var output_fns = dag.terminalFormulaNodes(all_outputs);
            // filter out non-terminal inputs
            var input_rngs = dag.terminalInputVectors();

            // first idx: the index of the TreeNode in the "inputs" array
            // second idx: the ith bootstrap
            var resamples = new InputSample[input_rngs.Length][];

            // RNG for sampling
            var rng = new Random();

            // we save initial inputs and outputs here
            var initial_inputs = StoreInputs(input_rngs, dag);
            var initial_outputs = StoreOutputs(output_fns, dag);

            // Set progress bar max
            pb.setMax(input_rngs.Length * 2);

            #region RESAMPLE

            // populate bootstrap array
            // for each input range (a TreeNode)
            for (int i = 0; i < input_rngs.Length; i++)
            {
                // this TreeNode
                var t = input_rngs[i];

                // resample
                resamples[i] = Resample(num_bootstraps, initial_inputs[t], rng);

                // update progress bar
                pb.IncrementProgress();
            }

            #endregion RESAMPLE

            #region INFERENCE
            return Inference(
                num_bootstraps,
                resamples,
                initial_inputs,
                initial_outputs,
                input_rngs,
                output_fns,
                dag,
                weighted,
                significance,
                pb);
            #endregion INFERENCE
        }
Beispiel #9
0
        private static Dictionary<AST.Range, InputSample> StoreInputs(AST.Range[] inputs, DAG dag)
        {
            var d = new Dictionary<AST.Range, InputSample>();
            foreach (AST.Range input_range in inputs)
            {
                var com = dag.getCOMRefForRange(input_range);
                var s = new InputSample(com.Height, com.Width);

                // store the entire COM array as a multiarray
                // in one fell swoop.
                s.AddArray(com.Range.Value2);

                // add stored input to dict
                d.Add(input_range, s);

                // this is to force excel to recalculate its outputs
                // exactly the same way that it will for our bootstraps
                BootMemo.ReplaceExcelRange(com.Range, s);
            }

            return d;
        }
Beispiel #10
0
        public static InputSample[] Resample(int num_bootstraps, InputSample orig_vals, Random rng)
        {
            // the resampled values go here
            var ss = new InputSample[num_bootstraps];

            // sample with replacement to get i bootstrapped samples
            for (var i = 0; i < num_bootstraps; i++)
            {
                var s = new InputSample(orig_vals.Rows(), orig_vals.Columns());

                // make a vector of index counters
                var inc_count = new int[orig_vals.Length()];

                // randomly sample j values, with replacement
                for (var j = 0; j < orig_vals.Length(); j++)
                {
                    // randomly select a value from the original values
                    int input_idx = rng.Next(0, orig_vals.Length());
                    inc_count[input_idx] += 1;
                    if (input_idx >= orig_vals.Length())
                    {
                        throw new Exception("input_idx >= orig_vals.Length()");
                    }
                    string value = orig_vals.GetInput(input_idx);
                    s.Add(value);
                }

                // indicate which indices are excluded
                s.SetIncludes(inc_count);

                // add the new InputSample to the output array
                ss[i] = s;
            }

            return ss;
        }
Beispiel #11
0
        public static TreeScore Inference(
            int num_bootstraps,
            InputSample[][] resamples,
            Dictionary<AST.Range, InputSample> initial_inputs,
            Dictionary<AST.Address, string> initial_outputs,
            AST.Range[] input_arr,
            AST.Address[] output_arr,
            DAG dag,
            bool weighted,
            double significance,
            ProgBar pb)
        {
            // synchronization token
            object lock_token = new Object();

            // init thread event notification array
            var mres = new ManualResetEvent[input_arr.Length];

            // init job storage
            var ddjs = new DataDebugJob[input_arr.Length];

            // init started jobs count
            var sjobs = 0;

            // init completed jobs count
            var cjobs = 0;

            // last-ditch effort flag
            bool last_try = false;

            // init score storage
            var scores = new TreeScore();

            for (int i = 0; i < input_arr.Length; i++)
            {
                try
                {
                    #region BOOTSTRAP
                    // bootstrapping is done in the parent STA thread because
                    // the .NET threading model prohibits thread pools (which
                    // are MTA) from accessing STA COM objects directly.

                    // alloc bootstrap storage for each output (f), for each resample (b)
                    FunctionOutput<string>[][] bs = new FunctionOutput<string>[initial_outputs.Count][];
                    for (int f = 0; f < initial_outputs.Count; f++)
                    {
                        bs[f] = new FunctionOutput<string>[num_bootstraps];
                    }

                    // init memoization table for input vector i
                    var memo = new BootMemo();

                    // fetch the input range TreeNode
                    var input = input_arr[i];

                    // fetch the input range COM object
                    var com = dag.getCOMRefForRange(input).Range;

                    // compute outputs
                    // replace the values of the COM object with the jth bootstrap,
                    // save all function outputs, and
                    // restore the original input
                    for (var b = 0; b < num_bootstraps; b++)
                    {
                        // lookup outputs from memo table; otherwise do replacement, compute outputs, store them in table, and return them
                        FunctionOutput<string>[] fos = memo.FastReplace(com, dag, initial_inputs[input], resamples[i][b], output_arr, false);
                        for (var f = 0; f < output_arr.Length; f++)
                        {
                            bs[f][b] = fos[f];
                        }
                    }

                    // restore the original inputs; faster to do once, after bootstrapping is done
                    BootMemo.ReplaceExcelRange(com, initial_inputs[input]);

                    // TODO: restore formulas if it turns out that they were overwrittern
                    //       this should never be the case
                    #endregion BOOTSTRAP

                    #region HYPOTHESIS_TEST
                    // cancellation token
                    mres[i] = new ManualResetEvent(false);

                    // set up job
                    ddjs[i] = new DataDebugJob(
                                dag,
                                bs,
                                initial_outputs,
                                input_arr[i],
                                output_arr,
                                weighted,
                                significance,
                                mres[i]
                                );

                    sjobs++;

                    // hand job to thread pool
                    ThreadPool.QueueUserWorkItem(ddjs[i].threadPoolCallback, i);
                    #endregion HYPOTHESIS_TEST

                    // update progress bar
                    pb.IncrementProgress();
                }
                catch (System.OutOfMemoryException e)
                {
                    if (!last_try)
                    {
                        // If there are no more jobs running, but
                        // we still can't allocate memory, try invoking
                        // GC and then trying again
                        cjobs = mres.Count(mre => mre.WaitOne(0));
                        if (sjobs - cjobs == 0)
                        {
                            GC.Collect();
                            last_try = true;
                        }
                    }
                    else
                    {
                        // we just don't have enough memory
                        throw e;
                    }

                    // wait for any of the 0..i-1 work items
                    // to complete and try again
                    WaitHandle.WaitAny(mres.Take(i).ToArray());
                }
            }

            // Do not proceed until all hypothesis tests are done.
            // WaitHandle.WaitAll cannot be called on an STA thread which
            // is why we call WaitOne in a loop.
            // Merge scores as data becomes available.
            for (int i = 0; i < input_arr.Length; i++)
            {
                mres[i].WaitOne();
                scores = DictAdd(scores, ddjs[i].Result);
            }

            return scores;
        }
Beispiel #12
0
        // num_bootstraps: the number of bootstrap samples to get
        // inputs: a list of inputs; each TreeNode represents an entire input range
        // outputs: a list of outputs; each TreeNode represents a function
        public static TreeScore DataDebug(int num_bootstraps,
                                          DAG dag,
                                          Excel.Application app,
                                          bool weighted,
                                          bool all_outputs,
                                          long max_duration_in_ms,
                                          Stopwatch sw,
                                          double significance,
                                          ProgBar pb)
        {
            // this modifies the weights of each node
            PropagateWeights(dag);

            // filter out non-terminal functions
            var output_fns = dag.terminalFormulaNodes(all_outputs);
            // filter out non-terminal inputs
            var input_rngs = dag.terminalInputVectors();

            // first idx: the index of the TreeNode in the "inputs" array
            // second idx: the ith bootstrap
            var resamples = new InputSample[input_rngs.Length][];

            // RNG for sampling
            var rng = new Random();

            // we save initial inputs and outputs here
            var initial_inputs  = StoreInputs(input_rngs, dag);
            var initial_outputs = StoreOutputs(output_fns, dag);

            // Set progress bar max
            pb.setMax(input_rngs.Length * 2);

            #region RESAMPLE

            // populate bootstrap array
            // for each input range (a TreeNode)
            for (int i = 0; i < input_rngs.Length; i++)
            {
                // this TreeNode
                var t = input_rngs[i];

                // resample
                resamples[i] = Resample(num_bootstraps, initial_inputs[t], rng);

                // update progress bar
                pb.IncrementProgress();
            }

            #endregion RESAMPLE

            #region INFERENCE
            return(Inference(
                       num_bootstraps,
                       resamples,
                       initial_inputs,
                       initial_outputs,
                       input_rngs,
                       output_fns,
                       dag,
                       weighted,
                       significance,
                       pb));

            #endregion INFERENCE
        }