SaveOutputs() 공개 정적인 메소드

public static SaveOutputs ( AST formula_nodes, DAG dag ) : string>.System.Collections.Generic.Dictionary
formula_nodes AST
dag DataDebugMethods.DAG
리턴 string>.System.Collections.Generic.Dictionary
예제 #1
0
        // Get dictionary of inputs and the error they produce
        public Dictionary <AST.Address, Tuple <string, double> > TopOfKErrors(AST.Address[] terminal_formula_nodes, CellDict inputs, int k, CellDict correct_outputs, Excel.Application app, Excel.Workbook wb, string classification_file, DAG dag)
        {
            var eg = new ErrorGenerator();
            var c  = Classification.Deserialize(classification_file);
            var max_error_produced_dictionary = new Dictionary <AST.Address, Tuple <string, double> >();

            foreach (KeyValuePair <AST.Address, string> pair in inputs)
            {
                AST.Address addr       = pair.Key;
                string      orig_value = pair.Value;

                //Load in the classification's dictionaries
                double max_error_produced = 0.0;
                string max_error_string   = "";

                // get k strings, in parallel
                string[] errorstrings = eg.GenerateErrorStrings(orig_value, c, k);

                for (int i = 0; i < k; i++)
                {
                    CellDict cd = new CellDict();
                    cd.Add(addr, errorstrings[i]);
                    //inject the typo
                    Utility.InjectValues(app, wb, cd);

                    // save function outputs
                    CellDict incorrect_outputs = Utility.SaveOutputs(terminal_formula_nodes, dag);

                    //remove the typo that was introduced
                    cd.Clear();
                    cd.Add(addr, orig_value);
                    Utility.InjectValues(app, wb, cd);

                    double total_error = Utility.CalculateTotalError(correct_outputs, incorrect_outputs);

                    //keep track of the largest observed max error
                    if (total_error > max_error_produced)
                    {
                        max_error_produced = total_error;
                        max_error_string   = errorstrings[i];
                    }
                }
                //Add entry for this TreeNode in our dictionary with its max_error_produced
                max_error_produced_dictionary.Add(addr, new Tuple <string, double>(max_error_string, max_error_produced));
            }
            return(max_error_produced_dictionary);
        }
예제 #2
0
        // remove errors until none remain
        private UserResults SimulateUser(int nboots,
                                         double significance,
                                         CutoffKind ck,
                                         DAG dag,
                                         CellDict original_inputs,
                                         CellDict errord,
                                         CellDict correct_outputs,
                                         Excel.Workbook wb,
                                         Excel.Application app,
                                         AnalysisType analysis_type,
                                         bool weighted,
                                         bool all_outputs,
                                         long max_duration_in_ms,
                                         Stopwatch sw,
                                         String logfile,
                                         ProgBar pb
                                         )
        {
            // init user results data structure
            var o = new UserResults();
            HashSet <AST.Address> known_good = new HashSet <AST.Address>();

            // initialize procedure
            var errors_remain         = true;
            var max_errors            = new ErrorDict();
            var incorrect_outputs     = Utility.SaveOutputs(dag.terminalFormulaNodes(all_outputs), dag);
            var errors_found          = 0;
            var number_of_true_errors = errord.Count;

            Utility.UpdatePerFunctionMaxError(correct_outputs, incorrect_outputs, max_errors);

            // the corrected state of the spreadsheet
            CellDict partially_corrected_outputs = correct_outputs.ToDictionary(p => p.Key, p => p.Value);

            // remove errors loop
            var cells_inspected = 0;
            List <KeyValuePair <AST.Address, int> > filtered_high_scores = null;
            bool correction_made = true;

            while (errors_remain)
            {
                Console.Write(".");

                AST.Address flagged_cell = null;

                // choose the appropriate test
                if (analysis_type == AnalysisType.CheckCell5 ||
                    analysis_type == AnalysisType.CheckCell10
                    )

                {
                    flagged_cell = SimulationStep.CheckCell_Step(o,
                                                                 significance,
                                                                 ck,
                                                                 nboots,
                                                                 dag,
                                                                 app,
                                                                 weighted,
                                                                 all_outputs,
                                                                 correction_made,
                                                                 known_good,
                                                                 ref filtered_high_scores,
                                                                 max_duration_in_ms,
                                                                 sw,
                                                                 pb);
                }
                else if (analysis_type == AnalysisType.NormalPerRange)
                {
                    flagged_cell = SimulationStep.NormalPerRange_Step(dag, wb, known_good, max_duration_in_ms, sw);
                }
                else if (analysis_type == AnalysisType.NormalAllInputs)
                {
                    flagged_cell = SimulationStep.NormalAllOutputs_Step(dag, app, wb, known_good, max_duration_in_ms, sw);
                }

                // stop if the test no longer returns anything or if
                // the test is simply done inspecting based on a fixed threshold
                if (flagged_cell == null || (ck.isCountBased && ck.Threshold == cells_inspected))
                {
                    errors_remain = false;
                }
                else    // a cell was flagged
                {
                    //cells_inspected should only be incremented when a cell is actually flagged. If nothing is flagged,
                    //then nothing is inspected, so cells_inspected doesn't increase.
                    cells_inspected += 1;

                    // check to see if the flagged value is actually an error
                    if (errord.ContainsKey(flagged_cell))
                    {
                        correction_made = true;
                        errors_found   += 1;
                        // P(k) * rel(k)
                        o.PrecRel_at_k.Add(errors_found / (double)cells_inspected);
                        o.true_positives.Add(flagged_cell);

                        // correct flagged cell
                        flagged_cell.GetCOMObject(app).Value2 = original_inputs[flagged_cell];

                        Utility.UpdatePerFunctionMaxError(correct_outputs, partially_corrected_outputs, max_errors);

                        // compute total error after applying this correction
                        var current_total_error = Utility.CalculateTotalError(correct_outputs, partially_corrected_outputs);
                        o.current_total_error.Add(current_total_error);

                        // save outputs
                        partially_corrected_outputs = Utility.SaveOutputs(dag.terminalFormulaNodes(all_outputs), dag);
                    }
                    else
                    {
                        correction_made = false;
                        // numerator is 0 here because rel(k) = 0 when no error was found
                        o.PrecRel_at_k.Add(0.0);
                        o.false_positives.Add(flagged_cell);
                    }

                    // mark it as known good -- at this point the cell has been
                    //      'inspected' regardless of whether it was an error
                    //      It was either corrected or marked as OK
                    known_good.Add(flagged_cell);

                    // compute output error magnitudes
                    var output_error_magnitude = Utility.MeanErrorMagnitude(partially_corrected_outputs, correct_outputs);
                    // compute input error magnitude
                    double num_input_error_magnitude;
                    double str_input_error_magnitude;
                    if (errord.ContainsKey(flagged_cell))
                    {
                        if (Utility.BothNumbers(errord[flagged_cell], original_inputs[flagged_cell]))
                        {
                            num_input_error_magnitude = Utility.NumericalMagnitudeChange(Double.Parse(errord[flagged_cell]), Double.Parse(original_inputs[flagged_cell]));
                            str_input_error_magnitude = 0;
                        }
                        else
                        {
                            num_input_error_magnitude = 0;
                            str_input_error_magnitude = Utility.StringMagnitudeChange(errord[flagged_cell], original_inputs[flagged_cell]);
                        }
                    }
                    else
                    {
                        num_input_error_magnitude = 0;
                        str_input_error_magnitude = 0;
                    }

                    // write error log
                    var logentry = new LogEntry(analysis_type,
                                                wb.Name,
                                                flagged_cell,
                                                original_inputs[flagged_cell],
                                                errord.ContainsKey(flagged_cell) ? errord[flagged_cell] : original_inputs[flagged_cell],
                                                output_error_magnitude,
                                                num_input_error_magnitude,
                                                str_input_error_magnitude,
                                                true,
                                                correction_made,
                                                significance,
                                                ck.Threshold);
                    logentry.WriteLog(logfile);
                    _error_log.Add(logentry);
                }
            }

            // find all of the false negatives
            o.false_negatives = Utility.GetFalseNegatives(o.true_positives, o.false_positives, errord);
            o.max_errors      = max_errors;

            var last_out_err_mag = Utility.MeanErrorMagnitude(partially_corrected_outputs, correct_outputs);

            // write out all false negative information
            foreach (AST.Address fn in o.false_negatives)
            {
                double num_input_error_magnitude;
                double str_input_error_magnitude;
                if (Utility.BothNumbers(errord[fn], original_inputs[fn]))
                {
                    num_input_error_magnitude = Utility.NumericalMagnitudeChange(Double.Parse(errord[fn]), Double.Parse(original_inputs[fn]));
                    str_input_error_magnitude = 0;
                }
                else
                {
                    num_input_error_magnitude = 0;
                    str_input_error_magnitude = Utility.StringMagnitudeChange(errord[fn], original_inputs[fn]);
                }

                // write error log
                _error_log.Add(new LogEntry(analysis_type,
                                            wb.Name,
                                            fn,
                                            original_inputs[fn],
                                            errord[fn],
                                            last_out_err_mag,
                                            num_input_error_magnitude,
                                            str_input_error_magnitude,
                                            false,
                                            true,
                                            significance,
                                            ck.Threshold));
            }
            return(o);
        }
예제 #3
0
        // returns the number of cells inspected
        public int Run(int nboots,                 // number of bootstraps
                       string xlfile,              // name of the workbook
                       double significance,        // significance threshold for test
                       CutoffKind ck,              // kind of threshold function to use
                       Excel.Application app,      // reference to Excel app
                       Classification c,           // data from which to generate errors
                       Random r,                   // a random number generator
                       AnalysisType analysisType,  // the type of analysis to run
                       bool weighted,              // should we weigh things?
                       bool all_outputs,           // if !all_outputs, we only consider terminal outputs
                       DAG dag,
                       Excel.Workbook wb,
                       AST.Address[] terminal_formula_cells,
                       AST.Range[] terminal_input_vectors,
                       CellDict original_inputs,
                       CellDict correct_outputs,
                       long max_duration_in_ms,
                       String logfile,               //filename for the output log
                       ProgBar pb
                       )
        {
            //set wbname and path
            _wb_name       = xlfile;
            _wb_path       = wb.Path;
            _analysis_type = analysisType;
            _significance  = significance;
            _all_outputs   = all_outputs;
            _weighted      = weighted;

            //Now we want to inject the errors from _errors
            Utility.InjectValues(app, wb, _errors);

            // save function outputs
            CellDict incorrect_outputs = Utility.SaveOutputs(terminal_formula_cells, dag);

            //Time the removal of errors
            Stopwatch sw = new Stopwatch();

            sw.Start();

            // remove errors until none remain; MODIFIES WORKBOOK
            _user = SimulateUser(nboots, significance, ck, dag, original_inputs, _errors, correct_outputs, wb, app, analysisType, weighted, all_outputs, max_duration_in_ms, sw, logfile, pb);

            sw.Stop();
            TimeSpan elapsed = sw.Elapsed;

            _analysis_time = elapsed.TotalSeconds;

            // save partially-corrected outputs
            var partially_corrected_outputs = Utility.SaveOutputs(terminal_formula_cells, dag);

            // compute total relative error
            _error = Utility.CalculateNormalizedError(correct_outputs, partially_corrected_outputs, _user.max_errors);
            _total_relative_error = Utility.TotalRelativeError(_error);

            // compute starting total relative error (normalized by max_errors)
            ErrorDict starting_error = Utility.CalculateNormalizedError(correct_outputs, incorrect_outputs, _user.max_errors);

            _initial_total_relative_error = Utility.TotalRelativeError(starting_error);

            // effort
            _max_effort      = dag.allCells().Length;
            _effort          = (_user.true_positives.Count + _user.false_positives.Count);
            _expended_effort = (double)_effort / (double)_max_effort;

            // compute average precision
            // AveP = (\sum_{k=1}^n (P(k) * rel(k))) / |total positives|
            // where P(k) is the precision at threshold k,
            // rel(k) = \{ 1 if item at k is a true positive, 0 otherwise
            _average_precision = _user.PrecRel_at_k.Sum() / (double)_errors.Count;

            // restore original values
            Utility.InjectValues(app, wb, original_inputs);

            _tree_construct_time = dag.AnalysisMilliseconds / 1000.0;
            // flag that we're done; safe to print output results
            _simulation_run = true;

            // return the number of cells inspected
            return(_effort);
        }