public static bool RunSubletyExperiment(Excel.Application app, Excel.Workbook wbh, int nboots, double significance, double threshold, UserSimulation.Classification c, Random r, String outfile, long max_duration_in_ms, String logfile, ProgBar pb, bool ignore_parse_errors) { pb.setMax(5); // record intitial state of spreadsheet var prepdata = Prep.PrepSimulation(app, wbh, pb, ignore_parse_errors); // init error generator var eg = new ErrorGenerator(); // get inputs as an array of addresses to facilitate random selection // DATA INPUTS ONLY AST.Address[] inputs = prepdata.dag.terminalInputCells(); for (int i = 0; i < 100; i++) { // randomly choose a *numeric* input // TODO: use Fischer-Yates and take values until // either we have a satisfactory input value or none // remain var rnd_addrs = inputs.Shuffle().ToList(); bool num_found = false; String input_string; double input_value; AST.Address rand_addr; do { // randomly choose an address; if there are none left, fail if (rnd_addrs.Count == 0) { return(false); } rand_addr = rnd_addrs.First(); rnd_addrs = rnd_addrs.Skip(1).ToList(); // get the value input_string = prepdata.original_inputs[rand_addr]; // try parsing it if (Double.TryParse(input_string, out input_value)) { num_found = true; } } while (!num_found); // perturb it String erroneous_input = eg.GenerateSubtleErrorString(input_value, c); // create an error dictionary with this one perturbed value var errors = new CellDict(); errors.Add(rand_addr, erroneous_input); // run simulations; simulation code does insertion of errors and restore of originals RunSimulation(app, wbh, nboots, significance, threshold, c, r, outfile, max_duration_in_ms, logfile, pb, prepdata, errors); } return(true); }
// Get dictionary of inputs and the error they produce public static CellDict GenImportantErrors(AST.Address[] output_nodes, CellDict inputs, int k, // number of alternatives to consider CellDict correct_outputs, Excel.Application app, Excel.Workbook wb, Classification c, DAG dag) { var eg = new ErrorGenerator(); var max_error_produced_dictionary = new Dictionary <AST.Address, Tuple <string, double> >(); foreach (KeyValuePair <AST.Address, string> pair in inputs) { AST.Address addr = pair.Key; string orig_value = pair.Value; //Load in the classification's dictionaries double max_error_produced = 0.0; string max_error_string = ""; // get k strings string[] errorstrings = eg.GenerateErrorStrings(orig_value, c, k); for (int i = 0; i < k; i++) { CellDict cd = new CellDict(); cd.Add(addr, errorstrings[i]); //inject the typo InjectValues(app, wb, cd); // save function outputs CellDict incorrect_outputs = SaveOutputs(output_nodes, dag); //remove the typo that was introduced cd.Clear(); cd.Add(addr, orig_value); InjectValues(app, wb, cd); double total_error = Utility.CalculateTotalError(correct_outputs, incorrect_outputs); //keep track of the largest observed max error if (total_error > max_error_produced) { max_error_produced = total_error; max_error_string = errorstrings[i]; } } //Add entry for this TreeNode in our dictionary with its max_error_produced max_error_produced_dictionary.Add(addr, new Tuple <string, double>(max_error_string, max_error_produced)); } // sort by max_error_produced var maxen = max_error_produced_dictionary.OrderByDescending(pair => pair.Value.Item2).Select(pair => new Tuple <AST.Address, string>(pair.Key, pair.Value.Item1)).ToList(); return(maxen.Take((int)Math.Ceiling(0.05 * inputs.Count)).ToDictionary(tup => tup.Item1, tup => tup.Item2)); }
// Get dictionary of inputs and the error they produce public Dictionary <AST.Address, Tuple <string, double> > TopOfKErrors(AST.Address[] terminal_formula_nodes, CellDict inputs, int k, CellDict correct_outputs, Excel.Application app, Excel.Workbook wb, string classification_file, DAG dag) { var eg = new ErrorGenerator(); var c = Classification.Deserialize(classification_file); var max_error_produced_dictionary = new Dictionary <AST.Address, Tuple <string, double> >(); foreach (KeyValuePair <AST.Address, string> pair in inputs) { AST.Address addr = pair.Key; string orig_value = pair.Value; //Load in the classification's dictionaries double max_error_produced = 0.0; string max_error_string = ""; // get k strings, in parallel string[] errorstrings = eg.GenerateErrorStrings(orig_value, c, k); for (int i = 0; i < k; i++) { CellDict cd = new CellDict(); cd.Add(addr, errorstrings[i]); //inject the typo Utility.InjectValues(app, wb, cd); // save function outputs CellDict incorrect_outputs = Utility.SaveOutputs(terminal_formula_nodes, dag); //remove the typo that was introduced cd.Clear(); cd.Add(addr, orig_value); Utility.InjectValues(app, wb, cd); double total_error = Utility.CalculateTotalError(correct_outputs, incorrect_outputs); //keep track of the largest observed max error if (total_error > max_error_produced) { max_error_produced = total_error; max_error_string = errorstrings[i]; } } //Add entry for this TreeNode in our dictionary with its max_error_produced max_error_produced_dictionary.Add(addr, new Tuple <string, double>(max_error_string, max_error_produced)); } return(max_error_produced_dictionary); }
public static void RunProportionExperiment(Excel.Application app, Excel.Workbook wbh, int nboots, double significance, double threshold, UserSimulation.Classification c, Random r, String outfile, long max_duration_in_ms, String logfile, ProgBar pb, bool ignore_parse_errors) { pb.setMax(5); // record intitial state of spreadsheet var prepdata = Prep.PrepSimulation(app, wbh, pb, ignore_parse_errors); // init error generator var eg = new ErrorGenerator(); // get inputs as an array of addresses to facilitate random selection // DATA INPUTS ONLY AST.Address[] inputs = prepdata.dag.terminalInputCells(); // sanity check: all of the inputs should also be in prepdata.original_inputs foreach (AST.Address addr in inputs) { if (!prepdata.original_inputs.ContainsKey(addr)) { throw new Exception("Missing address!"); } } for (int i = 0; i < 100; i++) { // randomly choose an input address AST.Address rand_addr = inputs[r.Next(inputs.Length)]; // get the value String input_value = prepdata.original_inputs[rand_addr]; // perturb it String erroneous_input = eg.GenerateErrorString(input_value, c); // create an error dictionary with this one perturbed value var errors = new CellDict(); errors.Add(rand_addr, erroneous_input); // run simulations; simulation code does insertion of errors and restore of originals RunSimulation(app, wbh, nboots, significance, threshold, c, r, outfile, max_duration_in_ms, logfile, pb, prepdata, errors); } }
// Get dictionary of inputs and the error they produce public static CellDict GenImportantErrors(AST.Address[] output_nodes, CellDict inputs, int k, // number of alternatives to consider CellDict correct_outputs, Excel.Application app, Excel.Workbook wb, Classification c, DAG dag) { var eg = new ErrorGenerator(); var max_error_produced_dictionary = new Dictionary<AST.Address, Tuple<string, double>>(); foreach (KeyValuePair<AST.Address, string> pair in inputs) { AST.Address addr = pair.Key; string orig_value = pair.Value; //Load in the classification's dictionaries double max_error_produced = 0.0; string max_error_string = ""; // get k strings string[] errorstrings = eg.GenerateErrorStrings(orig_value, c, k); for (int i = 0; i < k; i++) { CellDict cd = new CellDict(); cd.Add(addr, errorstrings[i]); //inject the typo InjectValues(app, wb, cd); // save function outputs CellDict incorrect_outputs = SaveOutputs(output_nodes, dag); //remove the typo that was introduced cd.Clear(); cd.Add(addr, orig_value); InjectValues(app, wb, cd); double total_error = Utility.CalculateTotalError(correct_outputs, incorrect_outputs); //keep track of the largest observed max error if (total_error > max_error_produced) { max_error_produced = total_error; max_error_string = errorstrings[i]; } } //Add entry for this TreeNode in our dictionary with its max_error_produced max_error_produced_dictionary.Add(addr, new Tuple<string, double>(max_error_string, max_error_produced)); } // sort by max_error_produced var maxen = max_error_produced_dictionary.OrderByDescending(pair => pair.Value.Item2).Select(pair => new Tuple<AST.Address, string>(pair.Key, pair.Value.Item1)).ToList(); return maxen.Take((int)Math.Ceiling(0.05 * inputs.Count)).ToDictionary(tup => tup.Item1, tup => tup.Item2); }
public static bool RunSubletyExperiment(Excel.Application app, Excel.Workbook wbh, int nboots, double significance, double threshold, UserSimulation.Classification c, Random r, String outfile, long max_duration_in_ms, String logfile, ProgBar pb, bool ignore_parse_errors) { pb.setMax(5); // record intitial state of spreadsheet var prepdata = Prep.PrepSimulation(app, wbh, pb, ignore_parse_errors); // init error generator var eg = new ErrorGenerator(); // get inputs as an array of addresses to facilitate random selection // DATA INPUTS ONLY AST.Address[] inputs = prepdata.dag.terminalInputCells(); for (int i = 0; i < 100; i++) { // randomly choose a *numeric* input // TODO: use Fischer-Yates and take values until // either we have a satisfactory input value or none // remain var rnd_addrs = inputs.Shuffle().ToList(); bool num_found = false; String input_string; double input_value; AST.Address rand_addr; do { // randomly choose an address; if there are none left, fail if (rnd_addrs.Count == 0) { return false; } rand_addr = rnd_addrs.First(); rnd_addrs = rnd_addrs.Skip(1).ToList(); // get the value input_string = prepdata.original_inputs[rand_addr]; // try parsing it if (Double.TryParse(input_string, out input_value)) { num_found = true; } } while (!num_found); // perturb it String erroneous_input = eg.GenerateSubtleErrorString(input_value, c); // create an error dictionary with this one perturbed value var errors = new CellDict(); errors.Add(rand_addr, erroneous_input); // run simulations; simulation code does insertion of errors and restore of originals RunSimulation(app, wbh, nboots, significance, threshold, c, r, outfile, max_duration_in_ms, logfile, pb, prepdata, errors); } return true; }
// Get dictionary of inputs and the error they produce public Dictionary<AST.Address, Tuple<string, double>> TopOfKErrors(AST.Address[] terminal_formula_nodes, CellDict inputs, int k, CellDict correct_outputs, Excel.Application app, Excel.Workbook wb, string classification_file, DAG dag) { var eg = new ErrorGenerator(); var c = Classification.Deserialize(classification_file); var max_error_produced_dictionary = new Dictionary<AST.Address, Tuple<string, double>>(); foreach (KeyValuePair<AST.Address,string> pair in inputs) { AST.Address addr = pair.Key; string orig_value = pair.Value; //Load in the classification's dictionaries double max_error_produced = 0.0; string max_error_string = ""; // get k strings, in parallel string[] errorstrings = eg.GenerateErrorStrings(orig_value, c, k); for (int i = 0; i < k; i++) { CellDict cd = new CellDict(); cd.Add(addr, errorstrings[i]); //inject the typo Utility.InjectValues(app, wb, cd); // save function outputs CellDict incorrect_outputs = Utility.SaveOutputs(terminal_formula_nodes, dag); //remove the typo that was introduced cd.Clear(); cd.Add(addr, orig_value); Utility.InjectValues(app, wb, cd); double total_error = Utility.CalculateTotalError(correct_outputs, incorrect_outputs); //keep track of the largest observed max error if (total_error > max_error_produced) { max_error_produced = total_error; max_error_string = errorstrings[i]; } } //Add entry for this TreeNode in our dictionary with its max_error_produced max_error_produced_dictionary.Add(addr, new Tuple<string, double>(max_error_string, max_error_produced)); } return max_error_produced_dictionary; }