Ejemplo n.º 1
 public static Classification Classify(MTurkParser.Data data, string serfile)
     var total_inputs = data.NumInputs;
     var c = new Classification();
     var stringpairs = data.StringPairs.ToArray();
     for (int i = 0; i < stringpairs.Length; i++)
         var original = stringpairs[i].Item1;
         var entered = stringpairs[i].Item2;
         Console.Write("\r{0:P} strings classified", System.Convert.ToDouble(i) / System.Convert.ToDouble(total_inputs));
         c.ProcessTypos(original, entered);
     return c;
Ejemplo n.º 2
        // Get dictionary of inputs and the error they produce
        public Dictionary <AST.Address, Tuple <string, double> > TopOfKErrors(AST.Address[] terminal_formula_nodes, CellDict inputs, int k, CellDict correct_outputs, Excel.Application app, Excel.Workbook wb, string classification_file, DAG dag)
            var eg = new ErrorGenerator();
            var c  = Classification.Deserialize(classification_file);
            var max_error_produced_dictionary = new Dictionary <AST.Address, Tuple <string, double> >();

            foreach (KeyValuePair <AST.Address, string> pair in inputs)
                AST.Address addr       = pair.Key;
                string      orig_value = pair.Value;

                //Load in the classification's dictionaries
                double max_error_produced = 0.0;
                string max_error_string   = "";

                // get k strings, in parallel
                string[] errorstrings = eg.GenerateErrorStrings(orig_value, c, k);

                for (int i = 0; i < k; i++)
                    CellDict cd = new CellDict();
                    cd.Add(addr, errorstrings[i]);
                    //inject the typo
                    Utility.InjectValues(app, wb, cd);

                    // save function outputs
                    CellDict incorrect_outputs = Utility.SaveOutputs(terminal_formula_nodes, dag);

                    //remove the typo that was introduced
                    cd.Add(addr, orig_value);
                    Utility.InjectValues(app, wb, cd);

                    double total_error = Utility.CalculateTotalError(correct_outputs, incorrect_outputs);

                    //keep track of the largest observed max error
                    if (total_error > max_error_produced)
                        max_error_produced = total_error;
                        max_error_string   = errorstrings[i];
                //Add entry for this TreeNode in our dictionary with its max_error_produced
                max_error_produced_dictionary.Add(addr, new Tuple <string, double>(max_error_string, max_error_produced));
Ejemplo n.º 5
        // this method only works for functions with numerical inputs
        public string GenerateSubtleErrorString(double input, Classification c)
            string errstr;
            double errmag = 100;

                // generate an error
                errstr = GenerateErrorString(Convert.ToString(input), c);
                double errval;
                if (Double.TryParse(errstr, out errval))
                    // it's a numerical error
                    // get the magnitude of the error
                    errmag = Utility.NumericalMagnitudeChange(errval, input);
            } while (errmag >= 0);
Ejemplo n.º 6
        //Generates the distribution of strings for a particular character given a classification
        private Dictionary <string, double> GenerateDistributionForChar(OptChar c, Classification classification)
            var typo_dict = classification.GetTypoDict();
            var kvps      = typo_dict.Where(pair => {
                if (OptChar.get_IsNone(pair.Key.Item1))
                    if (OptChar.get_IsNone(c))
            var sum          = kvps.Select(pair => pair.Value).Sum();
            var distribution = kvps.Select(pair => new KeyValuePair <string, double>(pair.Key.Item2, (double)pair.Value / sum));

            return(distribution.ToDictionary(pair => pair.Key, pair => pair.Value));
Ejemplo n.º 7
        //Gets the distribution of strings for a particular character
        //DOES NOT use previously generated distributions; generates the distribution every time
        private Dictionary <string, double> GetDistributionOfStringsForChar(OptChar c, Classification classification)
            OptChar key = c;
            Dictionary <string, double> distribution;

            //Generate the probability distribution based on the classification, which contains counts of observations
            distribution = GenerateDistributionForChar(key, classification);
            //If our dictionary does not have any information about this character, we return the character with probability 1.0
            if (distribution.Count == 0)
                distribution.Add("" + c.Value, 1.0);
        // Get dictionary of inputs and the error they produce
        public static CellDict GenImportantErrors(AST.Address[] output_nodes,
            CellDict inputs,
            int k,         // number of alternatives to consider
            CellDict correct_outputs,
            Excel.Application app,
            Excel.Workbook wb,
            Classification c,
            DAG dag)
            var eg = new ErrorGenerator();
            var max_error_produced_dictionary = new Dictionary<AST.Address, Tuple<string, double>>();

            foreach (KeyValuePair<AST.Address, string> pair in inputs)
                AST.Address addr = pair.Key;
                string orig_value = pair.Value;

                //Load in the classification's dictionaries
                double max_error_produced = 0.0;
                string max_error_string = "";

                // get k strings
                string[] errorstrings = eg.GenerateErrorStrings(orig_value, c, k);

                for (int i = 0; i < k; i++)
                    CellDict cd = new CellDict();
                    cd.Add(addr, errorstrings[i]);
                    //inject the typo
                    InjectValues(app, wb, cd);

                    // save function outputs
                    CellDict incorrect_outputs = SaveOutputs(output_nodes, dag);

                    //remove the typo that was introduced
                    cd.Add(addr, orig_value);
                    InjectValues(app, wb, cd);

                    double total_error = Utility.CalculateTotalError(correct_outputs, incorrect_outputs);

                    //keep track of the largest observed max error
                    if (total_error > max_error_produced)
                        max_error_produced = total_error;
                        max_error_string = errorstrings[i];
                //Add entry for this TreeNode in our dictionary with its max_error_produced
                max_error_produced_dictionary.Add(addr, new Tuple<string, double>(max_error_string, max_error_produced));

            // sort by max_error_produced
            var maxen = max_error_produced_dictionary.OrderByDescending(pair => pair.Value.Item2).Select(pair => new Tuple<AST.Address, string>(pair.Key, pair.Value.Item1)).ToList();

            return maxen.Take((int)Math.Ceiling(0.05 * inputs.Count)).ToDictionary(tup => tup.Item1, tup => tup.Item2);
Ejemplo n.º 22
 public string[] GenerateErrorStrings(string orig, Classification c, int k)
     var strs = new string[k];
     for (int i = 0; i < k; i++)
         strs[i] = GenerateErrorString(orig, c);
     return strs;