GetTypoDict() public method

public GetTypoDict ( ) : Dictionary,int>
return Dictionary,int>
Esempio n. 1
0
        //Generates the distribution of strings for a particular character given a classification
        private Dictionary <string, double> GenerateDistributionForChar(OptChar c, Classification classification)
        {
            var typo_dict = classification.GetTypoDict();
            var kvps      = typo_dict.Where(pair => {
                if (OptChar.get_IsNone(pair.Key.Item1))
                {
                    if (OptChar.get_IsNone(c))
                    {
                        return(true);
                    }
                    return(false);
                }
                else
                {
                    return(pair.Key.Item1.Equals(c));
                }
            }).ToArray();
            var sum          = kvps.Select(pair => pair.Value).Sum();
            var distribution = kvps.Select(pair => new KeyValuePair <string, double>(pair.Key.Item2, (double)pair.Value / sum));

            return(distribution.ToDictionary(pair => pair.Key, pair => pair.Value));
        }
Esempio n. 2
0
        public string GenerateErrorString(string input, Classification c)
        {
            // get typo dict
            var td = c.GetTypoDict();

            // get transposition dict
            var trd = c.GetTranspositionDict();

            // convert the input into a char array
            var ochars = StringToOptCharArray(input);

            // add leading and trailing 'empty characters'
            var inputchars = AddLeadingTrailingSpace(ochars);

            // calculate the marginal probabilities of NOT making a typo for each char in input
            double[] PrsCharNotTypo = inputchars.Select(oc =>
            {
                var key = new Tuple<OptChar, string>(oc, OptCharToString(oc));
                int count;
                if (!td.TryGetValue(key, out count)) {
                    count = 0;
                }
                // funny case to handle the fact that FSharpOption.None == null
                var cond_dist = td.Where(kvp => kvp.Key.Item1 == null ? oc == null : kvp.Key.Item1.Equals(oc));
                int total = cond_dist.Aggregate(0, (acc, kvp) => acc + kvp.Value);
                if (total == 0)
                {
                    return 1.0;
                }
                else
                {
                    return (double)count / total;
                }
            }).ToArray();

            // calculate the probability of making at least one error
            // might need log-probs here
            double PrTypo = 1.0 - PrsCharNotTypo.Aggregate(1.0, (acc, pr_not_typo) => acc * pr_not_typo);

            // calculate the marginal probabilities of NOT making a
            // transposition for each position in the input
            // note that we do NOT consider the empty strings here
            // For strings of length 1, the probability of not making a
            // transposition should be exactly 1.
            double[] PrsPosNotTrans = ochars.Length > 1 ? ochars.ToArray().Select((oc, idx) =>
            {
                int count;
                if (!trd.TryGetValue(0, out count)) {
                    count = 0;
                }
                int total = trd.Where(kvp => kvp.Key < input.Length - idx && kvp.Key >= -idx).Select(kvp => kvp.Value).Sum();
                if (total == 0)
                {
                    return 1.0;
                }
                else
                {
                    return (double)count / total;
                }
            }).ToArray() : new [] { 1.0 };

            // calculate the probability of having at least one transposition
            double PrTrans = 1.0 - PrsPosNotTrans.Aggregate(1.0, (acc, pr_not_trans) => acc * pr_not_trans);

            // calculate the relative probability of making a typo vs a transposition
            double RelPrTypo = PrTypo / (PrTypo + PrTrans);

            // init with original input in case typos/transpositions prove to be impossible
            string output = input;

            // the while loop ensures that we do not return an unmodified string.
            // for most strings, returning an unmodified string is very unlikely
            do
            {
                // flip a coin to determine whether our guaranteed error is a typo or a transposition
                if (r.NextDouble() < RelPrTypo)
                {   // is a typo
                    // determine the index of the guaranteed typo
                    double[] PrsMistype = PrsCharNotTypo.Select(pr => 1.0 - pr).ToArray();
                    // if there are no possible typos then we just can't produce one
                    if (PrsMistype.Sum() == 0)
                    {
                        break;
                    }
                    var i = MultinomialSample(PrsMistype);
                    // run transposition algorithm & add leading/trailing empty chars
                    // we set the guaranteed transposition index to -1 to ensure that no
                    // transpositions are guaranteed
                    OptChar[] input_t = AddLeadingTrailingSpace(Transposize(ochars, trd, -1));
                    // run typo algorithm (adjust i for leading space)
                    output = Typoize(input_t, td, i);
                }
                else
                {   // is a transposition
                    // determine the index of the guaranteed transposition
                    double[] PrsMistype = PrsPosNotTrans.Select(pr => 1.0 - pr).ToArray();
                    // if there are no possible transpositions then we just can't produce one
                    if (PrsMistype.Sum() == 0)
                    {
                        break;
                    }
                    var i = MultinomialSample(PrsMistype);
                    // run transposition algorithm & add leading/trailing empty chars
                    OptChar[] input_t = AddLeadingTrailingSpace(Transposize(ochars, trd, i));
                    // run typo algorithm; set guaranteed typo index to -1 to ensure that no
                    // typo is guaranteed
                    output = Typoize(input_t, td, -1);
                }
            } while (input == output);

            return output;
        }
Esempio n. 3
0
        public string GenerateErrorString(string input, Classification c)
        {
            // get typo dict
            var td = c.GetTypoDict();

            // get transposition dict
            var trd = c.GetTranspositionDict();

            // convert the input into a char array
            var ochars = StringToOptCharArray(input);

            // add leading and trailing 'empty characters'
            var inputchars = AddLeadingTrailingSpace(ochars);

            // calculate the marginal probabilities of NOT making a typo for each char in input
            double[] PrsCharNotTypo = inputchars.Select(oc =>
            {
                var key = new Tuple <OptChar, string>(oc, OptCharToString(oc));
                int count;
                if (!td.TryGetValue(key, out count))
                {
                    count = 0;
                }
                // funny case to handle the fact that FSharpOption.None == null
                var cond_dist = td.Where(kvp => kvp.Key.Item1 == null ? oc == null : kvp.Key.Item1.Equals(oc));
                int total     = cond_dist.Aggregate(0, (acc, kvp) => acc + kvp.Value);
                if (total == 0)
                {
                    return(1.0);
                }
                else
                {
                    return((double)count / total);
                }
            }).ToArray();

            // calculate the probability of making at least one error
            // might need log-probs here
            double PrTypo = 1.0 - PrsCharNotTypo.Aggregate(1.0, (acc, pr_not_typo) => acc * pr_not_typo);

            // calculate the marginal probabilities of NOT making a
            // transposition for each position in the input
            // note that we do NOT consider the empty strings here
            // For strings of length 1, the probability of not making a
            // transposition should be exactly 1.
            double[] PrsPosNotTrans = ochars.Length > 1 ? ochars.ToArray().Select((oc, idx) =>
            {
                int count;
                if (!trd.TryGetValue(0, out count))
                {
                    count = 0;
                }
                int total = trd.Where(kvp => kvp.Key < input.Length - idx && kvp.Key >= -idx).Select(kvp => kvp.Value).Sum();
                if (total == 0)
                {
                    return(1.0);
                }
                else
                {
                    return((double)count / total);
                }
            }).ToArray() : new [] { 1.0 };

            // calculate the probability of having at least one transposition
            double PrTrans = 1.0 - PrsPosNotTrans.Aggregate(1.0, (acc, pr_not_trans) => acc * pr_not_trans);

            // calculate the relative probability of making a typo vs a transposition
            double RelPrTypo = PrTypo / (PrTypo + PrTrans);

            // init with original input in case typos/transpositions prove to be impossible
            string output = input;

            // the while loop ensures that we do not return an unmodified string.
            // for most strings, returning an unmodified string is very unlikely
            do
            {
                // flip a coin to determine whether our guaranteed error is a typo or a transposition
                if (r.NextDouble() < RelPrTypo)
                {   // is a typo
                    // determine the index of the guaranteed typo
                    double[] PrsMistype = PrsCharNotTypo.Select(pr => 1.0 - pr).ToArray();
                    // if there are no possible typos then we just can't produce one
                    if (PrsMistype.Sum() == 0)
                    {
                        break;
                    }
                    var i = MultinomialSample(PrsMistype);
                    // run transposition algorithm & add leading/trailing empty chars
                    // we set the guaranteed transposition index to -1 to ensure that no
                    // transpositions are guaranteed
                    OptChar[] input_t = AddLeadingTrailingSpace(Transposize(ochars, trd, -1));
                    // run typo algorithm (adjust i for leading space)
                    output = Typoize(input_t, td, i);
                }
                else
                {   // is a transposition
                    // determine the index of the guaranteed transposition
                    double[] PrsMistype = PrsPosNotTrans.Select(pr => 1.0 - pr).ToArray();
                    // if there are no possible transpositions then we just can't produce one
                    if (PrsMistype.Sum() == 0)
                    {
                        break;
                    }
                    var i = MultinomialSample(PrsMistype);
                    // run transposition algorithm & add leading/trailing empty chars
                    OptChar[] input_t = AddLeadingTrailingSpace(Transposize(ochars, trd, i));
                    // run typo algorithm; set guaranteed typo index to -1 to ensure that no
                    // typo is guaranteed
                    output = Typoize(input_t, td, -1);
                }
            } while (input == output);

            return(output);
        }
Esempio n. 4
0
 //Generates the distribution of strings for a particular character given a classification
 private Dictionary<string, double> GenerateDistributionForChar(OptChar c, Classification classification)
 {
     var typo_dict = classification.GetTypoDict();
     var kvps = typo_dict.Where(pair => {
         if (OptChar.get_IsNone(pair.Key.Item1))
         {
             if (OptChar.get_IsNone(c))
             {
                 return true;
             }
             return false;
         }
         else
         {
             return pair.Key.Item1.Equals(c);
         }
     }).ToArray();
     var sum = kvps.Select(pair => pair.Value).Sum();
     var distribution = kvps.Select(pair => new KeyValuePair<string,double>(pair.Key.Item2, (double) pair.Value / sum));
     return distribution.ToDictionary(pair => pair.Key, pair => pair.Value);
 }