Beispiel #1
0
        //calculate backoff weight
        static void output(string context, List <Ngram> vec_ngram, StreamWriter sw)
        {
            int    idx         = -1;
            double numerator   = 1.0;
            double denominator = 1.0;

            for (int i = 0; i < vec_ngram.Count; i++)
            {
                Ngram ngram = vec_ngram[i];
                if (ngram.word == "")
                {
                    idx = i;
                }
                else
                {
                    numerator   -= ngram.prob;
                    denominator -= ngram.lower;
                }
            }

            // if no prefix context, cut off current n-gram
            if (idx == -1)
            {
                return;
            }

            /**
             * According to SRILM
             * Avoid some predictable anomalies due to rounding errors
             */
            bool valid = true;

            if (numerator < 0.0 && numerator > -LM_EPSILON)
            {
                numerator = 0.0;
            }
            if (denominator < 0.0 && denominator > -LM_EPSILON)
            {
                denominator = 0.0;
            }
            if (denominator == 0.0 && numerator > LM_EPSILON)
            {
                numerator = 0.0;
            }
            else if (numerator < 0.0)
            {
                valid = false;
            }
            else if (denominator <= 0.0)
            {
                if (numerator > LM_EPSILON)
                {
                    valid = false;
                }
                else
                {
                    numerator   = 0.0;
                    denominator = 0.0;  // give bow = 0
                }
            }
            double bow = 0;

            if (valid)
            {
                bow = (numerator == 0.0 && denominator == 0.0) ?
                      0.0 : (prob2log(numerator) - prob2log(denominator));
            }
            else
            {
                bow = LM_LOG_ZERO;
            }
            sw.WriteLine("{0}\t{1} {2}", context, prob2log(vec_ngram[idx].prob), bow);
        }
Beispiel #2
0
        static void Main(string[] args)
        {
            if (args.Length != 2)
            {
                Console.WriteLine("common_bow.exe [input file] [output file]");
                return;
            }

            StreamReader sr = new StreamReader(args[0]);
            StreamWriter sw = new StreamWriter(args[1]);

            SortedDictionary <string, List <Ngram> > kv = new SortedDictionary <string, List <Ngram> >();
            string strLine = null;

            while ((strLine = sr.ReadLine()) != null)
            {
                //For unigram, just duplicate input;
                //For others, duplicate input, and split ngram into history\tword
                //e.g. from h w\tvalue to h\tw\tvalue
                string[] items = strLine.Split(new char[] { '\t' }, 2);
                string   key   = items[0];
                string   value = items[1];
                Ngram    v     = new Ngram();
                v.word = "";
                string[] strV = value.Split(' ');
                v.prob  = double.Parse(strV[0]);
                v.lower = double.Parse(strV[1]);

                if (kv.ContainsKey(key) == false)
                {
                    kv.Add(key, new List <Ngram>());
                }
                kv[key].Add(v);


                string[] keys = key.Split(' ');
                if (keys.Length > 1)
                {
                    string strKeys = String.Join(" ", keys, 0, keys.Length - 1).Trim();
                    v       = new Ngram();
                    v.word  = keys[keys.Length - 1];
                    v.prob  = double.Parse(strV[0]);
                    v.lower = double.Parse(strV[1]);

                    if (kv.ContainsKey(strKeys) == false)
                    {
                        kv.Add(strKeys, new List <Ngram>());
                    }
                    kv[strKeys].Add(v);
                }
            }

            sr.Close();


            foreach (KeyValuePair <string, List <Ngram> > pair in kv)
            {
                output(pair.Key, pair.Value, sw);
            }

            sw.Close();
        }