Exemple #1
0
 public static List<string> Viterbi(
     List<string> str,
     Trigram q,
     Bigram e,
     Unigram freqList,
     HashSet<string> tags)
 {
     return Viterbi(str,q,e,freqList,tags,false);
 }
Exemple #2
0
 public void AddWord(string word, string w1)
 {
     try
     {
         unigram.AddWord(w1);
         freqList[word].AddWord(w1);
     }
     catch (Exception)
     {
         Unigram tmp = new Unigram();
         tmp.AddWord(w1);
         freqList.Add(word, tmp);
     }
     count++;
 }
Exemple #3
0
 public Bigram()
 {
     freqList = new Dictionary<string, Unigram>();
     unigram = new Unigram();
     count = 0;
 }
Exemple #4
0
        public static List<string> Viterbi(
           List<string> str,
           Trigram q,
           Bigram e,
           Unigram freqList,
           HashSet<string> tags,
           bool splitRare)
        {
            List<string> y = new List<string>();
            Dictionary<string, double> current = new Dictionary<string, double>();
            Dictionary<string, double> previous = new Dictionary<string, double>();
            string key;
            double prob = 0;
            double max = 0;

            if (freqList.Contains(str[0]))
            {
                y.Add("STOP");

                foreach (string v in tags)
                {
                    prob = q.Qml(v, "*", "*") * e.Qml(str[0], v);
                    current.Add("*:" + v, prob);

                    if (prob >= max)
                    {
                        max = prob;
                        y.RemoveAt(y.Count - 1);
                        y.Add(v);
                    }
                }
            }
            else
            {
                string v = (splitRare) ? ProperRare(str[0]) : "_RARE_";
                y.Add(v);

                prob = Math.Max(q.Qml("_RARE_", "*", "*"), 0);
                current.Add("*:" + "_RARE_", prob);

            }

            previous = current;
            current = new Dictionary<string, double>();
            max = 0;
            if (freqList.Contains(str[1]))
            {
                y.Add("STOP");

                foreach (string u in tags)
                {
                    foreach (string v in tags)
                    {
                        try
                        {
                            key = "*:" + u;
                            prob = previous[key] * q.Qml(v, "*", u) * e.Qml(str[1], v);
                            current.Add(string.Format("{0}:{1}", u, v), prob);
                            if (prob >= max)
                            {
                                max = prob;
                                y.RemoveAt(y.Count - 1);
                                y.Add(v);
                            }
                        }
                        catch (Exception)
                        {
                            continue;

                        }
                    }
                }
            }
            else
            {
                string v = (splitRare) ? ProperRare(str[1]) : "_RARE_";
                y.Add(v);

                foreach (string u in tags)
                {
                    try
                    {
                        key = "*:" + u;
                        prob = previous[key] * q.Qml(v, "*", u) * e.Qml(str[1], v);
                        current.Add(string.Format("{0}:{1}", u, v), prob);
                        if (prob >= max)
                        {
                            max = prob;
                            y.RemoveAt(y.Count - 1);
                            y.Add(v);
                        }
                    }
                    catch (Exception)
                    {
                        continue;

                    }
                }
            }

            for (int i = 2; i < str.Count; i++)
            {
                previous = current;
                current = new Dictionary<string, double>();
                max = 0;

                if (freqList.Contains(str[i]))
                {
                    y.Add("STOP");

                    foreach (string v in tags)
                    {
                        foreach (string u in tags)
                        {
                            foreach (string w in tags)
                            {
                                key = w + ":" + u;

                                try
                                {
                                    prob = previous[key] * q.Qml(v, w, u) * e.Qml(str[i], v);
                                }
                                catch (Exception)
                                {

                                    prob = 0;
                                }

                                try
                                {
                                    if (prob > 0)
                                        current.Add(string.Format("{0}:{1}", u, v), prob);

                                }
                                catch (Exception)
                                {
                                    if (prob > current[string.Format("{0}:{1}", u, v)])
                                    {
                                        current.Remove(string.Format("{0}:{1}", u, v));
                                        current.Add(string.Format("{0}:{1}", u, v), prob);
                                    }
                                }

                                if (prob >= max)
                                {
                                    max = prob;
                                    y.RemoveAt(y.Count - 1);
                                    y.Add(v);
                                }

                            }
                        }
                    }
                }
                else
                {
                    string v = (splitRare) ? ProperRare(str[i]) : "_RARE_";
                    y.Add(v);

                    foreach (string u in tags)
                    {
                        foreach (string w in tags)
                        {
                            key = w + ":" + u;

                            try
                            {
                                prob = previous[key] * q.Qml(v, w, u);
                            }
                            catch (Exception)
                            {
                                prob = 0;
                            }

                            try
                            {
                                if (prob > 0)
                                    current.Add(string.Format("{0}:{1}", u, v), prob);

                            }
                            catch (Exception)
                            {
                                if (prob > current[string.Format("{0}:{1}", u, v)])
                                {
                                    current.Remove(string.Format("{0}:{1}", u, v));
                                    current.Add(string.Format("{0}:{1}", u, v), prob);
                                }
                            }

                            if (prob >= max)
                            {
                                max = prob;
                                y.RemoveAt(y.Count - 1);
                                y.Add(v);
                            }

                        }
                    }

                }
            }

            return y;
        }
Exemple #5
0
        public static void Train()
        {
            if (!(File.Exists(trigramPath) && File.Exists(bigramPath) && File.Exists(tagsPath)))
            {
                if (File.Exists("tmp.train"))
                    ParseTrainingData("tmp.train");
                else
                    ParseTrainingData(ReplaceTrainingFile(trainingPath, BuildFreqList(trainingPath), true));

                SerializeModel(trigramPath, transition);
                SerializeModel(bigramPath, emission);
                SerializeModel(unigramPath, freqList);
                SerializeTags(tagsPath);
            }
            else
            {
                transition = (Trigram)DeserializeModel(trigramPath);
                emission = (Bigram)DeserializeModel(bigramPath);
                freqList = (Unigram)DeserializeModel(unigramPath);
                tags = DeserializeTags(tagsPath);
            }
        }
Exemple #6
0
        static string ReplaceTrainingFile(string path, Unigram freqList, bool splitRares)
        {
            //FileStream fs = File.Open(path, FileMode.Open, FileAccess.ReadWrite);

            string outpath = "tmpSP.train";

            StreamReader sr = new StreamReader(path);
            StreamWriter sw = new StreamWriter(outpath);

            string line;
            string[] str;

            while ((line = sr.ReadLine()) != null)
            {
                if (line != "")
                {
                    str = line.Split(' ');

                    if (freqList[str[0]] < 5)
                        sw.WriteLine(str[0] + ((splitRares) ? SplitRareTag(str[0]) : " _RARE_"));
                    else
                        sw.WriteLine(line);
                }
                else
                    sw.WriteLine(line);
            }

            sr.Close();
            sw.Close();
            //fs.Close();

            //File.Delete(path);
            //File.Move("tmp.train", path);

            return outpath;
        }
Exemple #7
0
 static string ReplaceTrainingFile(string path, Unigram freqList)
 {
     return ReplaceTrainingFile(path, freqList, false);
 }