Esempio n. 1
0
        public static List<string> EMTagger(List<string> sentence, HashSet<string> tags, Bigram emissionProb)
        {
            double prob = 0;
            double valMax = 0;

            List<string> argmax = new List<string>();

            foreach (var word in sentence)
            {
                valMax = 0;
                argmax.Add("STOP");

                foreach (var tag in tags)
                {
                    prob = emissionProb.Qml(word, tag);

                    if (prob >= valMax)
                    {
                        valMax = prob;
                        argmax.RemoveAt(argmax.Count - 1);
                        argmax.Add(tag);
                    }
                }
            }
            return argmax;
        }
Esempio n. 2
0
 public static List<string> Viterbi(
     List<string> str,
     Trigram q,
     Bigram e,
     Unigram freqList,
     HashSet<string> tags)
 {
     return Viterbi(str,q,e,freqList,tags,false);
 }
Esempio n. 3
0
 public void AddWord(string word, string w1, string w2)
 {
     try
     {
         bigram.AddWord(w1, w2);
         freqList[word].AddWord(w1, w2);
     }
     catch (Exception)
     {
         Bigram tmp = new Bigram();
         tmp.AddWord(w1, w2);
         freqList.Add(word, tmp);
     }
     count++;
 }
Esempio n. 4
0
 public Trigram()
 {
     freqList = new Dictionary<string, Bigram>();
     bigram = new Bigram();
     count = 0;
 }
Esempio n. 5
0
        public static List<string> Viterbi(
           List<string> str,
           Trigram q,
           Bigram e,
           Unigram freqList,
           HashSet<string> tags,
           bool splitRare)
        {
            List<string> y = new List<string>();
            Dictionary<string, double> current = new Dictionary<string, double>();
            Dictionary<string, double> previous = new Dictionary<string, double>();
            string key;
            double prob = 0;
            double max = 0;

            if (freqList.Contains(str[0]))
            {
                y.Add("STOP");

                foreach (string v in tags)
                {
                    prob = q.Qml(v, "*", "*") * e.Qml(str[0], v);
                    current.Add("*:" + v, prob);

                    if (prob >= max)
                    {
                        max = prob;
                        y.RemoveAt(y.Count - 1);
                        y.Add(v);
                    }
                }
            }
            else
            {
                string v = (splitRare) ? ProperRare(str[0]) : "_RARE_";
                y.Add(v);

                prob = Math.Max(q.Qml("_RARE_", "*", "*"), 0);
                current.Add("*:" + "_RARE_", prob);

            }

            previous = current;
            current = new Dictionary<string, double>();
            max = 0;
            if (freqList.Contains(str[1]))
            {
                y.Add("STOP");

                foreach (string u in tags)
                {
                    foreach (string v in tags)
                    {
                        try
                        {
                            key = "*:" + u;
                            prob = previous[key] * q.Qml(v, "*", u) * e.Qml(str[1], v);
                            current.Add(string.Format("{0}:{1}", u, v), prob);
                            if (prob >= max)
                            {
                                max = prob;
                                y.RemoveAt(y.Count - 1);
                                y.Add(v);
                            }
                        }
                        catch (Exception)
                        {
                            continue;

                        }
                    }
                }
            }
            else
            {
                string v = (splitRare) ? ProperRare(str[1]) : "_RARE_";
                y.Add(v);

                foreach (string u in tags)
                {
                    try
                    {
                        key = "*:" + u;
                        prob = previous[key] * q.Qml(v, "*", u) * e.Qml(str[1], v);
                        current.Add(string.Format("{0}:{1}", u, v), prob);
                        if (prob >= max)
                        {
                            max = prob;
                            y.RemoveAt(y.Count - 1);
                            y.Add(v);
                        }
                    }
                    catch (Exception)
                    {
                        continue;

                    }
                }
            }

            for (int i = 2; i < str.Count; i++)
            {
                previous = current;
                current = new Dictionary<string, double>();
                max = 0;

                if (freqList.Contains(str[i]))
                {
                    y.Add("STOP");

                    foreach (string v in tags)
                    {
                        foreach (string u in tags)
                        {
                            foreach (string w in tags)
                            {
                                key = w + ":" + u;

                                try
                                {
                                    prob = previous[key] * q.Qml(v, w, u) * e.Qml(str[i], v);
                                }
                                catch (Exception)
                                {

                                    prob = 0;
                                }

                                try
                                {
                                    if (prob > 0)
                                        current.Add(string.Format("{0}:{1}", u, v), prob);

                                }
                                catch (Exception)
                                {
                                    if (prob > current[string.Format("{0}:{1}", u, v)])
                                    {
                                        current.Remove(string.Format("{0}:{1}", u, v));
                                        current.Add(string.Format("{0}:{1}", u, v), prob);
                                    }
                                }

                                if (prob >= max)
                                {
                                    max = prob;
                                    y.RemoveAt(y.Count - 1);
                                    y.Add(v);
                                }

                            }
                        }
                    }
                }
                else
                {
                    string v = (splitRare) ? ProperRare(str[i]) : "_RARE_";
                    y.Add(v);

                    foreach (string u in tags)
                    {
                        foreach (string w in tags)
                        {
                            key = w + ":" + u;

                            try
                            {
                                prob = previous[key] * q.Qml(v, w, u);
                            }
                            catch (Exception)
                            {
                                prob = 0;
                            }

                            try
                            {
                                if (prob > 0)
                                    current.Add(string.Format("{0}:{1}", u, v), prob);

                            }
                            catch (Exception)
                            {
                                if (prob > current[string.Format("{0}:{1}", u, v)])
                                {
                                    current.Remove(string.Format("{0}:{1}", u, v));
                                    current.Add(string.Format("{0}:{1}", u, v), prob);
                                }
                            }

                            if (prob >= max)
                            {
                                max = prob;
                                y.RemoveAt(y.Count - 1);
                                y.Add(v);
                            }

                        }
                    }

                }
            }

            return y;
        }
Esempio n. 6
0
File: HMM.cs Progetto: kicasta/NLP
        public static void Train()
        {
            if (!(File.Exists(trigramPath) && File.Exists(bigramPath) && File.Exists(tagsPath)))
            {
                if (File.Exists("tmp.train"))
                    ParseTrainingData("tmp.train");
                else
                    ParseTrainingData(ReplaceTrainingFile(trainingPath, BuildFreqList(trainingPath), true));

                SerializeModel(trigramPath, transition);
                SerializeModel(bigramPath, emission);
                SerializeModel(unigramPath, freqList);
                SerializeTags(tagsPath);
            }
            else
            {
                transition = (Trigram)DeserializeModel(trigramPath);
                emission = (Bigram)DeserializeModel(bigramPath);
                freqList = (Unigram)DeserializeModel(unigramPath);
                tags = DeserializeTags(tagsPath);
            }
        }