Ejemplo n.º 1
0
        static Dictionary <ObservationFromState, double> createObservationModel(List <Word> words)
        {
            Dictionary <ObservationFromState, double> model     = new Dictionary <ObservationFromState, double>(); //actual model
            Dictionary <ObservationFromState, int>    wordCount = new Dictionary <ObservationFromState, int>();    //how many times the ObservationFromState occurs
            Dictionary <WordType, int> typeCount = new Dictionary <WordType, int>();                               //how many times WordType occurs

            for (int i = 0; i < words.Count - 1; i++)
            {
                if (typeCount.ContainsKey(words[i].PartOfSpeech))
                {
                    typeCount[words[i].PartOfSpeech]++;
                }
                else
                {
                    typeCount.Add(words[i].PartOfSpeech, 1);
                }
                ObservationFromState observation = new ObservationFromState(words[i].Content, words[i].PartOfSpeech);
                if (wordCount.ContainsKey(observation))
                {
                    wordCount[observation]++;
                }
                else
                {
                    wordCount.Add(observation, 1);
                }
            }
            foreach (KeyValuePair <ObservationFromState, int> pair in wordCount)
            {
                model.Add(pair.Key, (double)pair.Value / (double)typeCount[pair.Key.state]);
            }
            return(model);
        }
Ejemplo n.º 2
0
        public static void printObservationMatrix(Dictionary <ObservationFromState, double> model, List <string> myDictionary)
        {
            WordType[] types = (WordType[])Enum.GetValues(typeof(WordType));

            ObservationFromState obsv = new ObservationFromState("", WordType.Undefined);

            StringBuilder output = new StringBuilder();

            //Header
            output.Append("...,");
            for (int n = 0; n < myDictionary.Count; n++)
            {
                output.Append(myDictionary[n].escape() + ",");
            }
            output.AppendLine();



            for (int row = 0; row < types.Length; row++)
            {
                WordType from = types[row];
                obsv.state = from;
                output.Append(from.ToString() + ",");
                for (int col = 0; col < myDictionary.Count; col++)
                {
                    obsv.observation = myDictionary[col];
                    if (model.ContainsKey(obsv))
                    {
                        output.Append(model[obsv]);
                    }
                    output.Append(",");
                }

                output.AppendLine();
            }
            System.IO.File.WriteAllText("observer.csv", output.ToString());
        }
Ejemplo n.º 3
0
        //Φ[t](i) = argmax[j](δ[t-1](j) * a[ji])
        //P(X at time t) = max[for each i = prevstate] (P(i at time t - 1) * P(X|i) * P(observation at time t|X)
        // A = transition matrix
        // B = emission matrix, B[ij] = prob of observing o[j] from state s[i]

        /*N=length(O); # number of observation categories
         * K=length(S); # number of hidden states
         * T=length(Y); # length of observation series
         */

        /// <summary>
        /// Does the Viberti algorithm, giving the most likely parts of speech for a sentence.
        /// </summary>
        /// <param name="sentence">The sentence.</param>
        /// <param name="transitionProbs">The probability of transitioning from one part of speech to another (probability: 0-1)</param>
        /// <param name="observationProbs">The probability of observing a word given a part of speech: (probability: 0-1) (i.e. if the part of speech is "Noun", what is the probability that word is "Dog" or "Cat" ? )</param>
        /// <param name="initialProbs">The probability that a part of speech appears at the beginning of a sentence (probability: 0-1)</param>
        /// <returns>Returns the 1-based (NOT 0-Based) array of the parts of speech</returns>
        public static WordType[] DoViterbi(List <string> sentence, Dictionary <StateTransition, double> transitionProbs, Dictionary <ObservationFromState, double> observationProbs, Dictionary <WordType, double> initialProbs)
        {
            const int NUM_POSSIBLE_STATES_K = (int)WordType.COUNT;

            int WORKING_LEN_T = sentence.Count;

            //1-based
            PrettyTable <double> T1 = new PrettyTable <double>(NUM_POSSIBLE_STATES_K + 1, WORKING_LEN_T + 1);
            PrettyTable <int>    T2 = new PrettyTable <int>(NUM_POSSIBLE_STATES_K + 1, WORKING_LEN_T + 1);

            // Console.WriteLine(T1.ToString());
            int[]      Z = new int[WORKING_LEN_T + 1];
            WordType[] X = new WordType[WORKING_LEN_T + 1];


            for (int i = 1; i <= NUM_POSSIBLE_STATES_K; i++)
            {
                WordType             si = (WordType)i;
                ObservationFromState observationTrans = new SpeechTagging.ObservationFromState(sentence[0], si);
                T1[i, 1] = (initialProbs.ContainsKey((WordType)i) ? initialProbs[(WordType)i] : EPSILON) *
                           (observationProbs.ContainsKey(observationTrans) ? observationProbs[observationTrans] : EPSILON);
                T2[i, 1] = 0;
            }
            for (int i = 2; i <= WORKING_LEN_T; i++)
            {
                foreach (WordType sj in Enum.GetValues(typeof(WordType)))
                {
                    int    j        = (int)sj;
                    int    maxK2    = int.MinValue;
                    double maxKVal1 = double.MinValue;
                    double maxKVal2 = double.MinValue;

                    StateTransition      stateTrans       = new SpeechTagging.StateTransition(WordType.Undefined, (WordType)(i - 1));
                    ObservationFromState observationTrans = new ObservationFromState(sentence[i - 1], sj);


                    for (int k = 1; k <= NUM_POSSIBLE_STATES_K; k++)
                    {
                        stateTrans.from = (WordType)k;
                        double val = T1[k, i - 1]
                                     * (transitionProbs.ContainsKey(stateTrans) ? transitionProbs[stateTrans] : EPSILON)
                                     * (observationProbs.ContainsKey(observationTrans) ?  observationProbs[observationTrans] : EPSILON);
                        if (val > maxKVal1)
                        {
                            maxKVal1 = val;
                        }

                        val = T1[k, i - 1]
                              * (transitionProbs.ContainsKey(stateTrans) ? transitionProbs[stateTrans] : EPSILON);
                        if (val > maxKVal2)
                        {
                            maxKVal2 = val;  maxK2 = k;
                        }
                    }
                    WordType maxK2wordtype = (WordType)maxK2;

                    T1[j, i] = maxKVal1;
                    T2[j, i] = maxK2;
                }
                // Console.WriteLine(T1.ToString());
                // Console.WriteLine(T2.ToString());
            }

            double maxKTval = double.MinValue;

            for (int k = 1; k <= NUM_POSSIBLE_STATES_K; k++)
            {
                if (T1[k, WORKING_LEN_T] > maxKTval)
                {
                    maxKTval = T1[k, WORKING_LEN_T]; Z[WORKING_LEN_T] = k;
                }
            }
            X[WORKING_LEN_T] = (WordType)Z[WORKING_LEN_T];
            for (int i = WORKING_LEN_T; i >= 2; i--)
            {
                Z[i - 1] = T2[Z[i], i];
                X[i - 1] = (WordType)Z[i - 1];
            }

            return(X);
        }