示例#1
0
        public void Learn_HMMWithRandomDistributedProbs_LearnsAConsistentModelBasedOnData()
        {
            // Arrange
            int[] symbols = Enumerable.Range(1, 42).ToArray();
            List<int[]> obs = new List<int[]>();
            Random rnd = new Random();

            for (int i = 0; i < 10; i++)
            {
                obs.Add(symbols.OrderBy(x => rnd.Next()).ToArray());
            }

            HMMGraph hmm = new HMMGraph(symbols.Length);
            hmm.AddNode(new Node());
            hmm.AddNode(new Node());
            hmm.AddNode(new Node());
            hmm.AddNode(new Node());

            foreach (Node n in hmm.Nodes)
            {
                foreach (Node m in hmm.Nodes)
                {
                    n.SetTransition(m, 0.5);
                }

                foreach (int i in symbols)
                {
                    n.SetEmission(i, 0.5);
                }
            }

            hmm.Normalize();
            BaumWelch bw = new BaumWelch(obs.Count, hmm);

            // Act
            for (int i = 0; i < 5; i++)
            {
                bw.Learn(hmm, obs.ToArray());
            }
           
            // Assert
            const double PRECISION = .00000000001;
            foreach (Node n in hmm.Nodes)
            {
                //check transitions
                double sum = 0;
                foreach (Node nb in n.Transitions.Keys)
                {
                    sum += n.Transitions[nb];
                }
                Assert.IsTrue(1.0 - PRECISION < sum && sum < 1.0 + PRECISION);

                sum = 0;
                foreach (int o in n.Emissions.Keys)
                {
                    sum += n.Emissions[o];
                }
                Assert.IsTrue(1.0 - PRECISION < sum && sum < 1.0 + PRECISION);
            }
        }
示例#2
0
		public void LearnTest_validInput_ModelDescribingTheData ()
		{
			// Arrange
			HMMGraph hmm = new HMMGraph(NUMBER_OF_SYMBOLS_IN_HMMGRAPH);
			//int[] t = {2,3,5,6,2,12,4,6,3,36,62,2,144,3,531,44,23,234,21};

			List<int[]> obs = new List<int[]>();

			Random rnd = new Random();

			for(int i=0;i<4;i++) {
				obs.Add(Enumerable.Range(1,49).OrderBy(x => rnd.Next()).ToArray());
			}

			hmm.AddNode(new Node());
			hmm.AddNode(new Node());
			hmm.AddNode(new Node());
			hmm.AddNode(new Node());
			
			BaumWelch BW = new BaumWelch();

			// Act
			HMMGraph result = BW.Learn(hmm, obs.ToArray());

			// Assert
			Assert.IsNotNull(result);
			Assert.Inconclusive();	
		}
        private HMMGraph Splitstate(Node qPrime, HMMGraph graph) {
            Random random = new Random();
            Node q1 = new Node();   

            foreach (Node x in graph.Nodes) {

                q1.SetTransition(x, random.NextDouble());
            }

            foreach (int symbol in qPrime.Emissions.Keys) {

                q1.SetEmission(symbol, qPrime.Emissions[symbol]);
            }

            q1.InitialProbability = qPrime.InitialProbability;

            foreach (Node n in graph.Nodes) {

                n.Transitions[q1] = random.NextDouble();
            }

            q1.SetTransition(q1,random.NextDouble());
            graph.AddNode(q1);
            graph.Normalize();

            return graph;
        }
示例#4
0
        static HMMGraph CreateGraph(int numberOfSymbols, int numberOfStates, double outDegree)
        {
            HMMGraph graph = new HMMGraph(numberOfSymbols);

            double initialProbabilitySum = 0.0;

            for (int i = 0; i < numberOfStates; i++)
            {
                Node node = new Node();

                node.InitialProbability = random.NextDouble();
                initialProbabilitySum += node.InitialProbability;

                node.Emissions = new Dictionary<int, double>();

                for (int j = 0; j < numberOfSymbols; j++)
                {
                    node.Emissions.Add(j, random.NextDouble());
                }

                double emissionSum = node.Emissions.Values.Sum();

                for (int j = 0; j < numberOfSymbols; j++)
                {
                    node.Emissions[j] /= emissionSum;
                }

                graph.AddNode(node);
            }

            for (int i = 0; i < numberOfStates; i++)
            {
                graph.Nodes[i].InitialProbability /= initialProbabilitySum;

                graph.Nodes[i].Transitions = new Dictionary<Node, double>();

                int outDeg = (int)(((i % 2) == 1) ? Math.Floor(outDegree) : Math.Ceiling(outDegree));

                for (int j = 0; j < outDeg; j++)
                {
                    graph.Nodes[i].Transitions.Add(graph.Nodes[((i + j) % numberOfStates)], random.NextDouble());
                }

                double transitionSum = graph.Nodes[i].Transitions.Values.Sum();

                for (int j = 0; j < outDeg; j++)
                {
                    graph.Nodes[i].Transitions[graph.Nodes[((i + j) % numberOfStates)]] /= transitionSum;
                }
            }

            return graph;
        }
 public static HMMGraph HMM2Graph(HiddenMarkovModel hmm){
     HMMGraph g = new HMMGraph(hmm.Symbols);
     Node[] nodes = new Node[hmm.States];
     for (int i = 0; i < hmm.States; i++) {
         nodes[i] = new Node();
         g.AddNode(nodes[i]);
     }
     for (int i = 0; i < hmm.States; i++) {
         nodes[i].InitialProbability = hmm.Probabilities[i];
         for (int j = 0; j < hmm.States; j++)
             nodes[i].SetTransition(nodes[j], hmm.Transitions[i, j]);
         for (int k = 0; k < hmm.Symbols; k++)
             nodes[i].SetEmission(k, hmm.Emissions[i, k]);
     }
     return g;
 }
        /// <summary>
        /// Constructs a HMM from a HMMGraph.
        /// Remember to call the Normalize method on the HMMGraph if necessary
        /// </summary>
        /// <param name="g"></param>
        /// <returns></returns>
        public static HiddenMarkovModel Graph2HMM(HMMGraph g) {
            double[] initial = new double[g.Nodes.Count];
            for (int i = 0; i < g.NumNodes; i++)
                initial[i] = g.Nodes[i].InitialProbability;

            double[,] transitions = new double[g.NumNodes, g.NumNodes];
            double[,] emissions = new double[g.NumNodes, g.NumSymbols];

            for (int i = 0; i < g.NumNodes; i++) {
                foreach (KeyValuePair<Node, double> toProb in g.Nodes[i].Transitions.ToList()) {
                    int toNodeIndex = g.Nodes.IndexOf(toProb.Key);
                    transitions[i, toNodeIndex] = toProb.Value;
                }
                foreach (KeyValuePair<int, double> symbolProb in g.Nodes[i].Emissions)
                    emissions[i, symbolProb.Key] = symbolProb.Value;
            }
            return new HiddenMarkovModel(transitions, emissions, initial);
        }
		public HMMGraph Learn(HMMGraph hmm, int[] [] Observations) {

            // Setup algo
            graph = hmm;

			foreach(int[] O in Observations) {

				ReInitialize(O.Length);



				reestimateInitialProbs(O);
				reestimateTransitions(O);
				reestimateEmissions(O);

                graph.Normalize();
			}
			return graph;
		}
        private static Node FindQPrime(HMMGraph graph, int[] combinedTrainData) {
            BaumWelch bw = new BaumWelch(combinedTrainData.Length, graph);

            bw.PreCompute(graph, combinedTrainData);

            Node qPrime = graph.Nodes[0];
            double best = 0.0;
            double score = 0.0;

            foreach (Node n in graph.Nodes) {

                score = bw.ComputeGamma(n, graph, combinedTrainData); // relative (unscaled)

                if (score > best) {

                    qPrime = n;
                    best = score;
                }
            }
            return qPrime;
        }
        private void PreComputeBackward(HMMGraph G, int[] O) {

            for (int t = O.Length-1; t > -1; t--) {

                foreach (Node n in G.Nodes) {
                    // scaling
                    backward[t, G.Nodes.IndexOf(n)] = backward[t, G.Nodes.IndexOf(n)] / c[t]; 
                }
            }
        }
示例#10
0
        private void PreComputeForward(HMMGraph G, int[] O) {

            // Base step
            foreach (Node n in G.Nodes) {
                // calc coefficient
                c[0] += ComputeForward(n, G, 0, O);
            }

            // Scale
            foreach (Node n in G.Nodes) {
                forward[0, G.Nodes.IndexOf(n)] = forward[0, G.Nodes.IndexOf(n)] / c[0];
            }

            // Induction step
            for (int t = 1; t < O.Length; t++) {

                foreach (Node n in G.Nodes) {
                    c[t] += ComputeForward(n, G, t, O);
                }

                foreach (Node n in G.Nodes) {

                    forward[t, G.Nodes.IndexOf(n)] = forward[t, G.Nodes.IndexOf(n)] / c[t]; 
                }
            }
        }
示例#11
0
        public void PreCompute(HMMGraph G, int[] O) {

            PreComputeForward(G,O);
            PreComputeBackward(G,O);
        }
示例#12
0
		private double ComputeLikelihood(HMMGraph G, int[] O) {

			double likelihood = 0;

			foreach(Node n in G.Nodes) {

                double bwd = ComputeBackward(n, G, 0, O);
                double init = n.InitialProbability;

                double result = init * bwd;

                return result;
				//likelihood += n.InitialProbability * ComputeBackward(n, G, 0, O);
			}
		
			return likelihood;
		}
示例#13
0
        // return a relative gamma value (Unscaled)
		private double ComputeGamma(Node n, HMMGraph G, int t, int[] O) {

            //double result = (ComputeForward(n,G,t,O) * ComputeBackward(n, G, t, O)) 
            //    / ComputeLikelihood(G,O);

            double fwd = ComputeForward(n, G, t, O);
            double bwd = ComputeBackward(n, G, t, O);
            double likelihood = ComputeLikelihood(G, O);

            double result = (fwd * bwd) / likelihood;
            
            return result;
		}
示例#14
0
		public double ComputeGamma(Node n, HMMGraph G, int[] O) {

			double sum = 0;
			for(int t=0; t<O.Length; t++) {
			
				sum += ComputeGamma(n, G, t, O);
			}
			return sum;
		}
示例#15
0
		private double ComputeKsi(Node na, Node nb, HMMGraph G, 
				int t, int[] O) {

            return ComputeForward(na, G, t, O) 
                * (na.Transitions.Keys.Contains(nb) ? na.Transitions[nb] : MINIMUM_PROB)
                * (nb.Emissions.Keys.Contains(O[t+1]) ? nb.Emissions[O[t + 1]] : MINIMUM_PROB)
                * ComputeBackward(nb, G, t + 1, O);
		}
        // Assign incoming transitions to qPrime between q1 and q2
        private void AssignIncomingTransitions(Node qPrime, Node q1, Node q2, HMMGraph graph) {

            #region Junk
            //Dictionary<Node, double> trans = new Dictionary<Node, double>();

            //foreach (Node n in graph.Nodes)
            //{
            //    Dictionary<Node, double> trs = n.Transitions;

            //    foreach (KeyValuePair<Node, double> kv in trs)
            //    {
            //        trans.Add(kv.Key, kv.Value);
            //    }
            //}

            ////Dictionary<Node,double> rTrans = (from t in trans
            ////                                  where t.Key == qPrime
            ////                                  select t).ToDictionary(x => x.Key, x => x.Value);

            //List<KeyValuePair<Node,double>> rTrans = (from t in trans
            //                                          where t.Key == qPrime
            //                                          select t).ToList();


            //var trans = (from n in graph.Nodes
            //            select n.Transitions.ToList()).SelectMany(x => x);

            //var rTrans = (from kv in trans
            //              where kv.Key == qPrime
            //              select kv).ToList<KeyValuePair<Node,double>>();

            //foreach (KeyValuePair<Node, double> kv in rTrans) {

            //    if (rTrans.IndexOf(kv) < rTrans.Count / 2) {

            //        kv.Key = q1;
            //    }
            //    else {

            //        kv.Key = q2;
            //    }

            //(rTrans.IndexOf(kv) < rTrans.Count / 2) ? (kv.Key = q1) : (kv.Key = q2);

            #endregion

            int tCount = 0;

            foreach (Node n in graph.Nodes) {

                if (n.Transitions.ContainsKey(qPrime)) {
                    tCount++;
                }
            }

            int q1Count = 0;

            foreach (Node n in graph.Nodes) {

                if (n.Transitions.ContainsKey(qPrime)) {

                    double prob = n.Transitions[qPrime];
                    n.SetTransition(qPrime, 0);

                    if (q1Count < tCount / 2) {

                        n.SetTransition(q1, prob);
                    }
                    else {

                        n.SetTransition(q2, prob);
                    }
                }
            }
        }
示例#17
0
        private double ComputeBackward(Node n, HMMGraph G, int t, int[] O) {

            if (backward[t,graph.Nodes.IndexOf(n)] == UNASSIGNED)
            {
                if (t == O.Length - 1)
                {
                    backward[t, graph.Nodes.IndexOf(n)] = 1.0;
                }
                else
                {
                    double sum = 0;
                    foreach (Node ni in G.Nodes)
                    {
                        //sum += (n.Transitions.Keys.Contains(ni) ? n.Transitions[ni] : MINIMUM_PROB)
                        //    * (ni.Emissions.Keys.Contains(O[t+1]) ? ni.Emissions[O[t + 1]] : MINIMUM_PROB)
                        //    * ComputeBackward(ni, G, t + 1, O);
                        double trans = (n.Transitions.Keys.Contains(ni) ? n.Transitions[ni] : MINIMUM_PROB);
                        double emis = (ni.Emissions.Keys.Contains(O[t + 1]) ? ni.Emissions[O[t + 1]] : MINIMUM_PROB);
                        double bwd = ComputeBackward(ni, G, t + 1, O);

                        sum += trans * emis * bwd;

                        //if (trans == 0.0 || emis == 0.0 || bwd == 0.0) {
                        //    Console.WriteLine("stuff");
                        //}
                    }

                    backward[t, graph.Nodes.IndexOf(n)] = sum;
                }
            }
            return backward[t, graph.Nodes.IndexOf(n)];
		}
示例#18
0
        private double ComputeForward(Node n, HMMGraph G, int t, int[] O) {

            if (forward[t,graph.Nodes.IndexOf(n)] == UNASSIGNED)
            {
                if (t == 0)
                {
                    forward[t,graph.Nodes.IndexOf(n)] = n.InitialProbability 
                        * (n.Emissions.Keys.Contains(O[t]) ? n.Emissions[O[t]] : MINIMUM_PROB);
                }
                else
                {
                    double sum = 0;

                    foreach (Node ni in G.Nodes)
                    {
                        sum += ComputeForward(ni, G, t - 1, O) 
                            * (ni.Transitions.Keys.Contains(n) ? ni.Transitions[n] : MINIMUM_PROB);
                    }
                    forward[t, graph.Nodes.IndexOf(n)] = sum 
                           * (n.Emissions.Keys.Contains(O[t]) ? n.Emissions[O[t]] : MINIMUM_PROB);
                }
            }
            return forward[t, graph.Nodes.IndexOf(n)];
		}
        private void CleanGraph(HMMGraph G) {

            foreach (Node n in G.Nodes) {

                Node[] TKeys = n.Transitions.Keys.ToArray();
                for(int i=0;i< TKeys.Length;i++) {

                    Node key = TKeys[i];
                    n.SetTransition(key, n.Transitions[key]);
                }

                int[] EKeys = n.Emissions.Keys.ToArray();
                for (int i = 0; i < EKeys.Length; i++) {

                    int key = EKeys[i];
                    n.SetEmission(key, n.Emissions[key]);
                }
            }
        }
示例#20
0
        //// default cTor.
        //public BaumWelch() : this(0, null) {

        //    throw new Exception("Need parameters");
        //}

        // Master cTor.
        public BaumWelch(int obslength, HMMGraph G)
        {
            graph = G;
            ReInitialize(obslength);
        }
        public static SparseHiddenMarkovModel FromGraph(HMMGraph graph)
        {
            SparseHiddenMarkovModel model = new SparseHiddenMarkovModel(graph.NumNodes, graph.NumSymbols);

            model.initialDistribution = graph.Nodes.Select(n => n.InitialProbability).ToArray();
            model.transitionProbabilities = new double[graph.NumNodes, graph.NumNodes];
            model.emissionProbabilities = new double[graph.NumNodes, graph.NumSymbols];

            List<int>[] inTrans = Enumerable.Range(0, graph.NumNodes).Select(_ => new List<int>()).ToArray();

            for (int i = 0; i < graph.NumNodes; i++)
            {
                List<int> indexes = new List<int>();

                foreach(Node node in graph.Nodes[i].Transitions.Keys)
                {
                    int index = graph.Nodes.IndexOf(node);
                    indexes.Add(index);

                    model.transitionProbabilities[i, index] = graph.Nodes[i].Transitions[node];

                    inTrans[index].Add(i);

                    //model.transitionsIn[index].Add(i);
                    //model.transitionsOut[i].Add(index);
                }

                model.transitionsOut[i] = indexes.ToArray();
            }

            for (int i = 0; i < graph.NumNodes; i++)
            {
                model.transitionsIn[i] = inTrans[i].ToArray();
            }

            List<int>[] symbolEmittents = Enumerable.Range(0, graph.NumSymbols).Select(_ => new List<int>()).ToArray();

            for (int i = 0; i < graph.NumNodes; i++)
            {
                foreach(int j in graph.Nodes[i].Emissions.Keys)
                {
                    model.emissionProbabilities[i, j] = graph.Nodes[i].Emissions[j];

                    //model.emissions[i].Add(j);
                    //model.emittents[j].Add(i);
                    symbolEmittents[j].Add(i);
                }

                model.emissions[i] = graph.Nodes[i].Emissions.Keys.ToArray();
            }

            for (int i = 0; i < graph.NumSymbols; i++)
            {
                model.emittents[i] = symbolEmittents[i].ToArray();
            }
            
            return model;
        }
        public HMMGraph ToGraph()
        {
            HMMGraph graph = new HMMGraph(NumberOfSymbols);

            for (int i = 0; i < NumberOfStates; i++)
            {
                Node node = new Node();

                node.InitialProbability = initialDistribution[i];

                node.Emissions = emissions[i].ToDictionary(j => j, j => emissionProbabilities[i, j]);

                graph.AddNode(node);
            }

            for (int i = 0; i < NumberOfStates; i++)
            {
                graph.Nodes[i].Transitions = transitionsOut[i].ToDictionary(j => graph.Nodes[j], j => transitionProbabilities[i, j]);
            }

            return graph;
        }
        public override void Learn(SequenceData trainingData,
                SequenceData validationData, SequenceData testData) 
        {
       
            #region Junk
            //hmm.Learn(trainingData.GetNonempty(), 1);

            //foreach (int[] O in trainingData.GetAll()) {
            //    // 1. convert to hmm to graph model.
            //    HMMGraph hmmGraph = ModelConverter.HMM2Graph(hmm);

            //    // 2. find argmax gamma
            //    BaumWelch bw = new BaumWelch(O.Length, hmmGraph);

            //    //Node qPrime = (from n in hmmGraph.Nodes
            //    //               where hmmGraph.Nodes.TrueForAll(x => bw.ComputeGamma(n,
            //    //                   hmmGraph, O) > bw.ComputeGamma(x, hmmGraph, O))
            //    //               select n).Single();

            //    Node qPrime = (from n in hmmGraph.Nodes
            //                   where hmmGraph.Nodes.TrueForAll(x
            //                       => bw.ComputeGamma(n, hmmGraph, O) >= bw.ComputeGamma(x, hmmGraph, O))
            //                   select n).First();

            //    // 3. split node if transition or emission probs 
            //    // are above uniformity threshold. 
            //    double[] transValues = qPrime.Transitions.Values.ToArray();
            //    double[] emissionValues = qPrime.Emissions.Values.ToArray();

            //    if (!isUniform(transValues, TRANSITION_UNIFORMITY_THRESHOLD)
            //        || !isUniform(emissionValues, EMISSION_UNIFORMITY_THRESHOLD)) {
            //        // 4. assign new probs and normalize.

            //        Node q1 = new Node();
            //        Node q2 = new Node();

            //        if (!isUniform(transValues, TRANSITION_UNIFORMITY_THRESHOLD)) {
            //            AssignTransitions(qPrime, q1, q2);
            //        }

            //        if (!isUniform(emissionValues, EMISSION_UNIFORMITY_THRESHOLD)) {
            //            AssignEmissions(qPrime, q1, q2);
            //        }

            //        AssignIncomingTransitions(qPrime, q1, q2, hmmGraph);

            //        q1.InitialProbability = qPrime.InitialProbability / 2;
            //        q2.InitialProbability = qPrime.InitialProbability / 2;

            //        hmmGraph.AddNode(q1);
            //        hmmGraph.AddNode(q2);
            //        hmmGraph.RemoveNode(qPrime);
            //    }
            //    // 5. convert graph model back to hmm
            //    //hmmGraph.Normalize();
            //    hmm = ModelConverter.Graph2HMM(hmmGraph);

            //    // 6. ReLearn model using BW.
            //    hmm.Learn(trainingData.GetAll(), ITERATIONS);
            //}

            #endregion

            intermediateOutputFile = new System.IO.StreamWriter(intermediateOutputFileName + (run++) + ".csv");
            intermediateOutputFile.WriteLine("States, Likelihood");

            // Initialize graph
            HMMGraph graph = new HMMGraph(trainingData.NumSymbols);

            for (int i = 0; i < MINIMUM_STATES; i++) {

                graph.AddNode(new Node());
            }

            foreach (Node n in graph.Nodes) {
                foreach (Node m in graph.Nodes) {
                    n.SetTransition(m, 0.5);
                }

                for (int i = 0; i < trainingData.NumSymbols; i++) {
                    n.SetEmission(i, 0.5);
                }
            }
            graph.Normalize();

            this.hmm = SparseHiddenMarkovModel.FromGraph(graph);

            CleanGraph(graph);
            Random rnd = new Random();

            List<int> cList = new List<int>();
            foreach (int[] a in trainingData.GetAll()) {

                cList.AddRange(a);
            }
            int[] combinedTrainData = cList.ToArray();



            // Run iterations.
            int iteration = 1;
            int stuckAt = 1;
            int stuckFor = 1;

            while(hmm.NumberOfStates < maximum_states
                  && iteration < maximum_iterations) {

                Console.WriteLine("* Iteration {0} of {1} Model contains {2} states",iteration,maximum_iterations,hmm.NumberOfStates);
               
                graph = hmm.ToGraph();

                Node qPrime = FindQPrime(graph, combinedTrainData);

                // check to see if the algorithm is stuck
                if (stuckAt == hmm.NumberOfStates) {
                    stuckFor++;
                }
                else {
                    stuckAt = hmm.NumberOfStates;
                    stuckFor = 1;
                }

                bool isStuck = stuckFor > MAX_STUCK ? true : false; 

                if (isUniform(qPrime.Transitions.Values.ToArray(),TRANSITION_UNIFORMITY_THRESHOLD) 
                    || isUniform(qPrime.Emissions.Values.ToArray(),EMISSION_UNIFORMITY_THRESHOLD)
                    || isStuck) 
                {

                    if (isStuck) {
                        Console.WriteLine("Algorithm is stuck: FORCING SPLIT");
                    }
                    graph = Splitstate(qPrime, graph);
                }

                hmm = SparseHiddenMarkovModel.FromGraph(graph);
                hmm.Learn(trainingData.GetAll(), THRESHOLD, BW_ITERATIONS);
                OutputIntermediate(validationData);
                iteration++;
            }
            hmm = SparseHiddenMarkovModel.FromGraph(graph);
            intermediateOutputFile.Close();
        }