private HMMGraph Splitstate(Node qPrime, HMMGraph graph) {
            Random random = new Random();
            Node q1 = new Node();   

            foreach (Node x in graph.Nodes) {

                q1.SetTransition(x, random.NextDouble());
            }

            foreach (int symbol in qPrime.Emissions.Keys) {

                q1.SetEmission(symbol, qPrime.Emissions[symbol]);
            }

            q1.InitialProbability = qPrime.InitialProbability;

            foreach (Node n in graph.Nodes) {

                n.Transitions[q1] = random.NextDouble();
            }

            q1.SetTransition(q1,random.NextDouble());
            graph.AddNode(q1);
            graph.Normalize();

            return graph;
        }
示例#2
0
        public void Learn_HMMWithRandomDistributedProbs_LearnsAConsistentModelBasedOnData()
        {
            // Arrange
            int[] symbols = Enumerable.Range(1, 42).ToArray();
            List<int[]> obs = new List<int[]>();
            Random rnd = new Random();

            for (int i = 0; i < 10; i++)
            {
                obs.Add(symbols.OrderBy(x => rnd.Next()).ToArray());
            }

            HMMGraph hmm = new HMMGraph(symbols.Length);
            hmm.AddNode(new Node());
            hmm.AddNode(new Node());
            hmm.AddNode(new Node());
            hmm.AddNode(new Node());

            foreach (Node n in hmm.Nodes)
            {
                foreach (Node m in hmm.Nodes)
                {
                    n.SetTransition(m, 0.5);
                }

                foreach (int i in symbols)
                {
                    n.SetEmission(i, 0.5);
                }
            }

            hmm.Normalize();
            BaumWelch bw = new BaumWelch(obs.Count, hmm);

            // Act
            for (int i = 0; i < 5; i++)
            {
                bw.Learn(hmm, obs.ToArray());
            }
           
            // Assert
            const double PRECISION = .00000000001;
            foreach (Node n in hmm.Nodes)
            {
                //check transitions
                double sum = 0;
                foreach (Node nb in n.Transitions.Keys)
                {
                    sum += n.Transitions[nb];
                }
                Assert.IsTrue(1.0 - PRECISION < sum && sum < 1.0 + PRECISION);

                sum = 0;
                foreach (int o in n.Emissions.Keys)
                {
                    sum += n.Emissions[o];
                }
                Assert.IsTrue(1.0 - PRECISION < sum && sum < 1.0 + PRECISION);
            }
        }
		public HMMGraph Learn(HMMGraph hmm, int[] [] Observations) {

            // Setup algo
            graph = hmm;

			foreach(int[] O in Observations) {

				ReInitialize(O.Length);



				reestimateInitialProbs(O);
				reestimateTransitions(O);
				reestimateEmissions(O);

                graph.Normalize();
			}
			return graph;
		}
        public override void Learn(SequenceData trainingData,
                SequenceData validationData, SequenceData testData) 
        {
       
            #region Junk
            //hmm.Learn(trainingData.GetNonempty(), 1);

            //foreach (int[] O in trainingData.GetAll()) {
            //    // 1. convert to hmm to graph model.
            //    HMMGraph hmmGraph = ModelConverter.HMM2Graph(hmm);

            //    // 2. find argmax gamma
            //    BaumWelch bw = new BaumWelch(O.Length, hmmGraph);

            //    //Node qPrime = (from n in hmmGraph.Nodes
            //    //               where hmmGraph.Nodes.TrueForAll(x => bw.ComputeGamma(n,
            //    //                   hmmGraph, O) > bw.ComputeGamma(x, hmmGraph, O))
            //    //               select n).Single();

            //    Node qPrime = (from n in hmmGraph.Nodes
            //                   where hmmGraph.Nodes.TrueForAll(x
            //                       => bw.ComputeGamma(n, hmmGraph, O) >= bw.ComputeGamma(x, hmmGraph, O))
            //                   select n).First();

            //    // 3. split node if transition or emission probs 
            //    // are above uniformity threshold. 
            //    double[] transValues = qPrime.Transitions.Values.ToArray();
            //    double[] emissionValues = qPrime.Emissions.Values.ToArray();

            //    if (!isUniform(transValues, TRANSITION_UNIFORMITY_THRESHOLD)
            //        || !isUniform(emissionValues, EMISSION_UNIFORMITY_THRESHOLD)) {
            //        // 4. assign new probs and normalize.

            //        Node q1 = new Node();
            //        Node q2 = new Node();

            //        if (!isUniform(transValues, TRANSITION_UNIFORMITY_THRESHOLD)) {
            //            AssignTransitions(qPrime, q1, q2);
            //        }

            //        if (!isUniform(emissionValues, EMISSION_UNIFORMITY_THRESHOLD)) {
            //            AssignEmissions(qPrime, q1, q2);
            //        }

            //        AssignIncomingTransitions(qPrime, q1, q2, hmmGraph);

            //        q1.InitialProbability = qPrime.InitialProbability / 2;
            //        q2.InitialProbability = qPrime.InitialProbability / 2;

            //        hmmGraph.AddNode(q1);
            //        hmmGraph.AddNode(q2);
            //        hmmGraph.RemoveNode(qPrime);
            //    }
            //    // 5. convert graph model back to hmm
            //    //hmmGraph.Normalize();
            //    hmm = ModelConverter.Graph2HMM(hmmGraph);

            //    // 6. ReLearn model using BW.
            //    hmm.Learn(trainingData.GetAll(), ITERATIONS);
            //}

            #endregion

            intermediateOutputFile = new System.IO.StreamWriter(intermediateOutputFileName + (run++) + ".csv");
            intermediateOutputFile.WriteLine("States, Likelihood");

            // Initialize graph
            HMMGraph graph = new HMMGraph(trainingData.NumSymbols);

            for (int i = 0; i < MINIMUM_STATES; i++) {

                graph.AddNode(new Node());
            }

            foreach (Node n in graph.Nodes) {
                foreach (Node m in graph.Nodes) {
                    n.SetTransition(m, 0.5);
                }

                for (int i = 0; i < trainingData.NumSymbols; i++) {
                    n.SetEmission(i, 0.5);
                }
            }
            graph.Normalize();

            this.hmm = SparseHiddenMarkovModel.FromGraph(graph);

            CleanGraph(graph);
            Random rnd = new Random();

            List<int> cList = new List<int>();
            foreach (int[] a in trainingData.GetAll()) {

                cList.AddRange(a);
            }
            int[] combinedTrainData = cList.ToArray();



            // Run iterations.
            int iteration = 1;
            int stuckAt = 1;
            int stuckFor = 1;

            while(hmm.NumberOfStates < maximum_states
                  && iteration < maximum_iterations) {

                Console.WriteLine("* Iteration {0} of {1} Model contains {2} states",iteration,maximum_iterations,hmm.NumberOfStates);
               
                graph = hmm.ToGraph();

                Node qPrime = FindQPrime(graph, combinedTrainData);

                // check to see if the algorithm is stuck
                if (stuckAt == hmm.NumberOfStates) {
                    stuckFor++;
                }
                else {
                    stuckAt = hmm.NumberOfStates;
                    stuckFor = 1;
                }

                bool isStuck = stuckFor > MAX_STUCK ? true : false; 

                if (isUniform(qPrime.Transitions.Values.ToArray(),TRANSITION_UNIFORMITY_THRESHOLD) 
                    || isUniform(qPrime.Emissions.Values.ToArray(),EMISSION_UNIFORMITY_THRESHOLD)
                    || isStuck) 
                {

                    if (isStuck) {
                        Console.WriteLine("Algorithm is stuck: FORCING SPLIT");
                    }
                    graph = Splitstate(qPrime, graph);
                }

                hmm = SparseHiddenMarkovModel.FromGraph(graph);
                hmm.Learn(trainingData.GetAll(), THRESHOLD, BW_ITERATIONS);
                OutputIntermediate(validationData);
                iteration++;
            }
            hmm = SparseHiddenMarkovModel.FromGraph(graph);
            intermediateOutputFile.Close();
        }