private HMMGraph Splitstate(Node qPrime, HMMGraph graph) { Random random = new Random(); Node q1 = new Node(); foreach (Node x in graph.Nodes) { q1.SetTransition(x, random.NextDouble()); } foreach (int symbol in qPrime.Emissions.Keys) { q1.SetEmission(symbol, qPrime.Emissions[symbol]); } q1.InitialProbability = qPrime.InitialProbability; foreach (Node n in graph.Nodes) { n.Transitions[q1] = random.NextDouble(); } q1.SetTransition(q1,random.NextDouble()); graph.AddNode(q1); graph.Normalize(); return graph; }
public void Learn_HMMWithRandomDistributedProbs_LearnsAConsistentModelBasedOnData() { // Arrange int[] symbols = Enumerable.Range(1, 42).ToArray(); List<int[]> obs = new List<int[]>(); Random rnd = new Random(); for (int i = 0; i < 10; i++) { obs.Add(symbols.OrderBy(x => rnd.Next()).ToArray()); } HMMGraph hmm = new HMMGraph(symbols.Length); hmm.AddNode(new Node()); hmm.AddNode(new Node()); hmm.AddNode(new Node()); hmm.AddNode(new Node()); foreach (Node n in hmm.Nodes) { foreach (Node m in hmm.Nodes) { n.SetTransition(m, 0.5); } foreach (int i in symbols) { n.SetEmission(i, 0.5); } } hmm.Normalize(); BaumWelch bw = new BaumWelch(obs.Count, hmm); // Act for (int i = 0; i < 5; i++) { bw.Learn(hmm, obs.ToArray()); } // Assert const double PRECISION = .00000000001; foreach (Node n in hmm.Nodes) { //check transitions double sum = 0; foreach (Node nb in n.Transitions.Keys) { sum += n.Transitions[nb]; } Assert.IsTrue(1.0 - PRECISION < sum && sum < 1.0 + PRECISION); sum = 0; foreach (int o in n.Emissions.Keys) { sum += n.Emissions[o]; } Assert.IsTrue(1.0 - PRECISION < sum && sum < 1.0 + PRECISION); } }
public HMMGraph Learn(HMMGraph hmm, int[] [] Observations) { // Setup algo graph = hmm; foreach(int[] O in Observations) { ReInitialize(O.Length); reestimateInitialProbs(O); reestimateTransitions(O); reestimateEmissions(O); graph.Normalize(); } return graph; }
public override void Learn(SequenceData trainingData, SequenceData validationData, SequenceData testData) { #region Junk //hmm.Learn(trainingData.GetNonempty(), 1); //foreach (int[] O in trainingData.GetAll()) { // // 1. convert to hmm to graph model. // HMMGraph hmmGraph = ModelConverter.HMM2Graph(hmm); // // 2. find argmax gamma // BaumWelch bw = new BaumWelch(O.Length, hmmGraph); // //Node qPrime = (from n in hmmGraph.Nodes // // where hmmGraph.Nodes.TrueForAll(x => bw.ComputeGamma(n, // // hmmGraph, O) > bw.ComputeGamma(x, hmmGraph, O)) // // select n).Single(); // Node qPrime = (from n in hmmGraph.Nodes // where hmmGraph.Nodes.TrueForAll(x // => bw.ComputeGamma(n, hmmGraph, O) >= bw.ComputeGamma(x, hmmGraph, O)) // select n).First(); // // 3. split node if transition or emission probs // // are above uniformity threshold. // double[] transValues = qPrime.Transitions.Values.ToArray(); // double[] emissionValues = qPrime.Emissions.Values.ToArray(); // if (!isUniform(transValues, TRANSITION_UNIFORMITY_THRESHOLD) // || !isUniform(emissionValues, EMISSION_UNIFORMITY_THRESHOLD)) { // // 4. assign new probs and normalize. // Node q1 = new Node(); // Node q2 = new Node(); // if (!isUniform(transValues, TRANSITION_UNIFORMITY_THRESHOLD)) { // AssignTransitions(qPrime, q1, q2); // } // if (!isUniform(emissionValues, EMISSION_UNIFORMITY_THRESHOLD)) { // AssignEmissions(qPrime, q1, q2); // } // AssignIncomingTransitions(qPrime, q1, q2, hmmGraph); // q1.InitialProbability = qPrime.InitialProbability / 2; // q2.InitialProbability = qPrime.InitialProbability / 2; // hmmGraph.AddNode(q1); // hmmGraph.AddNode(q2); // hmmGraph.RemoveNode(qPrime); // } // // 5. convert graph model back to hmm // //hmmGraph.Normalize(); // hmm = ModelConverter.Graph2HMM(hmmGraph); // // 6. ReLearn model using BW. // hmm.Learn(trainingData.GetAll(), ITERATIONS); //} #endregion intermediateOutputFile = new System.IO.StreamWriter(intermediateOutputFileName + (run++) + ".csv"); intermediateOutputFile.WriteLine("States, Likelihood"); // Initialize graph HMMGraph graph = new HMMGraph(trainingData.NumSymbols); for (int i = 0; i < MINIMUM_STATES; i++) { graph.AddNode(new Node()); } foreach (Node n in graph.Nodes) { foreach (Node m in graph.Nodes) { n.SetTransition(m, 0.5); } for (int i = 0; i < trainingData.NumSymbols; i++) { n.SetEmission(i, 0.5); } } graph.Normalize(); this.hmm = SparseHiddenMarkovModel.FromGraph(graph); CleanGraph(graph); Random rnd = new Random(); List<int> cList = new List<int>(); foreach (int[] a in trainingData.GetAll()) { cList.AddRange(a); } int[] combinedTrainData = cList.ToArray(); // Run iterations. int iteration = 1; int stuckAt = 1; int stuckFor = 1; while(hmm.NumberOfStates < maximum_states && iteration < maximum_iterations) { Console.WriteLine("* Iteration {0} of {1} Model contains {2} states",iteration,maximum_iterations,hmm.NumberOfStates); graph = hmm.ToGraph(); Node qPrime = FindQPrime(graph, combinedTrainData); // check to see if the algorithm is stuck if (stuckAt == hmm.NumberOfStates) { stuckFor++; } else { stuckAt = hmm.NumberOfStates; stuckFor = 1; } bool isStuck = stuckFor > MAX_STUCK ? true : false; if (isUniform(qPrime.Transitions.Values.ToArray(),TRANSITION_UNIFORMITY_THRESHOLD) || isUniform(qPrime.Emissions.Values.ToArray(),EMISSION_UNIFORMITY_THRESHOLD) || isStuck) { if (isStuck) { Console.WriteLine("Algorithm is stuck: FORCING SPLIT"); } graph = Splitstate(qPrime, graph); } hmm = SparseHiddenMarkovModel.FromGraph(graph); hmm.Learn(trainingData.GetAll(), THRESHOLD, BW_ITERATIONS); OutputIntermediate(validationData); iteration++; } hmm = SparseHiddenMarkovModel.FromGraph(graph); intermediateOutputFile.Close(); }