public void LearnTest_validInput_ModelDescribingTheData () { // Arrange HMMGraph hmm = new HMMGraph(NUMBER_OF_SYMBOLS_IN_HMMGRAPH); //int[] t = {2,3,5,6,2,12,4,6,3,36,62,2,144,3,531,44,23,234,21}; List<int[]> obs = new List<int[]>(); Random rnd = new Random(); for(int i=0;i<4;i++) { obs.Add(Enumerable.Range(1,49).OrderBy(x => rnd.Next()).ToArray()); } hmm.AddNode(new Node()); hmm.AddNode(new Node()); hmm.AddNode(new Node()); hmm.AddNode(new Node()); BaumWelch BW = new BaumWelch(); // Act HMMGraph result = BW.Learn(hmm, obs.ToArray()); // Assert Assert.IsNotNull(result); Assert.Inconclusive(); }
public void Learn_HMMWithRandomDistributedProbs_LearnsAConsistentModelBasedOnData() { // Arrange int[] symbols = Enumerable.Range(1, 42).ToArray(); List<int[]> obs = new List<int[]>(); Random rnd = new Random(); for (int i = 0; i < 10; i++) { obs.Add(symbols.OrderBy(x => rnd.Next()).ToArray()); } HMMGraph hmm = new HMMGraph(symbols.Length); hmm.AddNode(new Node()); hmm.AddNode(new Node()); hmm.AddNode(new Node()); hmm.AddNode(new Node()); foreach (Node n in hmm.Nodes) { foreach (Node m in hmm.Nodes) { n.SetTransition(m, 0.5); } foreach (int i in symbols) { n.SetEmission(i, 0.5); } } hmm.Normalize(); BaumWelch bw = new BaumWelch(obs.Count, hmm); // Act for (int i = 0; i < 5; i++) { bw.Learn(hmm, obs.ToArray()); } // Assert const double PRECISION = .00000000001; foreach (Node n in hmm.Nodes) { //check transitions double sum = 0; foreach (Node nb in n.Transitions.Keys) { sum += n.Transitions[nb]; } Assert.IsTrue(1.0 - PRECISION < sum && sum < 1.0 + PRECISION); sum = 0; foreach (int o in n.Emissions.Keys) { sum += n.Emissions[o]; } Assert.IsTrue(1.0 - PRECISION < sum && sum < 1.0 + PRECISION); } }
private HMMGraph Splitstate(Node qPrime, HMMGraph graph) { Random random = new Random(); Node q1 = new Node(); foreach (Node x in graph.Nodes) { q1.SetTransition(x, random.NextDouble()); } foreach (int symbol in qPrime.Emissions.Keys) { q1.SetEmission(symbol, qPrime.Emissions[symbol]); } q1.InitialProbability = qPrime.InitialProbability; foreach (Node n in graph.Nodes) { n.Transitions[q1] = random.NextDouble(); } q1.SetTransition(q1,random.NextDouble()); graph.AddNode(q1); graph.Normalize(); return graph; }
static HMMGraph CreateGraph(int numberOfSymbols, int numberOfStates, double outDegree) { HMMGraph graph = new HMMGraph(numberOfSymbols); double initialProbabilitySum = 0.0; for (int i = 0; i < numberOfStates; i++) { Node node = new Node(); node.InitialProbability = random.NextDouble(); initialProbabilitySum += node.InitialProbability; node.Emissions = new Dictionary<int, double>(); for (int j = 0; j < numberOfSymbols; j++) { node.Emissions.Add(j, random.NextDouble()); } double emissionSum = node.Emissions.Values.Sum(); for (int j = 0; j < numberOfSymbols; j++) { node.Emissions[j] /= emissionSum; } graph.AddNode(node); } for (int i = 0; i < numberOfStates; i++) { graph.Nodes[i].InitialProbability /= initialProbabilitySum; graph.Nodes[i].Transitions = new Dictionary<Node, double>(); int outDeg = (int)(((i % 2) == 1) ? Math.Floor(outDegree) : Math.Ceiling(outDegree)); for (int j = 0; j < outDeg; j++) { graph.Nodes[i].Transitions.Add(graph.Nodes[((i + j) % numberOfStates)], random.NextDouble()); } double transitionSum = graph.Nodes[i].Transitions.Values.Sum(); for (int j = 0; j < outDeg; j++) { graph.Nodes[i].Transitions[graph.Nodes[((i + j) % numberOfStates)]] /= transitionSum; } } return graph; }
public static HMMGraph HMM2Graph(HiddenMarkovModel hmm){ HMMGraph g = new HMMGraph(hmm.Symbols); Node[] nodes = new Node[hmm.States]; for (int i = 0; i < hmm.States; i++) { nodes[i] = new Node(); g.AddNode(nodes[i]); } for (int i = 0; i < hmm.States; i++) { nodes[i].InitialProbability = hmm.Probabilities[i]; for (int j = 0; j < hmm.States; j++) nodes[i].SetTransition(nodes[j], hmm.Transitions[i, j]); for (int k = 0; k < hmm.Symbols; k++) nodes[i].SetEmission(k, hmm.Emissions[i, k]); } return g; }
public HMMGraph ToGraph() { HMMGraph graph = new HMMGraph(NumberOfSymbols); for (int i = 0; i < NumberOfStates; i++) { Node node = new Node(); node.InitialProbability = initialDistribution[i]; node.Emissions = emissions[i].ToDictionary(j => j, j => emissionProbabilities[i, j]); graph.AddNode(node); } for (int i = 0; i < NumberOfStates; i++) { graph.Nodes[i].Transitions = transitionsOut[i].ToDictionary(j => graph.Nodes[j], j => transitionProbabilities[i, j]); } return graph; }
public override void Learn(SequenceData trainingData, SequenceData validationData, SequenceData testData) { #region Junk //hmm.Learn(trainingData.GetNonempty(), 1); //foreach (int[] O in trainingData.GetAll()) { // // 1. convert to hmm to graph model. // HMMGraph hmmGraph = ModelConverter.HMM2Graph(hmm); // // 2. find argmax gamma // BaumWelch bw = new BaumWelch(O.Length, hmmGraph); // //Node qPrime = (from n in hmmGraph.Nodes // // where hmmGraph.Nodes.TrueForAll(x => bw.ComputeGamma(n, // // hmmGraph, O) > bw.ComputeGamma(x, hmmGraph, O)) // // select n).Single(); // Node qPrime = (from n in hmmGraph.Nodes // where hmmGraph.Nodes.TrueForAll(x // => bw.ComputeGamma(n, hmmGraph, O) >= bw.ComputeGamma(x, hmmGraph, O)) // select n).First(); // // 3. split node if transition or emission probs // // are above uniformity threshold. // double[] transValues = qPrime.Transitions.Values.ToArray(); // double[] emissionValues = qPrime.Emissions.Values.ToArray(); // if (!isUniform(transValues, TRANSITION_UNIFORMITY_THRESHOLD) // || !isUniform(emissionValues, EMISSION_UNIFORMITY_THRESHOLD)) { // // 4. assign new probs and normalize. // Node q1 = new Node(); // Node q2 = new Node(); // if (!isUniform(transValues, TRANSITION_UNIFORMITY_THRESHOLD)) { // AssignTransitions(qPrime, q1, q2); // } // if (!isUniform(emissionValues, EMISSION_UNIFORMITY_THRESHOLD)) { // AssignEmissions(qPrime, q1, q2); // } // AssignIncomingTransitions(qPrime, q1, q2, hmmGraph); // q1.InitialProbability = qPrime.InitialProbability / 2; // q2.InitialProbability = qPrime.InitialProbability / 2; // hmmGraph.AddNode(q1); // hmmGraph.AddNode(q2); // hmmGraph.RemoveNode(qPrime); // } // // 5. convert graph model back to hmm // //hmmGraph.Normalize(); // hmm = ModelConverter.Graph2HMM(hmmGraph); // // 6. ReLearn model using BW. // hmm.Learn(trainingData.GetAll(), ITERATIONS); //} #endregion intermediateOutputFile = new System.IO.StreamWriter(intermediateOutputFileName + (run++) + ".csv"); intermediateOutputFile.WriteLine("States, Likelihood"); // Initialize graph HMMGraph graph = new HMMGraph(trainingData.NumSymbols); for (int i = 0; i < MINIMUM_STATES; i++) { graph.AddNode(new Node()); } foreach (Node n in graph.Nodes) { foreach (Node m in graph.Nodes) { n.SetTransition(m, 0.5); } for (int i = 0; i < trainingData.NumSymbols; i++) { n.SetEmission(i, 0.5); } } graph.Normalize(); this.hmm = SparseHiddenMarkovModel.FromGraph(graph); CleanGraph(graph); Random rnd = new Random(); List<int> cList = new List<int>(); foreach (int[] a in trainingData.GetAll()) { cList.AddRange(a); } int[] combinedTrainData = cList.ToArray(); // Run iterations. int iteration = 1; int stuckAt = 1; int stuckFor = 1; while(hmm.NumberOfStates < maximum_states && iteration < maximum_iterations) { Console.WriteLine("* Iteration {0} of {1} Model contains {2} states",iteration,maximum_iterations,hmm.NumberOfStates); graph = hmm.ToGraph(); Node qPrime = FindQPrime(graph, combinedTrainData); // check to see if the algorithm is stuck if (stuckAt == hmm.NumberOfStates) { stuckFor++; } else { stuckAt = hmm.NumberOfStates; stuckFor = 1; } bool isStuck = stuckFor > MAX_STUCK ? true : false; if (isUniform(qPrime.Transitions.Values.ToArray(),TRANSITION_UNIFORMITY_THRESHOLD) || isUniform(qPrime.Emissions.Values.ToArray(),EMISSION_UNIFORMITY_THRESHOLD) || isStuck) { if (isStuck) { Console.WriteLine("Algorithm is stuck: FORCING SPLIT"); } graph = Splitstate(qPrime, graph); } hmm = SparseHiddenMarkovModel.FromGraph(graph); hmm.Learn(trainingData.GetAll(), THRESHOLD, BW_ITERATIONS); OutputIntermediate(validationData); iteration++; } hmm = SparseHiddenMarkovModel.FromGraph(graph); intermediateOutputFile.Close(); }