public static SparseHiddenMarkovModel FromGraph(HMMGraph graph) { SparseHiddenMarkovModel model = new SparseHiddenMarkovModel(graph.NumNodes, graph.NumSymbols); model.initialDistribution = graph.Nodes.Select(n => n.InitialProbability).ToArray(); model.transitionProbabilities = new double[graph.NumNodes, graph.NumNodes]; model.emissionProbabilities = new double[graph.NumNodes, graph.NumSymbols]; List<int>[] inTrans = Enumerable.Range(0, graph.NumNodes).Select(_ => new List<int>()).ToArray(); for (int i = 0; i < graph.NumNodes; i++) { List<int> indexes = new List<int>(); foreach(Node node in graph.Nodes[i].Transitions.Keys) { int index = graph.Nodes.IndexOf(node); indexes.Add(index); model.transitionProbabilities[i, index] = graph.Nodes[i].Transitions[node]; inTrans[index].Add(i); //model.transitionsIn[index].Add(i); //model.transitionsOut[i].Add(index); } model.transitionsOut[i] = indexes.ToArray(); } for (int i = 0; i < graph.NumNodes; i++) { model.transitionsIn[i] = inTrans[i].ToArray(); } List<int>[] symbolEmittents = Enumerable.Range(0, graph.NumSymbols).Select(_ => new List<int>()).ToArray(); for (int i = 0; i < graph.NumNodes; i++) { foreach(int j in graph.Nodes[i].Emissions.Keys) { model.emissionProbabilities[i, j] = graph.Nodes[i].Emissions[j]; //model.emissions[i].Add(j); //model.emittents[j].Add(i); symbolEmittents[j].Add(i); } model.emissions[i] = graph.Nodes[i].Emissions.Keys.ToArray(); } for (int i = 0; i < graph.NumSymbols; i++) { model.emittents[i] = symbolEmittents[i].ToArray(); } return model; }
public static SparseHiddenMarkovModel FromCompleteGraph(int numberOfStates, int numberOfSymbols) { SparseHiddenMarkovModel model = new SparseHiddenMarkovModel(numberOfStates, numberOfSymbols); model.initialDistribution = new double[numberOfStates]; double weight = 0.0; for (int i = 0; i < numberOfStates; i++) { double probability = model.random.NextDouble(); model.initialDistribution[i] = probability; weight += probability; } for (int i = 0; i < numberOfStates; i++) { model.initialDistribution[i] /= weight; } model.transitionProbabilities = new double[numberOfStates, numberOfStates]; //model.transitionsIn = Enumerable.Range(0, numberOfStates).ToDictionary(i => i, i => Enumerable.Range(0, numberOfStates).ToList()); //model.transitionsOut = Enumerable.Range(0, numberOfStates).ToDictionary(i => i, i => Enumerable.Range(0, numberOfStates).ToList()); for (int i = 0; i < numberOfStates; i++ ) { model.transitionsIn[i] = Enumerable.Range(0, numberOfStates).ToArray(); model.transitionsOut[i] = Enumerable.Range(0, numberOfStates).ToArray(); weight = 0.0; for (int j = 0; j < numberOfStates; j++) { double probability = model.random.NextDouble(); model.transitionProbabilities[i, j] = probability; weight += probability; } for (int j = 0; j < numberOfStates; j++) { model.transitionProbabilities[i, j] /= weight; } } model.emissionProbabilities = new double[numberOfStates, numberOfSymbols]; //model.emissions = Enumerable.Range(0, numberOfStates).ToDictionary(i => i, i => Enumerable.Range(0, numberOfSymbols).ToList()); //model.emittents = Enumerable.Range(0, numberOfSymbols).ToDictionary(i => i, i => Enumerable.Range(0, numberOfStates).ToList()); for (int i = 0; i < numberOfSymbols; i++) { model.emittents[i] = Enumerable.Range(0, numberOfStates).ToArray(); } for (int i = 0; i < numberOfStates; i++) { model.emissions[i] = Enumerable.Range(0, numberOfSymbols).ToArray(); weight = 0.0; for (int j = 0; j < numberOfSymbols; j++) { double probability = model.random.NextDouble(); model.emissionProbabilities[i, j] = probability; weight += probability; } for (int j = 0; j < numberOfSymbols; j++) { model.emissionProbabilities[i, j] /= weight; } } return model; }
public override void Learn(SequenceData trainingData, SequenceData validationData, SequenceData testData) { #region Junk //hmm.Learn(trainingData.GetNonempty(), 1); //foreach (int[] O in trainingData.GetAll()) { // // 1. convert to hmm to graph model. // HMMGraph hmmGraph = ModelConverter.HMM2Graph(hmm); // // 2. find argmax gamma // BaumWelch bw = new BaumWelch(O.Length, hmmGraph); // //Node qPrime = (from n in hmmGraph.Nodes // // where hmmGraph.Nodes.TrueForAll(x => bw.ComputeGamma(n, // // hmmGraph, O) > bw.ComputeGamma(x, hmmGraph, O)) // // select n).Single(); // Node qPrime = (from n in hmmGraph.Nodes // where hmmGraph.Nodes.TrueForAll(x // => bw.ComputeGamma(n, hmmGraph, O) >= bw.ComputeGamma(x, hmmGraph, O)) // select n).First(); // // 3. split node if transition or emission probs // // are above uniformity threshold. // double[] transValues = qPrime.Transitions.Values.ToArray(); // double[] emissionValues = qPrime.Emissions.Values.ToArray(); // if (!isUniform(transValues, TRANSITION_UNIFORMITY_THRESHOLD) // || !isUniform(emissionValues, EMISSION_UNIFORMITY_THRESHOLD)) { // // 4. assign new probs and normalize. // Node q1 = new Node(); // Node q2 = new Node(); // if (!isUniform(transValues, TRANSITION_UNIFORMITY_THRESHOLD)) { // AssignTransitions(qPrime, q1, q2); // } // if (!isUniform(emissionValues, EMISSION_UNIFORMITY_THRESHOLD)) { // AssignEmissions(qPrime, q1, q2); // } // AssignIncomingTransitions(qPrime, q1, q2, hmmGraph); // q1.InitialProbability = qPrime.InitialProbability / 2; // q2.InitialProbability = qPrime.InitialProbability / 2; // hmmGraph.AddNode(q1); // hmmGraph.AddNode(q2); // hmmGraph.RemoveNode(qPrime); // } // // 5. convert graph model back to hmm // //hmmGraph.Normalize(); // hmm = ModelConverter.Graph2HMM(hmmGraph); // // 6. ReLearn model using BW. // hmm.Learn(trainingData.GetAll(), ITERATIONS); //} #endregion intermediateOutputFile = new System.IO.StreamWriter(intermediateOutputFileName + (run++) + ".csv"); intermediateOutputFile.WriteLine("States, Likelihood"); // Initialize graph HMMGraph graph = new HMMGraph(trainingData.NumSymbols); for (int i = 0; i < MINIMUM_STATES; i++) { graph.AddNode(new Node()); } foreach (Node n in graph.Nodes) { foreach (Node m in graph.Nodes) { n.SetTransition(m, 0.5); } for (int i = 0; i < trainingData.NumSymbols; i++) { n.SetEmission(i, 0.5); } } graph.Normalize(); this.hmm = SparseHiddenMarkovModel.FromGraph(graph); CleanGraph(graph); Random rnd = new Random(); List<int> cList = new List<int>(); foreach (int[] a in trainingData.GetAll()) { cList.AddRange(a); } int[] combinedTrainData = cList.ToArray(); // Run iterations. int iteration = 1; int stuckAt = 1; int stuckFor = 1; while(hmm.NumberOfStates < maximum_states && iteration < maximum_iterations) { Console.WriteLine("* Iteration {0} of {1} Model contains {2} states",iteration,maximum_iterations,hmm.NumberOfStates); graph = hmm.ToGraph(); Node qPrime = FindQPrime(graph, combinedTrainData); // check to see if the algorithm is stuck if (stuckAt == hmm.NumberOfStates) { stuckFor++; } else { stuckAt = hmm.NumberOfStates; stuckFor = 1; } bool isStuck = stuckFor > MAX_STUCK ? true : false; if (isUniform(qPrime.Transitions.Values.ToArray(),TRANSITION_UNIFORMITY_THRESHOLD) || isUniform(qPrime.Emissions.Values.ToArray(),EMISSION_UNIFORMITY_THRESHOLD) || isStuck) { if (isStuck) { Console.WriteLine("Algorithm is stuck: FORCING SPLIT"); } graph = Splitstate(qPrime, graph); } hmm = SparseHiddenMarkovModel.FromGraph(graph); hmm.Learn(trainingData.GetAll(), THRESHOLD, BW_ITERATIONS); OutputIntermediate(validationData); iteration++; } hmm = SparseHiddenMarkovModel.FromGraph(graph); intermediateOutputFile.Close(); }