/// <summary> /// Returns the loglikelihood that the graph have generated the given sequences /// </summary> /// <param name="g"></param> /// <param name="evaluationData"></param> /// <returns></returns> //public double LogLikelihood(HiddenMarkovModel hmm, SequenceData evaluationData) { public double LogLikelihood(SparseHiddenMarkovModel hmm, SequenceData evaluationData) { double loglikelihood = 0; for (int i = 0; i < evaluationData.Count; i++) loglikelihood += Math.Log(hmm.Evaluate(evaluationData[i])); return loglikelihood; }
public override void Learn(SequenceData trainingData, SequenceData validationData, SequenceData testData) { //HMMGraph graph = RandomGraph(trainingData.NumSymbols); HMMGraph graph; //bestHMM = ModelConverter.Graph2HMM(graph); bestHMM = SparseHiddenMarkovModel.FromCompleteGraph(trainingData.NumSymbols, trainingData.NumSymbols); bestHMM.Learn(trainingData.GetNonempty(), baumwelchThreshold); //while (bestHMM.States < maxStates) while (bestHMM.NumberOfStates < maxStates) { WriteLine("Taking one more iteration"); //graph = ModelConverter.HMM2Graph(bestHMM); graph = bestHMM.ToGraph(); Dictionary<int, double> nodePerformance = new Dictionary<int, double>(); Dictionary<int, int> nodeOccurence = new Dictionary<int, int>(); double hiddenStateSequenceProbability; foreach (int[] signal in validationData.GetNonempty()) { //int[] hiddenStateSequence = bestHMM.Decode(signal, out hiddenStateSequenceProbability); int[] hiddenStateSequence = bestHMM.Viterby(signal, out hiddenStateSequenceProbability); for (int j = 0; j < hiddenStateSequence.Length; j++) { if (nodePerformance.ContainsKey(hiddenStateSequence[j])) { //nodePerformance[hiddenStateSequence[j]] += (Math.Log(hiddenStateSequenceProbability) + Math.Log(bestHMM.Emissions[hiddenStateSequence[j], signal[j]])); nodePerformance[hiddenStateSequence[j]] += (Math.Log(hiddenStateSequenceProbability) + Math.Log(bestHMM.EmissionProbability(hiddenStateSequence[j], signal[j]))); nodeOccurence[hiddenStateSequence[j]]++; } else { nodePerformance.Add(hiddenStateSequence[j], (Math.Log(hiddenStateSequenceProbability) + Math.Log(bestHMM.EmissionProbability(hiddenStateSequence[j], signal[j])))); nodeOccurence.Add(hiddenStateSequence[j], 1); } } } foreach (int node in nodeOccurence.Keys) { nodePerformance[node] /= nodeOccurence[node]; } int weakPoint = nodePerformance.Keys.Aggregate((a, b) => ((nodePerformance[b] < nodePerformance[a]) ? b : a)); SplitWorstPerformingNode(graph, weakPoint); //bestHMM = ModelConverter.Graph2HMM(graph); bestHMM = SparseHiddenMarkovModel.FromGraph(graph); WriteLine("Running BaumWelch"); bestHMM.Learn(trainingData.GetNonempty(), baumwelchThreshold); //Run the BaumWelch algorithm WriteLine(""); WriteLine("Log Likelihood: " + LogLikelihood(bestHMM, trainingData)); } }
public override void Learn(SequenceData trainingData, SequenceData validationData, SequenceData testData) { HMMGraph graph = new HMMGraph(trainingData.NumSymbols); //Add nodes and set initial and emission probabilities for (int i = 0; i < states; i++) { Node new_node = new Node(); for (int s = 0; s < trainingData.NumSymbols; s++) new_node.SetEmission(s, ran.NextDouble()); new_node.InitialProbability = ran.NextDouble(); graph.AddNode(new_node); } //Add random transmissions. Each node will have at most Log(n) edges in both directions //for (int i = 0; i < graph.Nodes.Count; i++) //{ // List<Node> shuffled = graph.Nodes.Select(e => e).ToList(); // Utilities.Shuffle(shuffled); // int upperBound = (int)Math.Ceiling(Math.Log(graph.Nodes.Count)); // if (upperBound >= graph.Nodes.Count) // upperBound = graph.Nodes.Count - 1; // for (int p = 0; p <= upperBound; p++) // { // Node from = graph.Nodes[i]; // Node to = graph.Nodes[p]; // from.SetTransition(to, ran.NextDouble()); // } //} int numberOfTransitions = (int)Math.Ceiling(Math.Log(states)); foreach (Node node in graph.Nodes) { for (int i = 0; i < numberOfTransitions; i++) { Node target; while (node.Transitions.ContainsKey(target = graph.Nodes[ran.Next(states)])); node.SetTransition(target, ran.NextDouble()); } } graph.Normalize(); hmm = SparseHiddenMarkovModel.FromGraph(graph); hmm.Learn(trainingData.GetNonempty(), tolerance); }
public override void Learn(SequenceData trainingData, SequenceData validationData, SequenceData testData) { hmm = SparseHiddenMarkovModel.FromCompleteGraph(1, trainingData.NumSymbols); double temperature = 2; double epsilon = 1.0; double likelihood = 0.0; double newLikelihood = Double.MinValue; double lastSparsity = hmm.TransitionSparsity; int stagnation = 1; do { if (temperature > 2) { HMMGraph graph = hmm.ToGraph(); CutEdges(graph, epsilon); if (hmm.TransitionSparsity != lastSparsity) { lastSparsity = hmm.TransitionSparsity; stagnation = Math.Max(1, (stagnation - 1)); } else { stagnation++; } //int numberOfStatesToAdd = Math.Max(0, (int)Math.Min(hmm.NumberOfStates, Math.Ceiling(Math.Log(Math.Pow(Math.Log(newLikelihood - likelihood), (1 / stagnation)) / (Math.Sqrt(temperature) * threshold))))); //int numberOfStatesToAdd = (((stagnation / temperature) > threshold) ? 1 : 0); int numberOfStatesToAdd = 1; foreach (int weakPoint in IdentifyWeakStates(validationData, numberOfStatesToAdd)) { SplitState(graph, weakPoint); stagnation = 1; } if (numberOfStatesToAdd == 0) { stagnation *= 2; } hmm = SparseHiddenMarkovModel.FromGraph(graph); WriteLine(String.Format("Added {0} states", numberOfStatesToAdd)); } temperature *= Math.Max(2, Math.Sqrt(hmm.NumberOfStates)); //temperature *= Math.Max(2, stagnation); epsilon = (1 / Math.Log(temperature)); double bwThreshold = Math.Pow(Math.Max(threshold, (1 / (-Math.Min((-1), Math.Log(Math.Min((1 - threshold), (1 / temperature)) / (1 - threshold)))))), stagnation); //int bwIterations = Math.Max(1, (int)Math.Log(stagnation * temperature * threshold)); WriteLine(String.Format("Running Baum-Welch with threshold {0}...", bwThreshold)); //WriteLine(String.Format("Running Baum-Welch with {0} iterations...", bwIterations)); hmm.Learn(trainingData.GetNonempty(), bwThreshold); //hmm.Learn(trainingData.GetNonempty(), 0.0, bwIterations); likelihood = newLikelihood; newLikelihood = 0.0; foreach (int[] signal in validationData.GetNonempty()) { newLikelihood += hmm.Evaluate(signal, true); } WriteLine(String.Empty); WriteLine(String.Format("Stagnation: {0}", stagnation)); WriteLine(String.Format("Epsilon: {0}", epsilon)); WriteLine(String.Format("Number of HMM States: {0}", hmm.NumberOfStates)); WriteLine(String.Format("Transition Sparsity; {0}", hmm.TransitionSparsity)); WriteLine(String.Format("Log Likelihood: {0}", newLikelihood)); WriteLine(String.Empty); } while ((Math.Abs(newLikelihood - likelihood) * Math.Pow(epsilon, 2)) > threshold); }