public static double Evaluate(Learner learner, SequenceData testSequences, double[] solutionData) { double[] guessedProbs = new double[testSequences.Count]; for (int i = 0; i < guessedProbs.Length; i++) guessedProbs[i] = learner.CalculateProbability(testSequences[i]); guessedProbs = Normalize(guessedProbs); return Evaluate(guessedProbs, solutionData); }
public void Learn_3statefullyconnectedgraph_aHMMfittedtothedata() { // Arrange int[] symbols = Enumerable.Range(0, 41).ToArray(); List<int[]> obs = new List<int[]>(); Random rnd = new Random(); for (int i = 0; i < 10; i++) { obs.Add(symbols.OrderBy(x => rnd.Next()).ToArray()); } SequenceData sd = new SequenceData(0); sd.AddSequences(obs); sd.SaveAddedSequences(); PadawanLearner pwl = new PadawanLearner(); pwl.Initialise(null, 0); // Act pwl.Learn(sd, null, null); //pwl.Learn(sd); double result = pwl.CalculateProbability(obs[2]); // Assert Assert.IsTrue(0.9 < result && result < 1.0); }
private static void BenchmarkLearners(IEnumerable<Learner> learners, int num_runs, SequenceData trainData, SequenceData testData, double[] solutions, StreamWriter file) { Dictionary<Learner, double[]> achieved_scores = new Dictionary<Learner, double[]>(); Dictionary<Learner, double[]> elapsed_times = new Dictionary<Learner, double[]>(); foreach (Learner learner in learners) { achieved_scores[learner] = new double[num_runs]; elapsed_times[learner] = new double[num_runs]; } for (int r = 0; r < num_runs; r++) { Console.Write("\nRun " + (r+1) + " / " + num_runs); //Split training data randomly into train and validation sets //Tuple<SequenceData, SequenceData> split = trainData.RandomSplit(0.6666666); //trainData = split.Item1; //SequenceData validationData = split.Item2; foreach (Learner learner in learners) { Console.WriteLine("\nEvaluating learner " + learner.Name()); //Track how much time learning takes for the particular Learner Stopwatch sw = new Stopwatch(); sw.Start(); //learner.Learn(trainData, validationData, testData); sw.Stop(); //Save score achieved and time spent elapsed_times[learner][r] = sw.Elapsed.TotalSeconds; achieved_scores[learner][r] = PautomacEvaluator.Evaluate(learner, testData, solutions); } } WriteResultOfRun(learners, achieved_scores, elapsed_times, file); }
public BWBenchmarker(int dataset) { this.dataset = dataset; trainData = DataLoader.LoadSequences(String.Format(@"Data/{0}.pautomac.train", dataset)); testData = DataLoader.LoadSequences(String.Format(@"Data/{0}.pautomac.test", dataset)); solutionData = DataLoader.LoadSolutions(String.Format(@"Data/{0}.pautomac_solution.txt", dataset)); }
public DataSet(int number, int trainingSetSize, int validationSetSize) { Number = number; pautomacTrainingData = DataLoader.LoadSequences(String.Format(@"Data/{0}.pautomac.train", Number)); TestData = DataLoader.LoadSequences(String.Format(@"Data/{0}.pautomac.test", Number)); this.trainingSetSize = Math.Min(trainingSetSize, ((pautomacTrainingData.Count * 2) / 3)); this.validationSetSize = Math.Min(validationSetSize, (pautomacTrainingData.Count - this.trainingSetSize)); SolutionData = DataLoader.LoadSolutions(String.Format(@"Data/{0}.pautomac_solution.txt", Number)); }
public Tuple<SequenceData, SequenceData> RandomSplit(double ratio, int random_seed) { SequenceData part1 = new SequenceData(NumSymbols); SequenceData part2 = new SequenceData(NumSymbols); List<int[]> shuffled = sequence_list.Select(e => e).ToList(); Utilities.Shuffle(shuffled, random_seed); int size_part1 = (int)(shuffled.Count * ratio); for (int i = 0; i < shuffled.Count; i++) { if (i < size_part1) part1.AddSequence(shuffled[i]); else part2.AddSequence(shuffled[i]); } part1.SaveAddedSequences(); part2.SaveAddedSequences(); return new Tuple<SequenceData, SequenceData>(part1, part2); }
public static SequenceData LoadSequences(string file) { string[] lines = System.IO.File.ReadAllLines(file); //parse number of different symbols int num_symbols = Int32.Parse(lines[0].Split(' ')[1]); SequenceData seqData = new SequenceData(num_symbols); //parse sequences for (int i = 1; i < lines.Length; i++) { string[] currentSeqStr = lines[i].Split(' '); //skip first element on each line, which contains the length of the sequence int[] currentSeq = currentSeqStr.Skip(1).Select(p => Int32.Parse(p)).ToArray(); seqData.AddSequence(currentSeq); } seqData.SaveAddedSequences(); return seqData; }
public Tuple<SequenceData, SequenceData> RandomSplit(int trainingDataSize, int validationDataSize, int randomSeed) { SequenceData trainingData = new SequenceData(NumSymbols); SequenceData validaitonData = new SequenceData(NumSymbols); List<int[]> shuffled = sequence_list.ToList(); Utilities.Shuffle(shuffled, randomSeed); trainingData.AddSequences(sequence_list.Take(trainingDataSize)); validaitonData.AddSequences(sequence_list.Skip(sequence_list.Count - validationDataSize).Take(validationDataSize)); trainingData.SaveAddedSequences(); validaitonData.SaveAddedSequences(); return new Tuple<SequenceData, SequenceData>(trainingData, validaitonData); }
public void AddSequences(SequenceData sequenceData) { for (int i = 0; i < sequenceData.Count; i++) sequence_list.Add(sequenceData[i]); emptySequences += sequenceData.emptySequences; SaveAddedSequences(); }
public abstract void Learn(SequenceData trainingData, SequenceData validationData, SequenceData testData);
private void OutputIntermediate(SequenceData valData) { List<int> combined = new List<int>(); foreach(int[] arr in valData.GetAll()) { combined.AddRange(arr); } intermediateOutputFile.WriteLine(hmm.NumberOfStates + ", " + this.hmm.Evaluate(valData.GetAll(), true)); //intermediateOutputFile.WriteLine(hmm.NumberOfStates + ", " + this.CalculateProbability(combined.ToArray())); }
public override void Learn(SequenceData trainingData, SequenceData validationData, SequenceData testData) { #region Junk //hmm.Learn(trainingData.GetNonempty(), 1); //foreach (int[] O in trainingData.GetAll()) { // // 1. convert to hmm to graph model. // HMMGraph hmmGraph = ModelConverter.HMM2Graph(hmm); // // 2. find argmax gamma // BaumWelch bw = new BaumWelch(O.Length, hmmGraph); // //Node qPrime = (from n in hmmGraph.Nodes // // where hmmGraph.Nodes.TrueForAll(x => bw.ComputeGamma(n, // // hmmGraph, O) > bw.ComputeGamma(x, hmmGraph, O)) // // select n).Single(); // Node qPrime = (from n in hmmGraph.Nodes // where hmmGraph.Nodes.TrueForAll(x // => bw.ComputeGamma(n, hmmGraph, O) >= bw.ComputeGamma(x, hmmGraph, O)) // select n).First(); // // 3. split node if transition or emission probs // // are above uniformity threshold. // double[] transValues = qPrime.Transitions.Values.ToArray(); // double[] emissionValues = qPrime.Emissions.Values.ToArray(); // if (!isUniform(transValues, TRANSITION_UNIFORMITY_THRESHOLD) // || !isUniform(emissionValues, EMISSION_UNIFORMITY_THRESHOLD)) { // // 4. assign new probs and normalize. // Node q1 = new Node(); // Node q2 = new Node(); // if (!isUniform(transValues, TRANSITION_UNIFORMITY_THRESHOLD)) { // AssignTransitions(qPrime, q1, q2); // } // if (!isUniform(emissionValues, EMISSION_UNIFORMITY_THRESHOLD)) { // AssignEmissions(qPrime, q1, q2); // } // AssignIncomingTransitions(qPrime, q1, q2, hmmGraph); // q1.InitialProbability = qPrime.InitialProbability / 2; // q2.InitialProbability = qPrime.InitialProbability / 2; // hmmGraph.AddNode(q1); // hmmGraph.AddNode(q2); // hmmGraph.RemoveNode(qPrime); // } // // 5. convert graph model back to hmm // //hmmGraph.Normalize(); // hmm = ModelConverter.Graph2HMM(hmmGraph); // // 6. ReLearn model using BW. // hmm.Learn(trainingData.GetAll(), ITERATIONS); //} #endregion intermediateOutputFile = new System.IO.StreamWriter(intermediateOutputFileName + (run++) + ".csv"); intermediateOutputFile.WriteLine("States, Likelihood"); // Initialize graph HMMGraph graph = new HMMGraph(trainingData.NumSymbols); for (int i = 0; i < MINIMUM_STATES; i++) { graph.AddNode(new Node()); } foreach (Node n in graph.Nodes) { foreach (Node m in graph.Nodes) { n.SetTransition(m, 0.5); } for (int i = 0; i < trainingData.NumSymbols; i++) { n.SetEmission(i, 0.5); } } graph.Normalize(); this.hmm = SparseHiddenMarkovModel.FromGraph(graph); CleanGraph(graph); Random rnd = new Random(); List<int> cList = new List<int>(); foreach (int[] a in trainingData.GetAll()) { cList.AddRange(a); } int[] combinedTrainData = cList.ToArray(); // Run iterations. int iteration = 1; int stuckAt = 1; int stuckFor = 1; while(hmm.NumberOfStates < maximum_states && iteration < maximum_iterations) { Console.WriteLine("* Iteration {0} of {1} Model contains {2} states",iteration,maximum_iterations,hmm.NumberOfStates); graph = hmm.ToGraph(); Node qPrime = FindQPrime(graph, combinedTrainData); // check to see if the algorithm is stuck if (stuckAt == hmm.NumberOfStates) { stuckFor++; } else { stuckAt = hmm.NumberOfStates; stuckFor = 1; } bool isStuck = stuckFor > MAX_STUCK ? true : false; if (isUniform(qPrime.Transitions.Values.ToArray(),TRANSITION_UNIFORMITY_THRESHOLD) || isUniform(qPrime.Emissions.Values.ToArray(),EMISSION_UNIFORMITY_THRESHOLD) || isStuck) { if (isStuck) { Console.WriteLine("Algorithm is stuck: FORCING SPLIT"); } graph = Splitstate(qPrime, graph); } hmm = SparseHiddenMarkovModel.FromGraph(graph); hmm.Learn(trainingData.GetAll(), THRESHOLD, BW_ITERATIONS); OutputIntermediate(validationData); iteration++; } hmm = SparseHiddenMarkovModel.FromGraph(graph); intermediateOutputFile.Close(); }