public static double Evaluate(Learner learner, SequenceData testSequences, double[] solutionData) {
     double[] guessedProbs = new double[testSequences.Count];
     for (int i = 0; i < guessedProbs.Length; i++)
         guessedProbs[i] = learner.CalculateProbability(testSequences[i]);
     guessedProbs = Normalize(guessedProbs);
     return Evaluate(guessedProbs, solutionData);
 }
        public void Learn_3statefullyconnectedgraph_aHMMfittedtothedata()
        {
            // Arrange
            int[] symbols = Enumerable.Range(0, 41).ToArray();
            List<int[]> obs = new List<int[]>();
            Random rnd = new Random();

            for (int i = 0; i < 10; i++)
            {
                obs.Add(symbols.OrderBy(x => rnd.Next()).ToArray());
            }

            SequenceData sd = new SequenceData(0);
            sd.AddSequences(obs);
            sd.SaveAddedSequences();
            PadawanLearner pwl = new PadawanLearner();
            pwl.Initialise(null, 0);
            // Act
            pwl.Learn(sd, null, null);
            //pwl.Learn(sd);
            double result = pwl.CalculateProbability(obs[2]);

            // Assert
            Assert.IsTrue(0.9 < result && result < 1.0);
        }
        private static void BenchmarkLearners(IEnumerable<Learner> learners, int num_runs, SequenceData trainData, SequenceData testData, double[] solutions, StreamWriter file) {
            Dictionary<Learner, double[]> achieved_scores = new Dictionary<Learner, double[]>();
            Dictionary<Learner, double[]> elapsed_times = new Dictionary<Learner, double[]>();
            
            foreach (Learner learner in learners) {
                achieved_scores[learner] = new double[num_runs];
                elapsed_times[learner] = new double[num_runs];
            }

            for (int r = 0; r < num_runs; r++) {
                Console.Write("\nRun " + (r+1) + " / " + num_runs);
                //Split training data randomly into train and validation sets
                //Tuple<SequenceData, SequenceData> split = trainData.RandomSplit(0.6666666);
                //trainData = split.Item1;
                //SequenceData validationData = split.Item2;

                foreach (Learner learner in learners) {
                    Console.WriteLine("\nEvaluating learner " + learner.Name());
                    //Track how much time learning takes for the particular Learner
                    Stopwatch sw = new Stopwatch();
                    sw.Start();
                    //learner.Learn(trainData, validationData, testData);
                    sw.Stop();

                    //Save score achieved and time spent
                    elapsed_times[learner][r] = sw.Elapsed.TotalSeconds;
                    achieved_scores[learner][r] = PautomacEvaluator.Evaluate(learner, testData, solutions);
                }
            }

            WriteResultOfRun(learners, achieved_scores, elapsed_times, file);
        }
        public BWBenchmarker(int dataset)
        {
            this.dataset = dataset;

            trainData = DataLoader.LoadSequences(String.Format(@"Data/{0}.pautomac.train", dataset));
            testData = DataLoader.LoadSequences(String.Format(@"Data/{0}.pautomac.test", dataset));

            solutionData = DataLoader.LoadSolutions(String.Format(@"Data/{0}.pautomac_solution.txt", dataset));
        }
		public DataSet(int number, int trainingSetSize, int validationSetSize)
        {
            Number = number;

            pautomacTrainingData = DataLoader.LoadSequences(String.Format(@"Data/{0}.pautomac.train", Number));
            TestData = DataLoader.LoadSequences(String.Format(@"Data/{0}.pautomac.test", Number));

            this.trainingSetSize = Math.Min(trainingSetSize, ((pautomacTrainingData.Count * 2) / 3));
			this.validationSetSize = Math.Min(validationSetSize, (pautomacTrainingData.Count - this.trainingSetSize));

            SolutionData = DataLoader.LoadSolutions(String.Format(@"Data/{0}.pautomac_solution.txt", Number));
        }
 public Tuple<SequenceData, SequenceData> RandomSplit(double ratio, int random_seed) {
     SequenceData part1 = new SequenceData(NumSymbols);
     SequenceData part2 = new SequenceData(NumSymbols);
     List<int[]> shuffled = sequence_list.Select(e => e).ToList();
     Utilities.Shuffle(shuffled, random_seed);
     int size_part1 = (int)(shuffled.Count * ratio);
     for (int i = 0; i < shuffled.Count; i++) {
         if (i < size_part1)
             part1.AddSequence(shuffled[i]);
         else
             part2.AddSequence(shuffled[i]);
     }
     part1.SaveAddedSequences();
     part2.SaveAddedSequences();
     return new Tuple<SequenceData, SequenceData>(part1, part2);
 }
        public static SequenceData LoadSequences(string file) {
            string[] lines = System.IO.File.ReadAllLines(file);
            //parse number of different symbols
            int num_symbols = Int32.Parse(lines[0].Split(' ')[1]);
            SequenceData seqData = new SequenceData(num_symbols);

            //parse sequences
            for (int i = 1; i < lines.Length; i++) {
                string[] currentSeqStr = lines[i].Split(' ');

                //skip first element on each line, which contains the length of the sequence
                int[] currentSeq = currentSeqStr.Skip(1).Select(p => Int32.Parse(p)).ToArray();
                seqData.AddSequence(currentSeq);
            }
            seqData.SaveAddedSequences();
            return seqData;
        }
        public Tuple<SequenceData, SequenceData> RandomSplit(int trainingDataSize, int validationDataSize, int randomSeed)
        {
            SequenceData trainingData = new SequenceData(NumSymbols);
            SequenceData validaitonData = new SequenceData(NumSymbols);

            List<int[]> shuffled = sequence_list.ToList();
            Utilities.Shuffle(shuffled, randomSeed);

            trainingData.AddSequences(sequence_list.Take(trainingDataSize));
			validaitonData.AddSequences(sequence_list.Skip(sequence_list.Count - validationDataSize).Take(validationDataSize));

            trainingData.SaveAddedSequences();
            validaitonData.SaveAddedSequences();

            return new Tuple<SequenceData, SequenceData>(trainingData, validaitonData);
        }
 public void AddSequences(SequenceData sequenceData) {
     for (int i = 0; i < sequenceData.Count; i++)
         sequence_list.Add(sequenceData[i]);
     emptySequences += sequenceData.emptySequences;
     SaveAddedSequences();
 }
Example #10
0
 public abstract void Learn(SequenceData trainingData, SequenceData validationData, SequenceData testData);
        private void OutputIntermediate(SequenceData valData) {

            List<int> combined = new List<int>();
            
            foreach(int[] arr in valData.GetAll()) {

                combined.AddRange(arr);
            }

            intermediateOutputFile.WriteLine(hmm.NumberOfStates + ", " + this.hmm.Evaluate(valData.GetAll(), true));
            //intermediateOutputFile.WriteLine(hmm.NumberOfStates + ", " + this.CalculateProbability(combined.ToArray()));
        }
        public override void Learn(SequenceData trainingData,
                SequenceData validationData, SequenceData testData) 
        {
       
            #region Junk
            //hmm.Learn(trainingData.GetNonempty(), 1);

            //foreach (int[] O in trainingData.GetAll()) {
            //    // 1. convert to hmm to graph model.
            //    HMMGraph hmmGraph = ModelConverter.HMM2Graph(hmm);

            //    // 2. find argmax gamma
            //    BaumWelch bw = new BaumWelch(O.Length, hmmGraph);

            //    //Node qPrime = (from n in hmmGraph.Nodes
            //    //               where hmmGraph.Nodes.TrueForAll(x => bw.ComputeGamma(n,
            //    //                   hmmGraph, O) > bw.ComputeGamma(x, hmmGraph, O))
            //    //               select n).Single();

            //    Node qPrime = (from n in hmmGraph.Nodes
            //                   where hmmGraph.Nodes.TrueForAll(x
            //                       => bw.ComputeGamma(n, hmmGraph, O) >= bw.ComputeGamma(x, hmmGraph, O))
            //                   select n).First();

            //    // 3. split node if transition or emission probs 
            //    // are above uniformity threshold. 
            //    double[] transValues = qPrime.Transitions.Values.ToArray();
            //    double[] emissionValues = qPrime.Emissions.Values.ToArray();

            //    if (!isUniform(transValues, TRANSITION_UNIFORMITY_THRESHOLD)
            //        || !isUniform(emissionValues, EMISSION_UNIFORMITY_THRESHOLD)) {
            //        // 4. assign new probs and normalize.

            //        Node q1 = new Node();
            //        Node q2 = new Node();

            //        if (!isUniform(transValues, TRANSITION_UNIFORMITY_THRESHOLD)) {
            //            AssignTransitions(qPrime, q1, q2);
            //        }

            //        if (!isUniform(emissionValues, EMISSION_UNIFORMITY_THRESHOLD)) {
            //            AssignEmissions(qPrime, q1, q2);
            //        }

            //        AssignIncomingTransitions(qPrime, q1, q2, hmmGraph);

            //        q1.InitialProbability = qPrime.InitialProbability / 2;
            //        q2.InitialProbability = qPrime.InitialProbability / 2;

            //        hmmGraph.AddNode(q1);
            //        hmmGraph.AddNode(q2);
            //        hmmGraph.RemoveNode(qPrime);
            //    }
            //    // 5. convert graph model back to hmm
            //    //hmmGraph.Normalize();
            //    hmm = ModelConverter.Graph2HMM(hmmGraph);

            //    // 6. ReLearn model using BW.
            //    hmm.Learn(trainingData.GetAll(), ITERATIONS);
            //}

            #endregion

            intermediateOutputFile = new System.IO.StreamWriter(intermediateOutputFileName + (run++) + ".csv");
            intermediateOutputFile.WriteLine("States, Likelihood");

            // Initialize graph
            HMMGraph graph = new HMMGraph(trainingData.NumSymbols);

            for (int i = 0; i < MINIMUM_STATES; i++) {

                graph.AddNode(new Node());
            }

            foreach (Node n in graph.Nodes) {
                foreach (Node m in graph.Nodes) {
                    n.SetTransition(m, 0.5);
                }

                for (int i = 0; i < trainingData.NumSymbols; i++) {
                    n.SetEmission(i, 0.5);
                }
            }
            graph.Normalize();

            this.hmm = SparseHiddenMarkovModel.FromGraph(graph);

            CleanGraph(graph);
            Random rnd = new Random();

            List<int> cList = new List<int>();
            foreach (int[] a in trainingData.GetAll()) {

                cList.AddRange(a);
            }
            int[] combinedTrainData = cList.ToArray();



            // Run iterations.
            int iteration = 1;
            int stuckAt = 1;
            int stuckFor = 1;

            while(hmm.NumberOfStates < maximum_states
                  && iteration < maximum_iterations) {

                Console.WriteLine("* Iteration {0} of {1} Model contains {2} states",iteration,maximum_iterations,hmm.NumberOfStates);
               
                graph = hmm.ToGraph();

                Node qPrime = FindQPrime(graph, combinedTrainData);

                // check to see if the algorithm is stuck
                if (stuckAt == hmm.NumberOfStates) {
                    stuckFor++;
                }
                else {
                    stuckAt = hmm.NumberOfStates;
                    stuckFor = 1;
                }

                bool isStuck = stuckFor > MAX_STUCK ? true : false; 

                if (isUniform(qPrime.Transitions.Values.ToArray(),TRANSITION_UNIFORMITY_THRESHOLD) 
                    || isUniform(qPrime.Emissions.Values.ToArray(),EMISSION_UNIFORMITY_THRESHOLD)
                    || isStuck) 
                {

                    if (isStuck) {
                        Console.WriteLine("Algorithm is stuck: FORCING SPLIT");
                    }
                    graph = Splitstate(qPrime, graph);
                }

                hmm = SparseHiddenMarkovModel.FromGraph(graph);
                hmm.Learn(trainingData.GetAll(), THRESHOLD, BW_ITERATIONS);
                OutputIntermediate(validationData);
                iteration++;
            }
            hmm = SparseHiddenMarkovModel.FromGraph(graph);
            intermediateOutputFile.Close();
        }