Ejemplo n.º 1
0
        public void Run(string[] args)
        {
            //args = new string[]{"-L", "baseline", "-A", "data/iris.arff", "-E", "cross", "10", "-N"};

            var rand = Rand.Get();

            //Parse the command line arguments
            ArgParser.Parse(args);
            var parameters = Parameters.Get();

            // Load the model
            var learner = GetLearner(parameters, rand);

            // Load the ARFF file
            var data = new Matrix();

            data.LoadArff(parameters.Arff);

            if (parameters.Outputs > data.Cols() - 1)
            {
                Console.WriteLine("Too many outputs: " + parameters.Outputs);
                Environment.Exit(0);
            }

            if (parameters.Normalize)
            {
                Console.WriteLine("Using normalized data\n");
                data.Normalize();
            }

            // Print some stats
            Console.WriteLine();
            Console.WriteLine("Dataset name: " + parameters.Arff);
            Console.WriteLine("Number of instances: " + data.Rows());
            Console.WriteLine("Number of attributes: " + data.Cols());
            Console.WriteLine("Learning algorithm: " + parameters.Learner);
            Console.WriteLine("Evaluation method: " + parameters.Evaluation);
            Console.WriteLine("Learning Rate: " + parameters.Rate);
            Console.WriteLine("Outputs: " + parameters.Outputs);
            Console.WriteLine("Snapshot File: " + parameters.SnapshotFileName);
            Console.WriteLine();

            if (parameters.Evaluation == "training")
            {
                Console.WriteLine("Calculating accuracy on training set...");
                var features = new VMatrix(data, 0, 0, data.Rows(), data.Cols() - parameters.Outputs);
                var labels   = new VMatrix();
                if (parameters.Outputs > 0)
                {
                    labels = new VMatrix(data, 0, data.Cols() - parameters.Outputs, data.Rows(), parameters.Outputs);
                }
                var confusion = new Matrix();
                var startTime = DateTime.Now.Ticks;
                learner.VTrain(features, labels);
                var elapsedTime = new TimeSpan(DateTime.Now.Ticks - startTime);
                Console.WriteLine("Time to train (in seconds): " + elapsedTime.TotalSeconds);
                if (parameters.Outputs > 0)
                {
                    var accuracy = learner.VMeasureAccuracy(features, labels, confusion);
                    Console.WriteLine("Training set accuracy: " + accuracy);
                }

                if (parameters.Verbose)
                {
                    Console.WriteLine("\nConfusion matrix: (Row=target value, Col=predicted value)");
                    confusion.Print();
                    Console.WriteLine("\n");
                }
            }
            else if (parameters.Evaluation == "static")
            {
                var testData = new Matrix();
                testData.LoadArff(parameters.EvalExtra);
                if (parameters.Normalize)
                {
                    testData.Normalize();                     // BUG! This may normalize differently from the training data. It should use the same ranges for normalization!
                }

                Console.WriteLine("Calculating accuracy on separate test set...");
                Console.WriteLine("Test set name: " + parameters.EvalExtra);
                Console.WriteLine("Number of test instances: " + testData.Rows());
                var features  = new VMatrix(data, 0, 0, data.Rows(), data.Cols() - parameters.Outputs);
                var labels    = new VMatrix(data, 0, data.Cols() - parameters.Outputs, data.Rows(), parameters.Outputs);
                var startTime = DateTime.Now.Ticks;
                learner.VTrain(features, labels);
                var elapsedTime = new TimeSpan(DateTime.Now.Ticks - startTime);
                Console.WriteLine("Time to train (in seconds): " + elapsedTime.TotalSeconds);
                var trainAccuracy = learner.VMeasureAccuracy(features, labels, null);
                Console.WriteLine("Training set accuracy: " + trainAccuracy);
                var testFeatures = new VMatrix(testData, 0, 0, testData.Rows(), testData.Cols() - parameters.Outputs);
                var testLabels   = new VMatrix(testData, 0, testData.Cols() - parameters.Outputs, testData.Rows(), parameters.Outputs);
                var confusion    = new Matrix();
                var testAccuracy = learner.VMeasureAccuracy(testFeatures, testLabels, confusion);
                Console.WriteLine("Test set accuracy: " + testAccuracy);
                if (parameters.Verbose)
                {
                    Console.WriteLine("\nConfusion matrix: (Row=target value, Col=predicted value)");
                    confusion.Print();
                    Console.WriteLine("\n");
                }
            }
            else if (parameters.Evaluation == "test")
            {
                var testData = new Matrix();
                testData.LoadArff(parameters.EvalExtra);
                if (parameters.Normalize)
                {
                    testData.Normalize();                     // BUG! This may normalize differently from the training data. It should use the same ranges for normalization!
                }

                Console.WriteLine("Calculating accuracy on separate test set...");
                Console.WriteLine("Test set name: " + parameters.EvalExtra);
                Console.WriteLine("Number of test instances: " + testData.Rows());
                var testFeatures = new VMatrix(testData, 0, 0, testData.Rows(), testData.Cols() - parameters.Outputs);
                var testLabels   = new VMatrix(testData, 0, testData.Cols() - parameters.Outputs, testData.Rows(), parameters.Outputs);
                var confusion    = new Matrix();
                var testAccuracy = learner.VMeasureAccuracy(testFeatures, testLabels, confusion);
                Console.WriteLine("Test set accuracy: " + testAccuracy);
                if (parameters.Verbose)
                {
                    Console.WriteLine("\nConfusion matrix: (Row=target value, Col=predicted value)");
                    confusion.Print();
                    Console.WriteLine("\n");
                }
            }
            else if (parameters.Evaluation == "random")
            {
                Console.WriteLine("Calculating accuracy on a random hold-out set...");
                var trainPercent = double.Parse(parameters.EvalExtra);
                if (trainPercent < 0 || trainPercent > 1)
                {
                    throw new Exception("Percentage for random evaluation must be between 0 and 1");
                }
                Console.WriteLine("Percentage used for training: " + trainPercent);
                Console.WriteLine("Percentage used for testing: " + (1 - trainPercent));
                var vData = new VMatrix(data, 0, 0, data.Rows(), data.Cols());
                if (!(learner is BPTT))
                {
                    vData.Shuffle(rand);
                }
                var trainSize     = (int)(trainPercent * vData.Rows());
                var trainFeatures = new VMatrix(vData, 0, 0, trainSize, vData.Cols() - parameters.Outputs);
                var trainLabels   = new VMatrix(vData, 0, vData.Cols() - parameters.Outputs, trainSize, parameters.Outputs);
                var testFeatures  = new VMatrix(vData, trainSize, 0, vData.Rows() - trainSize, vData.Cols() - parameters.Outputs);
                var testLabels    = new VMatrix(vData, trainSize, vData.Cols() - parameters.Outputs, vData.Rows() - trainSize, parameters.Outputs);
                var startTime     = DateTime.Now.Ticks;
                learner.VTrain(trainFeatures, trainLabels);
                var elapsedTime = new TimeSpan(DateTime.Now.Ticks - startTime);
                Console.WriteLine("Time to train (in seconds): " + elapsedTime.TotalSeconds);
                var trainAccuracy = learner.VMeasureAccuracy(trainFeatures, trainLabels, null);
                Console.WriteLine("Training set accuracy: " + trainAccuracy);
                var confusion    = new Matrix();
                var testAccuracy = learner.VMeasureAccuracy(testFeatures, testLabels, confusion);
                Console.WriteLine("Test set accuracy: " + testAccuracy);
                var testMSE = learner.VGetMSE(testFeatures, testLabels);
                Console.WriteLine("Test set MSE: " + testMSE);

                if (parameters.Verbose)
                {
                    Console.WriteLine("\nConfusion matrix: (Row=target value, Col=predicted value)");
                    confusion.Print();
                    Console.WriteLine("\n");
                }
            }
            else if (parameters.Evaluation == "cross")
            {
                Console.WriteLine("Calculating accuracy using cross-validation...");
                var folds = int.Parse(parameters.EvalExtra);
                if (folds <= 0)
                {
                    throw new Exception("Number of folds must be greater than 0");
                }
                Console.WriteLine("Number of folds: " + folds);
                var  reps        = 1;
                var  sumAccuracy = 0.0;
                long ticks       = 0;
                for (var j = 0; j < reps; j++)
                {
                    data.Shuffle(rand);
                    for (var i = 0; i < folds; i++)
                    {
                        var begin         = i * data.Rows() / folds;
                        var end           = (i + 1) * data.Rows() / folds;
                        var trainFeatures = new Matrix(data, 0, 0, begin, data.Cols() - parameters.Outputs);
                        var trainLabels   = new Matrix(data, 0, data.Cols() - parameters.Outputs, begin, parameters.Outputs);
                        var testFeatures  = new Matrix(data, begin, 0, end - begin, data.Cols() - parameters.Outputs);
                        var testLabels    = new Matrix(data, begin, data.Cols() - parameters.Outputs, end - begin, parameters.Outputs);
                        trainFeatures.Add(data, end, 0, data.Rows() - end);
                        trainLabels.Add(data, end, data.Cols() - parameters.Outputs, data.Rows() - end);
                        var startTime = DateTime.Now.Ticks;
                        learner.Train(trainFeatures, trainLabels);
                        ticks = DateTime.Now.Ticks - startTime;
                        var accuracy = learner.MeasureAccuracy(testFeatures, testLabels, null);
                        sumAccuracy += accuracy;
                        Console.WriteLine("Rep=" + j + ", Fold=" + i + ", Accuracy=" + accuracy);
                    }
                }
                ticks /= (reps * folds);
                var elapsedTime = new TimeSpan(ticks);
                Console.WriteLine("Average time to train (in seconds): " + elapsedTime.TotalSeconds);
                Console.WriteLine("Mean accuracy=" + (sumAccuracy / (reps * folds)));
            }
        }
Ejemplo n.º 2
0
        private double TrainEpoch(int epoch, VMatrix features, VMatrix labels)
        {
            double sse = 0;

            Console.Write("TrainEpoch ");
            int cl = Console.CursorLeft;

            unsafe
            {
                for (var row = 0; row < features.Rows(); row++)
                {
                    if (((row % 100) == 0) || (row == (features.Rows() - 1)))
                    {
                        Console.SetCursorPosition(cl, Console.CursorTop);
                        Console.Write(row);
                    }

                    // calculate the output
                    for (var layer = 0; layer < m_lCount.Length; layer++)
                    {
                        for (var n = 0; n < m_lCount[layer]; n++)
                        {
                            fixed(Node *node = &m_nodes[m_lBegIdx[layer] + n])
                            {
                                node->net = 0;
                                if (layer == 0)
                                {
                                    // input layer
                                    node->output = features.Get(row, n);
                                }
                                else
                                {
                                    // calculate the net value
                                    for (var w = 0; w < node->wEndIdx - node->wBegIdx; w++)
                                    {
                                        node->net += m_weights[node->wBegIdx + w] * m_nodes[m_lBegIdx[layer - 1] + w].output;
                                    }
                                    // add the bias
                                    node->net += m_weights[node->wEndIdx];

                                    node->output = 1.0 / (1.0 + Math.Exp(-node->net));
                                }
                            }
                        }
                    }

                    // calculate the error and weight changes
                    for (var layer = m_lCount.Length - 1; layer > 0; layer--)
                    {
                        for (var n = 0; n < m_lCount[layer]; n++)
                        {
                            fixed(Node *node = &m_nodes[m_lBegIdx[layer] + n])
                            {
                                double fPrime = node->output * (1.0 - node->output);

                                if (layer == m_lCount.Length - 1)
                                {
                                    // output layer
                                    double target = labels.Get(row, node->labelCol);
                                    if (node->isContinuous == 0)
                                    {
                                        // nominal
                                        if (target == node->labelVal)
                                        {
                                            target = 0.9;
                                        }
                                        else
                                        {
                                            target = 0.1;
                                        }
                                    }

                                    var error = target - node->output;
                                    node->error = error * fPrime;
                                    sse        += error * error;
                                }
                                else
                                {
                                    // hidden layer
                                    double sum = 0;
                                    for (var tn = 0; tn < m_lCount[layer + 1]; tn++)
                                    {
                                        fixed(Node *tNode = &m_nodes[m_lBegIdx[layer + 1] + tn])
                                        {
                                            sum += tNode->error * m_weights[tNode->wBegIdx + n];
                                        }
                                    }
                                    node->error = sum * fPrime;
                                }

                                // calculate the weight changes
                                double delta;

                                for (var w = 0; w < node->wEndIdx - node->wBegIdx; w++)
                                {
                                    delta  = m_rate * node->error * m_nodes[m_lBegIdx[layer - 1] + w].output;
                                    delta += m_momentum * m_deltas[node->wBegIdx + w];
                                    m_deltas[node->wBegIdx + w] = delta;
                                }

                                // calculate the bias weight change
                                delta  = m_rate * node->error;
                                delta += m_momentum * m_deltas[node->wEndIdx];
                                m_deltas[node->wEndIdx] = delta;
                            }
                        }
                    }

                    // update the weights
                    for (var w = 0; w < m_weights.Length; w++)
                    {
                        m_weights[w] += m_deltas[w];
                    }
                }
            }

            Console.WriteLine();

            return(sse / features.Rows());
        }
Ejemplo n.º 3
0
        // Calculate the MSE
        public override double VGetMSE(VMatrix features, VMatrix labels)
        {
            double sse = 0;

            Console.Write("VGetMSE ");
            int cl = Console.CursorLeft;

            unsafe
            {
                for (var row = 0; row < features.Rows(); row++)
                {
                    if (((row % 10) == 0) || (row == (features.Rows() - 1)))
                    {
                        Console.SetCursorPosition(cl, Console.CursorTop);
                        Console.Write(row);
                    }

                    // calculate the output
                    for (var layer = 0; layer < m_lCount.Length; layer++)
                    {
                        for (var n = 0; n < m_lCount[layer]; n++)
                        {
                            fixed(Node *node = &m_nodes[m_lBegIdx[layer] + n])
                            {
                                node->net = 0;

                                // calculate the net value
                                if (layer == 0)
                                {
                                    // input layer
                                    node->output = features.Get(row, n);
                                }
                                else
                                {
                                    // calculate the net value
                                    for (var w = 0; w < node->wEndIdx - node->wBegIdx; w++)
                                    {
                                        node->net += m_weights[node->wBegIdx + w] * m_nodes[m_lBegIdx[layer - 1] + w].output;
                                    }
                                    // add the bias
                                    node->net += m_weights[node->wEndIdx];

                                    node->output = 1.0 / (1.0 + Math.Exp(-node->net));
                                }
                            }
                        }
                    }

                    // calculate the error of the output layer
                    for (var n = 0; n < m_lCount[m_lCount.Length - 1]; n++)
                    {
                        fixed(Node *node = &m_nodes[m_lBegIdx[m_lCount.Length - 1] + n])
                        {
                            double target = labels.Get(row, node->labelCol);

                            if (node->isContinuous == 0)
                            {
                                // nominal
                                if (target == node->labelVal)
                                {
                                    target = 0.9;
                                }
                                else
                                {
                                    target = 0.1;
                                }
                            }
                            var error = target - node->output;

                            // update the error
                            sse += error * error;
                        }
                    }
                }
            }

            Console.WriteLine();

            return(sse / features.Rows());
        }
Ejemplo n.º 4
0
        public override void VTrain(VMatrix features, VMatrix labels, double[] colMin, double[] colMax)
        {
            if ((m_lCount == null) || (m_lCount.Length < 3))
            {
                m_lCount = new int[3] {
                    0, features.Cols() * 2, 0
                };
            }

            List <Node> nodes = new List <Node>();

            // add the input nodes
            m_lCount[0] = features.Cols();
            for (var n = 0; n < m_lCount[0]; n++)
            {
                nodes.Add(new Node(-1, -1, 0, 0, 0));
            }

            int numWeights = m_lCount[0] + 1;
            int wBegIdx    = 0;

            // add the nodes for the hidden layers
            for (var layer = 1; layer < m_lCount.Length - 1; layer++)
            {
                for (var n = 0; n < m_lCount[layer]; n++)
                {
                    nodes.Add(new Node(wBegIdx, wBegIdx + numWeights - 1, 0, 0, 0));
                    wBegIdx += numWeights;
                }

                numWeights = m_lCount[layer] + 1;
            }

            // figure out how many outputs we need
            int oCount = 0;

            for (var col = 0; col < labels.Cols(); col++)
            {
                var labelValueCount = labels.ValueCount(col);

                if (labelValueCount < 2)
                {
                    // continuous
                    oCount++;
                }
                else
                {
                    oCount += labelValueCount;
                }
            }

            // update the layer arrays
            m_lCount[m_lCount.Length - 1] = oCount;
            m_lBegIdx = new int[m_lCount.Length];
            for (var i = 0; i < m_lCount.Length; i++)
            {
                if (i == 0)
                {
                    m_lBegIdx[i] = 0;
                }
                else
                {
                    m_lBegIdx[i] = m_lBegIdx[i - 1] + m_lCount[i - 1];
                }
            }

            // add the output nodes
            for (var col = 0; col < labels.Cols(); col++)
            {
                var labelValueCount = labels.ValueCount(col);

                if (labelValueCount < 2)
                {
                    // continuous
                    nodes.Add(new Node(wBegIdx, wBegIdx + numWeights - 1, 1, col, -1));
                    wBegIdx += numWeights;
                }
                else
                {
                    for (var n = 0; n < labelValueCount; n++)
                    {
                        nodes.Add(new Node(wBegIdx, wBegIdx + numWeights - 1, 0, col, n));
                        wBegIdx += numWeights;
                    }
                }
            }

            m_nodes = nodes.ToArray();

            // create the weights
            m_weights     = new double[wBegIdx];
            m_bestWeights = new double[wBegIdx];
            m_deltas      = new double[wBegIdx];
            for (var i = 0; i < wBegIdx; i++)
            {
                m_weights[i]     = (double)(0.1 - (m_rand.NextDouble() * 0.2));
                m_bestWeights[i] = m_weights[i];
                m_deltas[i]      = 0;
            }

            //m_weights[0] = 1.0;
            //m_weights[1] = 0.5;
            //m_weights[2] = 0;
            //m_weights[3] = 1.2;
            //m_weights[4] = 0.5;
            //m_weights[5] = 0.5;
            //m_weights[6] = 0.1;
            //m_weights[7] = -0.8;
            //m_weights[8] = -1.3;

            if (!string.IsNullOrEmpty(OutputFileName))
            {
                m_outputFile = File.AppendText(OutputFileName);
            }

            int trainSize = (int)(0.75 * features.Rows());

            double[,] trainFeatures = new double[trainSize, features.Cols()];
            for (int r = 0; r < trainSize; r++)
            {
                for (int c = 0; c < features.Cols(); c++)
                {
                    trainFeatures[r, c] = features.Get(r, c);
                }
            }

            double[,] trainLabels = new double[trainSize, labels.Cols()];
            for (int r = 0; r < trainSize; r++)
            {
                for (int c = 0; c < labels.Cols(); c++)
                {
                    trainLabels[r, c] = labels.Get(r, c);
                }
            }

            int[] fIdx = new int[trainSize];
            for (int i = 0; i < fIdx.Length; i++)
            {
                fIdx[i] = i;
            }

            VMatrix validationFeatures = new VMatrix(features, trainSize, 0, features.Rows() - trainSize, features.Cols());
            VMatrix validationLabels   = new VMatrix(labels, trainSize, 0, labels.Rows() - trainSize, labels.Cols());

            int    epoch        = 0;                            // current epoch number
            int    bestEpoch    = 0;                            // epoch number of best MSE
            int    eCount       = 0;                            // number of epochs since the best MSE
            bool   checkDone    = false;                        // if true, check to see if we're done
            double bestMSE      = double.MaxValue;              // best validation MSE so far
            double bestAccuracy = double.MaxValue;              // best validationa accuracy so far

            Console.WriteLine("Epoch\tMSE (validation)\taccuracy (validation)");
            if (m_outputFile != null)
            {
                m_outputFile.Write("Layers: ");
                for (var l = 0; l < m_lCount.Length - 1; l++)
                {
                    m_outputFile.Write(m_lCount[l]);
                    m_outputFile.Write('x');
                }
                m_outputFile.WriteLine(m_lCount[m_lCount.Length - 1]);
                m_outputFile.WriteLine("Momentum: " + m_momentum);
                m_outputFile.WriteLine();
                m_outputFile.WriteLine("Weights");
                PrintWeights();
                m_outputFile.WriteLine("Epoch\tMSE (validation)\taccuracy (validation)");
            }

            CudafyModule km = CudafyTranslator.Cudafy();

            GPGPU gpu = CudafyHost.GetDevice(CudafyModes.Target, CudafyModes.DeviceId);

            gpu.LoadModule(km);

            for (; ;)
            {
                // shuffle the training set
                Shuffle(ref fIdx, m_rand);

                double[,] g_trainFeatures = gpu.CopyToDevice(trainFeatures);
                double[,] g_trainLabels   = gpu.CopyToDevice(trainLabels);
                int[]    g_fIdx    = gpu.CopyToDevice(fIdx);
                int[]    g_lCount  = gpu.CopyToDevice(m_lCount);
                int[]    g_lBegIdx = gpu.CopyToDevice(m_lBegIdx);
                Node[]   g_nodes   = gpu.CopyToDevice(m_nodes);
                double[] g_weights = gpu.CopyToDevice(m_weights);
                double[] g_deltas  = gpu.CopyToDevice(m_deltas);

                //// Launch trainSize blocks of 1 thread each
                gpu.Launch(trainSize / 256, 256).TrainEpoch(g_trainFeatures, g_trainLabels, g_fIdx, g_lCount, g_lBegIdx, g_nodes, g_weights, g_deltas, m_rate, m_momentum);

                //// copy the arrays back from the GPU to the CPU
                gpu.CopyFromDevice(g_weights, m_weights);
                gpu.CopyFromDevice(g_deltas, m_deltas);
                gpu.CopyFromDevice(g_fIdx, fIdx);

                // free the memory allocated on the GPU
                gpu.FreeAll();

                //TrainEpoch(trainFeatures, trainLabels, fIdx, m_lCount, m_lBegIdx, m_nodes, ref m_weights, ref m_deltas, m_rate, m_momentum, ref trainMSE);

                // check the MSE after this epoch
                double mse = VGetMSE(validationFeatures, validationLabels);

                // check the validation accuracy after this epoch
                double accuracy = VMeasureAccuracy(validationFeatures, validationLabels, null);

                Console.WriteLine(string.Format("{0}-{1}\t{2}\t{3}", epoch, eCount, mse, accuracy));
                if (m_outputFile != null)
                {
                    m_outputFile.WriteLine(string.Format("{0}-{1}\t{2}\t{3}", epoch, eCount, mse, accuracy));
                    m_outputFile.Flush();
                }

                if ((mse == 0.0) || (epoch > 10000))
                {
                    break;
                }
                else if ((epoch == 1) || (mse < bestMSE))
                {
                    if (epoch == 1)
                    {
                        // save the initial MSE
                        bestMSE = mse;
                    }
                    else if ((mse / bestMSE) > 0.99)
                    {
                        if (!checkDone)
                        {
                            checkDone = true;
                            eCount    = 0;
                        }
                    }
                    else
                    {
                        checkDone = false;
                        eCount    = 0;
                    }

                    // save the best for later
                    bestMSE      = mse;
                    bestAccuracy = accuracy;
                    bestEpoch    = epoch;
                    SaveBestWeights();
                }
                else if (!checkDone)
                {
                    checkDone = true;
                    eCount    = 0;
                }

                if (checkDone)
                {
                    // check to see if we're done
                    eCount++;
                    if (eCount >= 20)
                    {
                        break;
                    }
                }
            }
            ;

            if (m_outputFile != null)
            {
                m_outputFile.WriteLine();
                m_outputFile.WriteLine("Weights");
                PrintWeights();
            }

            if ((bestEpoch > 0) && (bestEpoch != epoch))
            {
                RestoreBestWeights();
                if (m_outputFile != null)
                {
                    m_outputFile.WriteLine();
                    m_outputFile.WriteLine(string.Format("Best Weights (from Epoch {0}, valMSE={1}, valAcc={2})", bestEpoch, bestMSE, bestAccuracy));
                    PrintWeights();
                }
            }

            if (m_outputFile != null)
            {
                m_outputFile.Close();
            }
        }