public VMatrix(VMatrix matrix, int rowStart, int colStart, int rowCount, int colCount) { m_matrix = matrix.m_matrix; m_rowCount = rowCount; m_colStart = colStart + matrix.m_colStart; m_colCount = colCount; m_rowData = new int[rowCount]; for (var i = 0; i < rowCount; i++) { m_rowData[i] = matrix.m_rowData[i + rowStart]; } }
// Shuffles the row order with a buddy matrix public void Shuffle(Random rand, VMatrix buddy) { for (var n = Rows(); n > 0; n--) { var i = rand.Next(n); var tmp = m_rowData[n - 1]; m_rowData[n - 1] = m_rowData[i]; m_rowData[i] = tmp; if (buddy != null) { var tmp1 = buddy.RowData()[n - 1]; buddy.RowData()[n - 1] = buddy.RowData()[i]; buddy.RowData()[i] = tmp1; } } }
public void Run(string[] args) { //args = new string[]{"-L", "baseline", "-A", "data/iris.arff", "-E", "cross", "10", "-N"}; var rand = Rand.Get(); //Parse the command line arguments ArgParser.Parse(args); var parameters = Parameters.Get(); // Load the model var learner = GetLearner(parameters, rand); // Load the ARFF file var data = new Matrix(); data.LoadArff(parameters.Arff); if (parameters.Outputs > data.Cols() - 1) { Console.WriteLine("Too many outputs: " + parameters.Outputs); Environment.Exit(0); } if (parameters.Normalize) { Console.WriteLine("Using normalized data\n"); data.Normalize(); } // Print some stats Console.WriteLine(); Console.WriteLine("Dataset name: " + parameters.Arff); Console.WriteLine("Number of instances: " + data.Rows()); Console.WriteLine("Number of attributes: " + data.Cols()); Console.WriteLine("Learning algorithm: " + parameters.Learner); Console.WriteLine("Evaluation method: " + parameters.Evaluation); Console.WriteLine("Learning Rate: " + parameters.Rate); Console.WriteLine("Outputs: " + parameters.Outputs); Console.WriteLine("Snapshot File: " + parameters.SnapshotFileName); Console.WriteLine(); if (parameters.Evaluation == "training") { Console.WriteLine("Calculating accuracy on training set..."); var features = new VMatrix(data, 0, 0, data.Rows(), data.Cols() - parameters.Outputs); var labels = new VMatrix(); if (parameters.Outputs > 0) { labels = new VMatrix(data, 0, data.Cols() - parameters.Outputs, data.Rows(), parameters.Outputs); } var confusion = new Matrix(); var startTime = DateTime.Now.Ticks; learner.VTrain(features, labels); var elapsedTime = new TimeSpan(DateTime.Now.Ticks - startTime); Console.WriteLine("Time to train (in seconds): " + elapsedTime.TotalSeconds); if (parameters.Outputs > 0) { var accuracy = learner.VMeasureAccuracy(features, labels, confusion); Console.WriteLine("Training set accuracy: " + accuracy); } if (parameters.Verbose) { Console.WriteLine("\nConfusion matrix: (Row=target value, Col=predicted value)"); confusion.Print(); Console.WriteLine("\n"); } } else if (parameters.Evaluation == "static") { var testData = new Matrix(); testData.LoadArff(parameters.EvalExtra); if (parameters.Normalize) { testData.Normalize(); // BUG! This may normalize differently from the training data. It should use the same ranges for normalization! } Console.WriteLine("Calculating accuracy on separate test set..."); Console.WriteLine("Test set name: " + parameters.EvalExtra); Console.WriteLine("Number of test instances: " + testData.Rows()); var features = new VMatrix(data, 0, 0, data.Rows(), data.Cols() - parameters.Outputs); var labels = new VMatrix(data, 0, data.Cols() - parameters.Outputs, data.Rows(), parameters.Outputs); var startTime = DateTime.Now.Ticks; learner.VTrain(features, labels); var elapsedTime = new TimeSpan(DateTime.Now.Ticks - startTime); Console.WriteLine("Time to train (in seconds): " + elapsedTime.TotalSeconds); var trainAccuracy = learner.VMeasureAccuracy(features, labels, null); Console.WriteLine("Training set accuracy: " + trainAccuracy); var testFeatures = new VMatrix(testData, 0, 0, testData.Rows(), testData.Cols() - parameters.Outputs); var testLabels = new VMatrix(testData, 0, testData.Cols() - parameters.Outputs, testData.Rows(), parameters.Outputs); var confusion = new Matrix(); var testAccuracy = learner.VMeasureAccuracy(testFeatures, testLabels, confusion); Console.WriteLine("Test set accuracy: " + testAccuracy); if (parameters.Verbose) { Console.WriteLine("\nConfusion matrix: (Row=target value, Col=predicted value)"); confusion.Print(); Console.WriteLine("\n"); } } else if (parameters.Evaluation == "test") { var testData = new Matrix(); testData.LoadArff(parameters.EvalExtra); if (parameters.Normalize) { testData.Normalize(); // BUG! This may normalize differently from the training data. It should use the same ranges for normalization! } Console.WriteLine("Calculating accuracy on separate test set..."); Console.WriteLine("Test set name: " + parameters.EvalExtra); Console.WriteLine("Number of test instances: " + testData.Rows()); var testFeatures = new VMatrix(testData, 0, 0, testData.Rows(), testData.Cols() - parameters.Outputs); var testLabels = new VMatrix(testData, 0, testData.Cols() - parameters.Outputs, testData.Rows(), parameters.Outputs); var confusion = new Matrix(); var testAccuracy = learner.VMeasureAccuracy(testFeatures, testLabels, confusion); Console.WriteLine("Test set accuracy: " + testAccuracy); if (parameters.Verbose) { Console.WriteLine("\nConfusion matrix: (Row=target value, Col=predicted value)"); confusion.Print(); Console.WriteLine("\n"); } } else if (parameters.Evaluation == "random") { Console.WriteLine("Calculating accuracy on a random hold-out set..."); var trainPercent = double.Parse(parameters.EvalExtra); if (trainPercent < 0 || trainPercent > 1) { throw new Exception("Percentage for random evaluation must be between 0 and 1"); } Console.WriteLine("Percentage used for training: " + trainPercent); Console.WriteLine("Percentage used for testing: " + (1 - trainPercent)); var vData = new VMatrix(data, 0, 0, data.Rows(), data.Cols()); if (!(learner is BPTT)) { vData.Shuffle(rand); } var trainSize = (int)(trainPercent * vData.Rows()); var trainFeatures = new VMatrix(vData, 0, 0, trainSize, vData.Cols() - parameters.Outputs); var trainLabels = new VMatrix(vData, 0, vData.Cols() - parameters.Outputs, trainSize, parameters.Outputs); var testFeatures = new VMatrix(vData, trainSize, 0, vData.Rows() - trainSize, vData.Cols() - parameters.Outputs); var testLabels = new VMatrix(vData, trainSize, vData.Cols() - parameters.Outputs, vData.Rows() - trainSize, parameters.Outputs); var startTime = DateTime.Now.Ticks; learner.VTrain(trainFeatures, trainLabels); var elapsedTime = new TimeSpan(DateTime.Now.Ticks - startTime); Console.WriteLine("Time to train (in seconds): " + elapsedTime.TotalSeconds); var trainAccuracy = learner.VMeasureAccuracy(trainFeatures, trainLabels, null); Console.WriteLine("Training set accuracy: " + trainAccuracy); var confusion = new Matrix(); var testAccuracy = learner.VMeasureAccuracy(testFeatures, testLabels, confusion); Console.WriteLine("Test set accuracy: " + testAccuracy); var testMSE = learner.VGetMSE(testFeatures, testLabels); Console.WriteLine("Test set MSE: " + testMSE); if (parameters.Verbose) { Console.WriteLine("\nConfusion matrix: (Row=target value, Col=predicted value)"); confusion.Print(); Console.WriteLine("\n"); } } else if (parameters.Evaluation == "cross") { Console.WriteLine("Calculating accuracy using cross-validation..."); var folds = int.Parse(parameters.EvalExtra); if (folds <= 0) { throw new Exception("Number of folds must be greater than 0"); } Console.WriteLine("Number of folds: " + folds); var reps = 1; var sumAccuracy = 0.0; long ticks = 0; for (var j = 0; j < reps; j++) { data.Shuffle(rand); for (var i = 0; i < folds; i++) { var begin = i * data.Rows() / folds; var end = (i + 1) * data.Rows() / folds; var trainFeatures = new Matrix(data, 0, 0, begin, data.Cols() - parameters.Outputs); var trainLabels = new Matrix(data, 0, data.Cols() - parameters.Outputs, begin, parameters.Outputs); var testFeatures = new Matrix(data, begin, 0, end - begin, data.Cols() - parameters.Outputs); var testLabels = new Matrix(data, begin, data.Cols() - parameters.Outputs, end - begin, parameters.Outputs); trainFeatures.Add(data, end, 0, data.Rows() - end); trainLabels.Add(data, end, data.Cols() - parameters.Outputs, data.Rows() - end); var startTime = DateTime.Now.Ticks; learner.Train(trainFeatures, trainLabels); ticks = DateTime.Now.Ticks - startTime; var accuracy = learner.MeasureAccuracy(testFeatures, testLabels, null); sumAccuracy += accuracy; Console.WriteLine("Rep=" + j + ", Fold=" + i + ", Accuracy=" + accuracy); } } ticks /= (reps * folds); var elapsedTime = new TimeSpan(ticks); Console.WriteLine("Average time to train (in seconds): " + elapsedTime.TotalSeconds); Console.WriteLine("Mean accuracy=" + (sumAccuracy / (reps * folds))); } }
// Calculate the MSE public override double VGetMSE(VMatrix features, VMatrix labels) { double sse = 0; Console.Write("VGetMSE "); int cl = Console.CursorLeft; unsafe { for (var row = 0; row < features.Rows(); row++) { if (((row % 10) == 0) || (row == (features.Rows() - 1))) { Console.SetCursorPosition(cl, Console.CursorTop); Console.Write(row); } // calculate the output for (var layer = 0; layer < m_lCount.Length; layer++) { for (var n = 0; n < m_lCount[layer]; n++) { fixed(Node *node = &m_nodes[m_lBegIdx[layer] + n]) { node->net = 0; // calculate the net value if (layer == 0) { // input layer node->output = features.Get(row, n); } else { // calculate the net value for (var w = 0; w < node->wEndIdx - node->wBegIdx; w++) { node->net += m_weights[node->wBegIdx + w] * m_nodes[m_lBegIdx[layer - 1] + w].output; } // add the bias node->net += m_weights[node->wEndIdx]; node->output = 1.0 / (1.0 + Math.Exp(-node->net)); } } } } // calculate the error of the output layer for (var n = 0; n < m_lCount[m_lCount.Length - 1]; n++) { fixed(Node *node = &m_nodes[m_lBegIdx[m_lCount.Length - 1] + n]) { double target = labels.Get(row, node->labelCol); if (node->isContinuous == 0) { // nominal if (target == node->labelVal) { target = 0.9; } else { target = 0.1; } } var error = target - node->output; // update the error sse += error * error; } } } } Console.WriteLine(); return(sse / features.Rows()); }
private double TrainEpoch(int epoch, VMatrix features, VMatrix labels) { double sse = 0; Console.Write("TrainEpoch "); int cl = Console.CursorLeft; unsafe { for (var row = 0; row < features.Rows(); row++) { if (((row % 100) == 0) || (row == (features.Rows() - 1))) { Console.SetCursorPosition(cl, Console.CursorTop); Console.Write(row); } // calculate the output for (var layer = 0; layer < m_lCount.Length; layer++) { for (var n = 0; n < m_lCount[layer]; n++) { fixed(Node *node = &m_nodes[m_lBegIdx[layer] + n]) { node->net = 0; if (layer == 0) { // input layer node->output = features.Get(row, n); } else { // calculate the net value for (var w = 0; w < node->wEndIdx - node->wBegIdx; w++) { node->net += m_weights[node->wBegIdx + w] * m_nodes[m_lBegIdx[layer - 1] + w].output; } // add the bias node->net += m_weights[node->wEndIdx]; node->output = 1.0 / (1.0 + Math.Exp(-node->net)); } } } } // calculate the error and weight changes for (var layer = m_lCount.Length - 1; layer > 0; layer--) { for (var n = 0; n < m_lCount[layer]; n++) { fixed(Node *node = &m_nodes[m_lBegIdx[layer] + n]) { double fPrime = node->output * (1.0 - node->output); if (layer == m_lCount.Length - 1) { // output layer double target = labels.Get(row, node->labelCol); if (node->isContinuous == 0) { // nominal if (target == node->labelVal) { target = 0.9; } else { target = 0.1; } } var error = target - node->output; node->error = error * fPrime; sse += error * error; } else { // hidden layer double sum = 0; for (var tn = 0; tn < m_lCount[layer + 1]; tn++) { fixed(Node *tNode = &m_nodes[m_lBegIdx[layer + 1] + tn]) { sum += tNode->error * m_weights[tNode->wBegIdx + n]; } } node->error = sum * fPrime; } // calculate the weight changes double delta; for (var w = 0; w < node->wEndIdx - node->wBegIdx; w++) { delta = m_rate * node->error * m_nodes[m_lBegIdx[layer - 1] + w].output; delta += m_momentum * m_deltas[node->wBegIdx + w]; m_deltas[node->wBegIdx + w] = delta; } // calculate the bias weight change delta = m_rate * node->error; delta += m_momentum * m_deltas[node->wEndIdx]; m_deltas[node->wEndIdx] = delta; } } } // update the weights for (var w = 0; w < m_weights.Length; w++) { m_weights[w] += m_deltas[w]; } } } Console.WriteLine(); return(sse / features.Rows()); }
public override void VTrain(VMatrix features, VMatrix labels, double[] colMin, double[] colMax) { if ((m_lCount == null) || (m_lCount.Length < 3)) { m_lCount = new int[3] { 0, features.Cols() * 2, 0 }; } List <Node> nodes = new List <Node>(); // add the input nodes m_lCount[0] = features.Cols(); for (var n = 0; n < m_lCount[0]; n++) { nodes.Add(new Node(-1, -1, 0, 0, 0)); } int numWeights = m_lCount[0] + 1; int wBegIdx = 0; // add the nodes for the hidden layers for (var layer = 1; layer < m_lCount.Length - 1; layer++) { for (var n = 0; n < m_lCount[layer]; n++) { nodes.Add(new Node(wBegIdx, wBegIdx + numWeights - 1, 0, 0, 0)); wBegIdx += numWeights; } numWeights = m_lCount[layer] + 1; } // figure out how many outputs we need int oCount = 0; for (var col = 0; col < labels.Cols(); col++) { var labelValueCount = labels.ValueCount(col); if (labelValueCount < 2) { // continuous oCount++; } else { oCount += labelValueCount; } } // update the layer arrays m_lCount[m_lCount.Length - 1] = oCount; m_lBegIdx = new int[m_lCount.Length]; for (var i = 0; i < m_lCount.Length; i++) { if (i == 0) { m_lBegIdx[i] = 0; } else { m_lBegIdx[i] = m_lBegIdx[i - 1] + m_lCount[i - 1]; } } // add the output nodes for (var col = 0; col < labels.Cols(); col++) { var labelValueCount = labels.ValueCount(col); if (labelValueCount < 2) { // continuous nodes.Add(new Node(wBegIdx, wBegIdx + numWeights - 1, 1, col, -1)); wBegIdx += numWeights; } else { for (var n = 0; n < labelValueCount; n++) { nodes.Add(new Node(wBegIdx, wBegIdx + numWeights - 1, 0, col, n)); wBegIdx += numWeights; } } } m_nodes = nodes.ToArray(); // create the weights m_weights = new double[wBegIdx]; m_bestWeights = new double[wBegIdx]; m_deltas = new double[wBegIdx]; for (var i = 0; i < wBegIdx; i++) { m_weights[i] = (double)(0.1 - (m_rand.NextDouble() * 0.2)); m_bestWeights[i] = m_weights[i]; m_deltas[i] = 0; } //m_weights[0] = 1.0; //m_weights[1] = 0.5; //m_weights[2] = 0; //m_weights[3] = 1.2; //m_weights[4] = 0.5; //m_weights[5] = 0.5; //m_weights[6] = 0.1; //m_weights[7] = -0.8; //m_weights[8] = -1.3; if (!string.IsNullOrEmpty(OutputFileName)) { m_outputFile = File.AppendText(OutputFileName); } int trainSize = (int)(0.75 * features.Rows()); double[,] trainFeatures = new double[trainSize, features.Cols()]; for (int r = 0; r < trainSize; r++) { for (int c = 0; c < features.Cols(); c++) { trainFeatures[r, c] = features.Get(r, c); } } double[,] trainLabels = new double[trainSize, labels.Cols()]; for (int r = 0; r < trainSize; r++) { for (int c = 0; c < labels.Cols(); c++) { trainLabels[r, c] = labels.Get(r, c); } } int[] fIdx = new int[trainSize]; for (int i = 0; i < fIdx.Length; i++) { fIdx[i] = i; } VMatrix validationFeatures = new VMatrix(features, trainSize, 0, features.Rows() - trainSize, features.Cols()); VMatrix validationLabels = new VMatrix(labels, trainSize, 0, labels.Rows() - trainSize, labels.Cols()); int epoch = 0; // current epoch number int bestEpoch = 0; // epoch number of best MSE int eCount = 0; // number of epochs since the best MSE bool checkDone = false; // if true, check to see if we're done double bestMSE = double.MaxValue; // best validation MSE so far double bestAccuracy = double.MaxValue; // best validationa accuracy so far Console.WriteLine("Epoch\tMSE (validation)\taccuracy (validation)"); if (m_outputFile != null) { m_outputFile.Write("Layers: "); for (var l = 0; l < m_lCount.Length - 1; l++) { m_outputFile.Write(m_lCount[l]); m_outputFile.Write('x'); } m_outputFile.WriteLine(m_lCount[m_lCount.Length - 1]); m_outputFile.WriteLine("Momentum: " + m_momentum); m_outputFile.WriteLine(); m_outputFile.WriteLine("Weights"); PrintWeights(); m_outputFile.WriteLine("Epoch\tMSE (validation)\taccuracy (validation)"); } CudafyModule km = CudafyTranslator.Cudafy(); GPGPU gpu = CudafyHost.GetDevice(CudafyModes.Target, CudafyModes.DeviceId); gpu.LoadModule(km); for (; ;) { // shuffle the training set Shuffle(ref fIdx, m_rand); double[,] g_trainFeatures = gpu.CopyToDevice(trainFeatures); double[,] g_trainLabels = gpu.CopyToDevice(trainLabels); int[] g_fIdx = gpu.CopyToDevice(fIdx); int[] g_lCount = gpu.CopyToDevice(m_lCount); int[] g_lBegIdx = gpu.CopyToDevice(m_lBegIdx); Node[] g_nodes = gpu.CopyToDevice(m_nodes); double[] g_weights = gpu.CopyToDevice(m_weights); double[] g_deltas = gpu.CopyToDevice(m_deltas); //// Launch trainSize blocks of 1 thread each gpu.Launch(trainSize / 256, 256).TrainEpoch(g_trainFeatures, g_trainLabels, g_fIdx, g_lCount, g_lBegIdx, g_nodes, g_weights, g_deltas, m_rate, m_momentum); //// copy the arrays back from the GPU to the CPU gpu.CopyFromDevice(g_weights, m_weights); gpu.CopyFromDevice(g_deltas, m_deltas); gpu.CopyFromDevice(g_fIdx, fIdx); // free the memory allocated on the GPU gpu.FreeAll(); //TrainEpoch(trainFeatures, trainLabels, fIdx, m_lCount, m_lBegIdx, m_nodes, ref m_weights, ref m_deltas, m_rate, m_momentum, ref trainMSE); // check the MSE after this epoch double mse = VGetMSE(validationFeatures, validationLabels); // check the validation accuracy after this epoch double accuracy = VMeasureAccuracy(validationFeatures, validationLabels, null); Console.WriteLine(string.Format("{0}-{1}\t{2}\t{3}", epoch, eCount, mse, accuracy)); if (m_outputFile != null) { m_outputFile.WriteLine(string.Format("{0}-{1}\t{2}\t{3}", epoch, eCount, mse, accuracy)); m_outputFile.Flush(); } if ((mse == 0.0) || (epoch > 10000)) { break; } else if ((epoch == 1) || (mse < bestMSE)) { if (epoch == 1) { // save the initial MSE bestMSE = mse; } else if ((mse / bestMSE) > 0.99) { if (!checkDone) { checkDone = true; eCount = 0; } } else { checkDone = false; eCount = 0; } // save the best for later bestMSE = mse; bestAccuracy = accuracy; bestEpoch = epoch; SaveBestWeights(); } else if (!checkDone) { checkDone = true; eCount = 0; } if (checkDone) { // check to see if we're done eCount++; if (eCount >= 20) { break; } } } ; if (m_outputFile != null) { m_outputFile.WriteLine(); m_outputFile.WriteLine("Weights"); PrintWeights(); } if ((bestEpoch > 0) && (bestEpoch != epoch)) { RestoreBestWeights(); if (m_outputFile != null) { m_outputFile.WriteLine(); m_outputFile.WriteLine(string.Format("Best Weights (from Epoch {0}, valMSE={1}, valAcc={2})", bestEpoch, bestMSE, bestAccuracy)); PrintWeights(); } } if (m_outputFile != null) { m_outputFile.Close(); } }