// Constructor public ClassifierPerformance(ClassificationData classificationData) { this.data = classificationData; // Count the number of distinct categories numSamples = data.Samples.Count; foreach (ISample sample in data.Samples) { if (!categories.Contains(sample.Category)) { categories.Add(sample.Category); } } this.numCategories = categories.Count; if (numCategories == 0) { throw new ArgumentException( ); } // Initialize the results matrix results = new int[numCategories, numCategories]; for (int i = 0; i < numCategories; i++) { for (int j = 0; j < numCategories; j++) { results[i, j] = 0; } } }
// Constructor public ClassifierPerformance( ClassificationData classificationData ) { this.data = classificationData; // Count the number of distinct categories numSamples = data.Samples.Count; foreach(ISample sample in data.Samples) { if (!categories.Contains( sample.Category )) { categories.Add( sample.Category ); } } this.numCategories = categories.Count; if (numCategories == 0) { throw new ArgumentException( ); } // Initialize the results matrix results = new int[numCategories, numCategories]; for (int i = 0; i < numCategories; i++) { for (int j = 0; j < numCategories; j++) { results[i, j] = 0; } } }
// Training algorithm, backpropagation public override bool Train(ClassificationData trainingData) { // Initialize the network, set random weights for the hidden layer for (int i = 0; i < hiddenLayer.Count; i++) { hiddenLayer[i].LoadRandomWeights( ); } // Set random weights for the second layer secondLayer.LoadRandomWeights( ); // Go through all the samples, until the the max number of repetitions int repetitions = 0; while (repetitions < maxRepetitions) { // Go through all samples for (int i = 0; i < trainingData.Samples.Count; i++) { // Compute the predicted result and the error double error; string result = Classify(trainingData.Samples[i]); string expected = trainingData.Samples[i].Category; if ((result == "good") && (expected == "bad")) { error = -1.0; } else if ((result == "bad") && (expected == "good")) { error = 1.0; } else { // No error, continue to the next sample error = 0.0; continue; } // Backpropagate the second layer secondLayer.BackPropagate(learningRate, error); // Backpropagate the first layer for (int j = 0; j < hiddenLayer.Count; j++) { FirstLayerNeuron neuron = (FirstLayerNeuron)hiddenLayer[j]; neuron.BackPropagate(learningRate, error, secondLayer); } } // Next repetition repetitions++; } // Set as trained and return true if there are no errors isTrained = true; return(true); }
// Test the classifier public override ClassifierPerformance Test(ClassificationData testData) { ClassifierPerformance perf = new ClassifierPerformance(testData); foreach (ISample sample in testData.Samples) { // Obtain the extimated category string expectedCategory = Classify(sample); // Compare with the actual category string actualCategory = sample.Category; perf.Add(expectedCategory, actualCategory); } return(perf); }
// Extract data with the given generic criteria public void Extract( ISampleSelector sampleSelector, IFeatureSelector featureSelector ) { // TODO: Change to Q or Y data // Get the table of financial data Table<QLSample> qlSamples = dc.GetTable<QLSample>( ); // copy the data to the list of training cases data = new ClassificationData( ); foreach (QLSample sample in qlSamples) { // Apply the sample selection criteria if (sampleSelector.Select( sample )) { // Apply the feature selection criteria data.Samples.Add( featureSelector.Select( sample ) ); } } }
/* * Operations that affect all the samples as a whole */ // Normalize the data by range // Each feature will have new values in the range public ClassificationData NormalizeToRange( double lower, double upper ) { // Check the boundaries of the target interval if (upper <= lower) { throw new ArgumentException( ); } // Initialize the ranges int numFeatures = samples[0].FeatureVector.Count; double[] minFeatureValues = new double[numFeatures]; double[] maxFeatureValues = new double[numFeatures]; for (int i = 0; i < numFeatures; i++) { minFeatureValues[i] = double.MaxValue; maxFeatureValues[i] = double.MinValue; } // First calculate the range of the features foreach (ISample sample in samples) { for (int iFeature = 0; iFeature < numFeatures; iFeature++) { // Update the min if (sample.FeatureVector[iFeature] < minFeatureValues[iFeature]) { minFeatureValues[iFeature] = sample.FeatureVector[iFeature]; } // Update the max if (sample.FeatureVector[iFeature] > maxFeatureValues[iFeature]) { maxFeatureValues[iFeature] = sample.FeatureVector[iFeature]; } } } // To prevent division by zero, if max = min all features are equal // to the min, if max -> max + 1 the tranform values are the same for (int i = 0; i < numFeatures; i++) { if (maxFeatureValues[i] == minFeatureValues[i]) { maxFeatureValues[i]++; } } // Create the new samples ClassificationData result = new ClassificationData( ); for (int i = 0; i < samples.Count; i++) { SampleBasic sample = new SampleBasic( ); sample.Description = samples[i].Description; sample.FeatureVector = new List<double>( numFeatures ); sample.Classify( samples[i].Category ); result.Samples.Add( sample ); } // Obtain the new feature values by a linear transformation // feature = (original - min) / (max - min) goes from 0 to 1 // feature = original * (upper - lower) + lower goes from lower to upper double featureValue; for (int iSample = 0; iSample < samples.Count; iSample++) { for (int iFeature = 0; iFeature < numFeatures; iFeature++) { featureValue = samples[iSample].FeatureVector[iFeature]; // Transform to [0,1] featureValue = (featureValue - minFeatureValues[iFeature]) / (maxFeatureValues[iFeature] - minFeatureValues[iFeature]); // Transform to [lower,upper] featureValue = featureValue * (upper - lower) + lower; result.Samples[iSample].FeatureVector.Insert( iFeature, featureValue ); } } return result; }
// Split the data randomly between train and test x% train (100-x)% test private void LoadRandomSplit( ) { ClassificationData allData = new ClassificationData( ); // Get all the data if (useQData) { Table<QLSample> qlData = dc.GetTable<QLSample>( ); foreach (QLSample sample in qlData) { allData.Samples.Add( sample ); } } if (useYData) { Table<YLSample> ylData = dc.GetTable<YLSample>( ); foreach (YLSample sample in ylData) { allData.Samples.Add( sample ); } } // Shuffle the data and split allData.Shuffle( ); List<ClassificationData> splits = allData.Split( randomSplitPercentage ); trainData = splits[0]; testData = splits[1]; }
// Executes the net with the data provided and shows the results public void Analyze( object sender, EventArgs e ) { if (!HasAnalysisData) { MessageBox.Show( "Please load analysis data first" ); return; } if (!HasTrainedClassifier) { MessageBox.Show( "Please train the classifier first" ); return; } // Normalize the analysis data analysisData = analysisData.NormalizeToRange( -1, 1 ); // Analyze the data positiveResultData.Samples.Clear( ); negativeResultData.Samples.Clear( ); foreach (ISample sample in analysisData.Samples) { bool isGoodInvestment = classifier.Classify( sample, "good" ); if (isGoodInvestment) { positiveResultData.Samples.Add( sample ); } else { negativeResultData.Samples.Add( sample ); } } // Show results form.OverweightList.Items.Clear( ); foreach (string description in PositiveResultDataDescriptions) { form.OverweightList.Items.Add( description ); } form.UnderweightList.Items.Clear( ); foreach (string description in NegativeResultDataDescriptions) { form.UnderweightList.Items.Add( description ); } }
// Load the data to run the net against public void LoadAnalysisData( object sender, EventArgs e ) { // The training data should be loaded first if (!HasTrainingData) { MessageBox.Show( "Please load training data first" ); return; } // Show the loading analysis data window LoadAnalysisDataForm loadForm = new LoadAnalysisDataForm( dc ); if (loadForm.ShowDialog( ) == DialogResult.OK) { // Get the data analysisData = loadForm.AnalysisData; // Show the analysis data on the screen form.AnalysisList.Items.Clear( ); foreach (string description in AnalysisDataDescriptions) { form.AnalysisList.Items.Add( description ); } } else { MessageBox.Show( "Please load the data to analyze" ); return; } }
// Trains the neural net using the training data public void Train( object sender, EventArgs e ) { if (!HasTrainingData) { MessageBox.Show( "Please load training data first" ); return; } if (!HasClassifierAvaliable) { MessageBox.Show( "Please set the training parameters first" ); return; } // Normalize the training data trainData = trainData.NormalizeToRange( -1, 1 ); // Train the classifier bool classifierTrained = classifier.Train( trainData ); if (!classifierTrained) { MessageBox.Show( "Unable to train" ); return; } // Test the classifier and show the results ClassifierPerformance perf; if (HasTestData) { // Normalize the test data testData = testData.NormalizeToRange( -1, 1 ); // Test the classifier with the test data perf = classifier.Test( testData ); } else { // Otherwise use the same training data perf = classifier.Test( trainData ); } // Show the results TestResultsForm testForm = new TestResultsForm( perf ); testForm.ShowDialog( ); }
// Load training data public void LoadTrainingData( object sender, EventArgs e ) { // Initialize the data context InitializeDataContext( ); if (dc == null) { MessageBox.Show( "Database file not found! Please load database first" ); return; } // Show the loading training data window LoadTrainTestDataForm loadForm = new LoadTrainTestDataForm( dc ); if (loadForm.ShowDialog( ) == DialogResult.OK) { // Get the data trainData = loadForm.TrainData; testData = loadForm.TestData; // Show the data on the screen form.TrainingList.Items.Clear( ); foreach (string description in TrainingDataDescriptions) { form.TrainingList.Items.Add( description ); } } else { MessageBox.Show( "Please load the training data" ); return; } }
/* * Operations that affect all the samples as a whole */ // Normalize the data by range // Each feature will have new values in the range public ClassificationData NormalizeToRange(double lower, double upper) { // Check the boundaries of the target interval if (upper <= lower) { throw new ArgumentException( ); } // Initialize the ranges int numFeatures = samples[0].FeatureVector.Count; double[] minFeatureValues = new double[numFeatures]; double[] maxFeatureValues = new double[numFeatures]; for (int i = 0; i < numFeatures; i++) { minFeatureValues[i] = double.MaxValue; maxFeatureValues[i] = double.MinValue; } // First calculate the range of the features foreach (ISample sample in samples) { for (int iFeature = 0; iFeature < numFeatures; iFeature++) { // Update the min if (sample.FeatureVector[iFeature] < minFeatureValues[iFeature]) { minFeatureValues[iFeature] = sample.FeatureVector[iFeature]; } // Update the max if (sample.FeatureVector[iFeature] > maxFeatureValues[iFeature]) { maxFeatureValues[iFeature] = sample.FeatureVector[iFeature]; } } } // To prevent division by zero, if max = min all features are equal // to the min, if max -> max + 1 the tranform values are the same for (int i = 0; i < numFeatures; i++) { if (maxFeatureValues[i] == minFeatureValues[i]) { maxFeatureValues[i]++; } } // Create the new samples ClassificationData result = new ClassificationData( ); for (int i = 0; i < samples.Count; i++) { SampleBasic sample = new SampleBasic( ); sample.Description = samples[i].Description; sample.FeatureVector = new List <double>(numFeatures); sample.Classify(samples[i].Category); result.Samples.Add(sample); } // Obtain the new feature values by a linear transformation // feature = (original - min) / (max - min) goes from 0 to 1 // feature = original * (upper - lower) + lower goes from lower to upper double featureValue; for (int iSample = 0; iSample < samples.Count; iSample++) { for (int iFeature = 0; iFeature < numFeatures; iFeature++) { featureValue = samples[iSample].FeatureVector[iFeature]; // Transform to [0,1] featureValue = (featureValue - minFeatureValues[iFeature]) / (maxFeatureValues[iFeature] - minFeatureValues[iFeature]); // Transform to [lower,upper] featureValue = featureValue * (upper - lower) + lower; result.Samples[iSample].FeatureVector.Insert(iFeature, featureValue); } } return(result); }