public void Test_ClassificationWith_BackwardsEliminationKnnModel() { // Given var randomizer = new Random(55); var data = TestDataBuilder.ReadIrisData(); var trainingDataPercentage = 0.8; int trainingDataCount = (int)(data.RowCount * trainingDataPercentage); var shuffledIndices = data.RowIndices.Shuffle(); var trainingIndices = shuffledIndices.Take(trainingDataCount).ToList(); var testIndices = shuffledIndices.Skip(trainingDataCount).Take(shuffledIndices.Count - trainingDataCount).ToList(); var trainingData = data.GetSubsetByRows(trainingIndices); var testData = data.GetSubsetByRows(testIndices); var weightingFunction = new GaussianFunction(0.07); var predictor = new SimpleKnnClassifier<string>( new EuclideanDistanceMeasure(), new MinMaxNormalizer(), weightingFunction.GetValue); var modelBuilder = new BackwardsEliminationKnnModelBuilder<string>( new MinMaxNormalizer(), predictor, new ClassificationAccuracyError<string>() ); var modelParams = new KnnAdditionalParams(3, true); var errorMeasure = new MeanSquareError(); var subject = new BackwardsEliminationKnnClassifier<string>( new EuclideanDistanceMeasure(), new MinMaxNormalizer(), weightingFunction.GetValue); // When var model = modelBuilder.BuildModel(trainingData, "iris_class", modelParams); var actualResults = subject.Predict(testData, model, "iris_class"); var confusionMatrix = new ConfusionMatrix<string>(testData.GetColumnVector<string>("iris_class"), actualResults); // Then Assert.IsTrue(confusionMatrix.Accuracy >= 0.95); }
public void Test_RegressionWith_SimpleKnnModel() { // Given var randomizer = new Random(55); var baseDataFrame = TestDataBuilder.BuildRandomAbstractNumericDataFrameWithRedundantAttrs(randomizer: randomizer); var queryDataFrame = new DataFrame(new DataTable("some data") { Columns = { new DataColumn("F1", typeof(double)), new DataColumn("F2", typeof(double)), new DataColumn("F3", typeof(double)), new DataColumn("F4", typeof(double)), new DataColumn("F5", typeof(double)) }, Rows = { new object[] { 10, 1, 1, 4, 5 }, new object[] { 4, 2, 1, 9, 10}, new object[] { 2, 1, 1, 3, 7}, } }); var expectedValues = Enumerable.Range(0, queryDataFrame.RowCount) .Select( rowIdx => TestDataBuilder.CalcualteLinearlyDependentFeatureValue(queryDataFrame.GetNumericRowVector(rowIdx))).ToList(); var modelBuilder = new SimpleKnnModelBuilder<double>(); var modelParams = new KnnAdditionalParams(4, true); var weightingFunction = new GaussianFunction(0.3); var predictor = new SimpleKnnRegressor(new EuclideanDistanceMeasure(), new MinMaxNormalizer(), weightingFunction.GetValue, normalizeNumericValues: true); var errorMeasure = new MeanSquareError(); // When var model = modelBuilder.BuildModel(baseDataFrame, "F6", modelParams); var results = predictor.Predict(queryDataFrame, model, "F6"); // Then var mse = errorMeasure.CalculateError(Vector<double>.Build.DenseOfEnumerable(expectedValues), Vector<double>.Build.DenseOfEnumerable(results)); Assert.IsTrue(mse < 0.55); }
public void TestBuildKnnModel() { // Given var randomizer = new Random(55); var baseDataFrame = TestDataBuilder.BuildRandomAbstractNumericDataFrameWithRedundantAttrs(randomizer: randomizer, rowCount: 100); var weightingFunction = new GaussianFunction(0.3); var modelParams = new KnnAdditionalParams(4, true); var predictor = new SimpleKnnRegressor(new EuclideanDistanceMeasure(), new MinMaxNormalizer(), weightingFunction.GetValue); var subject = new BackwardsEliminationKnnModelBuilder<double>( new MinMaxNormalizer(), predictor, new MeanSquareError()); var expectedRemovedFeaturesNames = new[] { "F4" }; // When var model = subject.BuildModel(baseDataFrame, "F6", modelParams) as IBackwardsEliminationKnnModel<double>; // Then Assert.AreEqual(1, model.RemovedFeaturesData.Count); CollectionAssert.AreEquivalent(expectedRemovedFeaturesNames, model.RemovedFeaturesData.Select(f => f.FeatureName)); }