public void DiscreteClassification_DiscreteFeatures_MultiValuesSplits_CongressVoting() { // Given var randomForestBuilder = new RandomForestModelBuilder <object>( multiValueTreeBuilderWithBetterNumercValsHandler, new DecisionTreePredictor <object>(), new ConfusionMatrixBuilder <object>(), i => (int)Math.Round(Math.Sqrt(i), MidpointRounding.AwayFromZero), () => new DecisionTreeModelBuilderParams(false)); var randomForestPredictor = new RandomForestPredictor <object>(new DecisionTreePredictor <object>(), true); var testData = TestDataBuilder.ReadCongressData(); var crossValidator = new CrossValidator <object>(); // When var accuracy = crossValidator.CrossValidate( randomForestBuilder, new RandomForestParams(100, 10), randomForestPredictor, new ConfusionMatrixBuilder <object>(), testData, "party", 0.7, 1).First(); // Then Assert.IsTrue(accuracy.Accuracy >= 0.9); }
public void DiscreteClassification_DiscreteFeatures_MultiValuesSplits_CongressVoting() { // Given var randomForestBuilder = new RandomForestModelBuilder<object>( multiValueTreeBuilderWithBetterNumercValsHandler, new DecisionTreePredictor<object>(), new ConfusionMatrixBuilder<object>(), i => (int)Math.Round(Math.Sqrt(i), MidpointRounding.AwayFromZero), () => new DecisionTreeModelBuilderParams(false)); var randomForestPredictor = new RandomForestPredictor<object>(new DecisionTreePredictor<object>(), true); var testData = TestDataBuilder.ReadCongressData(); var crossValidator = new CrossValidator<object>(); // When var accuracy = crossValidator.CrossValidate( randomForestBuilder, new RandomForestParams(100, 10), randomForestPredictor, new ConfusionMatrixBuilder<object>(), testData, "party", 0.7, 1).First(); // Then Assert.IsTrue(accuracy.Accuracy >= 0.9); }
public void DiscreteClassification_MixedFeatures_MultiValueSplits_CleanedTitanicData() { // Given var randomForestBuilder = new RandomForestModelBuilder<object>( multiValueTreeBuilderWithBetterNumercValsHandler, new DecisionTreePredictor<object>(), new ConfusionMatrixBuilder<object>(), i => (int)Math.Round(Math.Sqrt(i), MidpointRounding.AwayFromZero), () => new DecisionTreeModelBuilderParams(false, true)); var randomForestPredictor = new RandomForestPredictor<object>(new DecisionTreePredictor<object>()); var baseData = TestDataBuilder.ReadTitanicData(); baseData = baseData.GetSubsetByColumns(baseData.ColumnNames.Except(new[] { "FarePerPerson", "PassengerId", "FamilySize" }).ToList()); var crossValidator = new CrossValidator<object>(); // When var accuracy = crossValidator.CrossValidate( randomForestBuilder, new RandomForestParams(200, 10), randomForestPredictor, new ConfusionMatrixBuilder<object>(), baseData, "Survived", 0.75, 1); // Then Assert.IsTrue(accuracy.Select(acc => acc.Accuracy).Average() >= 0.75); /* var qualityMeasure = new ConfusionMatrixBuilder<object>(); IPredictionModel bestModel = null; double accuracy = Double.NegativeInfinity; var percetnagOfTrainData = 0.8; var trainingDataCount = (int)Math.Round(percetnagOfTrainData * baseData.RowCount); var testDataCount = baseData.RowCount - trainingDataCount; for (var i = 0; i < 10; i++) { var shuffledAllIndices = baseData.RowIndices.Shuffle(new Random()); var trainingIndices = shuffledAllIndices.Take(trainingDataCount).ToList(); var trainingData = baseData.GetSubsetByRows(trainingIndices); var testIndices = shuffledAllIndices.Except(trainingIndices).ToList(); var testData = baseData.GetSubsetByRows(testIndices); IPredictionModel model = randomForestBuilder.BuildModel(trainingData, "Survived", new RandomForestParams(250, 10)); IList<object> evalPredictions = randomForestPredictor.Predict(testData, model, "Survived"); IList<object> expected = testData.GetColumnVector<object>("Survived"); IDataQualityReport<object> qualityReport = qualityMeasure.GetReport(expected, evalPredictions); if (qualityReport.Accuracy > accuracy) { accuracy = qualityReport.Accuracy; bestModel = model; } } var queryData = TestDataBuilder.ReadTitanicQuery(); var predictions = randomForestPredictor.Predict(queryData, bestModel, "Survived").Select(elem => (double)Convert.ChangeType(elem, typeof(double))).ToList(); var passengerIds = queryData.GetNumericColumnVector("PassengerId"); var matrix = Matrix.Build.DenseOfColumns(new List<IEnumerable<double>>() { passengerIds, predictions }); DelimitedWriter.Write(@"c:\Users\Filip\Downloads\prediction.csv", matrix, ","); Assert.IsTrue(true); */ }
public void DiscreteClassification_MixedFeatures_MultiValueSplits_CleanedTitanicData() { // Given var randomForestBuilder = new RandomForestModelBuilder <object>( multiValueTreeBuilderWithBetterNumercValsHandler, new DecisionTreePredictor <object>(), new ConfusionMatrixBuilder <object>(), i => (int)Math.Round(Math.Sqrt(i), MidpointRounding.AwayFromZero), () => new DecisionTreeModelBuilderParams(false, true)); var randomForestPredictor = new RandomForestPredictor <object>(new DecisionTreePredictor <object>()); var baseData = TestDataBuilder.ReadTitanicData(); baseData = baseData.GetSubsetByColumns(baseData.ColumnNames.Except(new[] { "FarePerPerson", "PassengerId", "FamilySize" }).ToList()); var crossValidator = new CrossValidator <object>(); // When var accuracy = crossValidator.CrossValidate( randomForestBuilder, new RandomForestParams(200, 10), randomForestPredictor, new ConfusionMatrixBuilder <object>(), baseData, "Survived", 0.75, 1); // Then Assert.IsTrue(accuracy.Select(acc => acc.Accuracy).Average() >= 0.75); /* * var qualityMeasure = new ConfusionMatrixBuilder<object>(); * IPredictionModel bestModel = null; * double accuracy = Double.NegativeInfinity; * var percetnagOfTrainData = 0.8; * * var trainingDataCount = (int)Math.Round(percetnagOfTrainData * baseData.RowCount); * var testDataCount = baseData.RowCount - trainingDataCount; * for (var i = 0; i < 10; i++) * { * var shuffledAllIndices = baseData.RowIndices.Shuffle(new Random()); * var trainingIndices = shuffledAllIndices.Take(trainingDataCount).ToList(); * var trainingData = baseData.GetSubsetByRows(trainingIndices); * * var testIndices = shuffledAllIndices.Except(trainingIndices).ToList(); * var testData = baseData.GetSubsetByRows(testIndices); * IPredictionModel model = randomForestBuilder.BuildModel(trainingData, "Survived", new RandomForestParams(250, 10)); * IList<object> evalPredictions = randomForestPredictor.Predict(testData, model, "Survived"); * IList<object> expected = testData.GetColumnVector<object>("Survived"); * IDataQualityReport<object> qualityReport = qualityMeasure.GetReport(expected, evalPredictions); * if (qualityReport.Accuracy > accuracy) * { * accuracy = qualityReport.Accuracy; * bestModel = model; * } * } * * var queryData = TestDataBuilder.ReadTitanicQuery(); * var predictions = randomForestPredictor.Predict(queryData, bestModel, "Survived").Select(elem => (double)Convert.ChangeType(elem, typeof(double))).ToList(); * var passengerIds = queryData.GetNumericColumnVector("PassengerId"); * * var matrix = Matrix.Build.DenseOfColumns(new List<IEnumerable<double>>() { passengerIds, predictions }); * DelimitedWriter.Write(@"c:\Users\Filip\Downloads\prediction.csv", matrix, ","); * Assert.IsTrue(true); */ }