public IPredictionModel BuildModel( IDataFrame dataFrame, int dependentFeatureIndex, IModelBuilderParams additionalParams) { return BuildModel(dataFrame, dataFrame.ColumnNames[dependentFeatureIndex], additionalParams); }
protected virtual void ValidateAdditionalParams(IModelBuilderParams additionalParams) { if (!(additionalParams is IKnnAdditionalParams)) { throw new ArgumentException("Invalid parameters type!"); } }
public IPredictionModel BuildModel( IDataFrame dataFrame, int dependentFeatureIndex, IModelBuilderParams additionalParams) { return(BuildModel(dataFrame, dataFrame.ColumnNames[dependentFeatureIndex], additionalParams)); }
public IPredictionModel BuildModel(IDataFrame dataFrame, string dependentFeatureName, IModelBuilderParams additionalParams) { if (!(additionalParams is ILinearRegressionParams)) { throw new ArgumentException("Invalid parameters passed to Regularized Linear Regression model builder!"); } var linearRegressionParams = additionalParams as ILinearRegressionParams; var matrixX = dataFrame.GetSubsetByColumns(dataFrame.ColumnNames.Except(new[] { dependentFeatureName }).ToList()).GetAsMatrixWithIntercept(); var vectorY = dataFrame.GetNumericColumnVector(dependentFeatureName); return BuildModel(matrixX, vectorY, linearRegressionParams); }
public IPredictionModel BuildModel( IDataFrame dataFrame, string dependentFeatureName, IModelBuilderParams additionalParams) { if (!(additionalParams is IDecisionTreeModelBuilderParams)) { throw new ArgumentException("Invalid params passed for Decision Tree Model Builder!"); } if (ShouldStopRecusrsiveBuilding(dataFrame, dependentFeatureName)) { return BuildLeaf(dataFrame, dependentFeatureName); } var decisionTreeParams = (IDecisionTreeModelBuilderParams)additionalParams; var useParallelProcessing = decisionTreeParams.ProcessSubtreesCreationInParallel; var alreadyUsedAttributesInfo = new AlreadyUsedAttributesInfo(); //TODO: reduce the number of parameters - maybe some nicer DTO? var node = this.BuildDecisionNode(dataFrame, dependentFeatureName, decisionTreeParams, alreadyUsedAttributesInfo, 0, useParallelProcessing); return node; }
public IList <IDataQualityReport <TPredictionResult> > CrossValidate( IPredictionModelBuilder modelBuilder, IModelBuilderParams modelBuilderParams, IPredictor <TPredictionResult> predictor, IDataQualityMeasure <TPredictionResult> qualityMeasure, IDataFrame dataFrame, string dependentFeatureName, double percetnagOfTrainData, int folds) { var trainingDataCount = (int)Math.Round(percetnagOfTrainData * dataFrame.RowCount); var testDataCount = dataFrame.RowCount - trainingDataCount; var shuffledAllIndices = dataFrame.RowIndices.Shuffle(_randomizer); var maxWindowsCount = dataFrame.RowCount / testDataCount; var iterationAccuracies = new List <IDataQualityReport <TPredictionResult> >(); var currentWindowNo = 0; for (var i = 0; i < folds; i++) { if (currentWindowNo == maxWindowsCount) { currentWindowNo = 0; shuffledAllIndices = shuffledAllIndices.Shuffle(); } var offset = currentWindowNo * testDataCount; var trainingIndices = shuffledAllIndices.Skip(offset).Take(trainingDataCount).ToList(); var trainingData = dataFrame.GetSubsetByRows(trainingIndices); var testIndices = shuffledAllIndices.Except(trainingIndices).ToList(); var testData = dataFrame.GetSubsetByRows(testIndices); IPredictionModel model = modelBuilder.BuildModel(trainingData, dependentFeatureName, modelBuilderParams); IList <TPredictionResult> predictions = predictor.Predict(testData, model, dependentFeatureName); IList <TPredictionResult> expected = testData.GetColumnVector <TPredictionResult>(dependentFeatureName); IDataQualityReport <TPredictionResult> qualityReport = qualityMeasure.GetReport(expected, predictions); iterationAccuracies.Add(qualityReport); currentWindowNo++; } return(iterationAccuracies); }
public IPredictionModel BuildModel( IDataFrame dataFrame, string dependentFeatureName, IModelBuilderParams additionalParams) { if (!(additionalParams is IDecisionTreeModelBuilderParams)) { throw new ArgumentException("Invalid params passed for Decision Tree Model Builder!"); } if (ShouldStopRecusrsiveBuilding(dataFrame, dependentFeatureName)) { return(BuildLeaf(dataFrame, dependentFeatureName)); } var decisionTreeParams = (IDecisionTreeModelBuilderParams)additionalParams; var useParallelProcessing = decisionTreeParams.ProcessSubtreesCreationInParallel; var alreadyUsedAttributesInfo = new AlreadyUsedAttributesInfo(); //TODO: reduce the number of parameters - maybe some nicer DTO? var node = this.BuildDecisionNode(dataFrame, dependentFeatureName, decisionTreeParams, alreadyUsedAttributesInfo, 0, useParallelProcessing); return(node); }
public override IPredictionModel BuildModel(IDataFrame dataFrame, string dependentFeatureName, IModelBuilderParams additionalParams) { ValidateAdditionalParams(additionalParams); return(PerformBackwardsElimination(dataFrame, dependentFeatureName, additionalParams as IKnnAdditionalParams)); }
public IPredictionModel BuildModel(IDataFrame dataFrame, int dependentFeatureIndex, IModelBuilderParams additionalParams) { if (!(additionalParams is IRandomForestModelBuilderParams)) { throw new ArgumentException("Invalid parameters passed to Random Forest Model builder!"); } return(BuildModel(dataFrame, dataFrame.ColumnNames[dependentFeatureIndex], additionalParams)); }
public IPredictionModel BuildModel(IDataFrame dataFrame, string dependentFeatureName, IModelBuilderParams additionalParams) { if (!(additionalParams is IRandomForestModelBuilderParams)) { throw new ArgumentException("Invalid parameters passed to Random Forest Model builder!"); } var randomForestParams = additionalParams as IRandomForestModelBuilderParams; var trees = new IDecisionTreeNode[randomForestParams.TreesCount]; var oobErrors = new double[randomForestParams.TreesCount]; var columnsCountToTake = featuresToUseCountCalculator(dataFrame.ColumnsCount - 1); var featureColumns = dataFrame.ColumnNames.Except(new[] { dependentFeatureName }).ToList(); var randomizer = new Random(); var locker = new object(); Parallel.For( 0, randomForestParams.TreesCount, i => { var localRandomizer = new Random(i.GetHashCode()); var randomlySelectedIndices = Enumerable.Range(0, dataFrame.RowCount) .Select(_ => localRandomizer.Next(0, dataFrame.RowCount)) .ToList(); var outOfBagIndices = Enumerable.Range(0, dataFrame.RowCount).Except(randomlySelectedIndices).ToList(); var columnsToTake = new List <string>(); columnsToTake = featureColumns.Shuffle(localRandomizer).Take(columnsCountToTake).ToList(); columnsToTake.Add(dependentFeatureName); var baggedTestData = dataFrame.Slice(randomlySelectedIndices, columnsToTake); var oobTestData = dataFrame.Slice(outOfBagIndices, columnsToTake); var oobExpected = oobTestData.GetColumnVector <TPredictionVal>(dependentFeatureName).Values; var decisionTree = decisionTreeModelBuilder.BuildModel( baggedTestData, dependentFeatureName, decisionTreeModelBuilderParamsFactory()); var prediction = decisionTreePredictor.Predict(oobTestData, decisionTree, dependentFeatureName); //TODO: AAA !!! Later on add support for calculating variable importance!!! var oobError = dataQualityMeasure.CalculateError(oobExpected, prediction); trees[i] = decisionTree as IDecisionTreeNode; oobErrors[i] = oobError; }); return(new RandomForestModel(trees, oobErrors)); }
public IPredictionModel BuildModel(IDataFrame dataFrame, string dependentFeatureName, IModelBuilderParams additionalParams) { if (!(additionalParams is ILinearRegressionParams)) { throw new ArgumentException("Invalid parameters passed to Gradient Desccent model builder!"); } var linearRegressionParams = additionalParams as ILinearRegressionParams; var matrixX = dataFrame.GetSubsetByColumns(dataFrame.ColumnNames.Except(new[] { dependentFeatureName }).ToList()).GetAsMatrixWithIntercept(); var vectorY = dataFrame.GetNumericColumnVector(dependentFeatureName); return(BuildModel(matrixX, vectorY, linearRegressionParams)); }
public virtual IPredictionModel BuildModel(IDataFrame dataFrame, string dependentFeatureName, IModelBuilderParams additionalParams) { ValidateAdditionalParams(additionalParams); var knnParams = (IKnnAdditionalParams)additionalParams; Tuple <Matrix <double>, IList <TPredictionResult>, IList <string> > preparedData = PrepareTrainingData(dataFrame, dependentFeatureName); return(new KnnPredictionModel <TPredictionResult>(preparedData.Item1, preparedData.Item2, preparedData.Item3, knnParams.KNeighbors, knnParams.UseWeightedDistances)); }