public IPredictionModel BuildModel(
     IDataFrame dataFrame,
     int dependentFeatureIndex,
     IModelBuilderParams additionalParams)
 {
     return BuildModel(dataFrame, dataFrame.ColumnNames[dependentFeatureIndex], additionalParams);
 }
 protected virtual void ValidateAdditionalParams(IModelBuilderParams additionalParams)
 {
     if (!(additionalParams is IKnnAdditionalParams))
     {
         throw new ArgumentException("Invalid parameters type!");
     }
 }
 public IPredictionModel BuildModel(
     IDataFrame dataFrame,
     int dependentFeatureIndex,
     IModelBuilderParams additionalParams)
 {
     return(BuildModel(dataFrame, dataFrame.ColumnNames[dependentFeatureIndex], additionalParams));
 }
 public IPredictionModel BuildModel(IDataFrame dataFrame, string dependentFeatureName, IModelBuilderParams additionalParams)
 {
     if (!(additionalParams is ILinearRegressionParams))
     {
         throw new ArgumentException("Invalid parameters passed to Regularized Linear Regression model builder!");
     }
     var linearRegressionParams = additionalParams as ILinearRegressionParams;
     var matrixX = dataFrame.GetSubsetByColumns(dataFrame.ColumnNames.Except(new[] { dependentFeatureName }).ToList()).GetAsMatrixWithIntercept();
     var vectorY = dataFrame.GetNumericColumnVector(dependentFeatureName);
     return BuildModel(matrixX, vectorY, linearRegressionParams);
 }
        public IPredictionModel BuildModel(
            IDataFrame dataFrame,
            string dependentFeatureName,
            IModelBuilderParams additionalParams)
        {
            if (!(additionalParams is IDecisionTreeModelBuilderParams))
            {
                throw new ArgumentException("Invalid params passed for Decision Tree Model Builder!");
            }
            if (ShouldStopRecusrsiveBuilding(dataFrame, dependentFeatureName))
            {
                return BuildLeaf(dataFrame, dependentFeatureName);
            }

            var decisionTreeParams = (IDecisionTreeModelBuilderParams)additionalParams;
            var useParallelProcessing = decisionTreeParams.ProcessSubtreesCreationInParallel;
            var alreadyUsedAttributesInfo = new AlreadyUsedAttributesInfo();
            //TODO: reduce the number of parameters - maybe some nicer DTO?
            var node = this.BuildDecisionNode(dataFrame, dependentFeatureName, decisionTreeParams, alreadyUsedAttributesInfo, 0, useParallelProcessing);
            return node;
        }
        public IList <IDataQualityReport <TPredictionResult> > CrossValidate(
            IPredictionModelBuilder modelBuilder,
            IModelBuilderParams modelBuilderParams,
            IPredictor <TPredictionResult> predictor,
            IDataQualityMeasure <TPredictionResult> qualityMeasure,
            IDataFrame dataFrame,
            string dependentFeatureName,
            double percetnagOfTrainData,
            int folds)
        {
            var trainingDataCount  = (int)Math.Round(percetnagOfTrainData * dataFrame.RowCount);
            var testDataCount      = dataFrame.RowCount - trainingDataCount;
            var shuffledAllIndices = dataFrame.RowIndices.Shuffle(_randomizer);
            var maxWindowsCount    = dataFrame.RowCount / testDataCount;

            var iterationAccuracies = new List <IDataQualityReport <TPredictionResult> >();
            var currentWindowNo     = 0;

            for (var i = 0; i < folds; i++)
            {
                if (currentWindowNo == maxWindowsCount)
                {
                    currentWindowNo    = 0;
                    shuffledAllIndices = shuffledAllIndices.Shuffle();
                }
                var offset          = currentWindowNo * testDataCount;
                var trainingIndices = shuffledAllIndices.Skip(offset).Take(trainingDataCount).ToList();
                var trainingData    = dataFrame.GetSubsetByRows(trainingIndices);

                var testIndices = shuffledAllIndices.Except(trainingIndices).ToList();
                var testData    = dataFrame.GetSubsetByRows(testIndices);
                IPredictionModel          model       = modelBuilder.BuildModel(trainingData, dependentFeatureName, modelBuilderParams);
                IList <TPredictionResult> predictions = predictor.Predict(testData, model, dependentFeatureName);
                IList <TPredictionResult> expected    = testData.GetColumnVector <TPredictionResult>(dependentFeatureName);
                IDataQualityReport <TPredictionResult> qualityReport = qualityMeasure.GetReport(expected, predictions);
                iterationAccuracies.Add(qualityReport);
                currentWindowNo++;
            }
            return(iterationAccuracies);
        }
        public IPredictionModel BuildModel(
            IDataFrame dataFrame,
            string dependentFeatureName,
            IModelBuilderParams additionalParams)
        {
            if (!(additionalParams is IDecisionTreeModelBuilderParams))
            {
                throw new ArgumentException("Invalid params passed for Decision Tree Model Builder!");
            }
            if (ShouldStopRecusrsiveBuilding(dataFrame, dependentFeatureName))
            {
                return(BuildLeaf(dataFrame, dependentFeatureName));
            }

            var decisionTreeParams        = (IDecisionTreeModelBuilderParams)additionalParams;
            var useParallelProcessing     = decisionTreeParams.ProcessSubtreesCreationInParallel;
            var alreadyUsedAttributesInfo = new AlreadyUsedAttributesInfo();
            //TODO: reduce the number of parameters - maybe some nicer DTO?
            var node = this.BuildDecisionNode(dataFrame, dependentFeatureName, decisionTreeParams, alreadyUsedAttributesInfo, 0, useParallelProcessing);

            return(node);
        }
 public override IPredictionModel BuildModel(IDataFrame dataFrame, string dependentFeatureName, IModelBuilderParams additionalParams)
 {
     ValidateAdditionalParams(additionalParams);
     return(PerformBackwardsElimination(dataFrame, dependentFeatureName, additionalParams as IKnnAdditionalParams));
 }
        public IPredictionModel BuildModel(IDataFrame dataFrame, int dependentFeatureIndex, IModelBuilderParams additionalParams)
        {
            if (!(additionalParams is IRandomForestModelBuilderParams))
            {
                throw new ArgumentException("Invalid parameters passed to Random Forest Model builder!");
            }

            return(BuildModel(dataFrame, dataFrame.ColumnNames[dependentFeatureIndex], additionalParams));
        }
        public IPredictionModel BuildModel(IDataFrame dataFrame, string dependentFeatureName, IModelBuilderParams additionalParams)
        {
            if (!(additionalParams is IRandomForestModelBuilderParams))
            {
                throw new ArgumentException("Invalid parameters passed to Random Forest Model builder!");
            }

            var randomForestParams = additionalParams as IRandomForestModelBuilderParams;

            var trees     = new IDecisionTreeNode[randomForestParams.TreesCount];
            var oobErrors = new double[randomForestParams.TreesCount];

            var columnsCountToTake = featuresToUseCountCalculator(dataFrame.ColumnsCount - 1);
            var featureColumns     = dataFrame.ColumnNames.Except(new[] { dependentFeatureName }).ToList();
            var randomizer         = new Random();
            var locker             = new object();

            Parallel.For(
                0,
                randomForestParams.TreesCount,
                i =>
            {
                var localRandomizer         = new Random(i.GetHashCode());
                var randomlySelectedIndices =
                    Enumerable.Range(0, dataFrame.RowCount)
                    .Select(_ => localRandomizer.Next(0, dataFrame.RowCount))
                    .ToList();
                var outOfBagIndices =
                    Enumerable.Range(0, dataFrame.RowCount).Except(randomlySelectedIndices).ToList();
                var columnsToTake = new List <string>();
                columnsToTake     = featureColumns.Shuffle(localRandomizer).Take(columnsCountToTake).ToList();

                columnsToTake.Add(dependentFeatureName);

                var baggedTestData = dataFrame.Slice(randomlySelectedIndices, columnsToTake);
                var oobTestData    = dataFrame.Slice(outOfBagIndices, columnsToTake);
                var oobExpected    = oobTestData.GetColumnVector <TPredictionVal>(dependentFeatureName).Values;

                var decisionTree = decisionTreeModelBuilder.BuildModel(
                    baggedTestData,
                    dependentFeatureName,
                    decisionTreeModelBuilderParamsFactory());
                var prediction = decisionTreePredictor.Predict(oobTestData, decisionTree, dependentFeatureName);

                //TODO: AAA !!! Later on add support for calculating variable importance!!!
                var oobError = dataQualityMeasure.CalculateError(oobExpected, prediction);
                trees[i]     = decisionTree as IDecisionTreeNode;
                oobErrors[i] = oobError;
            });

            return(new RandomForestModel(trees, oobErrors));
        }
示例#11
0
        public IPredictionModel BuildModel(IDataFrame dataFrame, string dependentFeatureName, IModelBuilderParams additionalParams)
        {
            if (!(additionalParams is ILinearRegressionParams))
            {
                throw new ArgumentException("Invalid parameters passed to Gradient Desccent model builder!");
            }
            var linearRegressionParams = additionalParams as ILinearRegressionParams;
            var matrixX = dataFrame.GetSubsetByColumns(dataFrame.ColumnNames.Except(new[] { dependentFeatureName }).ToList()).GetAsMatrixWithIntercept();
            var vectorY = dataFrame.GetNumericColumnVector(dependentFeatureName);

            return(BuildModel(matrixX, vectorY, linearRegressionParams));
        }
        public virtual IPredictionModel BuildModel(IDataFrame dataFrame, string dependentFeatureName, IModelBuilderParams additionalParams)
        {
            ValidateAdditionalParams(additionalParams);
            var knnParams = (IKnnAdditionalParams)additionalParams;
            Tuple <Matrix <double>, IList <TPredictionResult>, IList <string> > preparedData = PrepareTrainingData(dataFrame, dependentFeatureName);

            return(new KnnPredictionModel <TPredictionResult>(preparedData.Item1, preparedData.Item2, preparedData.Item3, knnParams.KNeighbors, knnParams.UseWeightedDistances));
        }