예제 #1
0
        public void Test_GetSubset_ByColumns_UsingColumnIndices()
        {
            // Given
            var newDataTable = new DataTable
            {
                Columns =
                {
                    new DataColumn("Col1", typeof(string)),
                    new DataColumn("Col3", typeof(string))
                },
                Rows =
                {
                    new object[] { "a1.1", "b1.2" },
                    new object[] { "a2.1", "b2.2" },
                    new object[] { "a3.1", "b3.2" }
                }
            };
            var expectedDataFrame = new DataFrame(newDataTable, new[] { 100, 101, 102, 103 });

            // When
            var actualDataFrame = _subject.GetSubsetByColumns(new[] { 0, 2 });

            // Then
            Assert.IsTrue(expectedDataFrame.Equals(actualDataFrame));
        }
        public IDecisionTreeLeaf BuildLeaf(IDataFrame finalData, string dependentFeatureName)
        {
            var             vectorY       = finalData.GetNumericColumnVector(dependentFeatureName);
            var             featureNames  = finalData.ColumnNames.Except(new[] { dependentFeatureName }).ToList();
            var             subset        = finalData.GetSubsetByColumns(featureNames);
            var             matrixX       = finalData.GetSubsetByColumns(featureNames).GetAsMatrixWithIntercept();
            Vector <double> fittedWeights = null;

            try
            {
                fittedWeights = MultipleRegression.DirectMethod(matrixX, vectorY);
            }
            catch (Exception)
            {
                fittedWeights = regressionModelBuilder.BuildModel(matrixX, vectorY, regressionParams).Weights;
            }

            return(new RegressionAndModelLeaf(dependentFeatureName, fittedWeights, vectorY.Mean()));
        }
        public IDecisionTreeLeaf BuildLeaf(IDataFrame finalData, string dependentFeatureName)
        {
            var vectorY = finalData.GetNumericColumnVector(dependentFeatureName);
            var featureNames = finalData.ColumnNames.Except(new[] { dependentFeatureName }).ToList();
            var subset = finalData.GetSubsetByColumns(featureNames);
            var matrixX = finalData.GetSubsetByColumns(featureNames).GetAsMatrixWithIntercept();
            Vector<double> fittedWeights = null;

            try
            {
                fittedWeights = MultipleRegression.DirectMethod(matrixX, vectorY);
            }
            catch (Exception)
            {
                fittedWeights = regressionModelBuilder.BuildModel(matrixX, vectorY, regressionParams).Weights;
            }

            return new RegressionAndModelLeaf(dependentFeatureName, fittedWeights, vectorY.Mean());
        }
        protected virtual Tuple <Matrix <double>, IList <TPredictionResult>, IList <string> > PrepareTrainingData(
            IDataFrame dataFrame,
            string dependentFeatureName)
        {
            var dataColumns  = dataFrame.ColumnNames.Where(col => col != dependentFeatureName).ToList();
            var trainingData = dataFrame.GetSubsetByColumns(dataColumns).GetAsMatrix();
            IDataVector <TPredictionResult> expectedOutcomes = dataFrame.GetColumnVector <TPredictionResult>(dependentFeatureName);

            return(new Tuple <Matrix <double>, IList <TPredictionResult>, IList <string> >(trainingData, expectedOutcomes, dataColumns));
        }
 public IPredictionModel BuildModel(IDataFrame dataFrame, string dependentFeatureName, IModelBuilderParams additionalParams)
 {
     if (!(additionalParams is ILinearRegressionParams))
     {
         throw new ArgumentException("Invalid parameters passed to Regularized Linear Regression model builder!");
     }
     var linearRegressionParams = additionalParams as ILinearRegressionParams;
     var matrixX = dataFrame.GetSubsetByColumns(dataFrame.ColumnNames.Except(new[] { dependentFeatureName }).ToList()).GetAsMatrixWithIntercept();
     var vectorY = dataFrame.GetNumericColumnVector(dependentFeatureName);
     return BuildModel(matrixX, vectorY, linearRegressionParams);
 }
예제 #6
0
        /// <summary>
        /// 提取被预测数据的非特征矩阵
        /// </summary>
        /// <param name="queryDataFrame">被预测数据</param>
        /// <param name="knnModel">knn 预测模型</param>
        /// <param name="dependentFeatureIdx">依赖特征索引</param>
        /// <returns></returns>
        protected virtual Matrix <double> ExtractQueryDataAsMatrix(IDataFrame queryDataFrame, IKnnPredictionModel <TPredictionResult> knnModel, int dependentFeatureIdx)
        {
            // 获取被预测模型的 特征列
            var dependentFetureName = (dependentFeatureIdx < queryDataFrame.ColumnsCount && dependentFeatureIdx >= 0)
                ? queryDataFrame.ColumnNames[dependentFeatureIdx]
                : string.Empty;
            // 获取被预测模型的子集(排除特征列)
            var queryMatrix = queryDataFrame.GetSubsetByColumns(
                queryDataFrame.ColumnNames.Where(colName => colName != dependentFetureName).ToList()).GetAsMatrix();

            return(queryMatrix);
        }
예제 #7
0
        public IPredictionModel BuildModel(IDataFrame dataFrame, string dependentFeatureName, IModelBuilderParams additionalParams)
        {
            if (!(additionalParams is ILinearRegressionParams))
            {
                throw new ArgumentException("Invalid parameters passed to Gradient Desccent model builder!");
            }
            var linearRegressionParams = additionalParams as ILinearRegressionParams;
            var matrixX = dataFrame.GetSubsetByColumns(dataFrame.ColumnNames.Except(new[] { dependentFeatureName }).ToList()).GetAsMatrixWithIntercept();
            var vectorY = dataFrame.GetNumericColumnVector(dependentFeatureName);

            return(BuildModel(matrixX, vectorY, linearRegressionParams));
        }
        protected override Tuple <Matrix <double>, Matrix <double> > NormalizeData(IDataFrame queryDataFrame, IKnnPredictionModel <TPredictionResult> knnModel, int dependentFeatureIdx)
        {
            var backwardsEliminationModel = knnModel as IBackwardsEliminationKnnModel <TPredictionResult>;
            var featureIndicesToRemove    = backwardsEliminationModel.RemovedFeaturesData.Select(f => queryDataFrame.ColumnNames.IndexOf(f.FeatureName)).OrderBy(i => i).ToList();
            var relevantFeatures          =
                queryDataFrame.ColumnNames.Where((colName, colIdx) => !featureIndicesToRemove.Contains(colIdx) && colIdx != dependentFeatureIdx).ToList();

            var modelMatrix = GetModelMatrixWithOnlyRelevantColumns(knnModel.TrainingData, featureIndicesToRemove);
            var queryDataFrameWithoutRedundantColumns = queryDataFrame.GetSubsetByColumns(relevantFeatures).GetAsMatrix();

            return(base.PerformNormalization(modelMatrix, queryDataFrameWithoutRedundantColumns));
        }
 public IList<double> Predict(IDataFrame queryDataFrame, IPredictionModel model, string dependentFeatureName)
 {
     if (!(model is ILinearRegressionModel))
     {
         throw new ArgumentException("Invalid model passed to Linear Regression predictor!");
     }
     var linearRegressionModel = model as ILinearRegressionModel;
     var xMatrix = queryDataFrame.GetSubsetByColumns(
                     queryDataFrame.ColumnNames.Except(new[] { dependentFeatureName }).ToList())
                     .GetAsMatrixWithIntercept();
     var results = new List<double>();
     for (int rowIdx = 0; rowIdx < xMatrix.RowCount; rowIdx++)
     {
         var queryRow = xMatrix.Row(rowIdx);
         var result = linearRegressionModel.Weights.DotProduct(queryRow);
         results.Add(result);
     }
     return results;
 }
        public IList <TDecisionValue> Predict(IDataFrame queryDataFrame, IPredictionModel model, string dependentFeatureName)
        {
            if (!(model is IDecisionTreeNode))
            {
                throw new ArgumentException("Invalid model passed to Decision Tree Predictor");
            }
            var results = new ConcurrentBag <Tuple <int, TDecisionValue> >();
            var queryDataFrameWithoutDependentFeature =
                queryDataFrame.GetSubsetByColumns(
                    queryDataFrame.ColumnNames.Except(new[] { dependentFeatureName }).ToList());

            for (int rowIdx = 0; rowIdx < queryDataFrameWithoutDependentFeature.RowCount; rowIdx++)
            {
                IDataVector <TDecisionValue>   dataVector        = queryDataFrameWithoutDependentFeature.GetRowVector <TDecisionValue>(rowIdx);
                Tuple <TDecisionValue, double> predictionResults = ProcessInstance(dataVector, (IDecisionTreeNode)model, 1.0);
                results.Add(new Tuple <int, TDecisionValue>(rowIdx, predictionResults.Item1));
            }
            return(results.OrderBy(tpl => tpl.Item1).Select(tpl => tpl.Item2).ToList());
        }
예제 #11
0
        public IList <double> Predict(IDataFrame queryDataFrame, IPredictionModel model, string dependentFeatureName)
        {
            if (!(model is ILinearRegressionModel))
            {
                throw new ArgumentException("Invalid model passed to Linear Regression predictor!");
            }
            var linearRegressionModel = model as ILinearRegressionModel;
            var xMatrix = queryDataFrame.GetSubsetByColumns(
                queryDataFrame.ColumnNames.Except(new[] { dependentFeatureName }).ToList())
                          .GetAsMatrixWithIntercept();
            var results = new List <double>();

            for (int rowIdx = 0; rowIdx < xMatrix.RowCount; rowIdx++)
            {
                var queryRow = xMatrix.Row(rowIdx);
                var result   = linearRegressionModel.Weights.DotProduct(queryRow);
                results.Add(result);
            }
            return(results);
        }