public void Test_GetSubset_ByColumns_UsingColumnIndices() { // Given var newDataTable = new DataTable { Columns = { new DataColumn("Col1", typeof(string)), new DataColumn("Col3", typeof(string)) }, Rows = { new object[] { "a1.1", "b1.2" }, new object[] { "a2.1", "b2.2" }, new object[] { "a3.1", "b3.2" } } }; var expectedDataFrame = new DataFrame(newDataTable, new[] { 100, 101, 102, 103 }); // When var actualDataFrame = _subject.GetSubsetByColumns(new[] { 0, 2 }); // Then Assert.IsTrue(expectedDataFrame.Equals(actualDataFrame)); }
public IDecisionTreeLeaf BuildLeaf(IDataFrame finalData, string dependentFeatureName) { var vectorY = finalData.GetNumericColumnVector(dependentFeatureName); var featureNames = finalData.ColumnNames.Except(new[] { dependentFeatureName }).ToList(); var subset = finalData.GetSubsetByColumns(featureNames); var matrixX = finalData.GetSubsetByColumns(featureNames).GetAsMatrixWithIntercept(); Vector <double> fittedWeights = null; try { fittedWeights = MultipleRegression.DirectMethod(matrixX, vectorY); } catch (Exception) { fittedWeights = regressionModelBuilder.BuildModel(matrixX, vectorY, regressionParams).Weights; } return(new RegressionAndModelLeaf(dependentFeatureName, fittedWeights, vectorY.Mean())); }
public IDecisionTreeLeaf BuildLeaf(IDataFrame finalData, string dependentFeatureName) { var vectorY = finalData.GetNumericColumnVector(dependentFeatureName); var featureNames = finalData.ColumnNames.Except(new[] { dependentFeatureName }).ToList(); var subset = finalData.GetSubsetByColumns(featureNames); var matrixX = finalData.GetSubsetByColumns(featureNames).GetAsMatrixWithIntercept(); Vector<double> fittedWeights = null; try { fittedWeights = MultipleRegression.DirectMethod(matrixX, vectorY); } catch (Exception) { fittedWeights = regressionModelBuilder.BuildModel(matrixX, vectorY, regressionParams).Weights; } return new RegressionAndModelLeaf(dependentFeatureName, fittedWeights, vectorY.Mean()); }
protected virtual Tuple <Matrix <double>, IList <TPredictionResult>, IList <string> > PrepareTrainingData( IDataFrame dataFrame, string dependentFeatureName) { var dataColumns = dataFrame.ColumnNames.Where(col => col != dependentFeatureName).ToList(); var trainingData = dataFrame.GetSubsetByColumns(dataColumns).GetAsMatrix(); IDataVector <TPredictionResult> expectedOutcomes = dataFrame.GetColumnVector <TPredictionResult>(dependentFeatureName); return(new Tuple <Matrix <double>, IList <TPredictionResult>, IList <string> >(trainingData, expectedOutcomes, dataColumns)); }
public IPredictionModel BuildModel(IDataFrame dataFrame, string dependentFeatureName, IModelBuilderParams additionalParams) { if (!(additionalParams is ILinearRegressionParams)) { throw new ArgumentException("Invalid parameters passed to Regularized Linear Regression model builder!"); } var linearRegressionParams = additionalParams as ILinearRegressionParams; var matrixX = dataFrame.GetSubsetByColumns(dataFrame.ColumnNames.Except(new[] { dependentFeatureName }).ToList()).GetAsMatrixWithIntercept(); var vectorY = dataFrame.GetNumericColumnVector(dependentFeatureName); return BuildModel(matrixX, vectorY, linearRegressionParams); }
/// <summary> /// 提取被预测数据的非特征矩阵 /// </summary> /// <param name="queryDataFrame">被预测数据</param> /// <param name="knnModel">knn 预测模型</param> /// <param name="dependentFeatureIdx">依赖特征索引</param> /// <returns></returns> protected virtual Matrix <double> ExtractQueryDataAsMatrix(IDataFrame queryDataFrame, IKnnPredictionModel <TPredictionResult> knnModel, int dependentFeatureIdx) { // 获取被预测模型的 特征列 var dependentFetureName = (dependentFeatureIdx < queryDataFrame.ColumnsCount && dependentFeatureIdx >= 0) ? queryDataFrame.ColumnNames[dependentFeatureIdx] : string.Empty; // 获取被预测模型的子集(排除特征列) var queryMatrix = queryDataFrame.GetSubsetByColumns( queryDataFrame.ColumnNames.Where(colName => colName != dependentFetureName).ToList()).GetAsMatrix(); return(queryMatrix); }
public IPredictionModel BuildModel(IDataFrame dataFrame, string dependentFeatureName, IModelBuilderParams additionalParams) { if (!(additionalParams is ILinearRegressionParams)) { throw new ArgumentException("Invalid parameters passed to Gradient Desccent model builder!"); } var linearRegressionParams = additionalParams as ILinearRegressionParams; var matrixX = dataFrame.GetSubsetByColumns(dataFrame.ColumnNames.Except(new[] { dependentFeatureName }).ToList()).GetAsMatrixWithIntercept(); var vectorY = dataFrame.GetNumericColumnVector(dependentFeatureName); return(BuildModel(matrixX, vectorY, linearRegressionParams)); }
protected override Tuple <Matrix <double>, Matrix <double> > NormalizeData(IDataFrame queryDataFrame, IKnnPredictionModel <TPredictionResult> knnModel, int dependentFeatureIdx) { var backwardsEliminationModel = knnModel as IBackwardsEliminationKnnModel <TPredictionResult>; var featureIndicesToRemove = backwardsEliminationModel.RemovedFeaturesData.Select(f => queryDataFrame.ColumnNames.IndexOf(f.FeatureName)).OrderBy(i => i).ToList(); var relevantFeatures = queryDataFrame.ColumnNames.Where((colName, colIdx) => !featureIndicesToRemove.Contains(colIdx) && colIdx != dependentFeatureIdx).ToList(); var modelMatrix = GetModelMatrixWithOnlyRelevantColumns(knnModel.TrainingData, featureIndicesToRemove); var queryDataFrameWithoutRedundantColumns = queryDataFrame.GetSubsetByColumns(relevantFeatures).GetAsMatrix(); return(base.PerformNormalization(modelMatrix, queryDataFrameWithoutRedundantColumns)); }
public IList<double> Predict(IDataFrame queryDataFrame, IPredictionModel model, string dependentFeatureName) { if (!(model is ILinearRegressionModel)) { throw new ArgumentException("Invalid model passed to Linear Regression predictor!"); } var linearRegressionModel = model as ILinearRegressionModel; var xMatrix = queryDataFrame.GetSubsetByColumns( queryDataFrame.ColumnNames.Except(new[] { dependentFeatureName }).ToList()) .GetAsMatrixWithIntercept(); var results = new List<double>(); for (int rowIdx = 0; rowIdx < xMatrix.RowCount; rowIdx++) { var queryRow = xMatrix.Row(rowIdx); var result = linearRegressionModel.Weights.DotProduct(queryRow); results.Add(result); } return results; }
public IList <TDecisionValue> Predict(IDataFrame queryDataFrame, IPredictionModel model, string dependentFeatureName) { if (!(model is IDecisionTreeNode)) { throw new ArgumentException("Invalid model passed to Decision Tree Predictor"); } var results = new ConcurrentBag <Tuple <int, TDecisionValue> >(); var queryDataFrameWithoutDependentFeature = queryDataFrame.GetSubsetByColumns( queryDataFrame.ColumnNames.Except(new[] { dependentFeatureName }).ToList()); for (int rowIdx = 0; rowIdx < queryDataFrameWithoutDependentFeature.RowCount; rowIdx++) { IDataVector <TDecisionValue> dataVector = queryDataFrameWithoutDependentFeature.GetRowVector <TDecisionValue>(rowIdx); Tuple <TDecisionValue, double> predictionResults = ProcessInstance(dataVector, (IDecisionTreeNode)model, 1.0); results.Add(new Tuple <int, TDecisionValue>(rowIdx, predictionResults.Item1)); } return(results.OrderBy(tpl => tpl.Item1).Select(tpl => tpl.Item2).ToList()); }
public IList <double> Predict(IDataFrame queryDataFrame, IPredictionModel model, string dependentFeatureName) { if (!(model is ILinearRegressionModel)) { throw new ArgumentException("Invalid model passed to Linear Regression predictor!"); } var linearRegressionModel = model as ILinearRegressionModel; var xMatrix = queryDataFrame.GetSubsetByColumns( queryDataFrame.ColumnNames.Except(new[] { dependentFeatureName }).ToList()) .GetAsMatrixWithIntercept(); var results = new List <double>(); for (int rowIdx = 0; rowIdx < xMatrix.RowCount; rowIdx++) { var queryRow = xMatrix.Row(rowIdx); var result = linearRegressionModel.Weights.DotProduct(queryRow); results.Add(result); } return(results); }