private static Tuple <TDecisionValue, double> HandleRegressionAndModelLeaf( IDataVector <TDecisionValue> vector, IDecisionTreeNode decisionTree, double probabilitiesProductSoFar) { var regressionLeaf = decisionTree as IDecisionTreeRegressionAndModelLeaf; var numericVector = vector.NumericVector.ToList(); numericVector.Insert(0, 1.0); var vectorWithIntercept = Vector <double> .Build.DenseOfArray(numericVector.ToArray()); double predictedVal = 0.0; if (regressionLeaf.ModelWeights != null) { predictedVal = vectorWithIntercept.DotProduct(Vector <double> .Build.DenseOfArray(regressionLeaf.ModelWeights.ToArray())); } else { predictedVal = regressionLeaf.DecisionMeanValue; } return(new Tuple <TDecisionValue, double>( (TDecisionValue)Convert.ChangeType(predictedVal, typeof(TDecisionValue)), probabilitiesProductSoFar)); }
private Tuple <TDecisionValue, double> ProcessBinarySplit( IDataVector <TDecisionValue> vector, IBinaryDecisionTreeParentNode binaryDecisionTreeNode, double probabilitiesProductSoFar) { string decisionFeature = binaryDecisionTreeNode.DecisionFeatureName; if (!vector.FeatureNames.Contains(decisionFeature)) { throw new ArgumentException($"Invalid vector passed for prediction. Unknown feature {decisionFeature}"); } TDecisionValue vectorValue = vector[decisionFeature]; TDecisionValue decisionValue = (TDecisionValue)binaryDecisionTreeNode.DecisionValue; if (binaryDecisionTreeNode.IsValueNumeric) { var isLower = Convert.ToDouble(vectorValue) < Convert.ToDouble(decisionValue); var childToFollow = isLower ? binaryDecisionTreeNode.LeftChild : binaryDecisionTreeNode.RightChild; return(this.ProcessInstance(vector, childToFollow, probabilitiesProductSoFar * binaryDecisionTreeNode.RightChildLink.InstancesPercentage)); } else { var isEqual = vectorValue.Equals(decisionValue); var childToFollow = isEqual ? binaryDecisionTreeNode.RightChild : binaryDecisionTreeNode.LeftChild; return(this.ProcessInstance(vector, childToFollow, probabilitiesProductSoFar * binaryDecisionTreeNode.LeftChildLink.InstancesPercentage)); } }
protected virtual Tuple <Matrix <double>, IList <TPredictionResult>, IList <string> > PrepareTrainingData( IDataFrame dataFrame, string dependentFeatureName) { var dataColumns = dataFrame.ColumnNames.Where(col => col != dependentFeatureName).ToList(); var trainingData = dataFrame.GetSubsetByColumns(dataColumns).GetAsMatrix(); IDataVector <TPredictionResult> expectedOutcomes = dataFrame.GetColumnVector <TPredictionResult>(dependentFeatureName); return(new Tuple <Matrix <double>, IList <TPredictionResult>, IList <string> >(trainingData, expectedOutcomes, dataColumns)); }
private static Tuple <TDecisionValue, double> HandleLeaf(IDataVector <TDecisionValue> vector, IDecisionTreeNode decisionTree, double probabilitiesProductSoFar) { if (decisionTree is IDecisionTreeRegressionAndModelLeaf) { return(HandleRegressionAndModelLeaf(vector, decisionTree, probabilitiesProductSoFar)); } var classificationLeaf = decisionTree as IDecisionTreeLeaf; return(new Tuple <TDecisionValue, double>((TDecisionValue)classificationLeaf.LeafValue, probabilitiesProductSoFar)); }
private Tuple <TDecisionValue, double> ProcessInstance(IDataVector <TDecisionValue> vector, IDecisionTreeNode decisionTree, double probabilitiesProductSoFar) { if (decisionTree is IDecisionTreeLeaf) { return(HandleLeaf(vector, decisionTree, probabilitiesProductSoFar)); } var parentNode = decisionTree as IDecisionTreeParentNode; if (parentNode is IBinaryDecisionTreeParentNode) { return(this.ProcessBinarySplit(vector, parentNode as IBinaryDecisionTreeParentNode, probabilitiesProductSoFar)); } return(this.ProcessMultiValueSplit(vector, parentNode, probabilitiesProductSoFar)); }
private Tuple <TDecisionValue, double> ProcessMultiValueSplit( IDataVector <TDecisionValue> vector, IDecisionTreeParentNode multiValueDecisionTreeNode, double probabilitiesProductSoFar) { string decisionFeature = multiValueDecisionTreeNode.DecisionFeatureName; if (!vector.FeatureNames.Contains(decisionFeature)) { throw new ArgumentException($"Invalid vector passed for prediction. Unknown feature {decisionFeature}"); } TDecisionValue vectorValue = vector[decisionFeature]; if (multiValueDecisionTreeNode.TestResultsContains(vectorValue)) { // TODO: optimize for a single query (maybe?) - return Tuple var childToFollow = multiValueDecisionTreeNode.GetChildForTestResult(vectorValue); var linkToChild = multiValueDecisionTreeNode.GetChildLinkForChild(childToFollow); return(ProcessInstance(vector, childToFollow, probabilitiesProductSoFar * linkToChild.InstancesPercentage)); } var results = new Dictionary <TDecisionValue, double>(); foreach (var child in multiValueDecisionTreeNode.ChildrenWithTestResults) { var probabilityModifiedByPercentageOfSplit = child.Item1.InstancesPercentage * probabilitiesProductSoFar; var linkFollowingResults = this.ProcessInstance(vector, child.Item2, probabilityModifiedByPercentageOfSplit); if (!results.ContainsKey(linkFollowingResults.Item1)) { results.Add(linkFollowingResults.Item1, 0); } results[linkFollowingResults.Item1] += linkFollowingResults.Item2; } if (!results.Any()) { return(new Tuple <TDecisionValue, double>(default(TDecisionValue), 0)); } var normalizer = results.Values.Sum(); var winningOption = results.Select( res => new { DecisionValue = res.Key, ProbbailityOfSelection = res.Value / normalizer }) .OrderByDescending(res => res.ProbbailityOfSelection) .First(); return(new Tuple <TDecisionValue, double>(winningOption.DecisionValue, winningOption.ProbbailityOfSelection)); }
public IList <TDecisionValue> Predict(IDataFrame queryDataFrame, IPredictionModel model, string dependentFeatureName) { if (!(model is IDecisionTreeNode)) { throw new ArgumentException("Invalid model passed to Decision Tree Predictor"); } var results = new ConcurrentBag <Tuple <int, TDecisionValue> >(); var queryDataFrameWithoutDependentFeature = queryDataFrame.GetSubsetByColumns( queryDataFrame.ColumnNames.Except(new[] { dependentFeatureName }).ToList()); for (int rowIdx = 0; rowIdx < queryDataFrameWithoutDependentFeature.RowCount; rowIdx++) { IDataVector <TDecisionValue> dataVector = queryDataFrameWithoutDependentFeature.GetRowVector <TDecisionValue>(rowIdx); Tuple <TDecisionValue, double> predictionResults = ProcessInstance(dataVector, (IDecisionTreeNode)model, 1.0); results.Add(new Tuple <int, TDecisionValue>(rowIdx, predictionResults.Item1)); } return(results.OrderBy(tpl => tpl.Item1).Select(tpl => tpl.Item2).ToList()); }
public double Distance(IDataVector <double> vec1, IDataVector <double> vec2) { return(Distance(vec1.NumericVector, vec2.NumericVector)); }
public double CalculateError(IDataVector<double> vec1, IDataVector<double> vec2) { return CalculateError(vec1.NumericVector, vec2.NumericVector); }
public double CalculateError(IDataVector <double> vec1, IDataVector <double> vec2) { return(CalculateError(vec1.NumericVector, vec2.NumericVector)); }
public double CalculateError(IDataVector <TPredictionResult> vec1, IDataVector <TPredictionResult> vec2) { var confusionMatrix = new ConfusionMatrix <TPredictionResult>(vec1, vec2); return(1 - confusionMatrix.Accuracy); }