/// <summary> /// Private explicit interface implementation for indexed learning. /// </summary> /// <param name="observations"></param> /// <param name="targets"></param> /// <returns></returns> IPredictorModel <double> ILearner <double> .Learn(F64Matrix observations, double[] targets) { return(Learn(observations, targets)); }
/// <summary> /// Learns a classification random forest /// </summary> /// <param name="observations"></param> /// <param name="targets"></param> /// <returns></returns> public IEnumerable <RegressionDecisionTreeModel> Learn(F64Matrix observations, double[] targets, out double[] rawVariableImportance) { var indices = Enumerable.Range(0, targets.Length).ToArray(); return(Learn(observations, targets, indices, out rawVariableImportance)); }
/// <summary> /// Learns a classification random forest /// </summary> /// <param name="observations"></param> /// <param name="targets"></param> /// <returns></returns> public ClassificationForestModel Learn(F64Matrix observations, double[] targets) { var indices = Enumerable.Range(0, targets.Length).ToArray(); return(Learn(observations, targets, indices)); }
/// <summary> /// Private explicit interface implementation for indexed learning. /// </summary> /// <param name="observations"></param> /// <param name="targets"></param> /// <param name="indices"></param> /// <returns></returns> IPredictorModel <double> IIndexedLearner <double> .Learn(F64Matrix observations, double[] targets, int[] indices) { var models = Learn(observations, targets, indices, out var rawVariableImportance).ToArray(); return(new RegressionForestModel(models, rawVariableImportance)); }
/// <summary> /// Private explicit interface implementation for indexed probability learning. /// </summary> /// <param name="observations"></param> /// <param name="targets"></param> /// <param name="indices"></param> /// <returns></returns> IPredictorModel <ProbabilityPrediction> IIndexedLearner <ProbabilityPrediction> .Learn(F64Matrix observations, double[] targets, int[] indices) { return(Learn(observations, targets, indices)); }
/// <summary> /// Private explicit interface implementation for indexed probability learning. /// </summary> /// <param name="observations"></param> /// <param name="targets"></param> /// <returns></returns> IPredictorModel <ProbabilityPrediction> ILearner <ProbabilityPrediction> .Learn(F64Matrix observations, double[] targets) { return(Learn(observations, targets)); }
/// <summary> /// Verify that indices are valid and match observations and targets. /// </summary> /// <param name="indices"></param> /// <param name="observations"></param> /// <param name="targets"></param> public static void VerifyIndices(int[] indices, F64Matrix observations, double[] targets) { VerifyIndices(indices, observations.RowCount, targets.Length); }
/// <summary> /// Verify that observations and targets are valid. /// </summary> /// <param name="observations"></param> /// <param name="targets"></param> public static void VerifyObservationsAndTargets(F64Matrix observations, double[] targets) { VerifyObservationsAndTargets(observations.RowCount, observations.ColumnCount, targets.Length); }
/// <summary> /// Private explicit interface implementation for indexed learning. /// </summary> /// <param name="observations"></param> /// <param name="targets"></param> /// <param name="indices"></param> /// <returns></returns> IPredictorModel <double> IIndexedLearner <double> .Learn( F64Matrix observations, double[] targets, int[] indices) => Learn(observations, targets, indices);
/// <summary> /// Learns and extracts the meta features learned by the ensemble models /// </summary> /// <param name="observations"></param> /// <param name="targets"></param> /// <returns></returns> public ProbabilityPrediction[][] LearnMetaFeatures(F64Matrix observations, double[] targets) { var indices = Enumerable.Range(0, targets.Length).ToArray(); return(LearnMetaFeatures(observations, targets, indices)); }
/// <summary> /// Learns a RegressionEnsembleModel based on model selection. /// Trains several models and selects the best subset of models for the ensemble. /// The selection of the best set of models is based on cross validation. /// Trains several models and selects the best subset of models for the ensemble. /// </summary> /// <param name="observations"></param> /// <param name="targets"></param> /// <returns></returns> public RegressionEnsembleModel Learn(F64Matrix observations, double[] targets) { var indices = Enumerable.Range(0, targets.Length).ToArray(); return(Learn(observations, targets, indices)); }
/// <summary> /// Learns a decision tree from the provided observations and targets but limited to the observation indices provided by indices. /// Indices can contain the same index multiple times. /// </summary> /// <param name="observations"></param> /// <param name="targets"></param> /// <param name="indices"></param> /// <returns></returns> public BinaryTree Learn(F64Matrix observations, double[] targets, int[] indices) { return(Learn(observations, targets, indices, new double[0])); }
/// <summary> /// Learns a decision tree from the provided observations and targets. /// Weights can be provided in order to weight each sample individually /// </summary> /// <param name="observations"></param> /// <param name="targets"></param> /// <param name="weights"></param> /// <returns></returns> public BinaryTree Learn(F64Matrix observations, double[] targets, double[] weights) { var indices = Enumerable.Range(0, targets.Length).ToArray(); return(Learn(observations, targets, indices, weights)); }
bool Boost(F64Matrix observations, double[] targets, int[] indices, int iteration) { m_sampler.Sample(indices, m_sampleWeights, m_sampleIndices); var model = m_modelLearner.Learn(observations, targets, m_sampleIndices); // weighted sampling is used instead of weights in training var predictions = model.Predict(observations, indices); for (int i = 0; i < predictions.Length; i++) { var index = indices[i]; m_workErrors[index] = Math.Abs(m_indexedTargets[i] - predictions[i]); } var maxError = m_workErrors.Max(); for (int i = 0; i < m_workErrors.Length; i++) { var error = m_workErrors[i]; if (maxError != 0.0) { error = error / maxError; } switch (m_loss) { case AdaBoostRegressionLoss.Linear: break; case AdaBoostRegressionLoss.Squared: error = error * error; break; case AdaBoostRegressionLoss.Exponential: error = 1.0 - Math.Exp(-error); break; default: throw new ArgumentException("Unsupported loss type"); } m_workErrors[i] = error; } var modelError = m_workErrors.WeightedMean(m_sampleWeights, indices); if (modelError <= 0.0) { m_modelErrors.Add(0.0); m_modelWeights.Add(1.0); m_models.Add(model); return(true); } else if (modelError >= 0.5) { return(false); } var beta = modelError / (1.0 - modelError); var modelWeight = m_learningRate * Math.Log(1.0 / beta); // Only boost if not last iteration if (iteration != (m_iterations - 1)) { for (int i = 0; i < indices.Length; i++) { var index = indices[i]; var sampleWeight = m_sampleWeights[index]; var error = m_workErrors[index]; m_sampleWeights[index] = sampleWeight * Math.Pow(beta, (1.0 - error) * m_learningRate); } } m_modelErrors.Add(modelError); m_modelWeights.Add(modelWeight); m_models.Add(model); return(true); }
/// <summary> /// Learns an Adaboost regression model /// </summary> /// <param name="observations"></param> /// <param name="targets"></param> /// <param name="indices"></param> /// <returns></returns> public RegressionAdaBoostModel Learn(F64Matrix observations, double[] targets, int[] indices) { Checks.VerifyObservationsAndTargets(observations, targets); Checks.VerifyIndices(indices, observations, targets); if (m_maximumTreeDepth == 0) { m_maximumTreeDepth = 3; } m_modelLearner = new RegressionDecisionTreeLearner(m_maximumTreeDepth, m_minimumSplitSize, observations.ColumnCount, m_minimumInformationGain, 42); m_modelErrors.Clear(); m_modelWeights.Clear(); m_models.Clear(); Array.Resize(ref m_sampleWeights, targets.Length); Array.Resize(ref m_workErrors, targets.Length); Array.Resize(ref m_indexedTargets, indices.Length); Array.Resize(ref m_sampleIndices, indices.Length); indices.IndexedCopy(targets, Interval1D.Create(0, indices.Length), m_indexedTargets); var initialWeight = 1.0 / indices.Length; for (int i = 0; i < indices.Length; i++) { var index = indices[i]; m_sampleWeights[index] = initialWeight; } for (int i = 0; i < m_iterations; i++) { if (!Boost(observations, targets, indices, i)) { break; } var ensembleError = ErrorEstimate(observations, indices); if (ensembleError == 0.0) { break; } if (m_modelErrors[i] == 0.0) { break; } var weightSum = m_sampleWeights.Sum(indices); if (weightSum <= 0.0) { break; } if (i == (m_iterations - 1)) { // Normalize weights for (int j = 0; j < indices.Length; j++) { var index = indices[j]; m_sampleWeights[index] = m_sampleWeights[index] / weightSum; } } } var featuresCount = observations.ColumnCount; var variableImportance = VariableImportance(featuresCount); return(new RegressionAdaBoostModel(m_models.ToArray(), m_modelWeights.ToArray(), variableImportance)); }