/// <summary>
 /// Private explicit interface implementation for indexed learning.
 /// </summary>
 /// <param name="observations"></param>
 /// <param name="targets"></param>
 /// <returns></returns>
 IPredictorModel <double> ILearner <double> .Learn(F64Matrix observations, double[] targets)
 {
     return(Learn(observations, targets));
 }
        /// <summary>
        /// Learns a classification random forest
        /// </summary>
        /// <param name="observations"></param>
        /// <param name="targets"></param>
        /// <returns></returns>
        public IEnumerable <RegressionDecisionTreeModel> Learn(F64Matrix observations, double[] targets, out double[] rawVariableImportance)
        {
            var indices = Enumerable.Range(0, targets.Length).ToArray();

            return(Learn(observations, targets, indices, out rawVariableImportance));
        }
        /// <summary>
        /// Learns a classification random forest
        /// </summary>
        /// <param name="observations"></param>
        /// <param name="targets"></param>
        /// <returns></returns>
        public ClassificationForestModel Learn(F64Matrix observations, double[] targets)
        {
            var indices = Enumerable.Range(0, targets.Length).ToArray();

            return(Learn(observations, targets, indices));
        }
        /// <summary>
        /// Private explicit interface implementation for indexed learning.
        /// </summary>
        /// <param name="observations"></param>
        /// <param name="targets"></param>
        /// <param name="indices"></param>
        /// <returns></returns>
        IPredictorModel <double> IIndexedLearner <double> .Learn(F64Matrix observations, double[] targets, int[] indices)
        {
            var models = Learn(observations, targets, indices, out var rawVariableImportance).ToArray();

            return(new RegressionForestModel(models, rawVariableImportance));
        }
 /// <summary>
 /// Private explicit interface implementation for indexed probability learning.
 /// </summary>
 /// <param name="observations"></param>
 /// <param name="targets"></param>
 /// <param name="indices"></param>
 /// <returns></returns>
 IPredictorModel <ProbabilityPrediction> IIndexedLearner <ProbabilityPrediction> .Learn(F64Matrix observations, double[] targets, int[] indices)
 {
     return(Learn(observations, targets, indices));
 }
 /// <summary>
 /// Private explicit interface implementation for indexed probability learning.
 /// </summary>
 /// <param name="observations"></param>
 /// <param name="targets"></param>
 /// <returns></returns>
 IPredictorModel <ProbabilityPrediction> ILearner <ProbabilityPrediction> .Learn(F64Matrix observations, double[] targets)
 {
     return(Learn(observations, targets));
 }
Example #7
0
 /// <summary>
 /// Verify that indices are valid and match observations and targets.
 /// </summary>
 /// <param name="indices"></param>
 /// <param name="observations"></param>
 /// <param name="targets"></param>
 public static void VerifyIndices(int[] indices, F64Matrix observations, double[] targets)
 {
     VerifyIndices(indices, observations.RowCount, targets.Length);
 }
Example #8
0
 /// <summary>
 /// Verify that observations and targets are valid.
 /// </summary>
 /// <param name="observations"></param>
 /// <param name="targets"></param>
 public static void VerifyObservationsAndTargets(F64Matrix observations, double[] targets)
 {
     VerifyObservationsAndTargets(observations.RowCount, observations.ColumnCount, targets.Length);
 }
 /// <summary>
 /// Private explicit interface implementation for indexed learning.
 /// </summary>
 /// <param name="observations"></param>
 /// <param name="targets"></param>
 /// <param name="indices"></param>
 /// <returns></returns>
 IPredictorModel <double> IIndexedLearner <double> .Learn(
     F64Matrix observations, double[] targets, int[] indices) => Learn(observations, targets, indices);
Example #10
0
        /// <summary>
        /// Learns and extracts the meta features learned by the ensemble models
        /// </summary>
        /// <param name="observations"></param>
        /// <param name="targets"></param>
        /// <returns></returns>
        public ProbabilityPrediction[][] LearnMetaFeatures(F64Matrix observations, double[] targets)
        {
            var indices = Enumerable.Range(0, targets.Length).ToArray();

            return(LearnMetaFeatures(observations, targets, indices));
        }
Example #11
0
        /// <summary>
        /// Learns a RegressionEnsembleModel based on model selection.
        /// Trains several models and selects the best subset of models for the ensemble.
        /// The selection of the best set of models is based on cross validation.
        /// Trains several models and selects the best subset of models for the ensemble.
        /// </summary>
        /// <param name="observations"></param>
        /// <param name="targets"></param>
        /// <returns></returns>
        public RegressionEnsembleModel Learn(F64Matrix observations, double[] targets)
        {
            var indices = Enumerable.Range(0, targets.Length).ToArray();

            return(Learn(observations, targets, indices));
        }
 /// <summary>
 /// Learns a decision tree from the provided observations and targets but limited to the observation indices provided by indices.
 /// Indices can contain the same index multiple times.
 /// </summary>
 /// <param name="observations"></param>
 /// <param name="targets"></param>
 /// <param name="indices"></param>
 /// <returns></returns>
 public BinaryTree Learn(F64Matrix observations, double[] targets, int[] indices)
 {
     return(Learn(observations, targets, indices, new double[0]));
 }
        /// <summary>
        /// Learns a decision tree from the provided observations and targets.
        /// Weights can be provided in order to weight each sample individually
        /// </summary>
        /// <param name="observations"></param>
        /// <param name="targets"></param>
        /// <param name="weights"></param>
        /// <returns></returns>
        public BinaryTree Learn(F64Matrix observations, double[] targets, double[] weights)
        {
            var indices = Enumerable.Range(0, targets.Length).ToArray();

            return(Learn(observations, targets, indices, weights));
        }
        bool Boost(F64Matrix observations, double[] targets, int[] indices, int iteration)
        {
            m_sampler.Sample(indices, m_sampleWeights, m_sampleIndices);

            var model = m_modelLearner.Learn(observations, targets,
                                             m_sampleIndices); // weighted sampling is used instead of weights in training


            var predictions = model.Predict(observations, indices);

            for (int i = 0; i < predictions.Length; i++)
            {
                var index = indices[i];
                m_workErrors[index] = Math.Abs(m_indexedTargets[i] - predictions[i]);
            }

            var maxError = m_workErrors.Max();

            for (int i = 0; i < m_workErrors.Length; i++)
            {
                var error = m_workErrors[i];

                if (maxError != 0.0)
                {
                    error = error / maxError;
                }

                switch (m_loss)
                {
                case AdaBoostRegressionLoss.Linear:
                    break;

                case AdaBoostRegressionLoss.Squared:
                    error = error * error;
                    break;

                case AdaBoostRegressionLoss.Exponential:
                    error = 1.0 - Math.Exp(-error);
                    break;

                default:
                    throw new ArgumentException("Unsupported loss type");
                }

                m_workErrors[i] = error;
            }

            var modelError = m_workErrors.WeightedMean(m_sampleWeights, indices);

            if (modelError <= 0.0)
            {
                m_modelErrors.Add(0.0);
                m_modelWeights.Add(1.0);
                m_models.Add(model);
                return(true);
            }
            else if (modelError >= 0.5)
            {
                return(false);
            }

            var beta = modelError / (1.0 - modelError);

            var modelWeight = m_learningRate * Math.Log(1.0 / beta);

            // Only boost if not last iteration
            if (iteration != (m_iterations - 1))
            {
                for (int i = 0; i < indices.Length; i++)
                {
                    var index        = indices[i];
                    var sampleWeight = m_sampleWeights[index];
                    var error        = m_workErrors[index];
                    m_sampleWeights[index] = sampleWeight * Math.Pow(beta, (1.0 - error) * m_learningRate);
                }
            }

            m_modelErrors.Add(modelError);
            m_modelWeights.Add(modelWeight);
            m_models.Add(model);

            return(true);
        }
        /// <summary>
        /// Learns an Adaboost regression model
        /// </summary>
        /// <param name="observations"></param>
        /// <param name="targets"></param>
        /// <param name="indices"></param>
        /// <returns></returns>
        public RegressionAdaBoostModel Learn(F64Matrix observations, double[] targets,
                                             int[] indices)
        {
            Checks.VerifyObservationsAndTargets(observations, targets);
            Checks.VerifyIndices(indices, observations, targets);

            if (m_maximumTreeDepth == 0)
            {
                m_maximumTreeDepth = 3;
            }

            m_modelLearner = new RegressionDecisionTreeLearner(m_maximumTreeDepth, m_minimumSplitSize,
                                                               observations.ColumnCount, m_minimumInformationGain, 42);

            m_modelErrors.Clear();
            m_modelWeights.Clear();
            m_models.Clear();

            Array.Resize(ref m_sampleWeights, targets.Length);

            Array.Resize(ref m_workErrors, targets.Length);
            Array.Resize(ref m_indexedTargets, indices.Length);
            Array.Resize(ref m_sampleIndices, indices.Length);

            indices.IndexedCopy(targets, Interval1D.Create(0, indices.Length),
                                m_indexedTargets);

            var initialWeight = 1.0 / indices.Length;

            for (int i = 0; i < indices.Length; i++)
            {
                var index = indices[i];
                m_sampleWeights[index] = initialWeight;
            }

            for (int i = 0; i < m_iterations; i++)
            {
                if (!Boost(observations, targets, indices, i))
                {
                    break;
                }

                var ensembleError = ErrorEstimate(observations, indices);

                if (ensembleError == 0.0)
                {
                    break;
                }

                if (m_modelErrors[i] == 0.0)
                {
                    break;
                }

                var weightSum = m_sampleWeights.Sum(indices);
                if (weightSum <= 0.0)
                {
                    break;
                }

                if (i == (m_iterations - 1))
                {
                    // Normalize weights
                    for (int j = 0; j < indices.Length; j++)
                    {
                        var index = indices[j];
                        m_sampleWeights[index] = m_sampleWeights[index] / weightSum;
                    }
                }
            }

            var featuresCount      = observations.ColumnCount;
            var variableImportance = VariableImportance(featuresCount);

            return(new RegressionAdaBoostModel(m_models.ToArray(), m_modelWeights.ToArray(),
                                               variableImportance));
        }