Ejemplo n.º 1
0
        private protected EnsembleTrainerBase(ArgumentsBase args, IHostEnvironment env, string name)
            : base(env, name)
        {
            Args = args;

            using (var ch = Host.Start("Init"))
            {
                ch.CheckUserArg(Utils.Size(Args.BasePredictors) > 0, nameof(Args.BasePredictors), "This should have at-least one value");

                NumModels = Args.NumModels ??
                            (Args.BasePredictors.Length == 1 ? DefaultNumModels : Args.BasePredictors.Length);

                ch.CheckUserArg(NumModels > 0, nameof(Args.NumModels), "Must be positive, or null to indicate numModels is the number of base predictors");

                if (Utils.Size(Args.BasePredictors) > NumModels)
                {
                    ch.Warning("The base predictor count is greater than models count. Some of the base predictors will be ignored.");
                }

                _subsetSelector = Args.SamplingType.CreateComponent(Host);

                Trainers = new ITrainer <IPredictorProducing <TOutput> > [NumModels];
                for (int i = 0; i < Trainers.Length; i++)
                {
                    Trainers[i] = Args.BasePredictors[i % Args.BasePredictors.Length].CreateInstance(Host);
                }
                // We infer normalization and calibration preferences from the trainers. However, even if the internal trainers
                // don't need caching we are performing multiple passes over the data, so it is probably appropriate to always cache.
                Info = new TrainerInfo(
                    normalization: Trainers.Any(t => t.Info.NeedNormalization),
                    calibration: Trainers.Any(t => t.Info.NeedCalibration));
                ch.Done();
            }
        }
Ejemplo n.º 2
0
        public override void CalculateMetrics(FeatureSubsetModel <IPredictorProducing <TOutput> > model,
                                              ISubsetSelector subsetSelector, Subset subset, Batch batch, bool needMetrics)
        {
            base.CalculateMetrics(model, subsetSelector, subset, batch, needMetrics);

            var vm = model.Predictor as IValueMapper;

            Host.Check(vm != null, "Predictor doesn't implement the expected interface");
            var map = vm.GetMapper <VBuffer <Single>, TOutput>();

            TOutput[] preds = new TOutput[100];
            int       count = 0;
            var       data  = subsetSelector.GetTestData(subset, batch);

            using (var cursor = new FeatureFloatVectorCursor(data, CursOpt.AllFeatures))
            {
                while (cursor.MoveNext())
                {
                    Utils.EnsureSize(ref preds, count + 1);
                    map(in cursor.Features, ref preds[count]);
                    count++;
                }
            }
            Array.Resize(ref preds, count);
            _predictions[model] = preds;
        }
        public virtual void CalculateMetrics(FeatureSubsetModel <IPredictorProducing <TOutput> > model,
                                             ISubsetSelector subsetSelector, Subset subset, Batch batch, bool needMetrics)
        {
            if (!needMetrics || model == null || model.Metrics != null)
            {
                return;
            }

            using (var ch = Host.Start("Calculate metrics"))
            {
                RoleMappedData testData = subsetSelector.GetTestData(subset, batch);
                // Because the training and test datasets are drawn from the same base dataset, the test data role mappings
                // are the same as for the train data.
                IDataScorerTransform scorePipe = ScoreUtils.GetScorer(model.Predictor, testData, Host, testData.Schema);
                // REVIEW: Should we somehow allow the user to customize the evaluator?
                // By what mechanism should we allow that?
                var            evalComp       = GetEvaluatorSubComponent();
                RoleMappedData scoredTestData = RoleMappedData.Create(scorePipe,
                                                                      GetColumnRoles(testData.Schema, scorePipe.Schema));
                IEvaluator evaluator = evalComp.CreateInstance(Host);
                // REVIEW: with the new evaluators, metrics of individual models are no longer
                // printed to the Console. Consider adding an option on the combiner to print them.
                // REVIEW: Consider adding an option to the combiner to save a data view
                // containing all the results of the individual models.
                var metricsDict = evaluator.Evaluate(scoredTestData);
                if (!metricsDict.TryGetValue(MetricKinds.OverallMetrics, out IDataView metricsView))
                {
                    throw Host.Except("Evaluator did not produce any overall metrics");
                }
                // REVIEW: We're assuming that the metrics of interest are always doubles here.
                var metrics = EvaluateUtils.GetMetrics(metricsView, getVectorMetrics: false);
                model.Metrics = metrics.ToArray();
                ch.Done();
            }
        }
        internal EnsembleTrainerBase(ArgumentsBase args, IHostEnvironment env, string name)
            : base(env, name)
        {
            Args = args;

            using (var ch = Host.Start("Init"))
            {
                ch.CheckUserArg(Utils.Size(Args.BasePredictors) > 0, nameof(Args.BasePredictors), "This should have at-least one value");

                NumModels = Args.NumModels ??
                            (Args.BasePredictors.Length == 1 ? DefaultNumModels : Args.BasePredictors.Length);

                ch.CheckUserArg(NumModels > 0, nameof(Args.NumModels), "Must be positive, or null to indicate numModels is the number of base predictors");

                if (Utils.Size(Args.BasePredictors) > NumModels)
                {
                    ch.Warning("The base predictor count is greater than models count. Some of the base predictors will be ignored.");
                }

                _subsetSelector = Args.SamplingType.CreateComponent(Host);

                Trainers = new ITrainer <RoleMappedData, IPredictorProducing <TOutput> > [NumModels];
                for (int i = 0; i < Trainers.Length; i++)
                {
                    Trainers[i] = Args.BasePredictors[i % Args.BasePredictors.Length].CreateInstance(Host);
                }
                _needNorm = Trainers.Any(
                    t =>
                {
                    return(t is ITrainerEx nn && nn.NeedNormalization);
                });
                _needCalibration = Trainers.Any(
                    t =>
                {
                    return(t is ITrainerEx nn && nn.NeedCalibration);
                });
                ch.Done();
            }
        }
 public override void CalculateMetrics(FeatureSubsetModel<IPredictorProducing<TOutput>> model,
     ISubsetSelector subsetSelector, Subset subset, Batch batch, bool needMetrics)
 {
     base.CalculateMetrics(model, subsetSelector, subset, batch, true);
 }