private protected EnsembleTrainerBase(ArgumentsBase args, IHostEnvironment env, string name) : base(env, name) { Args = args; using (var ch = Host.Start("Init")) { ch.CheckUserArg(Utils.Size(Args.BasePredictors) > 0, nameof(Args.BasePredictors), "This should have at-least one value"); NumModels = Args.NumModels ?? (Args.BasePredictors.Length == 1 ? DefaultNumModels : Args.BasePredictors.Length); ch.CheckUserArg(NumModels > 0, nameof(Args.NumModels), "Must be positive, or null to indicate numModels is the number of base predictors"); if (Utils.Size(Args.BasePredictors) > NumModels) { ch.Warning("The base predictor count is greater than models count. Some of the base predictors will be ignored."); } _subsetSelector = Args.SamplingType.CreateComponent(Host); Trainers = new ITrainer <IPredictorProducing <TOutput> > [NumModels]; for (int i = 0; i < Trainers.Length; i++) { Trainers[i] = Args.BasePredictors[i % Args.BasePredictors.Length].CreateInstance(Host); } // We infer normalization and calibration preferences from the trainers. However, even if the internal trainers // don't need caching we are performing multiple passes over the data, so it is probably appropriate to always cache. Info = new TrainerInfo( normalization: Trainers.Any(t => t.Info.NeedNormalization), calibration: Trainers.Any(t => t.Info.NeedCalibration)); ch.Done(); } }
public override void CalculateMetrics(FeatureSubsetModel <IPredictorProducing <TOutput> > model, ISubsetSelector subsetSelector, Subset subset, Batch batch, bool needMetrics) { base.CalculateMetrics(model, subsetSelector, subset, batch, needMetrics); var vm = model.Predictor as IValueMapper; Host.Check(vm != null, "Predictor doesn't implement the expected interface"); var map = vm.GetMapper <VBuffer <Single>, TOutput>(); TOutput[] preds = new TOutput[100]; int count = 0; var data = subsetSelector.GetTestData(subset, batch); using (var cursor = new FeatureFloatVectorCursor(data, CursOpt.AllFeatures)) { while (cursor.MoveNext()) { Utils.EnsureSize(ref preds, count + 1); map(in cursor.Features, ref preds[count]); count++; } } Array.Resize(ref preds, count); _predictions[model] = preds; }
public virtual void CalculateMetrics(FeatureSubsetModel <IPredictorProducing <TOutput> > model, ISubsetSelector subsetSelector, Subset subset, Batch batch, bool needMetrics) { if (!needMetrics || model == null || model.Metrics != null) { return; } using (var ch = Host.Start("Calculate metrics")) { RoleMappedData testData = subsetSelector.GetTestData(subset, batch); // Because the training and test datasets are drawn from the same base dataset, the test data role mappings // are the same as for the train data. IDataScorerTransform scorePipe = ScoreUtils.GetScorer(model.Predictor, testData, Host, testData.Schema); // REVIEW: Should we somehow allow the user to customize the evaluator? // By what mechanism should we allow that? var evalComp = GetEvaluatorSubComponent(); RoleMappedData scoredTestData = RoleMappedData.Create(scorePipe, GetColumnRoles(testData.Schema, scorePipe.Schema)); IEvaluator evaluator = evalComp.CreateInstance(Host); // REVIEW: with the new evaluators, metrics of individual models are no longer // printed to the Console. Consider adding an option on the combiner to print them. // REVIEW: Consider adding an option to the combiner to save a data view // containing all the results of the individual models. var metricsDict = evaluator.Evaluate(scoredTestData); if (!metricsDict.TryGetValue(MetricKinds.OverallMetrics, out IDataView metricsView)) { throw Host.Except("Evaluator did not produce any overall metrics"); } // REVIEW: We're assuming that the metrics of interest are always doubles here. var metrics = EvaluateUtils.GetMetrics(metricsView, getVectorMetrics: false); model.Metrics = metrics.ToArray(); ch.Done(); } }
internal EnsembleTrainerBase(ArgumentsBase args, IHostEnvironment env, string name) : base(env, name) { Args = args; using (var ch = Host.Start("Init")) { ch.CheckUserArg(Utils.Size(Args.BasePredictors) > 0, nameof(Args.BasePredictors), "This should have at-least one value"); NumModels = Args.NumModels ?? (Args.BasePredictors.Length == 1 ? DefaultNumModels : Args.BasePredictors.Length); ch.CheckUserArg(NumModels > 0, nameof(Args.NumModels), "Must be positive, or null to indicate numModels is the number of base predictors"); if (Utils.Size(Args.BasePredictors) > NumModels) { ch.Warning("The base predictor count is greater than models count. Some of the base predictors will be ignored."); } _subsetSelector = Args.SamplingType.CreateComponent(Host); Trainers = new ITrainer <RoleMappedData, IPredictorProducing <TOutput> > [NumModels]; for (int i = 0; i < Trainers.Length; i++) { Trainers[i] = Args.BasePredictors[i % Args.BasePredictors.Length].CreateInstance(Host); } _needNorm = Trainers.Any( t => { return(t is ITrainerEx nn && nn.NeedNormalization); }); _needCalibration = Trainers.Any( t => { return(t is ITrainerEx nn && nn.NeedCalibration); }); ch.Done(); } }
public override void CalculateMetrics(FeatureSubsetModel<IPredictorProducing<TOutput>> model, ISubsetSelector subsetSelector, Subset subset, Batch batch, bool needMetrics) { base.CalculateMetrics(model, subsetSelector, subset, batch, true); }