public virtual RoleMappedData GetTestData(Subset subset, Batch batch)
            Host.CheckValue(batch.TestInstances, nameof(batch), "Batch does not have test data");

            if (subset == null || subset.SelectedFeatures == null)
            return(EnsembleUtils.SelectFeatures(Host, batch.TestInstances, subset.SelectedFeatures));
Exemple #2
        public Subset SelectFeatures(RoleMappedData data, Random rand)
            _host.CheckValue(data, nameof(data));

            var type     = data.Schema.Feature.Value.Type;
            int len      = type.GetVectorSize();
            var features = new BitArray(len);

            for (int j = 0; j < len; j++)
                features[j] = rand.NextDouble() < _args.FeaturesSelectionProportion;
            var dataNew = EnsembleUtils.SelectFeatures(_host, data, features);

            return(new Subset(dataNew, features));
        public void Train(List <FeatureSubsetModel <IPredictorProducing <TOutput> > > models, RoleMappedData data, IHostEnvironment env)
            Contracts.CheckValue(env, nameof(env));
            var host = env.Register(Stacking.LoadName);

            host.CheckValue(models, nameof(models));
            host.CheckValue(data, nameof(data));

            using (var ch = host.Start("Training stacked model"))
                ch.Check(Meta == null, "Train called multiple times");
                ch.Check(BasePredictorType != null);

                var maps = new ValueMapper <VBuffer <Single>, TOutput> [models.Count];
                for (int i = 0; i < maps.Length; i++)
                    Contracts.Assert(models[i].Predictor is IValueMapper);
                    var m = (IValueMapper)models[i].Predictor;
                    maps[i] = m.GetMapper <VBuffer <Single>, TOutput>();

                // REVIEW: Should implement this better....
                var labels   = new Single[100];
                var features = new VBuffer <Single> [100];
                int count    = 0;
                // REVIEW: Should this include bad values or filter them?
                using (var cursor = new FloatLabelCursor(data, CursOpt.AllFeatures | CursOpt.AllLabels))
                    TOutput[] predictions = new TOutput[maps.Length];
                    var       vBuffers    = new VBuffer <Single> [maps.Length];
                    while (cursor.MoveNext())
                        Parallel.For(0, maps.Length, i =>
                            var model = models[i];
                            if (model.SelectedFeatures != null)
                                EnsembleUtils.SelectFeatures(ref cursor.Features, model.SelectedFeatures, model.Cardinality, ref vBuffers[i]);
                                maps[i](ref vBuffers[i], ref predictions[i]);
                                maps[i](ref cursor.Features, ref predictions[i]);

                        Utils.EnsureSize(ref labels, count + 1);
                        Utils.EnsureSize(ref features, count + 1);
                        labels[count] = cursor.Label;
                        FillFeatureBuffer(predictions, ref features[count]);

                ch.Info("The number of instances used for stacking trainer is {0}", count);

                var bldr = new ArrayDataViewBuilder(host);
                Array.Resize(ref labels, count);
                Array.Resize(ref features, count);
                bldr.AddColumn(DefaultColumnNames.Label, NumberType.Float, labels);
                bldr.AddColumn(DefaultColumnNames.Features, NumberType.Float, features);

                var view = bldr.GetDataView();
                var rmd  = new RoleMappedData(view, DefaultColumnNames.Label, DefaultColumnNames.Features);

                var trainer = BasePredictorType.CreateComponent(host);
                if (trainer.Info.NeedNormalization)
                    ch.Warning("The trainer specified for stacking wants normalization, but we do not currently allow this.");
                Meta = trainer.Train(rmd);