internal EnsemblePredictor(IHostEnvironment env, PredictionKind kind,
                            FeatureSubsetModel <TScalarPredictor>[] models, IOutputCombiner <Single> combiner, Single[] weights = null)
     : base(env, LoaderSignature, models, combiner, weights)
     PredictionKind = kind;
     InputType      = InitializeMappers(out _mappers);
 private EnsembleDistributionPredictor(IHostEnvironment env, ModelLoadContext ctx)
     : base(env, RegistrationName, ctx)
     PredictionKind       = (PredictionKind)ctx.Reader.ReadInt32();
     _probabilityCombiner = new Median(env);
     InputType            = InitializeMappers(out _mappers);
     ComputeAveragedWeights(out _averagedWeights);
 internal EnsembleDistributionPredictor(IHostEnvironment env, PredictionKind kind,
                                        FeatureSubsetModel <TDistPredictor>[] models, IOutputCombiner <Single> combiner, Single[] weights = null)
     : base(env, RegistrationName, models, combiner, weights)
     PredictionKind       = kind;
     _probabilityCombiner = new Median(env);
     InputType            = InitializeMappers(out _mappers);
     ComputeAveragedWeights(out _averagedWeights);
        protected XGBoostTrainerBase(IHostEnvironment env, XGBoostArguments args, PredictionKind predictionKind, string name)
            Contracts.CheckValue(env, nameof(env));
            env.CheckNonWhiteSpace(name, nameof(name));

            _host = env.Register(name);
            _host.CheckValue(args, nameof(args));

            _args           = args;
            _predictionKind = predictionKind;
 public TreeEnsembleCombiner(IHostEnvironment env, PredictionKind kind)
     _host = env.Register("TreeEnsembleCombiner");
     switch (kind)
         case PredictionKind.BinaryClassification:
         case PredictionKind.Regression:
         case PredictionKind.Ranking:
             _kind = kind;
             throw _host.ExceptUserArg(nameof(kind), $"Tree ensembles can be either of type {nameof(PredictionKind.BinaryClassification)}, " +
                 $"{nameof(PredictionKind.Regression)} or {nameof(PredictionKind.Ranking)}");
        protected LightGbmTrainerBase(IHostEnvironment env, LightGbmArguments args, PredictionKind predictionKind, string name)
            Contracts.CheckValue(env, nameof(env));
            env.CheckNonWhiteSpace(name, nameof(name));

            Host = env.Register(name);
            Host.CheckValue(args, nameof(args));

            Args             = args;
            Options          = Args.ToDictionary(Host);
            _predictionKind  = predictionKind;
            _env             = env;
            ParallelTraining = Args.ParallelTrainer != null?Args.ParallelTrainer.CreateComponent(env) : new SingleTrainer();

        /// <summary>
        /// Suggests a default schema for a predictor
        /// </summary>
        /// <param name="kind">DataKind</param>
        /// <param name="dim">dimension</param>
        /// <returns>ISchema</returns>
        public static ISchema PredictionDefaultSchema(PredictionKind kind, int dim = 0)
            switch (kind)
            case PredictionKind.BinaryClassification:
                return(new ExtendedSchema((ISchema)null, new[] { "Score" }, new[] { NumberDataViewType.Single }));

            case PredictionKind.Regression:
                return(new ExtendedSchema((ISchema)null, new[] { "Prediction" }, new[] { NumberDataViewType.Single }));

            case PredictionKind.MulticlassClassification:
                return(new ExtendedSchema((ISchema)null, new[] { "Scores" }, new[] { new VectorDataViewType(NumberDataViewType.Single, dim) }));

            case PredictionKind.MultiOutputRegression:
                return(new ExtendedSchema((ISchema)null, new[] { "Predictions" }, new[] { new VectorDataViewType(NumberDataViewType.Single, dim) }));

                throw Contracts.Except("Unable to build the schema for kind {0}", kind);
 private MulticlassDataPartitionEnsembleTrainer(IHostEnvironment env, Arguments args, PredictionKind predictionKind)
     : this(env, args)
     Host.CheckParam(predictionKind == PredictionKind.MulticlassClassification, nameof(PredictionKind));
 private EnsembleTrainer(IHostEnvironment env, Arguments args, PredictionKind predictionKind)
     : this(env, args)
     Host.CheckParam(predictionKind == PredictionKind.BinaryClassification, nameof(PredictionKind));
        /// <summary>
        /// Finalizes the test on a predictor, calls the predictor with a scorer,
        /// saves the data, saves the models, loads it back, saves the data again,
        /// checks the output is the same.
        /// </summary>
        /// <param name="env">environment</param>
        /// <param name="outModelFilePath">output filename</param>
        /// <param name="predictor">predictor</param>
        /// <param name="roles">label, feature, ...</param>
        /// <param name="outData">first output data</param>
        /// <param name="outData2">second output data</param>
        /// <param name="kind">prediction kind</param>
        /// <param name="checkError">checks errors</param>
        /// <param name="ratio">check the error is below that threshold (if checkError is true)</param>
        /// <param name="ratioReadSave">check the predictions difference after reloading the model are below this threshold</param>
        public static void FinalizeSerializationTest(IHostEnvironment env,
                                                     string outModelFilePath, IPredictor predictor,
                                                     RoleMappedData roles, string outData, string outData2,
                                                     PredictionKind kind, bool checkError = true,
                                                     float ratio    = 0.8f, float ratioReadSave = 0.06f,
                                                     bool checkType = true)
            string labelColumn = kind != PredictionKind.Clustering ? roles.Schema.Label.Value.Name : null;

            #region save, reading, running

            // Saves model.
            using (var ch = env.Start("Save"))
                using (var fs = File.Create(outModelFilePath))
                    TrainUtils.SaveModel(env, ch, fs, predictor, roles);
            if (!File.Exists(outModelFilePath))
                throw new FileNotFoundException(outModelFilePath);

            // Loads the model back.
            using (var fs = File.OpenRead(outModelFilePath))
                var pred_local = ModelFileUtils.LoadPredictorOrNull(env, fs);
                if (pred_local == null)
                    throw new Exception(string.Format("Unable to load '{0}'", outModelFilePath));
                if (checkType && predictor.GetType() != pred_local.GetType())
                    throw new Exception(string.Format("Type mismatch {0} != {1}", predictor.GetType(), pred_local.GetType()));

            // Checks the outputs.
            var sch1   = SchemaHelper.ToString(roles.Schema.Schema);
            var scorer = PredictorHelper.CreateDefaultScorer(env, roles, predictor);

            var sch2 = SchemaHelper.ToString(scorer.Schema);
            if (string.IsNullOrEmpty(sch1) || string.IsNullOrEmpty(sch2))
                throw new Exception("Empty schemas");

            var saver   = env.CreateSaver("Text");
            var columns = new int[scorer.Schema.Count];
            for (int i = 0; i < columns.Length; ++i)
                columns[i] = saver.IsColumnSavable(scorer.Schema[i].Type) ? i : -1;
            columns = columns.Where(c => c >= 0).ToArray();
            using (var fs2 = File.Create(outData))
                saver.SaveData(fs2, scorer, columns);

            if (!File.Exists(outModelFilePath))
                throw new FileNotFoundException(outData);

            // Check we have the same output.
            using (var fs = File.OpenRead(outModelFilePath))
                var model = ModelFileUtils.LoadPredictorOrNull(env, fs);
                scorer = PredictorHelper.CreateDefaultScorer(env, roles, model);
                saver  = env.CreateSaver("Text");
                using (var fs2 = File.Create(outData2))
                    saver.SaveData(fs2, scorer, columns);

            var t1 = File.ReadAllLines(outData);
            var t2 = File.ReadAllLines(outData2);
            if (t1.Length != t2.Length)
                throw new Exception(string.Format("Not the same number of lines: {0} != {1}", t1.Length, t2.Length));
            var linesN = new List <int>();
            for (int i = 0; i < t1.Length; ++i)
                if (t1[i] != t2[i])
            if (linesN.Count > (int)(t1.Length * ratioReadSave))
                var rows = linesN.Select(i => string.Format("1-Mismatch on line {0}/{3}:\n{1}\n{2}", i, t1[i], t2[i], t1.Length)).ToList();
                rows.Add($"Number of differences: {linesN.Count}/{t1.Length}");
                throw new Exception(string.Join("\n", rows));


            #region clustering

            if (kind == PredictionKind.Clustering)
                // Nothing to do here.


            #region supervized

            string expectedOuput = kind == PredictionKind.Regression ? "Score" : "PredictedLabel";

            // Get label and basic checking about performance.
            using (var cursor = scorer.GetRowCursor(scorer.Schema))
                int ilabel, ipred;
                ilabel = SchemaHelper.GetColumnIndex(cursor.Schema, labelColumn);
                ipred  = SchemaHelper.GetColumnIndex(cursor.Schema, expectedOuput);
                var ty1   = cursor.Schema[ilabel].Type;
                var ty2   = cursor.Schema[ipred].Type;
                var dist1 = new Dictionary <int, int>();
                var dist2 = new Dictionary <int, int>();
                var conf  = new Dictionary <Tuple <int, int>, long>();

                if (kind == PredictionKind.MulticlassClassification)
                    #region Multiclass

                    if (!ty2.IsKey())
                        throw new Exception(string.Format("Label='{0}' Predicted={1}'\nSchema: {2}", ty1, ty2, SchemaHelper.ToString(cursor.Schema)));

                    if (ty1.RawKind() == DataKind.Single)
                        var   lgetter = cursor.GetGetter <float>(SchemaHelper._dc(ilabel, cursor));
                        var   pgetter = cursor.GetGetter <uint>(SchemaHelper._dc(ipred, cursor));
                        float ans     = 0;
                        uint  pre     = 0;
                        while (cursor.MoveNext())
                            lgetter(ref ans);
                            pgetter(ref pre);

                            // The scorer +1 to the argmax.

                            var key = new Tuple <int, int>((int)pre, (int)ans);
                            if (!conf.ContainsKey(key))
                                conf[key] = 1;
                            if (!dist1.ContainsKey((int)ans))
                                dist1[(int)ans] = 1;
                            if (!dist2.ContainsKey((int)pre))
                                dist2[(int)pre] = 1;
                    else if (ty1.RawKind() == DataKind.UInt32 && ty1.IsKey())
                        var  lgetter = cursor.GetGetter <uint>(SchemaHelper._dc(ilabel, cursor));
                        var  pgetter = cursor.GetGetter <uint>(SchemaHelper._dc(ipred, cursor));
                        uint ans     = 0;
                        uint pre     = 0;
                        while (cursor.MoveNext())
                            lgetter(ref ans);
                            pgetter(ref pre);

                            var key = new Tuple <int, int>((int)pre, (int)ans);
                            if (!conf.ContainsKey(key))
                                conf[key] = 1;
                            if (!dist1.ContainsKey((int)ans))
                                dist1[(int)ans] = 1;
                            if (!dist2.ContainsKey((int)pre))
                                dist2[(int)pre] = 1;
                        throw new NotImplementedException(string.Format("Not implemented for type {0}", ty1.ToString()));
                else if (kind == PredictionKind.BinaryClassification)
                    #region binary classification

                    if (ty2.RawKind() != DataKind.Boolean)
                        throw new Exception(string.Format("Label='{0}' Predicted={1}'\nSchema: {2}", ty1, ty2, SchemaHelper.ToString(cursor.Schema)));

                    if (ty1.RawKind() == DataKind.Single)
                        var   lgetter = cursor.GetGetter <float>(SchemaHelper._dc(ilabel, cursor));
                        var   pgetter = cursor.GetGetter <bool>(SchemaHelper._dc(ipred, cursor));
                        float ans     = 0;
                        bool  pre     = default(bool);
                        while (cursor.MoveNext())
                            lgetter(ref ans);
                            pgetter(ref pre);

                            if (ans != 0 && ans != 1)
                                throw Contracts.Except("The problem is not binary, expected answer is {0}", ans);

                            var key = new Tuple <int, int>(pre ? 1 : 0, (int)ans);
                            if (!conf.ContainsKey(key))
                                conf[key] = 1;
                            if (!dist1.ContainsKey((int)ans))
                                dist1[(int)ans] = 1;
                            if (!dist2.ContainsKey(pre ? 1 : 0))
                                dist2[pre ? 1 : 0] = 1;
                                ++dist2[pre ? 1 : 0];
                    else if (ty1.RawKind() == DataKind.UInt32)
                        var  lgetter = cursor.GetGetter <uint>(SchemaHelper._dc(ilabel, cursor));
                        var  pgetter = cursor.GetGetter <bool>(SchemaHelper._dc(ipred, cursor));
                        uint ans     = 0;
                        bool pre     = default(bool);
                        while (cursor.MoveNext())
                            lgetter(ref ans);
                            pgetter(ref pre);
                            if (ty1.IsKey())

                            if (ans != 0 && ans != 1)
                                throw Contracts.Except("The problem is not binary, expected answer is {0}", ans);

                            var key = new Tuple <int, int>(pre ? 1 : 0, (int)ans);
                            if (!conf.ContainsKey(key))
                                conf[key] = 1;
                            if (!dist1.ContainsKey((int)ans))
                                dist1[(int)ans] = 1;
                            if (!dist2.ContainsKey(pre ? 1 : 0))
                                dist2[pre ? 1 : 0] = 1;
                                ++dist2[pre ? 1 : 0];
                    else if (ty1.RawKind() == DataKind.Boolean)
                        var  lgetter = cursor.GetGetter <bool>(SchemaHelper._dc(ilabel, cursor));
                        var  pgetter = cursor.GetGetter <bool>(SchemaHelper._dc(ipred, cursor));
                        bool ans     = default(bool);
                        bool pre     = default(bool);
                        while (cursor.MoveNext())
                            lgetter(ref ans);
                            pgetter(ref pre);

                            var key = new Tuple <int, int>(pre ? 1 : 0, ans ? 1 : 0);
                            if (!conf.ContainsKey(key))
                                conf[key] = 1;

                            if (!dist1.ContainsKey(ans ? 1 : 0))
                                dist1[ans ? 1 : 0] = 1;
                                ++dist1[ans ? 1 : 0];
                            if (!dist2.ContainsKey(pre ? 1 : 0))
                                dist2[pre ? 1 : 0] = 1;
                                ++dist2[pre ? 1 : 0];
                        throw new NotImplementedException(string.Format("Not implemented for type {0}", ty1));

                else if (kind == PredictionKind.Regression)
                    #region regression

                    if (ty1.RawKind() != DataKind.Single)
                        throw new Exception(string.Format("Label='{0}' Predicted={1}'\nSchema: {2}", ty1, ty2, SchemaHelper.ToString(cursor.Schema)));
                    if (ty2.RawKind() != DataKind.Single)
                        throw new Exception(string.Format("Label='{0}' Predicted={1}'\nSchema: {2}", ty1, ty2, SchemaHelper.ToString(cursor.Schema)));

                    var   lgetter = cursor.GetGetter <float>(SchemaHelper._dc(ilabel, cursor));
                    var   pgetter = cursor.GetGetter <float>(SchemaHelper._dc(ipred, cursor));
                    float ans     = 0;
                    float pre     = 0f;
                    float error   = 0f;
                    while (cursor.MoveNext())
                        lgetter(ref ans);
                        pgetter(ref pre);
                        error += (ans - pre) * (ans - pre);
                        if (!dist1.ContainsKey((int)ans))
                            dist1[(int)ans] = 1;
                        if (!dist2.ContainsKey((int)pre))
                            dist2[(int)pre] = 1;

                    if (float.IsNaN(error) || float.IsInfinity(error))
                        throw new Exception("Regression wen wrong. Error is infinite.");

                    throw new NotImplementedException(string.Format("Not implemented for kind {0}", kind));

                var nbError = conf.Where(c => c.Key.Item1 != c.Key.Item2).Select(c => c.Value).Sum();
                var nbTotal = conf.Select(c => c.Value).Sum();

                if (checkError && (nbError * 1.0 > nbTotal * ratio || dist2.Count <= 1))
                    var sconf = string.Join("\n", conf.OrderBy(c => c.Key)
                                            .Select(c => string.Format("pred={0} exp={1} count={2}", c.Key.Item1, c.Key.Item2, c.Value)));
                    var sdist2 = string.Join("\n", dist1.OrderBy(c => c.Key)
                                             .Select(c => string.Format("label={0} count={1}", c.Key, c.Value)));
                    var sdist1 = string.Join("\n", dist2.OrderBy(c => c.Key).Take(20)
                                             .Select(c => string.Format("label={0} count={1}", c.Key, c.Value)));
                    throw new Exception(string.Format("Too many errors {0}/{1}={7}\n###########\nConfusion:\n{2}\n########\nDIST1\n{3}\n###########\nDIST2\n{4}\nOutput:\n{5}\n...\n{6}",
                                                      nbError, nbTotal,
                                                      sconf, sdist1, sdist2,
                                                      string.Join("\n", t1.Take(Math.Min(30, t1.Length))),
                                                      string.Join("\n", t1.Skip(Math.Max(0, t1.Length - 30))),
                                                      nbError * 1.0 / nbTotal));

 private EnsemblePredictor(IHostEnvironment env, ModelLoadContext ctx)
     : base(env, RegistrationName, ctx)
     PredictionKind = (PredictionKind)ctx.Reader.ReadInt32();
     InputType      = InitializeMappers(out _mappers);
 private RegressionEnsembleTrainer(IHostEnvironment env, Arguments args, PredictionKind predictionKind)
     : this(env, args)
     Host.CheckParam(predictionKind == PredictionKind.Regression, nameof(PredictionKind));
        public int ComputeNbClass(PredictionKind kind)
            KeyValuePair <long, Tuple <TLabel, float> > first = new KeyValuePair <long, Tuple <TLabel, float> >(0, new Tuple <TLabel, float>(default(TLabel), 0));

            foreach (var pair in _labelWeights)
                first = pair;

            TLabel mini = first.Value.Item1, maxi = first.Value.Item1;

            foreach (var pair in _labelWeights)
                if (mini.CompareTo(pair.Value.Item1) > 0)
                    mini = pair.Value.Item1;
                if (maxi.CompareTo(pair.Value.Item1) < 0)
                    maxi = pair.Value.Item1;

            var conv = new TypedConverters <TLabel>(DataKind.Int32);

            int imini = 0, imaxi = 0;
            var convMapper = conv.GetMapper <int>();

            if (convMapper == null)
                throw _host.Except("No conversion from {0} to {1}", typeof(TLabel), typeof(int));
            switch (kind)
            case PredictionKind.BinaryClassification:
                if (mini.CompareTo(maxi) == 0)
                    throw _host.Except("Only one class, two are expected.");
                convMapper(in mini, ref imini);
                convMapper(in maxi, ref imaxi);
                if (imaxi - imini != 1)
                    throw _host.Except("More than two classes: min(labels)={0} max(labels)={1}", imini, imaxi);

            case PredictionKind.MulticlassClassification:
                if (mini.CompareTo(maxi) == 0)
                    throw _host.Except("Only one class, more are expected.");
                convMapper(in mini, ref imini);
                convMapper(in maxi, ref imaxi);
                return(imini == 0 ? imaxi + 1 : imini);

                throw _host.ExceptNotSupp("Not suported for predictor {0}", kind);
        public ValueMapper <TIn, TOut> GetMapper <TIn, TOut>(NearestNeighborsTrees trees, int k,
                                                             NearestNeighborsAlgorithm algo, NearestNeighborsWeights weight, PredictionKind kind)
            _host.Check(typeof(TIn) == typeof(VBuffer <float>));
            _host.CheckValue(_labelWeights, "_labelWeights");
            _host.Check(algo == NearestNeighborsAlgorithm.kdtree, "algo");

            if (weight == NearestNeighborsWeights.uniform)
                switch (kind)
                case PredictionKind.BinaryClassification:
                    return(GetMapperBinaryPrediction <TIn, TOut>(trees, k, algo, weight));

                case PredictionKind.MulticlassClassification:
                    return(GetMapperMulticlassPrediction <TIn, TOut>(trees, k, algo, weight));

                    throw _host.ExceptNotImpl("Not implemented yet for kind={0}", kind);
                throw _host.ExceptNotImpl("Not implemented yet for wieght={0}", weight);