public static Output MakeScoringTransform(IHostEnvironment env, ModelInput input)
        {
            Contracts.CheckValue(env, nameof(env));
            var host = env.Register("MakeScoringTransform");

            host.CheckValue(input, nameof(input));
            EntryPointUtils.CheckInputArgs(host, input);

            IPredictor     predictor;
            RoleMappedData data;
            var            emptyData = new EmptyDataView(host, input.PredictorModel.TransformModel.InputSchema);

            input.PredictorModel.PrepareData(host, emptyData, out data, out predictor);

            IDataView scoredPipe;

            using (var ch = host.Start("Creating scoring pipeline"))
            {
                ch.Trace("Creating pipeline");
                var bindable = ScoreUtils.GetSchemaBindableMapper(host, predictor);
                ch.AssertValue(bindable);

                var mapper = bindable.Bind(host, data.Schema);
                var scorer = ScoreUtils.GetScorerComponent(host, mapper);
                scoredPipe = scorer.CreateComponent(host, data.Data, mapper, input.PredictorModel.GetTrainingSchema(host));
            }

            return(new Output
            {
                ScoredData = scoredPipe,
                ScoringTransform = new TransformModelImpl(host, scoredPipe, emptyData)
            });
        }
        // When the label column is not a key, we check that the number of classes is the same for all the predictors, by checking the
        // OutputType property of the IValueMapper.
        // If any of the predictors do not implement IValueMapper we throw an exception. Returns the class count.
        private static int CheckNonKeyLabelColumnCore(IHostEnvironment env, IPredictor pred, PredictorModel[] models, bool isBinary, DataViewType labelType)
        {
            env.Assert(!(labelType is KeyType));
            env.AssertNonEmpty(models);

            if (isBinary)
            {
                return(2);
            }

            // The label is numeric, we just have to check that the number of classes is the same.
            if (!(pred is IValueMapper vm))
            {
                throw env.Except("Cannot determine the number of classes the predictor outputs");
            }
            var classCount = vm.OutputType.GetVectorSize();

            for (int i = 1; i < models.Length; i++)
            {
                var model = models[i];
                var edv   = new EmptyDataView(env, model.TransformModel.InputSchema);
                model.PrepareData(env, edv, out RoleMappedData rmd, out pred);
                vm = pred as IValueMapper;
                if (vm.OutputType.GetVectorSize() != classCount)
                {
                    throw env.Except("Label of model {0} has different number of classes than model 0", i);
                }
            }
            return(classCount);
        }
        public static Output ExtractSweepResult(IHostEnvironment env, ResultInput input)
        {
            var autoMlState = input.State as AutoInference.AutoMlMlState;

            if (autoMlState == null)
            {
                throw env.Except("The state must be a valid AutoMlState.");
            }
            // Create results output dataview
            var       rows = autoMlState.GetAllEvaluatedPipelines().Select(p => p.ToResultRow()).ToList();
            IDataView outputView;
            var       col1 = new KeyValuePair <string, ColumnType>("Graph", TextType.Instance);
            var       col2 = new KeyValuePair <string, ColumnType>("MetricValue", PrimitiveType.FromKind(DataKind.R8));
            var       col3 = new KeyValuePair <string, ColumnType>("PipelineId", TextType.Instance);

            if (rows.Count == 0)
            {
                var host = env.Register("ExtractSweepResult");
                outputView = new EmptyDataView(host, new SimpleSchema(host, col1, col2, col3));
            }
            else
            {
                var builder = new ArrayDataViewBuilder(env);
                builder.AddColumn(col1.Key, (PrimitiveType)col1.Value, rows.Select(r => new DvText(r.GraphJson)).ToArray());
                builder.AddColumn(col2.Key, (PrimitiveType)col2.Value, rows.Select(r => r.MetricValue).ToArray());
                builder.AddColumn(col3.Key, (PrimitiveType)col3.Value, rows.Select(r => new DvText(r.PipelineId)).ToArray());
                outputView = builder.GetDataView();
            }
            return(new Output {
                Results = outputView, State = autoMlState
            });
        }
Esempio n. 4
0
        public ISchema GetOutputSchema(ISchema inputSchema)
        {
            var dv     = new EmptyDataView(_env, inputSchema);
            var output = ApplyTransformUtils.ApplyAllTransformsToData(_env, _xf, dv);

            return(output.Schema);
        }
Esempio n. 5
0
        public TransformWrapper Fit(IDataView input)
        {
            var xf    = new TermTransform(_env, input, _column, _srcColumn);
            var empty = new EmptyDataView(_env, input.Schema);
            var chunk = ApplyTransformUtils.ApplyAllTransformsToData(_env, xf, empty, input);

            return(new TransformWrapper(_env, chunk));
        }
Esempio n. 6
0
        public TransformWrapper Fit(IDataView input)
        {
            var xf    = new KeyToValueTransform(_env, input, _name, _source);
            var empty = new EmptyDataView(_env, input.Schema);
            var chunk = ApplyTransformUtils.ApplyAllTransformsToData(_env, xf, empty, input);

            return(new TransformWrapper(_env, chunk));
        }
Esempio n. 7
0
        public TransformWrapper Fit(IDataView input)
        {
            var xf    = LambdaTransform.CreateMap(_env, input, _action);
            var empty = new EmptyDataView(_env, input.Schema);
            var chunk = ApplyTransformUtils.ApplyAllTransformsToData(_env, xf, empty, input);

            return(new TransformWrapper(_env, chunk));
        }
Esempio n. 8
0
        /// <summary>
        /// This method outputs a Key-Value Pair (kvp) per model in the ensemble.
        ///   * The key is the model number such as "Partition model 0 summary". If the model implements <see cref="ICanSaveSummary"/>
        ///     then this string is followed by the first line of the model summary (the first line contains a description specific to the
        ///     model kind, such as "Feature gains" for FastTree or "Feature weights" for linear).
        ///   * The value:
        ///       - If the model implements <see cref="ICanGetSummaryInKeyValuePairs"/> then the value is the list of Key-Value pairs
        ///         containing the detailed summary for that model (for example, linear models have a list containing kvps where the keys
        ///         are the feature names and the values are the weights. FastTree has a similar list with the feature gains as values).
        ///       - If the model does not implement <see cref="ICanGetSummaryInKeyValuePairs"/> but does implement <see cref="ICanSaveSummary"/>,
        ///         the value is a string containing the summary of that model.
        ///       - If neither of those interfaces are implemented then the value is a string containing the name of the type of model.
        /// </summary>
        /// <returns></returns>
        public IList <KeyValuePair <string, object> > GetSummaryInKeyValuePairs(RoleMappedSchema schema)
        {
            Host.CheckValueOrNull(schema);

            var list = new List <KeyValuePair <string, object> >();

            var sb = new StringBuilder();

            for (int i = 0; i < PredictorModels.Length; i++)
            {
                var key          = string.Format("Partition model {0} summary:", i);
                var summaryKvps  = PredictorModels[i].Predictor as ICanGetSummaryInKeyValuePairs;
                var summaryModel = PredictorModels[i].Predictor as ICanSaveSummary;
                if (summaryKvps == null && summaryModel == null)
                {
                    list.Add(new KeyValuePair <string, object>(key, PredictorModels[i].Predictor.GetType().Name));
                    continue;
                }

                // Load the feature names for the i'th model.
                var dv = new EmptyDataView(Host, PredictorModels[i].TransformModel.InputSchema);
                PredictorModels[i].PrepareData(Host, dv, out RoleMappedData rmd, out IPredictor pred);

                if (summaryModel != null)
                {
                    sb.Clear();
                    using (StringWriter sw = new StringWriter(sb))
                        summaryModel.SaveSummary(sw, rmd.Schema);
                }

                if (summaryKvps != null)
                {
                    var listCur = summaryKvps.GetSummaryInKeyValuePairs(rmd.Schema);
                    if (summaryModel != null)
                    {
                        using (var reader = new StringReader(sb.ToString()))
                        {
                            string firstLine = null;
                            while (string.IsNullOrEmpty(firstLine))
                            {
                                firstLine = reader.ReadLine();
                            }
                            if (!string.IsNullOrEmpty(firstLine))
                            {
                                key += ("\r\n" + firstLine);
                            }
                        }
                    }
                    list.Add(new KeyValuePair <string, object>(key, listCur));
                }
                else
                {
                    Host.AssertValue(summaryModel);
                    list.Add(new KeyValuePair <string, object>(key, sb.ToString()));
                }
            }
            return(list);
        }
Esempio n. 9
0
        /// <summary>
        /// Create a TransformModel containing the transforms from "result" back to "input".
        /// </summary>
        public TransformModel(IHostEnvironment env, IDataView result, IDataView input)
        {
            Contracts.CheckValue(env, nameof(env));
            env.CheckValue(result, nameof(result));
            env.CheckValue(input, nameof(input));

            var root = new EmptyDataView(env, input.Schema);

            _schemaRoot = root.Schema;
            _chain      = ApplyTransformUtils.ApplyAllTransformsToData(env, result, root, input);
        }
            public BoundBase(SchemaBindablePipelineEnsembleBase parent, RoleMappedSchema schema)
            {
                Parent = parent;
                InputRoleMappedSchema = schema;
                OutputSchema          = ScoreSchemaFactory.Create(Parent.ScoreType, Parent._scoreColumnKind);
                _inputColIndices      = new HashSet <int>();
                for (int i = 0; i < Parent._inputCols.Length; i++)
                {
                    var name = Parent._inputCols[i];
                    var col  = InputRoleMappedSchema.Schema.GetColumnOrNull(name);
                    if (!col.HasValue)
                    {
                        throw Parent.Host.ExceptSchemaMismatch(nameof(InputRoleMappedSchema), "input", name);
                    }
                    _inputColIndices.Add(col.Value.Index);
                }

                Mappers        = new ISchemaBoundRowMapper[Parent.PredictorModels.Length];
                BoundPipelines = new IRowToRowMapper[Parent.PredictorModels.Length];
                ScoreCols      = new int[Parent.PredictorModels.Length];
                for (int i = 0; i < Mappers.Length; i++)
                {
                    // Get the RoleMappedSchema to pass to the predictor.
                    var emptyDv = new EmptyDataView(Parent.Host, schema.Schema);
                    Parent.PredictorModels[i].PrepareData(Parent.Host, emptyDv, out RoleMappedData rmd, out IPredictor predictor);

                    // Get the predictor as a bindable mapper, and bind it to the RoleMappedSchema found above.
                    var bindable = ScoreUtils.GetSchemaBindableMapper(Parent.Host, Parent.PredictorModels[i].Predictor);
                    Mappers[i] = bindable.Bind(Parent.Host, rmd.Schema) as ISchemaBoundRowMapper;
                    if (Mappers[i] == null)
                    {
                        throw Parent.Host.Except("Predictor {0} is not a row to row mapper", i);
                    }

                    // Make sure there is a score column, and remember its index.
                    var scoreCol = Mappers[i].OutputSchema.GetColumnOrNull(MetadataUtils.Const.ScoreValueKind.Score);
                    if (!scoreCol.HasValue)
                    {
                        throw Parent.Host.Except("Predictor {0} does not contain a score column", i);
                    }
                    ScoreCols[i] = scoreCol.Value.Index;

                    // Get the pipeline.
                    var dv       = new EmptyDataView(Parent.Host, schema.Schema);
                    var tm       = new TransformModelImpl(Parent.Host, dv, dv);
                    var pipeline = Parent.PredictorModels[i].TransformModel.Apply(Parent.Host, tm);
                    BoundPipelines[i] = pipeline.AsRowToRowMapper(Parent.Host);
                    if (BoundPipelines[i] == null)
                    {
                        throw Parent.Host.Except("Transform pipeline {0} contains transforms that do not implement IRowToRowMapper", i);
                    }
                }
            }
Esempio n. 11
0
            public Mapper(CustomMappingTransformer <TSrc, TDst> parent, Schema inputSchema)
            {
                Contracts.AssertValue(parent);
                Contracts.AssertValue(inputSchema);

                _host        = parent._host.Register(nameof(Mapper));
                _parent      = parent;
                _inputSchema = inputSchema;

                var emptyDataView = new EmptyDataView(_host, inputSchema);

                _typedSrc = TypedCursorable <TSrc> .Create(_host, emptyDataView, false, _parent.InputSchemaDefinition);
            }
Esempio n. 12
0
        public void AssertStaticSimple()
        {
            var env    = new ConsoleEnvironment(0, verbose: true);
            var schema = new SimpleSchema(env,
                                          P("hello", TextType.Instance),
                                          P("my", new VectorType(NumberType.I8, 5)),
                                          P("friend", new KeyType(DataKind.U4, 0, 3)));
            var view = new EmptyDataView(env, schema);

            view.AssertStatic(env, c => (
                                  my: c.I8.Vector,
                                  friend: c.KeyU4.NoValue.Scalar,
                                  hello: c.Text.Scalar
                                  ));
        }
Esempio n. 13
0
        private static IDataView MakeScorer(IHostEnvironment env, ISchema schema, string featureColumn, TModel model, BinaryClassifierScorer.Arguments args)
        {
            var settings = $"Binary{{{CmdParser.GetSettings(env, args, new BinaryClassifierScorer.Arguments())}}}";

            var scorerFactorySettings = CmdParser.CreateComponentFactory(
                typeof(IComponentFactory <IDataView, ISchemaBoundMapper, RoleMappedSchema, IDataScorerTransform>),
                typeof(SignatureDataScorer),
                settings);

            var bindable = ScoreUtils.GetSchemaBindableMapper(env, model, scorerFactorySettings: scorerFactorySettings);
            var edv      = new EmptyDataView(env, schema);
            var data     = new RoleMappedData(edv, "Label", featureColumn, opt: true);

            return(new BinaryClassifierScorer(env, args, data.Data, bindable.Bind(env, data.Schema), data.Schema));
        }
Esempio n. 14
0
        private static TOut CreatePipelineEnsemble <TOut>(IHostEnvironment env, IPredictorModel[] predictors, SchemaBindablePipelineEnsembleBase ensemble)
            where TOut : CommonOutputs.TrainerOutput, new()
        {
            var inputSchema = predictors[0].TransformModel.InputSchema;
            var dv          = new EmptyDataView(env, inputSchema);

            // The role mappings are specific to the individual predictors.
            var rmd            = new RoleMappedData(dv);
            var predictorModel = new PredictorModel(env, rmd, dv, ensemble);

            var output = new TOut {
                PredictorModel = predictorModel
            };

            return(output);
        }
Esempio n. 15
0
            public ReferringSitesCollectionView()
            {
                AutomationId    = ReferringSitesPageAutomationIds.CollectionView;
                BackgroundColor = Color.Transparent;
                ItemTemplate    = new ReferringSitesDataTemplate();
                SelectionMode   = SelectionMode.Single;
                ItemsLayout     = new LinearItemsLayout(ItemsLayoutOrientation.Vertical);

                //Set iOS Header to `new BoxView { HeightRequest = titleRowHeight + titleTopMargin }` following this bug fix: https://github.com/xamarin/Xamarin.Forms/issues/9879
                Header = Device.RuntimePlatform is Device.iOS ? null : new BoxView {
                    HeightRequest = ReferringSitesDataTemplate.BottomPadding
                };
                Footer = Device.RuntimePlatform is Device.iOS ? null : new BoxView {
                    HeightRequest = ReferringSitesDataTemplate.TopPadding
                };
                EmptyView = new EmptyDataView("EmptyReferringSitesList", ReferringSitesPageAutomationIds.EmptyDataView);
            }
            public ReferringSitesCollectionView()
            {
                AutomationId    = ReferringSitesPageAutomationIds.CollectionView;
                BackgroundColor = Color.Transparent;
                ItemTemplate    = new ReferringSitesDataTemplateSelector();
                SelectionMode   = SelectionMode.Single;
                ItemsLayout     = new LinearItemsLayout(ItemsLayoutOrientation.Vertical);

                //iOS Header + Footer break CollectionView after Refresh bug: https://github.com/xamarin/Xamarin.Forms/issues/9879
                Header = Device.RuntimePlatform is Device.iOS ? null : new BoxView {
                    HeightRequest = ReferringSitesDataTemplateSelector.BottomPadding
                };
                Footer = Device.RuntimePlatform is Device.iOS ? null : new BoxView {
                    HeightRequest = ReferringSitesDataTemplateSelector.TopPadding
                };
                EmptyView = new EmptyDataView("EmptyReferringSitesList", ReferringSitesPageAutomationIds.EmptyDataView);
            }
Esempio n. 17
0
        public void SaveSummary(TextWriter writer, RoleMappedSchema schema)
        {
            for (int i = 0; i < PredictorModels.Length; i++)
            {
                writer.WriteLine("Partition model {0} summary:", i);

                if (!(PredictorModels[i].Predictor is ICanSaveSummary summaryModel))
                {
                    writer.WriteLine("Model of type {0}", PredictorModels[i].Predictor.GetType().Name);
                    continue;
                }

                // Load the feature names for the i'th model.
                var dv = new EmptyDataView(Host, PredictorModels[i].TransformModel.InputSchema);
                PredictorModels[i].PrepareData(Host, dv, out RoleMappedData rmd, out IPredictor pred);
                summaryModel.SaveSummary(writer, rmd.Schema);
            }
        }
Esempio n. 18
0
        private static void SaveIdvSchemaToFile(IDataView idv, string path, IHost host)
        {
            var emptyDataView = new EmptyDataView(host, idv.Schema);
            var saverArgs     = new TextSaver.Arguments
            {
                OutputHeader = false,
                OutputSchema = true,
                Dense        = true
            };
            IDataSaver saver = new TextSaver(host, saverArgs);

            using (var fs = File.OpenWrite(path))
            {
                saver.SaveData(fs, emptyDataView, Utils.GetIdentityPermutation(emptyDataView.Schema.Count)
                               .Where(x => !emptyDataView.Schema[x].IsHidden && saver.IsColumnSavable(emptyDataView.Schema[x].Type))
                               .ToArray());
            }
        }
        // Checks that all the label columns of the model have the same key type as their label column - including the same
        // cardinality and the same key values, and returns the cardinality of the label column key.
        private static int CheckKeyLabelColumnCore <T>(IHostEnvironment env, PredictorModel[] models, KeyType labelType, Schema schema, int labelIndex, VectorType keyValuesType)
            where T : IEquatable <T>
        {
            env.Assert(keyValuesType.ItemType.RawType == typeof(T));
            env.AssertNonEmpty(models);
            var labelNames = default(VBuffer <T>);

            schema[labelIndex].GetKeyValues(ref labelNames);
            var classCount = labelNames.Length;

            var curLabelNames = default(VBuffer <T>);

            for (int i = 1; i < models.Length; i++)
            {
                var model = models[i];
                var edv   = new EmptyDataView(env, model.TransformModel.InputSchema);
                model.PrepareData(env, edv, out RoleMappedData rmd, out IPredictor pred);
                var labelInfo = rmd.Schema.Label.HasValue;
                if (!rmd.Schema.Label.HasValue)
                {
                    throw env.Except("Training schema for model {0} does not have a label column", i);
                }
                var labelCol = rmd.Schema.Label.Value;

                var curLabelType = labelCol.Type as KeyType;
                if (!labelType.Equals(curLabelType))
                {
                    throw env.Except("Label column of model {0} has different type than model 0", i);
                }

                var mdType = labelCol.Metadata.Schema.GetColumnOrNull(MetadataUtils.Kinds.KeyValues)?.Type;
                if (!mdType.Equals(keyValuesType))
                {
                    throw env.Except("Label column of model {0} has different key value type than model 0", i);
                }
                labelCol.GetKeyValues(ref curLabelNames);
                if (!AreEqual(in labelNames, in curLabelNames))
                {
                    throw env.Except("Label of model {0} has different values than model 0", i);
                }
            }
            return(classCount);
        }
        internal override void Save(IHostEnvironment env, Stream stream)
        {
            Contracts.CheckValue(env, nameof(env));
            env.CheckValue(stream, nameof(stream));
            using (var ch = env.Start("Saving predictor model"))
            {
                // REVIEW: address the asymmetry in the way we're loading and saving the model.
                // Effectively, we have methods to load the transform model from a model.zip, but don't have
                // methods to compose the model.zip out of transform model, predictor and role mappings
                // (we use the TrainUtils.SaveModel that does all three).

                // Create the chain of transforms for saving.
                IDataView data = new EmptyDataView(env, TransformModel.InputSchema);
                data = TransformModel.Apply(env, data);
                var roleMappedData = new RoleMappedData(data, _roleMappings, opt: true);

                TrainUtils.SaveModel(env, ch, stream, Predictor, roleMappedData);
            }
        }
Esempio n. 21
0
        private static void GetPipeline(IHostEnvironment env, InputBase input, out IDataView startingData, out RoleMappedData transformedData)
        {
            Contracts.AssertValue(env);
            env.AssertValue(input);
            env.AssertNonEmpty(input.Models);

            ISchema inputSchema = null;

            startingData    = null;
            transformedData = null;
            byte[][] transformedDataSerialized    = null;
            string[] transformedDataZipEntryNames = null;
            for (int i = 0; i < input.Models.Length; i++)
            {
                var model = input.Models[i];

                var inputData = new EmptyDataView(env, model.TransformModel.InputSchema);
                model.PrepareData(env, inputData, out RoleMappedData transformedDataCur, out IPredictor pred);

                if (inputSchema == null)
                {
                    env.Assert(i == 0);
                    inputSchema     = model.TransformModel.InputSchema;
                    startingData    = inputData;
                    transformedData = transformedDataCur;
                }
                else if (input.ValidatePipelines)
                {
                    using (var ch = env.Start("Validating pipeline"))
                    {
                        if (transformedDataSerialized == null)
                        {
                            ch.Assert(transformedDataZipEntryNames == null);
                            SerializeRoleMappedData(env, ch, transformedData, out transformedDataSerialized,
                                                    out transformedDataZipEntryNames);
                        }
                        CheckSamePipeline(env, ch, transformedDataCur, transformedDataSerialized, transformedDataZipEntryNames);
                        ch.Done();
                    }
                }
            }
        }
Esempio n. 22
0
        /// <summary>
        /// Save a zero-row dataview that will be used to infer schema information, used in the case
        /// where the tranpsose loader is instantiated with no input streams.
        /// </summary>
        private static void SaveSchema(IHostEnvironment env, ModelSaveContext ctx, Schema schema)
        {
            Contracts.AssertValue(env);

            env.AssertValue(ctx);
            env.AssertValue(schema);

            var noRows = new EmptyDataView(env, schema);

            env.Assert(noRows.GetRowCount() == 0);

            var saverArgs = new BinarySaver.Arguments();

            saverArgs.Silent = true;
            var saver = new BinarySaver(env, saverArgs);

            // We load our schema from what amounts to a binary loader, so all columns should likewise be savable.
            env.Assert(Enumerable.Range(0, schema.ColumnCount).All(c => saver.IsColumnSavable(schema.GetColumnType(c))));
            ctx.SaveBinaryStream("Schema.idv", w => saver.SaveData(w.BaseStream, noRows, Utils.GetIdentityPermutation(schema.ColumnCount)));
        }
Esempio n. 23
0
        /// <summary>
        /// Create a TransformModel containing the given (optional) transforms applied to the
        /// given root schema.
        /// </summary>
        public TransformModel(IHostEnvironment env, Schema schemaRoot, IDataTransform[] xfs)
        {
            Contracts.CheckValue(env, nameof(env));
            env.CheckValue(schemaRoot, nameof(schemaRoot));
            env.CheckValueOrNull(xfs);

            IDataView view = new EmptyDataView(env, schemaRoot);

            _schemaRoot = view.Schema;

            if (Utils.Size(xfs) > 0)
            {
                foreach (var xf in xfs)
                {
                    env.AssertValue(xf, "xfs", "Transforms should not be null");
                    view = ApplyTransformUtils.ApplyTransformToData(env, xf, view);
                }
            }

            _chain = view;
        }
Esempio n. 24
0
        /// <summary>
        /// Apply this transform model to the given input transform model to produce a composite transform model.
        /// </summary>
        public ITransformModel Apply(IHostEnvironment env, ITransformModel input)
        {
            Contracts.CheckValue(env, nameof(env));
            env.CheckValue(input, nameof(input));

            IDataView view;
            Schema    schemaRoot = input.InputSchema;
            var       mod        = input as TransformModel;

            if (mod != null)
            {
                view = ApplyTransformUtils.ApplyAllTransformsToData(env, _chain, mod._chain);
            }
            else
            {
                view = new EmptyDataView(env, schemaRoot);
                view = input.Apply(env, view);
                view = Apply(env, view);
            }

            return(new TransformModel(env, schemaRoot, view));
        }
Esempio n. 25
0
        // Checks that the predictors have matching label columns, and returns the number of classes in all predictors.
        protected static int CheckLabelColumn(IHostEnvironment env, IPredictorModel[] models, bool isBinary)
        {
            Contracts.CheckValue(env, nameof(env));
            env.CheckNonEmpty(models, nameof(models));

            var model = models[0];
            var edv   = new EmptyDataView(env, model.TransformModel.InputSchema);

            model.PrepareData(env, edv, out RoleMappedData rmd, out IPredictor pred);
            var labelInfo = rmd.Schema.Label;

            if (labelInfo == null)
            {
                throw env.Except("Training schema for model 0 does not have a label column");
            }

            var labelType = rmd.Schema.Schema.GetColumnType(rmd.Schema.Label.Index);

            if (!labelType.IsKey)
            {
                return(CheckNonKeyLabelColumnCore(env, pred, models, isBinary, labelType));
            }

            if (isBinary && labelType.KeyCount != 2)
            {
                throw env.Except("Label is not binary");
            }
            var schema = rmd.Schema.Schema;
            var mdType = schema.GetMetadataTypeOrNull(MetadataUtils.Kinds.KeyValues, labelInfo.Index);

            if (mdType == null || !mdType.IsKnownSizeVector)
            {
                throw env.Except("Label column of type key must have a vector of key values metadata");
            }

            return(Utils.MarshalInvoke(CheckKeyLabelColumnCore <int>, mdType.ItemType.RawType, env, models, labelType.AsKey, schema, labelInfo.Index, mdType));
        }
            public void Save(IHostEnvironment env, ModelSaveContext ctx)
            {
                Contracts.AssertValue(ctx);

                // *** Binary format ***
                // Schema of the data view containing the optional columns
                // int: number of added columns
                // for each added column
                //   int: id of output column name
                //   ColumnType: the type of the column

                var noRows    = new EmptyDataView(env, _inputWithOptionalColumn);
                var saverArgs = new BinarySaver.Arguments();

                saverArgs.Silent = true;
                var saver = new BinarySaver(env, saverArgs);

                using (var strm = new MemoryStream())
                {
                    saver.SaveData(strm, noRows, _srcColsWithOptionalColumn);
                    ctx.SaveBinaryStream("Schema.idv", w => w.WriteByteArray(strm.ToArray()));
                }

                int size = InfoCount;

                ctx.Writer.Write(size);

                saver = new BinarySaver(env, new BinarySaver.Arguments());
                for (int i = 0; i < size; i++)
                {
                    ctx.SaveNonEmptyString(GetColumnNameCore(i));
                    var columnType = ColumnTypes[i];
                    int written;
                    saver.TryWriteTypeDescription(ctx.Writer.BaseStream, columnType, out written);
                }
            }
Esempio n. 27
0
        protected TTransformer TrainTransformer(IDataView trainSet,
                                                IDataView validationSet = null, IPredictor initPredictor = null)
        {
            var cachedTrain = TrainerInfo.WantCaching ? new CacheDataView(_env, trainSet, prefetch: null) : trainSet;

            var       trainRoles = new RoleMappedData(cachedTrain, label: _labelCol, feature: _featureCol);
            var       emptyData  = new EmptyDataView(_env, trainSet.Schema);
            IDataView normalizer = emptyData;

            if (TrainerInfo.NeedNormalization && trainRoles.Schema.FeaturesAreNormalized() == false)
            {
                var view = NormalizeTransform.CreateMinMaxNormalizer(_env, trainRoles.Data, name: trainRoles.Schema.Feature.Name);
                normalizer = ApplyTransformUtils.ApplyAllTransformsToData(_env, view, emptyData, cachedTrain);

                trainRoles = new RoleMappedData(view, trainRoles.Schema.GetColumnRoleNames());
            }

            RoleMappedData validRoles;

            if (validationSet == null)
            {
                validRoles = null;
            }
            else
            {
                var cachedValid = TrainerInfo.WantCaching ? new CacheDataView(_env, validationSet, prefetch: null) : validationSet;
                cachedValid = ApplyTransformUtils.ApplyAllTransformsToData(_env, normalizer, cachedValid);
                validRoles  = new RoleMappedData(cachedValid, label: _labelCol, feature: _featureCol);
            }

            var pred = TrainCore(new TrainContext(trainRoles, validRoles, initPredictor));

            var scoreRoles = new RoleMappedData(normalizer, label: _labelCol, feature: _featureCol);

            return(MakeScorer(pred, scoreRoles));
        }