Esempio n. 1
0
        /// <summary>
        /// Save a zero-row dataview that will be used to infer schema information, used in the case
        /// where the tranpsose loader is instantiated with no input streams.
        /// </summary>
        private static void SaveSchema(IHostEnvironment env, ModelSaveContext ctx, Schema schema)
        {
            Contracts.AssertValue(env);

            env.AssertValue(ctx);
            env.AssertValue(schema);

            var noRows = new EmptyDataView(env, schema);

            env.Assert(noRows.GetRowCount() == 0);

            var saverArgs = new BinarySaver.Arguments();

            saverArgs.Silent = true;
            var saver = new BinarySaver(env, saverArgs);

            // We load our schema from what amounts to a binary loader, so all columns should likewise be savable.
            env.Assert(Enumerable.Range(0, schema.Count).All(c => saver.IsColumnSavable(schema[c].Type)));
            ctx.SaveBinaryStream("Schema.idv", w => saver.SaveData(w.BaseStream, noRows, Utils.GetIdentityPermutation(schema.Count)));
        }
Esempio n. 2
0
 /// <summary>
 /// Load an object from the repository directory.
 /// </summary>
 public static void LoadModel <TRes, TSig>(IHostEnvironment env, out TRes result, RepositoryReader rep, string dir, params object[] extra)
     where TRes : class
 {
     Contracts.CheckValue(env, nameof(env));
     env.CheckValue(rep, nameof(rep));
     if (!LoadModelOrNull <TRes, TSig>(env, out result, rep, dir, extra))
     {
         throw env.ExceptDecode("Corrupt model file");
     }
     env.AssertValue(result);
 }
Esempio n. 3
0
        private static TRes CreateCore <TRes, TSig>(IHostEnvironment env, string settings, out string loadName, params object[] extraArgs)
            where TRes : class
        {
            Contracts.AssertValue(env);
            env.AssertValue(settings, "settings");

            var sc = SubComponent.Parse <TRes, TSig>(settings);

            loadName = sc.Kind;
            return(sc.CreateInstance(env, extraArgs));
        }
        private static ISchemaBoundMapper WrapCore <T>(IHostEnvironment env, ISchemaBoundMapper mapper, RoleMappedSchema trainSchema)
        {
            Contracts.AssertValue(env);
            env.AssertValue(mapper);
            env.AssertValue(trainSchema);
            env.Assert(mapper is ISchemaBoundRowMapper);
            env.Assert(trainSchema.Label.HasValue);
            var labelColumn = trainSchema.Label.Value;

            // Key values from the training schema label, will map to slot names of the score output.
            var type = labelColumn.Annotations.Schema.GetColumnOrNull(AnnotationUtils.Kinds.KeyValues)?.Type as VectorDataViewType;

            env.AssertValue(type);

            // Wrap the fetching of the metadata as a simple getter.
            ValueGetter <VBuffer <T> > getter = (ref VBuffer <T> value) =>
                                                labelColumn.GetKeyValues(ref value);

            return(MulticlassClassificationScorer.LabelNameBindableMapper.CreateBound <T>(env, (ISchemaBoundRowMapper)mapper, type, getter, AnnotationUtils.Kinds.TrainingLabelValues, CanWrap));
        }
Esempio n. 5
0
        internal static List <ConfusionMatrix> Create(IHostEnvironment env, IDataView confusionMatrix)
        {
            Contracts.AssertValue(env);
            env.AssertValue(confusionMatrix);

            if (!confusionMatrix.Schema.TryGetColumnIndex(MetricKinds.ColumnNames.Count, out int countColumn))
            {
                throw env.Except($"ConfusionMatrix data view did not contain a {nameof(MetricKinds.ColumnNames.Count)} column.");
            }

            IRowCursor cursor = confusionMatrix.GetRowCursor(col => col == countColumn);
            var        slots  = default(VBuffer <DvText>);

            confusionMatrix.Schema.GetMetadata(MetadataUtils.Kinds.SlotNames, countColumn, ref slots);
            string[] classNames = new string[slots.Count];
            for (int i = 0; i < slots.Count; i++)
            {
                classNames[i] = slots.Values[i].ToString();
            }

            ColumnType type = confusionMatrix.Schema.GetColumnType(countColumn);

            env.Assert(type.IsVector);
            ValueGetter <VBuffer <double> > countGetter = cursor.GetGetter <VBuffer <double> >(countColumn);
            VBuffer <double>       countValues          = default;
            List <ConfusionMatrix> confusionMatrices    = new List <ConfusionMatrix>();

            int valuesRowIndex = 0;

            double[,] elements = null;
            while (cursor.MoveNext())
            {
                if (valuesRowIndex == 0)
                {
                    elements = new double[type.VectorSize, type.VectorSize];
                }

                countGetter(ref countValues);
                for (int i = 0; i < countValues.Length; i++)
                {
                    elements[valuesRowIndex, i] = countValues.Values[i];
                }

                valuesRowIndex++;

                if (valuesRowIndex == type.VectorSize)
                {
                    valuesRowIndex = 0;
                    confusionMatrices.Add(new ConfusionMatrix(elements, classNames));
                }
            }

            return(confusionMatrices);
        }
        internal static List <ClassificationMetrics> FromMetrics(IHostEnvironment env, IDataView overallMetrics, IDataView confusionMatrix,
                                                                 int confusionMatriceStartIndex = 0)
        {
            Contracts.AssertValue(env);
            env.AssertValue(overallMetrics);
            env.AssertValue(confusionMatrix);

            var metricsEnumerable = overallMetrics.AsEnumerable <SerializationClass>(env, true, ignoreMissingColumns: true);

            if (!metricsEnumerable.GetEnumerator().MoveNext())
            {
                throw env.Except("The overall RegressionMetrics didn't have any rows.");
            }

            List <ClassificationMetrics> metrics = new List <ClassificationMetrics>();
            var confusionMatrices = ConfusionMatrix.Create(env, confusionMatrix).GetEnumerator();

            int index = 0;

            foreach (var metric in metricsEnumerable)
            {
                if (index++ >= confusionMatriceStartIndex && !confusionMatrices.MoveNext())
                {
                    throw env.Except("Confusion matrices didn't have enough matrices.");
                }

                metrics.Add(
                    new ClassificationMetrics()
                {
                    AccuracyMicro    = metric.AccuracyMicro,
                    AccuracyMacro    = metric.AccuracyMacro,
                    LogLoss          = metric.LogLoss,
                    LogLossReduction = metric.LogLossReduction,
                    TopKAccuracy     = metric.TopKAccuracy,
                    PerClassLogLoss  = metric.PerClassLogLoss,
                    ConfusionMatrix  = confusionMatrices.Current
                });
            }

            return(metrics);
        }
            public BindingsImpl ApplyToSchema(DataViewSchema input, ISchemaBindableMapper bindable, IHostEnvironment env)
            {
                Contracts.AssertValue(env);
                env.AssertValue(input);
                env.AssertValue(bindable);

                string scoreCol = RowMapper.OutputSchema[ScoreColumnIndex].Name;
                var    schema   = new RoleMappedSchema(input, RowMapper.GetInputColumnRoles());

                // Checks compatibility of the predictor input types.
                var mapper    = bindable.Bind(env, schema);
                var rowMapper = mapper as ISchemaBoundRowMapper;

                env.CheckParam(rowMapper != null, nameof(bindable), "Mapper must implement ISchemaBoundRowMapper");
                int  mapperScoreColumn;
                bool tmp = rowMapper.OutputSchema.TryGetColumnIndex(scoreCol, out mapperScoreColumn);

                env.Check(tmp, "Mapper doesn't have expected score column");

                return(new BindingsImpl(input, rowMapper, Suffix, ScoreColumnKind, true, mapperScoreColumn, PredColType));
            }
Esempio n. 8
0
        private static bool TryCreateBindableFromScorer(IHostEnvironment env, IPredictor predictor,
                                                        SubComponent <IDataScorerTransform, SignatureDataScorer> scorerSettings, out ISchemaBindableMapper bindable)
        {
            Contracts.AssertValue(env);
            env.AssertValue(predictor);
            env.Assert(scorerSettings.IsGood());

            // Try to find a mapper factory method with the same loadname as the scorer settings.
            var mapperComponent = new SubComponent <ISchemaBindableMapper, SignatureBindableMapper>(scorerSettings.Kind, scorerSettings.Settings);

            return(ComponentCatalog.TryCreateInstance(env, out bindable, mapperComponent, predictor));
        }
Esempio n. 9
0
        /// <summary>
        /// Create a Cursorable object on a given data view.
        /// </summary>
        /// <param name="env">Host environment.</param>
        /// <param name="data">The underlying data view.</param>
        /// <param name="ignoreMissingColumns">Whether to ignore missing columns in the data view.</param>
        /// <param name="schemaDefinition">The optional user-provided schema.</param>
        /// <returns>The constructed Cursorable.</returns>
        public static TypedCursorable <TRow> Create(IHostEnvironment env, IDataView data, bool ignoreMissingColumns, SchemaDefinition schemaDefinition)
        {
            Contracts.AssertValue(env);
            env.AssertValue(data);
            env.AssertValueOrNull(schemaDefinition);

            var outSchema = schemaDefinition == null
                ? InternalSchemaDefinition.Create(typeof(TRow), SchemaDefinition.Direction.Write)
                : InternalSchemaDefinition.Create(typeof(TRow), schemaDefinition);

            return(new TypedCursorable <TRow>(env, data, ignoreMissingColumns, outSchema));
        }
Esempio n. 10
0
        private static IDataTransform Create(IHostEnvironment env, Arguments args, ITrainer trainer, IDataView input, IComponentFactory <IPredictor, ISchemaBindableMapper> mapperFactory)
        {
            Contracts.AssertValue(env, nameof(env));
            env.AssertValue(args, nameof(args));
            env.AssertValue(trainer, nameof(trainer));
            env.AssertValue(input, nameof(input));

            var host = env.Register("TrainAndScoreTransform");

            using (var ch = host.Start("Train"))
            {
                ch.Trace("Constructing trainer");
                var    customCols = TrainUtils.CheckAndGenerateCustomColumns(env, args.CustomColumn);
                string feat;
                string group;
                var    data      = CreateDataFromArgs(ch, input, args, out feat, out group);
                var    predictor = TrainUtils.Train(host, ch, data, trainer, null,
                                                    args.Calibrator, args.MaxCalibrationExamples, null);

                return(ScoreUtils.GetScorer(args.Scorer, predictor, input, feat, group, customCols, env, data.Schema, mapperFactory));
            }
        }
        internal static ISchemaBoundMapper WrapCore <T>(IHostEnvironment env, ISchemaBoundMapper mapper, RoleMappedSchema trainSchema)
        {
            Contracts.AssertValue(env);
            env.AssertValue(mapper);
            env.AssertValue(trainSchema);
            env.Assert(mapper is ISchemaBoundRowMapper);

            // Key values from the training schema label, will map to slot names of the score output.
            var type = trainSchema.Label.Value.Metadata.Schema.GetColumnOrNull(MetadataUtils.Kinds.KeyValues)?.Type;

            env.AssertValue(type);
            env.Assert(type is VectorType);

            // Wrap the fetching of the metadata as a simple getter.
            ValueGetter <VBuffer <T> > getter =
                (ref VBuffer <T> value) =>
            {
                trainSchema.Label.Value.GetKeyValues(ref value);
            };

            return(LabelNameBindableMapper.CreateBound <T>(env, (ISchemaBoundRowMapper)mapper, type as VectorType, getter, MetadataUtils.Kinds.SlotNames, CanWrap));
        }
        public static IDataView CreateFromList <TRow>(IHostEnvironment env, IList <TRow> data,
                                                      SchemaDefinition schemaDefinition = null)
            where TRow : class
        {
            Contracts.AssertValue(env);
            env.AssertValue(data);
            env.AssertValueOrNull(schemaDefinition);
            var internalSchemaDefn = schemaDefinition == null
                ? InternalSchemaDefinition.Create(typeof(TRow))
                : InternalSchemaDefinition.Create(typeof(TRow), schemaDefinition);

            return(new ListDataView <TRow>(env, data, internalSchemaDefn));
        }
Esempio n. 13
0
        public static StreamingDataView <TRow> CreateFromEnumerable <TRow>(IHostEnvironment env, IEnumerable <TRow> data,
                                                                           SchemaDefinition schemaDefinition = null)
            where TRow : class
        {
            Contracts.AssertValue(env);
            env.AssertValue(data);
            env.AssertValueOrNull(schemaDefinition);
            var internalSchemaDefn = schemaDefinition == null
                ? InternalSchemaDefinition.Create(typeof(TRow), SchemaDefinition.Direction.Read)
                : InternalSchemaDefinition.Create(typeof(TRow), schemaDefinition);

            return(new StreamingDataView <TRow>(env, data, internalSchemaDefn));
        }
            public RowMapper(IHostEnvironment env, BindableMapper parent, RoleMappedSchema schema)
            {
                Contracts.AssertValue(env);
                _env = env;
                _env.AssertValue(schema);
                _env.AssertValue(parent);
                _env.AssertValue(schema.Feature);
                _parent = parent;
                InputRoleMappedSchema = schema;
                var genericMapper = parent.GenericMapper.Bind(_env, schema);

                _genericRowMapper = genericMapper as ISchemaBoundRowMapper;

                if (parent.Stringify)
                {
                    var builder = new SchemaBuilder();
                    builder.AddColumn(DefaultColumnNames.FeatureContributions, TextType.Instance, null);
                    _outputSchema = builder.GetSchema();
                    if (InputSchema.HasSlotNames(InputRoleMappedSchema.Feature.Index, InputRoleMappedSchema.Feature.Type.VectorSize))
                    {
                        InputSchema.GetMetadata(MetadataUtils.Kinds.SlotNames, InputRoleMappedSchema.Feature.Index,
                                                ref _slotNames);
                    }
                    else
                    {
                        _slotNames = VBufferUtils.CreateEmpty <ReadOnlyMemory <char> >(InputRoleMappedSchema.Feature.Type.VectorSize);
                    }
                }
                else
                {
                    _outputSchema = Schema.Create(new FeatureContributionSchema(_env, DefaultColumnNames.FeatureContributions,
                                                                                new VectorType(NumberType.R4, schema.Feature.Type as VectorType),
                                                                                InputSchema, InputRoleMappedSchema.Feature.Index));
                }

                _outputGenericSchema = _genericRowMapper.OutputSchema;
                OutputSchema         = new CompositeSchema(new Schema[] { _outputGenericSchema, _outputSchema, }).AsSchema;
            }
        private PredictionEngine(IHostEnvironment env, Func <ISchema, IRowToRowMapper> makeMapper, bool ignoreMissingColumns,
                                 SchemaDefinition inputSchemaDefinition, SchemaDefinition outputSchemaDefinition)
        {
            Contracts.CheckValue(env, nameof(env));
            env.AssertValue(makeMapper);

            _inputRow = DataViewConstructionUtils.CreateInputRow <TSrc>(env, inputSchemaDefinition);
            var mapper     = makeMapper(_inputRow.Schema);
            var cursorable = TypedCursorable <TDst> .Create(env, new EmptyDataView(env, mapper.Schema), ignoreMissingColumns, outputSchemaDefinition);

            var outputRow = mapper.GetRow(_inputRow, col => true, out _disposer);

            _outputRow = cursorable.GetRow(outputRow);
        }
Esempio n. 16
0
        private static ISchemaBoundMapper WrapCore <T>(IHostEnvironment env, ISchemaBoundMapper mapper, RoleMappedSchema trainSchema)
        {
            Contracts.AssertValue(env);
            env.AssertValue(mapper);
            env.AssertValue(trainSchema);
            env.Assert(mapper is ISchemaBoundRowMapper);

            // Key values from the training schema label, will map to slot names of the score output.
            var type = trainSchema.Schema.GetMetadataTypeOrNull(MetadataUtils.Kinds.KeyValues, trainSchema.Label.Index);

            env.AssertValue(type);
            env.Assert(type.IsVector);

            // Wrap the fetching of the metadata as a simple getter.
            ValueGetter <VBuffer <T> > getter =
                (ref VBuffer <T> value) =>
            {
                trainSchema.Schema.GetMetadata(MetadataUtils.Kinds.KeyValues,
                                               trainSchema.Label.Index, ref value);
            };

            return(MultiClassClassifierScorer.LabelNameBindableMapper.CreateBound <T>(env, (ISchemaBoundRowMapper)mapper, type.AsVector, getter, MetadataUtils.Kinds.TrainingLabelValues, CanWrap));
        }
Esempio n. 17
0
            public PipelinePattern InferPipelines(int numTransformLevels, int batchSize, int numOfTrainingRows)
            {
                _env.AssertValue(_trainData, nameof(_trainData), "Must set training data prior to calling method.");
                _env.AssertValue(_testData, nameof(_testData), "Must set test data prior to calling method.");

                var h = _env.Register("InferPipelines");

                using (var ch = h.Start("InferPipelines"))
                {
                    // Check if search space has not been initialized. If not,
                    // run method to define it usign inference.
                    if (!IsSearchSpaceDefined())
                    {
                        InferSearchSpace(numTransformLevels);
                    }

                    // Learn for a given number of iterations
                    MainLearningLoop(batchSize, numOfTrainingRows);

                    // Return best pipeline seen
                    return(_sortedSampledElements.Count > 0 ? _sortedSampledElements.First().Value : null);
                }
            }
Esempio n. 18
0
        internal static BinaryClassificationMetrics FromMetrics(IHostEnvironment env, IDataView overallMetrics, IDataView confusionMatrix)
        {
            Contracts.AssertValue(env);
            env.AssertValue(overallMetrics);
            env.AssertValue(confusionMatrix);

            var metricsEnumerable = overallMetrics.AsEnumerable <SerializationClass>(env, true, ignoreMissingColumns: true);
            var enumerator        = metricsEnumerable.GetEnumerator();

            if (!enumerator.MoveNext())
            {
                throw env.Except("The overall RegressionMetrics didn't have any rows.");
            }

            SerializationClass metrics = enumerator.Current;

            if (enumerator.MoveNext())
            {
                throw env.Except("The overall RegressionMetrics contained more than 1 row.");
            }

            return(new BinaryClassificationMetrics()
            {
                Auc = metrics.Auc,
                Accuracy = metrics.Accuracy,
                PositivePrecision = metrics.PositivePrecision,
                PositiveRecall = metrics.PositiveRecall,
                NegativePrecision = metrics.NegativePrecision,
                NegativeRecall = metrics.NegativeRecall,
                LogLoss = metrics.LogLoss,
                LogLossReduction = metrics.LogLossReduction,
                Entropy = metrics.Entropy,
                F1Score = metrics.F1Score,
                Auprc = metrics.Auprc,
                ConfusionMatrix = ConfusionMatrix.Create(env, confusionMatrix),
            });
        }
Esempio n. 19
0
            private RandomNumberGenerator(IHostEnvironment env, ModelLoadContext ctx)
            {
                Contracts.AssertValue(env);
                env.AssertValue(ctx);

                // *** Binary format ***
                // int: sizeof(Float)
                // Float: gamma

                int cbFloat = ctx.Reader.ReadInt32();
                env.CheckDecode(cbFloat == sizeof(float));

                _gamma = ctx.Reader.ReadFloat();
                env.CheckDecode(FloatUtils.IsFinite(_gamma));
            }
Esempio n. 20
0
        public XGBoostScalarRowMapperBase(RoleMappedSchema schema, XGBoostPredictorBase <TOutput> parent, IHostEnvironment env, ISchema outputSchema)
        {
            Contracts.AssertValue(env, "env");
            env.AssertValue(schema, "schema");
            env.AssertValue(parent, "parent");
            env.AssertValue(schema.Feature, "schema");

            // REVIEW xadupre: only one feature columns is allowed.
            // This should be revisited in the future.
            // XGBoost has plans for others types.
            // Look at https://github.com/dmlc/xgboost/issues/874.
            env.Check(schema.Feature != null, "Unexpected number of feature columns, 1 expected.");

            _parent = parent;
            var columns = new[] { schema.Feature };
            var fc      = new[] { new KeyValuePair <RoleMappedSchema.ColumnRole, string>(RoleMappedSchema.ColumnRole.Feature, columns[0].Name) };

            _inputSchema  = new RoleMappedSchema(schema.Schema, fc);
            _outputSchema = outputSchema;

            _inputCols = new List <int>();
            foreach (var kvp in columns)
            {
                int index;
                if (schema.Schema.TryGetColumnIndex(kvp.Name, out index))
                {
                    _inputCols.Add(index);
                }
                else
                {
                    Contracts.Assert(false);
                }
            }

            _booster = _parent.GetBooster();
        }
Esempio n. 21
0
        /// <summary>
        /// Determines the scorer subcomponent (if the given one is null or empty), and creates the schema bound mapper.
        /// </summary>
        private static SubComponent <IDataScorerTransform, SignatureDataScorer> GetScorerComponentAndMapper(
            IPredictor predictor, SubComponent <IDataScorerTransform, SignatureDataScorer> scorer,
            RoleMappedSchema schema, IHostEnvironment env, out ISchemaBoundMapper mapper)
        {
            Contracts.AssertValue(env);

            var bindable = GetSchemaBindableMapper(env, predictor, scorer);

            env.AssertValue(bindable);
            mapper = bindable.Bind(env, schema);
            if (scorer.IsGood())
            {
                return(scorer);
            }
            return(GetScorerComponent(mapper));
        }
        public LinearModelStatistics(IHostEnvironment env, ModelLoadContext ctx)
        {
            Contracts.CheckValue(env, nameof(env));
            _env = env;
            _env.AssertValue(ctx);

            // *** Binary Format ***
            // int: count of parameters
            // long: count of training examples
            // Single: deviance
            // Single: null deviance
            // bool: whether standard error is included
            // (Conditional) Single[_paramCount]: values of std errors of coefficients
            // (Conditional) int: length of std errors of coefficients
            // (Conditional) int[_paramCount]: indices of std errors of coefficients

            _paramCount = ctx.Reader.ReadInt32();
            _env.CheckDecode(_paramCount > 0);

            _trainingExampleCount = ctx.Reader.ReadInt64();
            _env.CheckDecode(_trainingExampleCount > 0);

            _deviance     = ctx.Reader.ReadFloat();
            _nullDeviance = ctx.Reader.ReadFloat();

            var hasStdErrors = ctx.Reader.ReadBoolean();

            if (!hasStdErrors)
            {
                _env.Assert(_coeffStdError == null);
                return;
            }

            Single[] stdErrorValues = ctx.Reader.ReadFloatArray(_paramCount);
            int      length         = ctx.Reader.ReadInt32();

            _env.CheckDecode(length >= _paramCount);
            if (length == _paramCount)
            {
                _coeffStdError = new VBuffer <Single>(length, stdErrorValues);
                return;
            }

            _env.Assert(length > _paramCount);
            int[] stdErrorIndices = ctx.Reader.ReadIntArray(_paramCount);
            _coeffStdError = new VBuffer <Single>(length, _paramCount, stdErrorValues, stdErrorIndices);
        }
Esempio n. 23
0
        /// <summary>
        /// Determines the scorer component factory (if the given one is null or empty), and creates the schema bound mapper.
        /// </summary>
        private static TScorerFactory GetScorerComponentAndMapper(
            IPredictor predictor,
            TScorerFactory scorerFactory,
            RoleMappedSchema schema,
            IHostEnvironment env,
            IComponentFactory<IPredictor, ISchemaBindableMapper> mapperFactory,
            out ISchemaBoundMapper mapper)
        {
            Contracts.AssertValue(env);

            var bindable = GetSchemaBindableMapper(env, predictor, mapperFactory, scorerFactory as ICommandLineComponentFactory);
            env.AssertValue(bindable);
            mapper = bindable.Bind(env, schema);
            if (scorerFactory != null)
                return scorerFactory;
            return GetScorerComponent(mapper);
        }
        public XmlGenerator(IHostEnvironment env, Arguments args, string regenerate)
        {
            Contracts.CheckValue(env, nameof(env));
            env.AssertValue(args, nameof(args));
            env.AssertNonEmpty(regenerate, nameof(regenerate));

            _xmlFilename = args.XmlFilename;
            if (!string.IsNullOrWhiteSpace(_xmlFilename))
            {
                Utils.CheckOptionalUserDirectory(_xmlFilename, nameof(args.XmlFilename));
            }
            else
            {
                _xmlFilename = null;
            }
            _host = env.Register("XML Generator");
        }
        private void SaveInputSchema(DataViewSchema inputSchema, RepositoryWriter rep)
        {
            _env.AssertValueOrNull(inputSchema);
            _env.AssertValue(rep);

            if (inputSchema == null)
            {
                return;
            }

            using (var ch = _env.Start("Saving Schema"))
            {
                var entry = rep.CreateEntry(SchemaEntryName);
                var saver = new BinarySaver(_env, new BinarySaver.Arguments {
                    Silent = true
                });
                DataSaverUtils.SaveDataView(ch, saver, new EmptyDataView(_env, inputSchema), entry.Stream, keepHidden: true);
            }
        }
Esempio n. 26
0
        private static void GetPipeline(IHostEnvironment env, InputBase input, out IDataView startingData, out RoleMappedData transformedData)
        {
            Contracts.AssertValue(env);
            env.AssertValue(input);
            env.AssertNonEmpty(input.Models);

            ISchema inputSchema = null;

            startingData    = null;
            transformedData = null;
            byte[][] transformedDataSerialized    = null;
            string[] transformedDataZipEntryNames = null;
            for (int i = 0; i < input.Models.Length; i++)
            {
                var model = input.Models[i];

                var inputData = new EmptyDataView(env, model.TransformModel.InputSchema);
                model.PrepareData(env, inputData, out RoleMappedData transformedDataCur, out IPredictor pred);

                if (inputSchema == null)
                {
                    env.Assert(i == 0);
                    inputSchema     = model.TransformModel.InputSchema;
                    startingData    = inputData;
                    transformedData = transformedDataCur;
                }
                else if (input.ValidatePipelines)
                {
                    using (var ch = env.Start("Validating pipeline"))
                    {
                        if (transformedDataSerialized == null)
                        {
                            ch.Assert(transformedDataZipEntryNames == null);
                            SerializeRoleMappedData(env, ch, transformedData, out transformedDataSerialized,
                                                    out transformedDataZipEntryNames);
                        }
                        CheckSamePipeline(env, ch, transformedDataCur, transformedDataSerialized, transformedDataZipEntryNames);
                        ch.Done();
                    }
                }
            }
        }
        /// <summary>
        /// Potentially apply a min-max normalizer to the data's feature column, keeping all existing role
        /// mappings except for the feature role mapping.
        /// </summary>
        /// <param name="env">The host environment to use to potentially instantiate the transform</param>
        /// <param name="data">The role-mapped data that is potentially going to be modified by this method.</param>
        /// <param name="trainer">The trainer to query as to whether it wants normalization. If the
        /// <see cref="ITrainer.Info"/>'s <see cref="TrainerInfo.NeedNormalization"/> is <c>true</c></param>
        /// <returns>True if the normalizer was applied and <paramref name="data"/> was modified</returns>
        public static bool CreateIfNeeded(IHostEnvironment env, ref RoleMappedData data, ITrainer trainer)
        {
            Contracts.CheckValue(env, nameof(env));
            env.CheckValue(data, nameof(data));
            env.CheckValue(trainer, nameof(trainer));

            // If the trainer does not need normalization, or if the features either don't exist
            // or are not normalized, return false.
            if (!trainer.Info.NeedNormalization || data.Schema.FeaturesAreNormalized() != false)
            {
                return(false);
            }
            var featInfo = data.Schema.Feature;

            env.AssertValue(featInfo); // Should be defined, if FeaturesAreNormalized returned a definite value.

            var view = CreateMinMaxNormalizer(env, data.Data, name: featInfo.Name);

            data = new RoleMappedData(view, data.Schema.GetColumnRoleNames());
            return(true);
        }
Esempio n. 28
0
        /// <summary>
        /// Create a TransformModel containing the given (optional) transforms applied to the
        /// given root schema.
        /// </summary>
        public TransformModel(IHostEnvironment env, Schema schemaRoot, IDataTransform[] xfs)
        {
            Contracts.CheckValue(env, nameof(env));
            env.CheckValue(schemaRoot, nameof(schemaRoot));
            env.CheckValueOrNull(xfs);

            IDataView view = new EmptyDataView(env, schemaRoot);

            _schemaRoot = view.Schema;

            if (Utils.Size(xfs) > 0)
            {
                foreach (var xf in xfs)
                {
                    env.AssertValue(xf, "xfs", "Transforms should not be null");
                    view = ApplyTransformUtils.ApplyTransformToData(env, xf, view);
                }
            }

            _chain = view;
        }
Esempio n. 29
0
        internal ModelStatisticsBase(IHostEnvironment env, ModelLoadContext ctx)
        {
            Contracts.CheckValue(env, nameof(env));
            Env = env;
            Env.AssertValue(ctx);

            // *** Binary Format ***
            // int: count of parameters
            // long: count of training examples
            // float: deviance
            // float: null deviance

            ParametersCount = ctx.Reader.ReadInt32();
            Env.CheckDecode(ParametersCount > 0);

            TrainingExampleCount = ctx.Reader.ReadInt64();
            Env.CheckDecode(TrainingExampleCount > 0);

            Deviance     = ctx.Reader.ReadFloat();
            NullDeviance = ctx.Reader.ReadFloat();
        }
Esempio n. 30
0
        /// <summary>
        /// Save schema associations of role/column-name in <paramref name="rep"/>.
        /// </summary>
        internal static void SaveRoleMappings(IHostEnvironment env, IChannel ch, RoleMappedSchema schema, RepositoryWriter rep)
        {
            // REVIEW: Should we also save this stuff, for instance, in some portion of the
            // score command or transform?
            Contracts.AssertValue(env);
            env.AssertValue(ch);
            ch.AssertValue(schema);

            ArrayDataViewBuilder builder = new ArrayDataViewBuilder(env);

            List <string> rolesList       = new List <string>();
            List <string> columnNamesList = new List <string>();

            // OrderBy is stable, so there is no danger in it "reordering" columns
            // when a role is filled by multiple columns.
            foreach (var role in schema.GetColumnRoleNames().OrderBy(r => r.Key.Value))
            {
                rolesList.Add(role.Key.Value);
                columnNamesList.Add(role.Value);
            }
            builder.AddColumn("Role", rolesList.ToArray());
            builder.AddColumn("Column", columnNamesList.ToArray());

            using (var entry = rep.CreateEntry(DirTrainingInfo, RoleMappingFile))
            {
                // REVIEW: It seems very important that we have the role mappings
                // be easily human interpretable and even manipulable, but relying on the
                // text saver/loader means that special characters like '\n' won't be reinterpretable.
                // On the other hand, no one is such a big lunatic that they will actually
                // ever go ahead and do something so stupid as that.
                var saver = new TextSaver(env, new TextSaver.Arguments()
                {
                    Dense = true, Silent = true
                });
                var view = builder.GetDataView();
                saver.SaveData(entry.Stream, view, Utils.GetIdentityPermutation(view.Schema.ColumnCount));
            }
        }