/// <summary> /// Save a zero-row dataview that will be used to infer schema information, used in the case /// where the tranpsose loader is instantiated with no input streams. /// </summary> private static void SaveSchema(IHostEnvironment env, ModelSaveContext ctx, Schema schema) { Contracts.AssertValue(env); env.AssertValue(ctx); env.AssertValue(schema); var noRows = new EmptyDataView(env, schema); env.Assert(noRows.GetRowCount() == 0); var saverArgs = new BinarySaver.Arguments(); saverArgs.Silent = true; var saver = new BinarySaver(env, saverArgs); // We load our schema from what amounts to a binary loader, so all columns should likewise be savable. env.Assert(Enumerable.Range(0, schema.Count).All(c => saver.IsColumnSavable(schema[c].Type))); ctx.SaveBinaryStream("Schema.idv", w => saver.SaveData(w.BaseStream, noRows, Utils.GetIdentityPermutation(schema.Count))); }
/// <summary> /// Load an object from the repository directory. /// </summary> public static void LoadModel <TRes, TSig>(IHostEnvironment env, out TRes result, RepositoryReader rep, string dir, params object[] extra) where TRes : class { Contracts.CheckValue(env, nameof(env)); env.CheckValue(rep, nameof(rep)); if (!LoadModelOrNull <TRes, TSig>(env, out result, rep, dir, extra)) { throw env.ExceptDecode("Corrupt model file"); } env.AssertValue(result); }
private static TRes CreateCore <TRes, TSig>(IHostEnvironment env, string settings, out string loadName, params object[] extraArgs) where TRes : class { Contracts.AssertValue(env); env.AssertValue(settings, "settings"); var sc = SubComponent.Parse <TRes, TSig>(settings); loadName = sc.Kind; return(sc.CreateInstance(env, extraArgs)); }
private static ISchemaBoundMapper WrapCore <T>(IHostEnvironment env, ISchemaBoundMapper mapper, RoleMappedSchema trainSchema) { Contracts.AssertValue(env); env.AssertValue(mapper); env.AssertValue(trainSchema); env.Assert(mapper is ISchemaBoundRowMapper); env.Assert(trainSchema.Label.HasValue); var labelColumn = trainSchema.Label.Value; // Key values from the training schema label, will map to slot names of the score output. var type = labelColumn.Annotations.Schema.GetColumnOrNull(AnnotationUtils.Kinds.KeyValues)?.Type as VectorDataViewType; env.AssertValue(type); // Wrap the fetching of the metadata as a simple getter. ValueGetter <VBuffer <T> > getter = (ref VBuffer <T> value) => labelColumn.GetKeyValues(ref value); return(MulticlassClassificationScorer.LabelNameBindableMapper.CreateBound <T>(env, (ISchemaBoundRowMapper)mapper, type, getter, AnnotationUtils.Kinds.TrainingLabelValues, CanWrap)); }
internal static List <ConfusionMatrix> Create(IHostEnvironment env, IDataView confusionMatrix) { Contracts.AssertValue(env); env.AssertValue(confusionMatrix); if (!confusionMatrix.Schema.TryGetColumnIndex(MetricKinds.ColumnNames.Count, out int countColumn)) { throw env.Except($"ConfusionMatrix data view did not contain a {nameof(MetricKinds.ColumnNames.Count)} column."); } IRowCursor cursor = confusionMatrix.GetRowCursor(col => col == countColumn); var slots = default(VBuffer <DvText>); confusionMatrix.Schema.GetMetadata(MetadataUtils.Kinds.SlotNames, countColumn, ref slots); string[] classNames = new string[slots.Count]; for (int i = 0; i < slots.Count; i++) { classNames[i] = slots.Values[i].ToString(); } ColumnType type = confusionMatrix.Schema.GetColumnType(countColumn); env.Assert(type.IsVector); ValueGetter <VBuffer <double> > countGetter = cursor.GetGetter <VBuffer <double> >(countColumn); VBuffer <double> countValues = default; List <ConfusionMatrix> confusionMatrices = new List <ConfusionMatrix>(); int valuesRowIndex = 0; double[,] elements = null; while (cursor.MoveNext()) { if (valuesRowIndex == 0) { elements = new double[type.VectorSize, type.VectorSize]; } countGetter(ref countValues); for (int i = 0; i < countValues.Length; i++) { elements[valuesRowIndex, i] = countValues.Values[i]; } valuesRowIndex++; if (valuesRowIndex == type.VectorSize) { valuesRowIndex = 0; confusionMatrices.Add(new ConfusionMatrix(elements, classNames)); } } return(confusionMatrices); }
internal static List <ClassificationMetrics> FromMetrics(IHostEnvironment env, IDataView overallMetrics, IDataView confusionMatrix, int confusionMatriceStartIndex = 0) { Contracts.AssertValue(env); env.AssertValue(overallMetrics); env.AssertValue(confusionMatrix); var metricsEnumerable = overallMetrics.AsEnumerable <SerializationClass>(env, true, ignoreMissingColumns: true); if (!metricsEnumerable.GetEnumerator().MoveNext()) { throw env.Except("The overall RegressionMetrics didn't have any rows."); } List <ClassificationMetrics> metrics = new List <ClassificationMetrics>(); var confusionMatrices = ConfusionMatrix.Create(env, confusionMatrix).GetEnumerator(); int index = 0; foreach (var metric in metricsEnumerable) { if (index++ >= confusionMatriceStartIndex && !confusionMatrices.MoveNext()) { throw env.Except("Confusion matrices didn't have enough matrices."); } metrics.Add( new ClassificationMetrics() { AccuracyMicro = metric.AccuracyMicro, AccuracyMacro = metric.AccuracyMacro, LogLoss = metric.LogLoss, LogLossReduction = metric.LogLossReduction, TopKAccuracy = metric.TopKAccuracy, PerClassLogLoss = metric.PerClassLogLoss, ConfusionMatrix = confusionMatrices.Current }); } return(metrics); }
public BindingsImpl ApplyToSchema(DataViewSchema input, ISchemaBindableMapper bindable, IHostEnvironment env) { Contracts.AssertValue(env); env.AssertValue(input); env.AssertValue(bindable); string scoreCol = RowMapper.OutputSchema[ScoreColumnIndex].Name; var schema = new RoleMappedSchema(input, RowMapper.GetInputColumnRoles()); // Checks compatibility of the predictor input types. var mapper = bindable.Bind(env, schema); var rowMapper = mapper as ISchemaBoundRowMapper; env.CheckParam(rowMapper != null, nameof(bindable), "Mapper must implement ISchemaBoundRowMapper"); int mapperScoreColumn; bool tmp = rowMapper.OutputSchema.TryGetColumnIndex(scoreCol, out mapperScoreColumn); env.Check(tmp, "Mapper doesn't have expected score column"); return(new BindingsImpl(input, rowMapper, Suffix, ScoreColumnKind, true, mapperScoreColumn, PredColType)); }
private static bool TryCreateBindableFromScorer(IHostEnvironment env, IPredictor predictor, SubComponent <IDataScorerTransform, SignatureDataScorer> scorerSettings, out ISchemaBindableMapper bindable) { Contracts.AssertValue(env); env.AssertValue(predictor); env.Assert(scorerSettings.IsGood()); // Try to find a mapper factory method with the same loadname as the scorer settings. var mapperComponent = new SubComponent <ISchemaBindableMapper, SignatureBindableMapper>(scorerSettings.Kind, scorerSettings.Settings); return(ComponentCatalog.TryCreateInstance(env, out bindable, mapperComponent, predictor)); }
/// <summary> /// Create a Cursorable object on a given data view. /// </summary> /// <param name="env">Host environment.</param> /// <param name="data">The underlying data view.</param> /// <param name="ignoreMissingColumns">Whether to ignore missing columns in the data view.</param> /// <param name="schemaDefinition">The optional user-provided schema.</param> /// <returns>The constructed Cursorable.</returns> public static TypedCursorable <TRow> Create(IHostEnvironment env, IDataView data, bool ignoreMissingColumns, SchemaDefinition schemaDefinition) { Contracts.AssertValue(env); env.AssertValue(data); env.AssertValueOrNull(schemaDefinition); var outSchema = schemaDefinition == null ? InternalSchemaDefinition.Create(typeof(TRow), SchemaDefinition.Direction.Write) : InternalSchemaDefinition.Create(typeof(TRow), schemaDefinition); return(new TypedCursorable <TRow>(env, data, ignoreMissingColumns, outSchema)); }
private static IDataTransform Create(IHostEnvironment env, Arguments args, ITrainer trainer, IDataView input, IComponentFactory <IPredictor, ISchemaBindableMapper> mapperFactory) { Contracts.AssertValue(env, nameof(env)); env.AssertValue(args, nameof(args)); env.AssertValue(trainer, nameof(trainer)); env.AssertValue(input, nameof(input)); var host = env.Register("TrainAndScoreTransform"); using (var ch = host.Start("Train")) { ch.Trace("Constructing trainer"); var customCols = TrainUtils.CheckAndGenerateCustomColumns(env, args.CustomColumn); string feat; string group; var data = CreateDataFromArgs(ch, input, args, out feat, out group); var predictor = TrainUtils.Train(host, ch, data, trainer, null, args.Calibrator, args.MaxCalibrationExamples, null); return(ScoreUtils.GetScorer(args.Scorer, predictor, input, feat, group, customCols, env, data.Schema, mapperFactory)); } }
internal static ISchemaBoundMapper WrapCore <T>(IHostEnvironment env, ISchemaBoundMapper mapper, RoleMappedSchema trainSchema) { Contracts.AssertValue(env); env.AssertValue(mapper); env.AssertValue(trainSchema); env.Assert(mapper is ISchemaBoundRowMapper); // Key values from the training schema label, will map to slot names of the score output. var type = trainSchema.Label.Value.Metadata.Schema.GetColumnOrNull(MetadataUtils.Kinds.KeyValues)?.Type; env.AssertValue(type); env.Assert(type is VectorType); // Wrap the fetching of the metadata as a simple getter. ValueGetter <VBuffer <T> > getter = (ref VBuffer <T> value) => { trainSchema.Label.Value.GetKeyValues(ref value); }; return(LabelNameBindableMapper.CreateBound <T>(env, (ISchemaBoundRowMapper)mapper, type as VectorType, getter, MetadataUtils.Kinds.SlotNames, CanWrap)); }
public static IDataView CreateFromList <TRow>(IHostEnvironment env, IList <TRow> data, SchemaDefinition schemaDefinition = null) where TRow : class { Contracts.AssertValue(env); env.AssertValue(data); env.AssertValueOrNull(schemaDefinition); var internalSchemaDefn = schemaDefinition == null ? InternalSchemaDefinition.Create(typeof(TRow)) : InternalSchemaDefinition.Create(typeof(TRow), schemaDefinition); return(new ListDataView <TRow>(env, data, internalSchemaDefn)); }
public static StreamingDataView <TRow> CreateFromEnumerable <TRow>(IHostEnvironment env, IEnumerable <TRow> data, SchemaDefinition schemaDefinition = null) where TRow : class { Contracts.AssertValue(env); env.AssertValue(data); env.AssertValueOrNull(schemaDefinition); var internalSchemaDefn = schemaDefinition == null ? InternalSchemaDefinition.Create(typeof(TRow), SchemaDefinition.Direction.Read) : InternalSchemaDefinition.Create(typeof(TRow), schemaDefinition); return(new StreamingDataView <TRow>(env, data, internalSchemaDefn)); }
public RowMapper(IHostEnvironment env, BindableMapper parent, RoleMappedSchema schema) { Contracts.AssertValue(env); _env = env; _env.AssertValue(schema); _env.AssertValue(parent); _env.AssertValue(schema.Feature); _parent = parent; InputRoleMappedSchema = schema; var genericMapper = parent.GenericMapper.Bind(_env, schema); _genericRowMapper = genericMapper as ISchemaBoundRowMapper; if (parent.Stringify) { var builder = new SchemaBuilder(); builder.AddColumn(DefaultColumnNames.FeatureContributions, TextType.Instance, null); _outputSchema = builder.GetSchema(); if (InputSchema.HasSlotNames(InputRoleMappedSchema.Feature.Index, InputRoleMappedSchema.Feature.Type.VectorSize)) { InputSchema.GetMetadata(MetadataUtils.Kinds.SlotNames, InputRoleMappedSchema.Feature.Index, ref _slotNames); } else { _slotNames = VBufferUtils.CreateEmpty <ReadOnlyMemory <char> >(InputRoleMappedSchema.Feature.Type.VectorSize); } } else { _outputSchema = Schema.Create(new FeatureContributionSchema(_env, DefaultColumnNames.FeatureContributions, new VectorType(NumberType.R4, schema.Feature.Type as VectorType), InputSchema, InputRoleMappedSchema.Feature.Index)); } _outputGenericSchema = _genericRowMapper.OutputSchema; OutputSchema = new CompositeSchema(new Schema[] { _outputGenericSchema, _outputSchema, }).AsSchema; }
private PredictionEngine(IHostEnvironment env, Func <ISchema, IRowToRowMapper> makeMapper, bool ignoreMissingColumns, SchemaDefinition inputSchemaDefinition, SchemaDefinition outputSchemaDefinition) { Contracts.CheckValue(env, nameof(env)); env.AssertValue(makeMapper); _inputRow = DataViewConstructionUtils.CreateInputRow <TSrc>(env, inputSchemaDefinition); var mapper = makeMapper(_inputRow.Schema); var cursorable = TypedCursorable <TDst> .Create(env, new EmptyDataView(env, mapper.Schema), ignoreMissingColumns, outputSchemaDefinition); var outputRow = mapper.GetRow(_inputRow, col => true, out _disposer); _outputRow = cursorable.GetRow(outputRow); }
private static ISchemaBoundMapper WrapCore <T>(IHostEnvironment env, ISchemaBoundMapper mapper, RoleMappedSchema trainSchema) { Contracts.AssertValue(env); env.AssertValue(mapper); env.AssertValue(trainSchema); env.Assert(mapper is ISchemaBoundRowMapper); // Key values from the training schema label, will map to slot names of the score output. var type = trainSchema.Schema.GetMetadataTypeOrNull(MetadataUtils.Kinds.KeyValues, trainSchema.Label.Index); env.AssertValue(type); env.Assert(type.IsVector); // Wrap the fetching of the metadata as a simple getter. ValueGetter <VBuffer <T> > getter = (ref VBuffer <T> value) => { trainSchema.Schema.GetMetadata(MetadataUtils.Kinds.KeyValues, trainSchema.Label.Index, ref value); }; return(MultiClassClassifierScorer.LabelNameBindableMapper.CreateBound <T>(env, (ISchemaBoundRowMapper)mapper, type.AsVector, getter, MetadataUtils.Kinds.TrainingLabelValues, CanWrap)); }
public PipelinePattern InferPipelines(int numTransformLevels, int batchSize, int numOfTrainingRows) { _env.AssertValue(_trainData, nameof(_trainData), "Must set training data prior to calling method."); _env.AssertValue(_testData, nameof(_testData), "Must set test data prior to calling method."); var h = _env.Register("InferPipelines"); using (var ch = h.Start("InferPipelines")) { // Check if search space has not been initialized. If not, // run method to define it usign inference. if (!IsSearchSpaceDefined()) { InferSearchSpace(numTransformLevels); } // Learn for a given number of iterations MainLearningLoop(batchSize, numOfTrainingRows); // Return best pipeline seen return(_sortedSampledElements.Count > 0 ? _sortedSampledElements.First().Value : null); } }
internal static BinaryClassificationMetrics FromMetrics(IHostEnvironment env, IDataView overallMetrics, IDataView confusionMatrix) { Contracts.AssertValue(env); env.AssertValue(overallMetrics); env.AssertValue(confusionMatrix); var metricsEnumerable = overallMetrics.AsEnumerable <SerializationClass>(env, true, ignoreMissingColumns: true); var enumerator = metricsEnumerable.GetEnumerator(); if (!enumerator.MoveNext()) { throw env.Except("The overall RegressionMetrics didn't have any rows."); } SerializationClass metrics = enumerator.Current; if (enumerator.MoveNext()) { throw env.Except("The overall RegressionMetrics contained more than 1 row."); } return(new BinaryClassificationMetrics() { Auc = metrics.Auc, Accuracy = metrics.Accuracy, PositivePrecision = metrics.PositivePrecision, PositiveRecall = metrics.PositiveRecall, NegativePrecision = metrics.NegativePrecision, NegativeRecall = metrics.NegativeRecall, LogLoss = metrics.LogLoss, LogLossReduction = metrics.LogLossReduction, Entropy = metrics.Entropy, F1Score = metrics.F1Score, Auprc = metrics.Auprc, ConfusionMatrix = ConfusionMatrix.Create(env, confusionMatrix), }); }
private RandomNumberGenerator(IHostEnvironment env, ModelLoadContext ctx) { Contracts.AssertValue(env); env.AssertValue(ctx); // *** Binary format *** // int: sizeof(Float) // Float: gamma int cbFloat = ctx.Reader.ReadInt32(); env.CheckDecode(cbFloat == sizeof(float)); _gamma = ctx.Reader.ReadFloat(); env.CheckDecode(FloatUtils.IsFinite(_gamma)); }
public XGBoostScalarRowMapperBase(RoleMappedSchema schema, XGBoostPredictorBase <TOutput> parent, IHostEnvironment env, ISchema outputSchema) { Contracts.AssertValue(env, "env"); env.AssertValue(schema, "schema"); env.AssertValue(parent, "parent"); env.AssertValue(schema.Feature, "schema"); // REVIEW xadupre: only one feature columns is allowed. // This should be revisited in the future. // XGBoost has plans for others types. // Look at https://github.com/dmlc/xgboost/issues/874. env.Check(schema.Feature != null, "Unexpected number of feature columns, 1 expected."); _parent = parent; var columns = new[] { schema.Feature }; var fc = new[] { new KeyValuePair <RoleMappedSchema.ColumnRole, string>(RoleMappedSchema.ColumnRole.Feature, columns[0].Name) }; _inputSchema = new RoleMappedSchema(schema.Schema, fc); _outputSchema = outputSchema; _inputCols = new List <int>(); foreach (var kvp in columns) { int index; if (schema.Schema.TryGetColumnIndex(kvp.Name, out index)) { _inputCols.Add(index); } else { Contracts.Assert(false); } } _booster = _parent.GetBooster(); }
/// <summary> /// Determines the scorer subcomponent (if the given one is null or empty), and creates the schema bound mapper. /// </summary> private static SubComponent <IDataScorerTransform, SignatureDataScorer> GetScorerComponentAndMapper( IPredictor predictor, SubComponent <IDataScorerTransform, SignatureDataScorer> scorer, RoleMappedSchema schema, IHostEnvironment env, out ISchemaBoundMapper mapper) { Contracts.AssertValue(env); var bindable = GetSchemaBindableMapper(env, predictor, scorer); env.AssertValue(bindable); mapper = bindable.Bind(env, schema); if (scorer.IsGood()) { return(scorer); } return(GetScorerComponent(mapper)); }
public LinearModelStatistics(IHostEnvironment env, ModelLoadContext ctx) { Contracts.CheckValue(env, nameof(env)); _env = env; _env.AssertValue(ctx); // *** Binary Format *** // int: count of parameters // long: count of training examples // Single: deviance // Single: null deviance // bool: whether standard error is included // (Conditional) Single[_paramCount]: values of std errors of coefficients // (Conditional) int: length of std errors of coefficients // (Conditional) int[_paramCount]: indices of std errors of coefficients _paramCount = ctx.Reader.ReadInt32(); _env.CheckDecode(_paramCount > 0); _trainingExampleCount = ctx.Reader.ReadInt64(); _env.CheckDecode(_trainingExampleCount > 0); _deviance = ctx.Reader.ReadFloat(); _nullDeviance = ctx.Reader.ReadFloat(); var hasStdErrors = ctx.Reader.ReadBoolean(); if (!hasStdErrors) { _env.Assert(_coeffStdError == null); return; } Single[] stdErrorValues = ctx.Reader.ReadFloatArray(_paramCount); int length = ctx.Reader.ReadInt32(); _env.CheckDecode(length >= _paramCount); if (length == _paramCount) { _coeffStdError = new VBuffer <Single>(length, stdErrorValues); return; } _env.Assert(length > _paramCount); int[] stdErrorIndices = ctx.Reader.ReadIntArray(_paramCount); _coeffStdError = new VBuffer <Single>(length, _paramCount, stdErrorValues, stdErrorIndices); }
/// <summary> /// Determines the scorer component factory (if the given one is null or empty), and creates the schema bound mapper. /// </summary> private static TScorerFactory GetScorerComponentAndMapper( IPredictor predictor, TScorerFactory scorerFactory, RoleMappedSchema schema, IHostEnvironment env, IComponentFactory<IPredictor, ISchemaBindableMapper> mapperFactory, out ISchemaBoundMapper mapper) { Contracts.AssertValue(env); var bindable = GetSchemaBindableMapper(env, predictor, mapperFactory, scorerFactory as ICommandLineComponentFactory); env.AssertValue(bindable); mapper = bindable.Bind(env, schema); if (scorerFactory != null) return scorerFactory; return GetScorerComponent(mapper); }
public XmlGenerator(IHostEnvironment env, Arguments args, string regenerate) { Contracts.CheckValue(env, nameof(env)); env.AssertValue(args, nameof(args)); env.AssertNonEmpty(regenerate, nameof(regenerate)); _xmlFilename = args.XmlFilename; if (!string.IsNullOrWhiteSpace(_xmlFilename)) { Utils.CheckOptionalUserDirectory(_xmlFilename, nameof(args.XmlFilename)); } else { _xmlFilename = null; } _host = env.Register("XML Generator"); }
private void SaveInputSchema(DataViewSchema inputSchema, RepositoryWriter rep) { _env.AssertValueOrNull(inputSchema); _env.AssertValue(rep); if (inputSchema == null) { return; } using (var ch = _env.Start("Saving Schema")) { var entry = rep.CreateEntry(SchemaEntryName); var saver = new BinarySaver(_env, new BinarySaver.Arguments { Silent = true }); DataSaverUtils.SaveDataView(ch, saver, new EmptyDataView(_env, inputSchema), entry.Stream, keepHidden: true); } }
private static void GetPipeline(IHostEnvironment env, InputBase input, out IDataView startingData, out RoleMappedData transformedData) { Contracts.AssertValue(env); env.AssertValue(input); env.AssertNonEmpty(input.Models); ISchema inputSchema = null; startingData = null; transformedData = null; byte[][] transformedDataSerialized = null; string[] transformedDataZipEntryNames = null; for (int i = 0; i < input.Models.Length; i++) { var model = input.Models[i]; var inputData = new EmptyDataView(env, model.TransformModel.InputSchema); model.PrepareData(env, inputData, out RoleMappedData transformedDataCur, out IPredictor pred); if (inputSchema == null) { env.Assert(i == 0); inputSchema = model.TransformModel.InputSchema; startingData = inputData; transformedData = transformedDataCur; } else if (input.ValidatePipelines) { using (var ch = env.Start("Validating pipeline")) { if (transformedDataSerialized == null) { ch.Assert(transformedDataZipEntryNames == null); SerializeRoleMappedData(env, ch, transformedData, out transformedDataSerialized, out transformedDataZipEntryNames); } CheckSamePipeline(env, ch, transformedDataCur, transformedDataSerialized, transformedDataZipEntryNames); ch.Done(); } } } }
/// <summary> /// Potentially apply a min-max normalizer to the data's feature column, keeping all existing role /// mappings except for the feature role mapping. /// </summary> /// <param name="env">The host environment to use to potentially instantiate the transform</param> /// <param name="data">The role-mapped data that is potentially going to be modified by this method.</param> /// <param name="trainer">The trainer to query as to whether it wants normalization. If the /// <see cref="ITrainer.Info"/>'s <see cref="TrainerInfo.NeedNormalization"/> is <c>true</c></param> /// <returns>True if the normalizer was applied and <paramref name="data"/> was modified</returns> public static bool CreateIfNeeded(IHostEnvironment env, ref RoleMappedData data, ITrainer trainer) { Contracts.CheckValue(env, nameof(env)); env.CheckValue(data, nameof(data)); env.CheckValue(trainer, nameof(trainer)); // If the trainer does not need normalization, or if the features either don't exist // or are not normalized, return false. if (!trainer.Info.NeedNormalization || data.Schema.FeaturesAreNormalized() != false) { return(false); } var featInfo = data.Schema.Feature; env.AssertValue(featInfo); // Should be defined, if FeaturesAreNormalized returned a definite value. var view = CreateMinMaxNormalizer(env, data.Data, name: featInfo.Name); data = new RoleMappedData(view, data.Schema.GetColumnRoleNames()); return(true); }
/// <summary> /// Create a TransformModel containing the given (optional) transforms applied to the /// given root schema. /// </summary> public TransformModel(IHostEnvironment env, Schema schemaRoot, IDataTransform[] xfs) { Contracts.CheckValue(env, nameof(env)); env.CheckValue(schemaRoot, nameof(schemaRoot)); env.CheckValueOrNull(xfs); IDataView view = new EmptyDataView(env, schemaRoot); _schemaRoot = view.Schema; if (Utils.Size(xfs) > 0) { foreach (var xf in xfs) { env.AssertValue(xf, "xfs", "Transforms should not be null"); view = ApplyTransformUtils.ApplyTransformToData(env, xf, view); } } _chain = view; }
internal ModelStatisticsBase(IHostEnvironment env, ModelLoadContext ctx) { Contracts.CheckValue(env, nameof(env)); Env = env; Env.AssertValue(ctx); // *** Binary Format *** // int: count of parameters // long: count of training examples // float: deviance // float: null deviance ParametersCount = ctx.Reader.ReadInt32(); Env.CheckDecode(ParametersCount > 0); TrainingExampleCount = ctx.Reader.ReadInt64(); Env.CheckDecode(TrainingExampleCount > 0); Deviance = ctx.Reader.ReadFloat(); NullDeviance = ctx.Reader.ReadFloat(); }
/// <summary> /// Save schema associations of role/column-name in <paramref name="rep"/>. /// </summary> internal static void SaveRoleMappings(IHostEnvironment env, IChannel ch, RoleMappedSchema schema, RepositoryWriter rep) { // REVIEW: Should we also save this stuff, for instance, in some portion of the // score command or transform? Contracts.AssertValue(env); env.AssertValue(ch); ch.AssertValue(schema); ArrayDataViewBuilder builder = new ArrayDataViewBuilder(env); List <string> rolesList = new List <string>(); List <string> columnNamesList = new List <string>(); // OrderBy is stable, so there is no danger in it "reordering" columns // when a role is filled by multiple columns. foreach (var role in schema.GetColumnRoleNames().OrderBy(r => r.Key.Value)) { rolesList.Add(role.Key.Value); columnNamesList.Add(role.Value); } builder.AddColumn("Role", rolesList.ToArray()); builder.AddColumn("Column", columnNamesList.ToArray()); using (var entry = rep.CreateEntry(DirTrainingInfo, RoleMappingFile)) { // REVIEW: It seems very important that we have the role mappings // be easily human interpretable and even manipulable, but relying on the // text saver/loader means that special characters like '\n' won't be reinterpretable. // On the other hand, no one is such a big lunatic that they will actually // ever go ahead and do something so stupid as that. var saver = new TextSaver(env, new TextSaver.Arguments() { Dense = true, Silent = true }); var view = builder.GetDataView(); saver.SaveData(entry.Stream, view, Utils.GetIdentityPermutation(view.Schema.ColumnCount)); } }