internal BatchPredictionEngine(IHostEnvironment env, Stream modelStream, bool ignoreMissingColumns, SchemaDefinition inputSchemaDefinition = null, SchemaDefinition outputSchemaDefinition = null) { Contracts.AssertValue(env); Contracts.AssertValue(modelStream); Contracts.AssertValueOrNull(inputSchemaDefinition); Contracts.AssertValueOrNull(outputSchemaDefinition); // Initialize pipe. _srcDataView = DataViewConstructionUtils.CreateFromEnumerable(env, new TSrc[] { }, inputSchemaDefinition); // Load transforms. var pipe = env.LoadTransforms(modelStream, _srcDataView); // Load predictor (if present) and apply default scorer. // REVIEW: distinguish the case of predictor / no predictor? var predictor = env.LoadPredictorOrNull(modelStream); if (predictor != null) { var roles = ModelFileUtils.LoadRoleMappingsOrNull(env, modelStream); pipe = roles != null ? env.CreateDefaultScorer(RoleMappedData.CreateOpt(pipe, roles), predictor) : env.CreateDefaultScorer(env.CreateExamples(pipe, "Features"), predictor); } _pipeEngine = new PipeEngine <TDst>(env, pipe, ignoreMissingColumns, outputSchemaDefinition); }
/// <summary> /// Saves a model in a zip file. /// </summary> public static void SaveModel(IHostEnvironment env, IDataTransform tr, string outModelFilePath) { using (var ch = env.Start("SaveModel")) using (var fs = File.Create(outModelFilePath)) { var trainingExamples = env.CreateExamples(tr, null); TrainUtils.SaveModel(env, ch, fs, null, trainingExamples); } }
/// <summary> /// Constructor /// </summary> /// <param name="env">environment</param> /// <param name="modelStream">stream</param> /// <param name="output">name of the output column</param> /// <param name="outputIsFloat">output is a gloat (true) or a vector of floats (false)</param> /// <param name="conc">number of concurrency threads</param> /// <param name="features">features name</param> public ValueMapperPredictionEngineFloat(IHostEnvironment env, Stream modelStream, string output = "Probability", bool outputIsFloat = true, int conc = 1, string features = "Features") { _env = env; if (_env == null) { throw Contracts.Except("env must not be null"); } var inputs = new FloatVectorInput[0]; var view = ComponentCreation.CreateStreamingDataView <FloatVectorInput>(_env, inputs); long modelPosition = modelStream.Position; _predictor = ComponentCreation.LoadPredictorOrNull(_env, modelStream); if (_predictor == null) { throw _env.Except("Unable to load a model."); } modelStream.Seek(modelPosition, SeekOrigin.Begin); _transforms = ComponentCreation.LoadTransforms(_env, modelStream, view); if (_transforms == null) { throw _env.Except("Unable to load a model."); } var data = _env.CreateExamples(_transforms, features); if (data == null) { throw _env.Except("Cannot create rows."); } var scorer = _env.CreateDefaultScorer(data, _predictor); if (scorer == null) { throw _env.Except("Cannot create a scorer."); } _valueMapper = new ValueMapperFromTransformFloat <VBuffer <float> >(_env, scorer, features, output, conc: conc); if (_valueMapper == null) { throw _env.Except("Cannot create a mapper."); } if (outputIsFloat) { _mapper = _valueMapper.GetMapper <VBuffer <float>, float>(); _mapperVector = null; } else { _mapper = null; _mapperVector = _valueMapper.GetMapper <VBuffer <float>, VBuffer <float> >(); } }
public ValueMapperExample(string modelName, string features) { _env = EnvHelper.NewTestEnvironment(); _predictor = ModelFileUtils.LoadPredictorOrNull(_env, File.OpenRead(modelName)); var inputs = new Input[0]; var view = DataViewConstructionUtils.CreateFromEnumerable(_env, inputs); _transforms = ModelFileUtils.LoadTransforms(_env, view, File.OpenRead(modelName)); var data = _env.CreateExamples(_transforms, features); var scorer = _env.CreateDefaultScorer(data, _predictor); _valueMapper = new ValueMapperFromTransformFloat <VBuffer <float> >(_env, scorer, "Features", "Probability"); _mapper = _valueMapper.GetMapper <VBuffer <float>, float>(); }
public ValueMapperExample(string modelName, string features) { _env = EnvHelper.NewTestEnvironment(); _predictor = _env.LoadPredictorOrNull(File.OpenRead(modelName)); var inputs = new Input[0]; var view = _env.CreateStreamingDataView <Input>(inputs); _transforms = ComponentCreation.LoadTransforms(_env, File.OpenRead(modelName), view); var data = _env.CreateExamples(_transforms, features); var scorer = _env.CreateDefaultScorer(data, _predictor); _valueMapper = new ValueMapperFromTransformFloat <VBuffer <float> >(_env, scorer, "Features", "Probability"); _mapper = _valueMapper.GetMapper <VBuffer <float>, float>(); }
private static RoleMappedData CreateDataFromArgs(IHostEnvironment env, IExceptionContext ectx, IDataView input, TrainAndScoreTransformer.ArgumentsBase args, out string feat, out string group) { var schema = input.Schema; feat = TrainUtils.MatchNameOrDefaultOrNull(ectx, schema, "FeatureColumn", args.FeatureColumn, DefaultColumnNames.Features); var label = TrainUtils.MatchNameOrDefaultOrNull(ectx, schema, "LabelColumn", args.LabelColumn, DefaultColumnNames.Label); group = TrainUtils.MatchNameOrDefaultOrNull(ectx, schema, "GroupColumn", args.GroupColumn, DefaultColumnNames.GroupId); var weight = TrainUtils.MatchNameOrDefaultOrNull(ectx, schema, "WeightColumn", args.WeightColumn, DefaultColumnNames.Weight); var name = TrainUtils.MatchNameOrDefaultOrNull(ectx, schema, "NameColumn", args.NameColumn, DefaultColumnNames.Name); var customCols_ = TrainUtils.CheckAndGenerateCustomColumns(ectx, args.CustomColumn); var customCols = customCols_ == null ? new List <KeyValuePair <RoleMappedSchema.ColumnRole, string> >() : customCols_.ToList(); if (!string.IsNullOrEmpty(name)) { customCols.Add(new KeyValuePair <RoleMappedSchema.ColumnRole, string>(RoleMappedSchema.ColumnRole.Name, name)); } return(env.CreateExamples(input, feat, label: label, group: group, weight: weight, custom: customCols)); }
/// <summary> /// Constructor /// </summary> /// <param name="env">environment</param> /// <param name="modelStream">stream</param> /// <param name="conc">number of concurrency threads</param> /// <param name="features">features column</param> public ValueMapperPredictionEngine(IHostEnvironment env, Stream modelStream, int conc = 1, string features = "Features") { _env = env; if (_env == null) { throw Contracts.Except("env must not be null"); } var inputs = new TRowValue[0]; var view = ComponentCreation.CreateStreamingDataView <TRowValue>(_env, inputs); long modelPosition = modelStream.Position; _predictor = ComponentCreation.LoadPredictorOrNull(_env, modelStream); if (_predictor == null) { throw _env.Except("Unable to load a model."); } modelStream.Seek(modelPosition, SeekOrigin.Begin); _transforms = ComponentCreation.LoadTransforms(_env, modelStream, view); if (_transforms == null) { throw _env.Except("Unable to load a model."); } var data = _env.CreateExamples(_transforms, features); if (data == null) { throw _env.Except("Cannot create rows."); } var scorer = _env.CreateDefaultScorer(data, _predictor); if (scorer == null) { throw _env.Except("Cannot create a scorer."); } _CreateMapper(scorer, conc); }
/// <summary> /// Finalize the test on a transform, calls the transform, /// saves the data, saves the models, loads it back, saves the data again, /// checks the output is the same. /// </summary> /// <param name="env">environment</param> /// <param name="outModelFilePath"model filename</param> /// <param name="transform">transform to test</param> /// <param name="source">source (view before applying the transform</param> /// <param name="outData">fist data file</param> /// <param name="outData2">second data file</param> /// <param name="startsWith">Check that outputs is the same on disk after outputting the transformed data after the model was serialized</param> public static void SerializationTestTransform(IHostEnvironment env, string outModelFilePath, IDataTransform transform, IDataView source, string outData, string outData2, bool startsWith = false, bool skipDoubleQuote = false, bool forceDense = false) { // Saves model. var roles = env.CreateExamples(transform, null); using (var ch = env.Start("SaveModel")) using (var fs = File.Create(outModelFilePath)) TrainUtils.SaveModel(env, ch, fs, null, roles); if (!File.Exists(outModelFilePath)) { throw new FileNotFoundException(outModelFilePath); } // We load it again. using (var fs = File.OpenRead(outModelFilePath)) { var tr2 = env.LoadTransforms(fs, source); if (tr2 == null) { throw new Exception(string.Format("Unable to load '{0}'", outModelFilePath)); } if (transform.GetType() != tr2.GetType()) { throw new Exception(string.Format("Type mismatch {0} != {1}", transform.GetType(), tr2.GetType())); } } // Checks the outputs. var saver = env.CreateSaver(forceDense ? "Text{dense=+}" : "Text"); var columns = new int[transform.Schema.Count]; for (int i = 0; i < columns.Length; ++i) { columns[i] = i; } using (var fs2 = File.Create(outData)) saver.SaveData(fs2, transform, columns); if (!File.Exists(outModelFilePath)) { throw new FileNotFoundException(outData); } // Check we have the same output. using (var fs = File.OpenRead(outModelFilePath)) { var tr = env.LoadTransforms(fs, source); saver = env.CreateSaver(forceDense ? "Text{dense=+}" : "Text"); using (var fs2 = File.Create(outData2)) saver.SaveData(fs2, tr, columns); } var t1 = File.ReadAllLines(outData); var t2 = File.ReadAllLines(outData2); if (t1.Length != t2.Length) { throw new Exception(string.Format("Not the same number of lines: {0} != {1}", t1.Length, t2.Length)); } for (int i = 0; i < t1.Length; ++i) { if (skipDoubleQuote && (t1[i].Contains("\"\"\t\"\"") || t2[i].Contains("\"\"\t\"\""))) { continue; } if ((startsWith && !t1[i].StartsWith(t2[i])) || (!startsWith && t1[i] != t2[i])) { if (t1[i].EndsWith("\t5\t0:\"\"")) { var a = t1[i].Substring(0, t1[i].Length - "\t5\t0:\"\"".Length); a += "\t\"\"\t\"\"\t\"\"\t\"\"\t\"\""; var b = t2[i]; if ((startsWith && !a.StartsWith(b)) || (!startsWith && a != b)) { throw new Exception(string.Format("2-Mismatch on line {0}/{3}:\n{1}\n{2}", i, t1[i], t2[i], t1.Length)); } } else { // The test might fail because one side is dense and the other is sparse. throw new Exception(string.Format("3-Mismatch on line {0}/{3}:\n{1}\n{2}", i, t1[i], t2[i], t1.Length)); } } } }