internal BatchPredictionEngine(IHostEnvironment env, Stream modelStream, bool ignoreMissingColumns,
                                       SchemaDefinition inputSchemaDefinition = null, SchemaDefinition outputSchemaDefinition = null)
        {
            Contracts.AssertValue(env);
            Contracts.AssertValue(modelStream);
            Contracts.AssertValueOrNull(inputSchemaDefinition);
            Contracts.AssertValueOrNull(outputSchemaDefinition);

            // Initialize pipe.
            _srcDataView = DataViewConstructionUtils.CreateFromEnumerable(env, new TSrc[] { }, inputSchemaDefinition);

            // Load transforms.
            var pipe = env.LoadTransforms(modelStream, _srcDataView);

            // Load predictor (if present) and apply default scorer.
            // REVIEW: distinguish the case of predictor / no predictor?
            var predictor = env.LoadPredictorOrNull(modelStream);

            if (predictor != null)
            {
                var roles = ModelFileUtils.LoadRoleMappingsOrNull(env, modelStream);
                pipe = roles != null
                    ? env.CreateDefaultScorer(RoleMappedData.CreateOpt(pipe, roles), predictor)
                    : env.CreateDefaultScorer(env.CreateExamples(pipe, "Features"), predictor);
            }

            _pipeEngine = new PipeEngine <TDst>(env, pipe, ignoreMissingColumns, outputSchemaDefinition);
        }
Esempio n. 2
0
 /// <summary>
 /// Saves a model in a zip file.
 /// </summary>
 public static void SaveModel(IHostEnvironment env, IDataTransform tr, string outModelFilePath)
 {
     using (var ch = env.Start("SaveModel"))
         using (var fs = File.Create(outModelFilePath))
         {
             var trainingExamples = env.CreateExamples(tr, null);
             TrainUtils.SaveModel(env, ch, fs, null, trainingExamples);
         }
 }
Esempio n. 3
0
        /// <summary>
        /// Constructor
        /// </summary>
        /// <param name="env">environment</param>
        /// <param name="modelStream">stream</param>
        /// <param name="output">name of the output column</param>
        /// <param name="outputIsFloat">output is a gloat (true) or a vector of floats (false)</param>
        /// <param name="conc">number of concurrency threads</param>
        /// <param name="features">features name</param>
        public ValueMapperPredictionEngineFloat(IHostEnvironment env, Stream modelStream,
                                                string output   = "Probability", bool outputIsFloat = true, int conc = 1,
                                                string features = "Features")
        {
            _env = env;
            if (_env == null)
            {
                throw Contracts.Except("env must not be null");
            }
            var inputs = new FloatVectorInput[0];
            var view   = ComponentCreation.CreateStreamingDataView <FloatVectorInput>(_env, inputs);

            long modelPosition = modelStream.Position;

            _predictor = ComponentCreation.LoadPredictorOrNull(_env, modelStream);
            if (_predictor == null)
            {
                throw _env.Except("Unable to load a model.");
            }
            modelStream.Seek(modelPosition, SeekOrigin.Begin);
            _transforms = ComponentCreation.LoadTransforms(_env, modelStream, view);
            if (_transforms == null)
            {
                throw _env.Except("Unable to load a model.");
            }

            var data = _env.CreateExamples(_transforms, features);

            if (data == null)
            {
                throw _env.Except("Cannot create rows.");
            }
            var scorer = _env.CreateDefaultScorer(data, _predictor);

            if (scorer == null)
            {
                throw _env.Except("Cannot create a scorer.");
            }

            _valueMapper = new ValueMapperFromTransformFloat <VBuffer <float> >(_env,
                                                                                scorer, features, output, conc: conc);
            if (_valueMapper == null)
            {
                throw _env.Except("Cannot create a mapper.");
            }
            if (outputIsFloat)
            {
                _mapper       = _valueMapper.GetMapper <VBuffer <float>, float>();
                _mapperVector = null;
            }
            else
            {
                _mapper       = null;
                _mapperVector = _valueMapper.GetMapper <VBuffer <float>, VBuffer <float> >();
            }
        }
            public ValueMapperExample(string modelName, string features)
            {
                _env       = EnvHelper.NewTestEnvironment();
                _predictor = ModelFileUtils.LoadPredictorOrNull(_env, File.OpenRead(modelName));
                var inputs = new Input[0];

                var view = DataViewConstructionUtils.CreateFromEnumerable(_env, inputs);

                _transforms = ModelFileUtils.LoadTransforms(_env, view, File.OpenRead(modelName));
                var data   = _env.CreateExamples(_transforms, features);
                var scorer = _env.CreateDefaultScorer(data, _predictor);

                _valueMapper = new ValueMapperFromTransformFloat <VBuffer <float> >(_env, scorer, "Features", "Probability");
                _mapper      = _valueMapper.GetMapper <VBuffer <float>, float>();
            }
Esempio n. 5
0
            public ValueMapperExample(string modelName, string features)
            {
                _env       = EnvHelper.NewTestEnvironment();
                _predictor = _env.LoadPredictorOrNull(File.OpenRead(modelName));
                var inputs = new Input[0];

                var view = _env.CreateStreamingDataView <Input>(inputs);

                _transforms = ComponentCreation.LoadTransforms(_env, File.OpenRead(modelName), view);
                var data   = _env.CreateExamples(_transforms, features);
                var scorer = _env.CreateDefaultScorer(data, _predictor);

                _valueMapper = new ValueMapperFromTransformFloat <VBuffer <float> >(_env, scorer, "Features", "Probability");
                _mapper      = _valueMapper.GetMapper <VBuffer <float>, float>();
            }
Esempio n. 6
0
        private static RoleMappedData CreateDataFromArgs(IHostEnvironment env, IExceptionContext ectx, IDataView input,
                                                         TrainAndScoreTransformer.ArgumentsBase args, out string feat, out string group)
        {
            var schema = input.Schema;

            feat = TrainUtils.MatchNameOrDefaultOrNull(ectx, schema, "FeatureColumn", args.FeatureColumn, DefaultColumnNames.Features);
            var label = TrainUtils.MatchNameOrDefaultOrNull(ectx, schema, "LabelColumn", args.LabelColumn, DefaultColumnNames.Label);

            group = TrainUtils.MatchNameOrDefaultOrNull(ectx, schema, "GroupColumn", args.GroupColumn, DefaultColumnNames.GroupId);
            var weight      = TrainUtils.MatchNameOrDefaultOrNull(ectx, schema, "WeightColumn", args.WeightColumn, DefaultColumnNames.Weight);
            var name        = TrainUtils.MatchNameOrDefaultOrNull(ectx, schema, "NameColumn", args.NameColumn, DefaultColumnNames.Name);
            var customCols_ = TrainUtils.CheckAndGenerateCustomColumns(ectx, args.CustomColumn);
            var customCols  = customCols_ == null ? new List <KeyValuePair <RoleMappedSchema.ColumnRole, string> >() : customCols_.ToList();

            if (!string.IsNullOrEmpty(name))
            {
                customCols.Add(new KeyValuePair <RoleMappedSchema.ColumnRole, string>(RoleMappedSchema.ColumnRole.Name, name));
            }
            return(env.CreateExamples(input, feat, label: label, group: group, weight: weight, custom: customCols));
        }
Esempio n. 7
0
        /// <summary>
        /// Constructor
        /// </summary>
        /// <param name="env">environment</param>
        /// <param name="modelStream">stream</param>
        /// <param name="conc">number of concurrency threads</param>
        /// <param name="features">features column</param>
        public ValueMapperPredictionEngine(IHostEnvironment env, Stream modelStream,
                                           int conc = 1, string features = "Features")
        {
            _env = env;
            if (_env == null)
            {
                throw Contracts.Except("env must not be null");
            }
            var inputs = new TRowValue[0];
            var view   = ComponentCreation.CreateStreamingDataView <TRowValue>(_env, inputs);

            long modelPosition = modelStream.Position;

            _predictor = ComponentCreation.LoadPredictorOrNull(_env, modelStream);
            if (_predictor == null)
            {
                throw _env.Except("Unable to load a model.");
            }
            modelStream.Seek(modelPosition, SeekOrigin.Begin);
            _transforms = ComponentCreation.LoadTransforms(_env, modelStream, view);
            if (_transforms == null)
            {
                throw _env.Except("Unable to load a model.");
            }

            var data = _env.CreateExamples(_transforms, features);

            if (data == null)
            {
                throw _env.Except("Cannot create rows.");
            }
            var scorer = _env.CreateDefaultScorer(data, _predictor);

            if (scorer == null)
            {
                throw _env.Except("Cannot create a scorer.");
            }
            _CreateMapper(scorer, conc);
        }
Esempio n. 8
0
        /// <summary>
        /// Finalize the test on a transform, calls the transform,
        /// saves the data, saves the models, loads it back, saves the data again,
        /// checks the output is the same.
        /// </summary>
        /// <param name="env">environment</param>
        /// <param name="outModelFilePath"model filename</param>
        /// <param name="transform">transform to test</param>
        /// <param name="source">source (view before applying the transform</param>
        /// <param name="outData">fist data file</param>
        /// <param name="outData2">second data file</param>
        /// <param name="startsWith">Check that outputs is the same on disk after outputting the transformed data after the model was serialized</param>
        public static void SerializationTestTransform(IHostEnvironment env,
                                                      string outModelFilePath, IDataTransform transform,
                                                      IDataView source, string outData, string outData2,
                                                      bool startsWith = false, bool skipDoubleQuote = false,
                                                      bool forceDense = false)
        {
            // Saves model.
            var roles = env.CreateExamples(transform, null);

            using (var ch = env.Start("SaveModel"))
                using (var fs = File.Create(outModelFilePath))
                    TrainUtils.SaveModel(env, ch, fs, null, roles);
            if (!File.Exists(outModelFilePath))
            {
                throw new FileNotFoundException(outModelFilePath);
            }

            // We load it again.
            using (var fs = File.OpenRead(outModelFilePath))
            {
                var tr2 = env.LoadTransforms(fs, source);
                if (tr2 == null)
                {
                    throw new Exception(string.Format("Unable to load '{0}'", outModelFilePath));
                }
                if (transform.GetType() != tr2.GetType())
                {
                    throw new Exception(string.Format("Type mismatch {0} != {1}", transform.GetType(), tr2.GetType()));
                }
            }

            // Checks the outputs.
            var saver   = env.CreateSaver(forceDense ? "Text{dense=+}" : "Text");
            var columns = new int[transform.Schema.Count];

            for (int i = 0; i < columns.Length; ++i)
            {
                columns[i] = i;
            }
            using (var fs2 = File.Create(outData))
                saver.SaveData(fs2, transform, columns);

            if (!File.Exists(outModelFilePath))
            {
                throw new FileNotFoundException(outData);
            }

            // Check we have the same output.
            using (var fs = File.OpenRead(outModelFilePath))
            {
                var tr = env.LoadTransforms(fs, source);
                saver = env.CreateSaver(forceDense ? "Text{dense=+}" : "Text");
                using (var fs2 = File.Create(outData2))
                    saver.SaveData(fs2, tr, columns);
            }

            var t1 = File.ReadAllLines(outData);
            var t2 = File.ReadAllLines(outData2);

            if (t1.Length != t2.Length)
            {
                throw new Exception(string.Format("Not the same number of lines: {0} != {1}", t1.Length, t2.Length));
            }
            for (int i = 0; i < t1.Length; ++i)
            {
                if (skipDoubleQuote && (t1[i].Contains("\"\"\t\"\"") || t2[i].Contains("\"\"\t\"\"")))
                {
                    continue;
                }
                if ((startsWith && !t1[i].StartsWith(t2[i])) || (!startsWith && t1[i] != t2[i]))
                {
                    if (t1[i].EndsWith("\t5\t0:\"\""))
                    {
                        var a = t1[i].Substring(0, t1[i].Length - "\t5\t0:\"\"".Length);
                        a += "\t\"\"\t\"\"\t\"\"\t\"\"\t\"\"";
                        var b = t2[i];
                        if ((startsWith && !a.StartsWith(b)) || (!startsWith && a != b))
                        {
                            throw new Exception(string.Format("2-Mismatch on line {0}/{3}:\n{1}\n{2}", i, t1[i], t2[i], t1.Length));
                        }
                    }
                    else
                    {
                        // The test might fail because one side is dense and the other is sparse.
                        throw new Exception(string.Format("3-Mismatch on line {0}/{3}:\n{1}\n{2}", i, t1[i], t2[i], t1.Length));
                    }
                }
            }
        }