示例#1
0
        public void TestI_DescribeTransformSaveDataAndZip()
        {
            /*using (*/ var env = EnvHelper.NewTestEnvironment();
            {
                var inputs = InputOutput.CreateInputs();
                var data   = DataViewConstructionUtils.CreateFromEnumerable(env, inputs);
                var args   = new DescribeTransform.Arguments()
                {
                    columns = new[] { "X" }
                };
                var tr = new DescribeTransform(env, args, data);

                var methodName = System.Reflection.MethodBase.GetCurrentMethod().Name;

                var outputDataFilePath = FileHelper.GetOutputFile("outputDataFilePath.txt", methodName);
                StreamHelper.SavePredictions(env, tr, outputDataFilePath);
                Assert.IsTrue(File.Exists(outputDataFilePath));

                var outModelFilePath = FileHelper.GetOutputFile("outModelFilePath.zip", methodName);
                StreamHelper.SaveModel(env, tr, outModelFilePath);
                Assert.IsTrue(File.Exists(outModelFilePath));

                var outputDataFilePath2 = FileHelper.GetOutputFile("outputDataFilePath2.txt", methodName);
                StreamHelper.SavePredictions(env, outModelFilePath, outputDataFilePath2, data);
                Assert.IsTrue(File.Exists(outputDataFilePath2));

                var d1 = File.ReadAllText(outputDataFilePath);
                Assert.IsTrue(d1.Length > 0);
                var d2 = File.ReadAllText(outputDataFilePath2);
                Assert.AreEqual(d1, d2);
            }
        }
示例#2
0
        public override SchemaShape GetOutputSchema(SchemaShape inputSchema)
        {
            var addedCols        = DataViewConstructionUtils.GetSchemaColumns(Transformer.AddedSchema);
            var addedSchemaShape = SchemaShape.Create(SchemaBuilder.MakeSchema(addedCols));

            var result   = inputSchema.Columns.ToDictionary(x => x.Name);
            var inputDef = InternalSchemaDefinition.Create(typeof(TSrc), Transformer.InputSchemaDefinition);

            foreach (var col in inputDef.Columns)
            {
                if (!result.TryGetValue(col.ColumnName, out var column))
                {
                    throw Contracts.ExceptSchemaMismatch(nameof(inputSchema), "input", col.ColumnName);
                }

                SchemaShape.GetColumnTypeShape(col.ColumnType, out var vecKind, out var itemType, out var isKey);
                // Special treatment for vectors: if we expect variable vector, we also allow fixed-size vector.
                if (itemType != column.ItemType || isKey != column.IsKey ||
                    vecKind == SchemaShape.Column.VectorKind.Scalar && column.Kind != SchemaShape.Column.VectorKind.Scalar ||
                    vecKind == SchemaShape.Column.VectorKind.Vector && column.Kind != SchemaShape.Column.VectorKind.Vector ||
                    vecKind == SchemaShape.Column.VectorKind.VariableVector && column.Kind == SchemaShape.Column.VectorKind.Scalar)
                {
                    throw Contracts.ExceptSchemaMismatch(nameof(inputSchema), "input", col.ColumnName, col.ColumnType.ToString(), column.GetTypeString());
                }
            }

            foreach (var addedCol in addedSchemaShape.Columns)
            {
                result[addedCol.Name] = addedCol;
            }

            return(new SchemaShape(result.Values));
        }
        public void TestLambdaColumnPassThroughTransform()
        {
            /*using (*/
            var host = EnvHelper.NewTestEnvironment();
            {
                var inputs = new InputOutputU[] {
                    new InputOutputU()
                    {
                        X = new float[] { 0.1f, 1.1f }, Y = 0
                    },
                    new InputOutputU()
                    {
                        X = new float[] { 0.2f, 1.2f }, Y = 1
                    },
                    new InputOutputU()
                    {
                        X = new float[] { 0.3f, 1.3f }, Y = 2
                    }
                };

                var data       = DataViewConstructionUtils.CreateFromEnumerable(host, inputs);
                var lambdaView = LambdaColumnHelper.Create <VBuffer <float>, VBuffer <float> >(host,
                                                                                               "Lambda", data, "X", "XX", new VectorDataViewType(NumberDataViewType.Single, 2),
                                                                                               new VectorDataViewType(NumberDataViewType.Single, 2),
                                                                                               (in VBuffer <float> src, ref VBuffer <float> dst) =>
                {
                    dst           = new VBuffer <float>(2, new float[2]);
                    dst.Values[0] = src.Values[0] + 1f;
                    dst.Values[1] = src.Values[1] + 1f;
                });
        public void TestChainTransformSerialize()
        {
            /*using (*/ var host = EnvHelper.NewTestEnvironment();
            {
                var inputs = new[] {
                    new ExampleA()
                    {
                        X = new float[] { 1, 10, 100 }
                    },
                    new ExampleA()
                    {
                        X = new float[] { 2, 3, 5 }
                    }
                };

                IDataView      loader = DataViewConstructionUtils.CreateFromEnumerable(host, inputs);
                IDataTransform data   = host.CreateTransform("Scaler{col=X4:X}", loader);
                data = host.CreateTransform("ChainTrans{ xf1=Scaler{col=X2:X} xf2=Poly{col=X3:X2} }", data);

                // We create a specific folder in build/UnitTest which will contain the output.
                var methodName       = System.Reflection.MethodBase.GetCurrentMethod().Name;
                var outModelFilePath = FileHelper.GetOutputFile("outModelFilePath.zip", methodName);
                var outData          = FileHelper.GetOutputFile("outData.txt", methodName);
                var outData2         = FileHelper.GetOutputFile("outData2.txt", methodName);
                TestTransformHelper.SerializationTestTransform(host, outModelFilePath, data, loader, outData, outData2);
            }
        }
示例#5
0
        public void TestI_DescribeTransformCode()
        {
            /*using (*/ var env = EnvHelper.NewTestEnvironment();
            {
                var inputs = InputOutput.CreateInputs();
                var data   = DataViewConstructionUtils.CreateFromEnumerable(env, inputs);
                var args   = new DescribeTransform.Arguments()
                {
                    columns = new[] { "X" }
                };
                var tr = new DescribeTransform(env, args, data);

                var values = new List <int>();
                using (var cursor = tr.GetRowCursor(tr.Schema))
                {
                    var columnGetter = cursor.GetGetter <int>(SchemaHelper._dc(1, cursor));
                    while (cursor.MoveNext())
                    {
                        int got = 0;
                        columnGetter(ref got);
                        values.Add((int)got);
                    }
                }
                Assert.AreEqual(values.Count, 4);
            }
        }
        public void TestTagViewTransform()
        {
            /*using (*/ var host = EnvHelper.NewTestEnvironment();
            {
                var inputs = new[] {
                    new ExampleA()
                    {
                        X = new float[] { 0, 1 }
                    },
                    new ExampleA()
                    {
                        X = new float[] { 2, 3 }
                    }
                };

                IDataView loader = DataViewConstructionUtils.CreateFromEnumerable(host, inputs);
                var       data   = host.CreateTransform("Scaler{col=X1:X}", loader);
                data = host.CreateTransform("tag{t=memory}", data);

                var methodName       = System.Reflection.MethodBase.GetCurrentMethod().Name;
                var outModelFilePath = FileHelper.GetOutputFile("outModelFilePath.zip", methodName);
                var outData          = FileHelper.GetOutputFile("outData.txt", methodName);
                var outData2         = FileHelper.GetOutputFile("outData2.txt", methodName);
                TestTransformHelper.SerializationTestTransform(host, outModelFilePath, data, loader, outData, outData2);
            }
        }
示例#7
0
 /// <summary>
 /// Create a new <see cref="IDataView"/> over an enumerable of the items of user-defined type.
 /// The user maintains ownership of the <paramref name="data"/> and the resulting data view will
 /// never alter the contents of the <paramref name="data"/>.
 /// Since <see cref="IDataView"/> is assumed to be immutable, the user is expected to support
 /// multiple enumerations of the <paramref name="data"/> that would return the same results, unless
 /// the user knows that the data will only be cursored once.
 ///
 /// One typical usage for streaming data view could be: create the data view that lazily loads data
 /// as needed, then apply pre-trained transformations to it and cursor through it for transformation
 /// results.
 /// </summary>
 /// <typeparam name="TRow">The user-defined item type.</typeparam>
 /// <param name="data">The enumerable data containing type <typeparamref name="TRow"/> to convert to an<see cref="IDataView"/>.</param>
 /// <param name="schemaDefinition">The optional schema definition of the data view to create. If <c>null</c>,
 /// the schema definition is inferred from <typeparamref name="TRow"/>.</param>
 /// <returns>The constructed <see cref="IDataView"/>.</returns>
 /// <example>
 /// <format type="text/markdown">
 /// <![CDATA[
 /// [!code-csharp[LoadFromEnumerable](~/../docs/samples/docs/samples/Microsoft.ML.Samples/Dynamic/DataOperations/LoadFromEnumerable.cs)]
 /// ]]>
 /// </format>
 /// </example>
 public IDataView LoadFromEnumerable <TRow>(IEnumerable <TRow> data, SchemaDefinition schemaDefinition = null)
     where TRow : class
 {
     _env.CheckValue(data, nameof(data));
     _env.CheckValueOrNull(schemaDefinition);
     return(DataViewConstructionUtils.CreateFromEnumerable(_env, data, schemaDefinition));
 }
示例#8
0
 /// <summary>
 /// Create a new <see cref="IDataView"/> over an enumerable of the items of user-defined type using the provided <see cref="DataViewSchema"/>,
 /// which might contain more information about the schema than the type can capture.
 /// </summary>
 /// <remarks>
 /// The user maintains ownership of the <paramref name="data"/> and the resulting data view will
 /// never alter the contents of the <paramref name="data"/>.
 /// Since <see cref="IDataView"/> is assumed to be immutable, the user is expected to support
 /// multiple enumerations of the <paramref name="data"/> that would return the same results, unless
 /// the user knows that the data will only be cursored once.
 /// One typical usage for streaming data view could be: create the data view that lazily loads data
 /// as needed, then apply pre-trained transformations to it and cursor through it for transformation
 /// results.
 /// One practical usage of this would be to supply the feature column names through the <see cref="DataViewSchema.Annotations"/>.
 /// </remarks>
 /// <typeparam name="TRow">The user-defined item type.</typeparam>
 /// <param name="data">The enumerable data containing type <typeparamref name="TRow"/> to convert to an <see cref="IDataView"/>.</param>
 /// <param name="schema">The schema of the returned <see cref="IDataView"/>.</param>
 /// <returns>An <see cref="IDataView"/> with the given <paramref name="schema"/>.</returns>
 public IDataView LoadFromEnumerable <TRow>(IEnumerable <TRow> data, DataViewSchema schema)
     where TRow : class
 {
     _env.CheckValue(data, nameof(data));
     _env.CheckValue(schema, nameof(schema));
     return(DataViewConstructionUtils.CreateFromEnumerable(_env, data, schema));
 }
示例#9
0
 /// <summary>
 /// Create a prediction engine for one-time prediction.
 /// It's mainly used in conjunction with <see cref="Load(Stream, out DataViewSchema)"/>,
 /// where input schema is extracted during loading the model.
 /// </summary>
 /// <typeparam name="TSrc">The class that defines the input data.</typeparam>
 /// <typeparam name="TDst">The class that defines the output data.</typeparam>
 /// <param name="transformer">The transformer to use for prediction.</param>
 /// <param name="inputSchema">Input schema.</param>
 public PredictionEngine <TSrc, TDst> CreatePredictionEngine <TSrc, TDst>(ITransformer transformer, DataViewSchema inputSchema)
     where TSrc : class
     where TDst : class, new()
 {
     return(transformer.CreatePredictionEngine <TSrc, TDst>(_env, false,
                                                            DataViewConstructionUtils.GetSchemaDefinition <TSrc>(_env, inputSchema)));
 }
        public void TestI_PolynomialTransformSerialize()
        {
            /*using (*/
            var host = EnvHelper.NewTestEnvironment();
            {
                var inputs = new[] {
                    new ExampleA()
                    {
                        X = new float[] { 1, 10, 100 }
                    },
                    new ExampleA()
                    {
                        X = new float[] { 2, 3, 5 }
                    }
                };

                IDataView loader = DataViewConstructionUtils.CreateFromEnumerable(host, inputs);
                var       data   = host.CreateTransform("poly{col=poly:X d=3}", loader);

                // We create a specific folder in build/UnitTest which will contain the output.
                var methodName       = System.Reflection.MethodBase.GetCurrentMethod().Name;
                var outModelFilePath = FileHelper.GetOutputFile("outModelFilePath.zip", methodName);
                var outData          = FileHelper.GetOutputFile("outData.txt", methodName);
                var outData2         = FileHelper.GetOutputFile("outData2.txt", methodName);

                // This function serializes the output data twice, once before saving the pipeline, once after loading the pipeline.
                // It checks it gives the same result.
                TestTransformHelper.SerializationTestTransform(host, outModelFilePath, data, loader, outData, outData2);
            }
        }
        public void TestSelectTagContactViewTransform()
        {
            var methodName = System.Reflection.MethodBase.GetCurrentMethod().Name;
            var firstData  = FileHelper.GetOutputFile("first.idv", methodName);
            var outData    = FileHelper.GetOutputFile("outData.txt", methodName);
            var outData2   = FileHelper.GetOutputFile("outData2.txt", methodName);

            /*using (*/ var env = EnvHelper.NewTestEnvironment();
            {
                var inputs = new[] {
                    new ExampleA()
                    {
                        X = new float[] { 0, 1, 4 }
                    },
                    new ExampleA()
                    {
                        X = new float[] { 2, 3, 7 }
                    }
                };

                // Create IDV
                IDataView loader = DataViewConstructionUtils.CreateFromEnumerable(env, inputs);
                var       saver  = ComponentCreation.CreateSaver(env, "binary");
                using (var ch = env.Start("save"))
                {
                    using (var fs0 = env.CreateOutputFile(firstData))
                        DataSaverUtils.SaveDataView(ch, saver, loader, fs0, true);

                    // Create parallel pipeline
                    loader = DataViewConstructionUtils.CreateFromEnumerable(env, inputs);
                    var data = env.CreateTransform("Scaler{col=X1:X}", loader);
                    data = env.CreateTransform(string.Format("selecttag{{t=first s=second f={0}}}", firstData), data);
                    data = env.CreateTransform("Scaler{col=X1:X}", data);
                    var merged = env.CreateTransform("append{t=first}", data);

                    // Save the outcome
                    var text    = env.CreateSaver("Text");
                    var columns = new int[merged.Schema.Count];
                    for (int i = 0; i < columns.Length; ++i)
                    {
                        columns[i] = i;
                    }
                    using (var fs2 = File.Create(outData))
                        text.SaveData(fs2, merged, columns);

                    // Final checking
                    var lines = File.ReadAllLines(outData);
                    if (!lines.Any())
                    {
                        throw new Exception("Empty file.");
                    }
                    if (lines.Length != 9)
                    {
                        throw new Exception("Some lines are missing.");
                    }
                }
            }
        }
示例#12
0
        public void TestScikitAPI_SimplePredictor()
        {
            var inputs = new[] {
                new ExampleA()
                {
                    X = new float[] { 1, 10, 100 }
                },
                new ExampleA()
                {
                    X = new float[] { 2, 3, 5 }
                },
                new ExampleA()
                {
                    X = new float[] { 2, 4, 5 }
                },
                new ExampleA()
                {
                    X = new float[] { 2, 4, 7 }
                },
            };

            var inputs2 = new[] {
                new ExampleA()
                {
                    X = new float[] { -1, -10, -100 }
                },
                new ExampleA()
                {
                    X = new float[] { -2, -3, -5 }
                },
                new ExampleA()
                {
                    X = new float[] { 3, 4, 5 }
                },
                new ExampleA()
                {
                    X = new float[] { 3, 4, 7 }
                },
            };

            /*using (*/
            var host = EnvHelper.NewTestEnvironment(conc: 1);
            {
                var data = DataViewConstructionUtils.CreateFromEnumerable(host, inputs);
                using (var pipe = new ScikitPipeline(new[] { "poly{col=X}" }, "km{k=2}", host))
                {
                    var predictor = pipe.Train(data, feature: "X");
                    Assert.IsTrue(predictor != null);
                    var data2       = new StreamingDataFrame(DataViewConstructionUtils.CreateFromEnumerable(host, inputs2));
                    var predictions = pipe.Predict(data2);
                    var df          = DataFrameIO.ReadView(predictions);
                    Assert.AreEqual(df.Shape, new Tuple <int, int>(4, 12));
                    var dfs  = df.ToString();
                    var dfs2 = dfs.Replace("\n", ";");
                    Assert.IsTrue(dfs2.StartsWith("X.0,X.1,X.2,X.3,X.4,X.5,X.6,X.7,X.8,PredictedLabel,Score.0,Score.1;-1,-10,-100,1,10,100,100,1000,10000"));
                }
            }
        }
        /// <summary>
        /// Constructor
        /// </summary>
        /// <param name="env">environment</param>
        /// <param name="modelStream">stream</param>
        /// <param name="output">name of the output column</param>
        /// <param name="outputIsFloat">output is a gloat (true) or a vector of floats (false)</param>
        /// <param name="conc">number of concurrency threads</param>
        /// <param name="features">features name</param>
        public ValueMapperPredictionEngineFloat(IHostEnvironment env, Stream modelStream,
                                                string output   = "Probability", bool outputIsFloat = true, int conc = 1,
                                                string features = "Features")
        {
            _env = env;
            if (_env == null)
            {
                throw Contracts.Except("env must not be null");
            }
            var inputs = new FloatVectorInput[0];
            var view   = DataViewConstructionUtils.CreateFromEnumerable <FloatVectorInput>(_env, inputs);

            long modelPosition = modelStream.Position;

            _predictor = ModelFileUtils.LoadPredictorOrNull(_env, modelStream);
            if (_predictor == null)
            {
                throw _env.Except("Unable to load a model.");
            }
            modelStream.Seek(modelPosition, SeekOrigin.Begin);
            _transforms = ModelFileUtils.LoadTransforms(_env, view, modelStream);
            if (_transforms == null)
            {
                throw _env.Except("Unable to load a model.");
            }

            var data = _env.CreateExamples(_transforms, features);

            if (data == null)
            {
                throw _env.Except("Cannot create rows.");
            }
            var scorer = _env.CreateDefaultScorer(data, _predictor);

            if (scorer == null)
            {
                throw _env.Except("Cannot create a scorer.");
            }

            _valueMapper = new ValueMapperFromTransformFloat <VBuffer <float> >(_env,
                                                                                scorer, features, output, conc: conc);
            if (_valueMapper == null)
            {
                throw _env.Except("Cannot create a mapper.");
            }
            if (outputIsFloat)
            {
                _mapper       = _valueMapper.GetMapper <VBuffer <float>, float>();
                _mapperVector = null;
            }
            else
            {
                _mapper       = null;
                _mapperVector = _valueMapper.GetMapper <VBuffer <float>, VBuffer <float> >();
            }
        }
        void TestDnnImageFeaturizer()
        {
            // Onnxruntime supports Ubuntu 16.04, but not CentOS
            // Do not execute on CentOS image
            if (!RuntimeInformation.IsOSPlatform(OSPlatform.Windows))
            {
                return;
            }


            var samplevector = GetSampleArrayData();

            var dataView = DataViewConstructionUtils.CreateFromList(Env,
                                                                    new TestData[] {
                new TestData()
                {
                    data_0 = samplevector
                },
            });

            var xyData = new List <TestDataXY> {
                new TestDataXY()
                {
                    A = new float[inputSize]
                }
            };
            var stringData = new List <TestDataDifferntType> {
                new TestDataDifferntType()
                {
                    data_0 = new string[inputSize]
                }
            };
            var sizeData = new List <TestDataSize> {
                new TestDataSize()
                {
                    data_0 = new float[2]
                }
            };
            var pipe = new DnnImageFeaturizerEstimator(Env, "output_1", m => m.ModelSelector.ResNet18(m.Environment, m.OutputColumn, m.InputColumn), "data_0");

            var invalidDataWrongNames      = ML.Data.ReadFromEnumerable(xyData);
            var invalidDataWrongTypes      = ML.Data.ReadFromEnumerable(stringData);
            var invalidDataWrongVectorSize = ML.Data.ReadFromEnumerable(sizeData);

            TestEstimatorCore(pipe, dataView, invalidInput: invalidDataWrongNames);
            TestEstimatorCore(pipe, dataView, invalidInput: invalidDataWrongTypes);
            pipe.GetOutputSchema(SchemaShape.Create(invalidDataWrongVectorSize.Schema));
            try
            {
                pipe.Fit(invalidDataWrongVectorSize);
                Assert.False(true);
            }
            catch (ArgumentOutOfRangeException) { }
            catch (InvalidOperationException) { }
        }
        public void TestDnnImageFeaturizer()
        {
            //skip running for x86 as this test using too much memory (over 2GB limit on x86)
            //and very like to hit memory related issue when running on CI
            //TODO: optimized memory usage in related code and enable x86 test run
            if (!Environment.Is64BitProcess)
            {
                return;
            }

            var samplevector = GetSampleArrayData();

            var dataView = DataViewConstructionUtils.CreateFromList(Env,
                                                                    new TestData[] {
                new TestData()
                {
                    data_0 = samplevector
                },
            });

            var xyData = new List <TestDataXY> {
                new TestDataXY()
                {
                    A = new float[InputSize]
                }
            };
            var stringData = new List <TestDataDifferntType> {
                new TestDataDifferntType()
                {
                    data_0 = new string[InputSize]
                }
            };
            var sizeData = new List <TestDataSize> {
                new TestDataSize()
                {
                    data_0 = new float[2]
                }
            };
            var pipe = ML.Transforms.DnnFeaturizeImage("output_1", m => m.ModelSelector.ResNet18(m.Environment, m.OutputColumn, m.InputColumn), "data_0");

            var invalidDataWrongNames      = ML.Data.LoadFromEnumerable(xyData);
            var invalidDataWrongTypes      = ML.Data.LoadFromEnumerable(stringData);
            var invalidDataWrongVectorSize = ML.Data.LoadFromEnumerable(sizeData);

            TestEstimatorCore(pipe, dataView, invalidInput: invalidDataWrongNames);
            TestEstimatorCore(pipe, dataView, invalidInput: invalidDataWrongTypes);
            pipe.GetOutputSchema(SchemaShape.Create(invalidDataWrongVectorSize.Schema));
            try
            {
                pipe.Fit(invalidDataWrongVectorSize);
                Assert.False(true);
            }
            catch (ArgumentOutOfRangeException) { }
            catch (InvalidOperationException) { }
        }
示例#16
0
        /// <summary>
        /// The 'reapply' constructor.
        /// </summary>
        private StatefulFilterTransform(IHostEnvironment env, StatefulFilterTransform <TSrc, TDst, TState> transform, IDataView newSource)
        {
            _host.AssertValue(transform);
            _host.AssertValue(newSource);
            _source      = newSource;
            _filterFunc  = transform._filterFunc;
            _typedSource = TypedCursorable <TSrc> .Create(_host, newSource, false, transform._inputSchemaDefinition);

            _addedSchema = transform._addedSchema;
            _bindings    = new ColumnBindings(newSource.Schema, DataViewConstructionUtils.GetSchemaColumns(_addedSchema));
        }
示例#17
0
            public PredictionEngineExample(string modelName)
            {
                _env = EnvHelper.NewTestEnvironment();

                var view = DataViewConstructionUtils.CreateFromEnumerable(_env, new FloatVectorInput[] { });
                var pipe = DataViewConstructionUtils.LoadPipeWithPredictor(_env, File.OpenRead(modelName),
                                                                           new EmptyDataView(_env, view.Schema));
                var transformer = new TransformWrapper(_env, pipe);

                _predictor = _env.CreatePredictionEngine <FloatVectorInput, FloatOutput>(transformer);
            }
 private static Func <DataViewSchema, IRowToRowMapper> StreamChecker(IHostEnvironment env, Stream modelStream)
 {
     env.CheckValue(modelStream, nameof(modelStream));
     return(schema =>
     {
         var pipe = DataViewConstructionUtils.LoadPipeWithPredictor(env, modelStream, new EmptyDataView(env, schema));
         var transformer = new TransformWrapper(env, pipe);
         env.CheckParam(((ITransformer)transformer).IsRowToRowMapper, nameof(transformer), "Must be a row to row mapper");
         return ((ITransformer)transformer).GetRowToRowMapper(schema);
     });
 }
示例#19
0
        private protected PredictionEngineBase(IHostEnvironment env, ITransformer transformer, bool ignoreMissingColumns,
                                               SchemaDefinition inputSchemaDefinition = null, SchemaDefinition outputSchemaDefinition = null)
        {
            Contracts.CheckValue(env, nameof(env));
            env.AssertValue(transformer);
            Transformer = transformer;
            var makeMapper = TransformerChecker(env, transformer);

            env.AssertValue(makeMapper);
            _inputRow = DataViewConstructionUtils.CreateInputRow <TSrc>(env, inputSchemaDefinition);
            PredictionEngineCore(env, _inputRow, makeMapper(_inputRow.Schema), ignoreMissingColumns, inputSchemaDefinition, outputSchemaDefinition, out _disposer, out _outputRow);
        }
示例#20
0
        public override SchemaShape GetOutputSchema(SchemaShape inputSchema)
        {
            var addedCols        = DataViewConstructionUtils.GetSchemaColumns(Transformer.AddedSchema);
            var addedSchemaShape = SchemaShape.Create(new Schema(addedCols));

            var result = inputSchema.Columns.ToDictionary(x => x.Name);

            foreach (var addedCol in addedSchemaShape.Columns)
            {
                result[addedCol.Name] = addedCol;
            }

            return(new SchemaShape(result.Values));
        }
        public void Testl_ShakeInputTransformVectorAdd()
        {
            /*using (*/ var host = EnvHelper.NewTestEnvironment();
            {
                var inputs = new[] {
                    new SHExampleA()
                    {
                        X = new float[] { 0, 1 }
                    },
                    new SHExampleA()
                    {
                        X = new float[] { 2, 3 }
                    }
                };

                var data = DataViewConstructionUtils.CreateFromEnumerable(host, inputs);

                var args = new ShakeInputTransform.Arguments
                {
                    inputColumn      = "X",
                    inputFeaturesInt = new[] { 0, 1 },
                    outputColumns    = new[] { "yo" },
                    values           = "-10,10;-100,100",
                    aggregation      = ShakeInputTransform.ShakeAggregation.add
                };

                var trv = new ExampleValueMapperVector();
                if (trv == null)
                {
                    throw new Exception("Invalid");
                }
                var shake = new ShakeInputTransform(host, args, data, new IValueMapper[] { trv });

                using (var cursor = shake.GetRowCursor(shake.Schema))
                {
                    var outValues = new List <float>();
                    var colGetter = cursor.GetGetter <VBuffer <float> >(SchemaHelper._dc(1, cursor));
                    while (cursor.MoveNext())
                    {
                        VBuffer <float> got = new VBuffer <float>();
                        colGetter(ref got);
                        outValues.AddRange(got.DenseValues());
                    }
                    if (outValues.Count != 4)
                    {
                        throw new Exception("expected 4");
                    }
                }
            }
        }
示例#22
0
        internal BatchPredictionEngine(IHostEnvironment env, IDataView dataPipeline, bool ignoreMissingColumns,
                                       SchemaDefinition inputSchemaDefinition = null, SchemaDefinition outputSchemaDefinition = null)
        {
            Contracts.AssertValue(env);
            Contracts.AssertValue(dataPipeline);
            Contracts.AssertValueOrNull(inputSchemaDefinition);
            Contracts.AssertValueOrNull(outputSchemaDefinition);

            // Initialize pipe.
            _srcDataView = DataViewConstructionUtils.CreateFromEnumerable(env, new TSrc[] { }, inputSchemaDefinition);
            var pipe = ApplyTransformUtils.ApplyAllTransformsToData(env, dataPipeline, _srcDataView);

            _pipeEngine = new PipeEngine <TDst>(env, pipe, ignoreMissingColumns, outputSchemaDefinition);
        }
示例#23
0
        internal BatchPredictionEngine(IHostEnvironment env, Stream modelStream, bool ignoreMissingColumns,
                                       SchemaDefinition inputSchemaDefinition = null, SchemaDefinition outputSchemaDefinition = null)
        {
            Contracts.AssertValue(env);
            Contracts.AssertValue(modelStream);
            Contracts.AssertValueOrNull(inputSchemaDefinition);
            Contracts.AssertValueOrNull(outputSchemaDefinition);

            // Initialize pipe.
            _srcDataView = DataViewConstructionUtils.CreateFromEnumerable(env, new TSrc[] { }, inputSchemaDefinition);
            var pipe = DataViewConstructionUtils.LoadPipeWithPredictor(env, modelStream, _srcDataView);

            _pipeEngine = new PipeEngine <TDst>(env, pipe, ignoreMissingColumns, outputSchemaDefinition);
        }
        private void TestResampleTransform(float ratio)
        {
            /*using (*/ var env = EnvHelper.NewTestEnvironment(conc: 1);
            {
                var inputs = new InputOutput[] {
                    new InputOutput()
                    {
                        X = new float[] { 0, 1 }, Y = 1
                    },
                    new InputOutput()
                    {
                        X = new float[] { 0, 1 }, Y = 0
                    },
                    new InputOutput()
                    {
                        X = new float[] { 0, 1 }, Y = 2
                    },
                    new InputOutput()
                    {
                        X = new float[] { 0, 1 }, Y = 3
                    },
                };

                var data = DataViewConstructionUtils.CreateFromEnumerable(env, inputs);
                var args = new ResampleTransform.Arguments {
                    lambda = ratio, cache = false
                };
                var tr     = new ResampleTransform(env, args, data);
                var values = new List <int>();
                using (var cursor = tr.GetRowCursor(tr.Schema))
                {
                    var columnGetter = cursor.GetGetter <int>(SchemaHelper._dc(1, cursor));
                    while (cursor.MoveNext())
                    {
                        int got = 0;
                        columnGetter(ref got);
                        values.Add((int)got);
                    }
                }
                if (ratio < 1 && values.Count > 8)
                {
                    throw new Exception("ResampleTransform did not work.");
                }
                if (ratio > 1 && values.Count < 1)
                {
                    throw new Exception("ResampleTransform did not work.");
                }
            }
        }
            public ValueMapperExample(string modelName, string features)
            {
                _env       = EnvHelper.NewTestEnvironment();
                _predictor = ModelFileUtils.LoadPredictorOrNull(_env, File.OpenRead(modelName));
                var inputs = new Input[0];

                var view = DataViewConstructionUtils.CreateFromEnumerable(_env, inputs);

                _transforms = ModelFileUtils.LoadTransforms(_env, view, File.OpenRead(modelName));
                var data   = _env.CreateExamples(_transforms, features);
                var scorer = _env.CreateDefaultScorer(data, _predictor);

                _valueMapper = new ValueMapperFromTransformFloat <VBuffer <float> >(_env, scorer, "Features", "Probability");
                _mapper      = _valueMapper.GetMapper <VBuffer <float>, float>();
            }
        void TestDnnImageFeaturizer()
        {
            var samplevector = GetSampleArrayData();

            var dataView = DataViewConstructionUtils.CreateFromList(Env,
                                                                    new TestData[] {
                new TestData()
                {
                    data_0 = samplevector
                },
            });

            var xyData = new List <TestDataXY> {
                new TestDataXY()
                {
                    A = new float[inputSize]
                }
            };
            var stringData = new List <TestDataDifferntType> {
                new TestDataDifferntType()
                {
                    data_0 = new string[inputSize]
                }
            };
            var sizeData = new List <TestDataSize> {
                new TestDataSize()
                {
                    data_0 = new float[2]
                }
            };
            var pipe = ML.Transforms.DnnFeaturizeImage("output_1", m => m.ModelSelector.ResNet18(m.Environment, m.OutputColumn, m.InputColumn), "data_0");

            var invalidDataWrongNames      = ML.Data.LoadFromEnumerable(xyData);
            var invalidDataWrongTypes      = ML.Data.LoadFromEnumerable(stringData);
            var invalidDataWrongVectorSize = ML.Data.LoadFromEnumerable(sizeData);

            TestEstimatorCore(pipe, dataView, invalidInput: invalidDataWrongNames);
            TestEstimatorCore(pipe, dataView, invalidInput: invalidDataWrongTypes);
            pipe.GetOutputSchema(SchemaShape.Create(invalidDataWrongVectorSize.Schema));
            try
            {
                pipe.Fit(invalidDataWrongVectorSize);
                Assert.False(true);
            }
            catch (ArgumentOutOfRangeException) { }
            catch (InvalidOperationException) { }
        }
示例#27
0
        public void TestScikitAPI_DelegateEnvironmentVerbose0()
        {
            var inputs = new[] {
                new ExampleA()
                {
                    X = new float[] { 1, 10, 100 }
                },
                new ExampleA()
                {
                    X = new float[] { 2, 3, 5 }
                }
            };

            var inputs2 = new[] {
                new ExampleA()
                {
                    X = new float[] { -1, -10, -100 }
                },
                new ExampleA()
                {
                    X = new float[] { -2, -3, -5 }
                }
            };

            var        stdout = new List <string>();
            var        stderr = new List <string>();
            ILogWriter logout = new LogWriter(s => stdout.Add(s));
            ILogWriter logerr = new LogWriter(s => stderr.Add(s));

            /*using (*/
            var host = new DelegateEnvironment(seed: 0, outWriter: logout, errWriter: logerr, verbose: 0);

            {
                ComponentHelper.AddStandardComponents(host);
                var data = DataViewConstructionUtils.CreateFromEnumerable(host, inputs);
                using (var pipe = new ScikitPipeline(new[] { "poly{col=X}" }, "km{k=2}", host: host))
                {
                    var predictor = pipe.Train(data, feature: "X");
                    Assert.IsTrue(predictor != null);
                }
            }
            Assert.AreEqual(stdout.Count, 0);
            Assert.AreEqual(stderr.Count, 0);
        }
        public void TestI_PolynomialTransformSparse()
        {
            var inputs = new[] {
                new ExampleASparse()
                {
                    X = new VBuffer <float> (5, 3, new float[] { 1, 10, 100 }, new int[] { 0, 2, 4 })
                },
                new ExampleASparse()
                {
                    X = new VBuffer <float> (5, 3, new float[] { 2, 3, 5 }, new int[] { 1, 2, 3 })
                }
            };
            /*using (*/
            var host = EnvHelper.NewTestEnvironment();
            {
                var            data = DataViewConstructionUtils.CreateFromEnumerable(host, inputs);
                List <float[]> values;
                CommonTestPolynomialTransform(host, data, 5, out values);

                List <float[]> valuesDense;
                data = DataViewConstructionUtils.CreateFromEnumerable(host, inputs);
                CommonTestPolynomialTransform(host, data, 5, out valuesDense);

                if (values.Count != valuesDense.Count)
                {
                    throw new Exception("Mismath in number of observations.");
                }
                for (int i = 0; i < values.Count; ++i)
                {
                    if (values[i].Length != valuesDense[i].Length)
                    {
                        throw new Exception("Mismath in dimensions.");
                    }
                    for (int j = 0; j < values[i].Length; ++j)
                    {
                        if (values[i][j] != valuesDense[i][j])
                        {
                            throw new Exception("Mismath in value.");
                        }
                    }
                }
            }
        }
示例#29
0
        public void TestScikitAPI_SimpleTransform()
        {
            var inputs = new[] {
                new ExampleA()
                {
                    X = new float[] { 1, 10, 100 }
                },
                new ExampleA()
                {
                    X = new float[] { 2, 3, 5 }
                }
            };

            var inputs2 = new[] {
                new ExampleA()
                {
                    X = new float[] { -1, -10, -100 }
                },
                new ExampleA()
                {
                    X = new float[] { -2, -3, -5 }
                }
            };

            /*using (*/
            var host = EnvHelper.NewTestEnvironment(conc: 1);
            {
                var data = DataViewConstructionUtils.CreateFromEnumerable(host, inputs);
                using (var pipe = new ScikitPipeline(new[] { "poly{col=X}" }, host: host))
                {
                    var predictor = pipe.Train(data);
                    Assert.IsTrue(predictor != null);
                    var data2       = DataViewConstructionUtils.CreateFromEnumerable(host, inputs2);
                    var predictions = pipe.Transform(data2);
                    var df          = DataFrameIO.ReadView(predictions);
                    Assert.AreEqual(df.Shape, new Tuple <int, int>(2, 9));
                    var dfs  = df.ToString();
                    var dfs2 = dfs.Replace("\n", ";");
                    Assert.AreEqual(dfs2, "X.0,X.1,X.2,X.3,X.4,X.5,X.6,X.7,X.8;-1,-10,-100,1,10,100,100,1000,10000;-2,-3,-5,4,6,10,9,15,25");
                }
            }
        }
 public void TestI_ScalerTransformDenseMinMax()
 {
     var inputs = new[] {
         new ExampleA()
         {
             X = new float[] { 1, 10, 100 }
         },
         new ExampleA()
         {
             X = new float[] { 2, 3, 5 }
         }
     };
     /*using (*/
     var host = EnvHelper.NewTestEnvironment();
     {
         var            data = DataViewConstructionUtils.CreateFromEnumerable(host, inputs);
         List <float[]> values;
         CommonTestScalerTransform(host, data, 3, ScalerTransform.ScalerStrategy.minMax, out values);
     }
 }