public void TestI_DescribeTransformSaveDataAndZip() { /*using (*/ var env = EnvHelper.NewTestEnvironment(); { var inputs = InputOutput.CreateInputs(); var data = DataViewConstructionUtils.CreateFromEnumerable(env, inputs); var args = new DescribeTransform.Arguments() { columns = new[] { "X" } }; var tr = new DescribeTransform(env, args, data); var methodName = System.Reflection.MethodBase.GetCurrentMethod().Name; var outputDataFilePath = FileHelper.GetOutputFile("outputDataFilePath.txt", methodName); StreamHelper.SavePredictions(env, tr, outputDataFilePath); Assert.IsTrue(File.Exists(outputDataFilePath)); var outModelFilePath = FileHelper.GetOutputFile("outModelFilePath.zip", methodName); StreamHelper.SaveModel(env, tr, outModelFilePath); Assert.IsTrue(File.Exists(outModelFilePath)); var outputDataFilePath2 = FileHelper.GetOutputFile("outputDataFilePath2.txt", methodName); StreamHelper.SavePredictions(env, outModelFilePath, outputDataFilePath2, data); Assert.IsTrue(File.Exists(outputDataFilePath2)); var d1 = File.ReadAllText(outputDataFilePath); Assert.IsTrue(d1.Length > 0); var d2 = File.ReadAllText(outputDataFilePath2); Assert.AreEqual(d1, d2); } }
public override SchemaShape GetOutputSchema(SchemaShape inputSchema) { var addedCols = DataViewConstructionUtils.GetSchemaColumns(Transformer.AddedSchema); var addedSchemaShape = SchemaShape.Create(SchemaBuilder.MakeSchema(addedCols)); var result = inputSchema.Columns.ToDictionary(x => x.Name); var inputDef = InternalSchemaDefinition.Create(typeof(TSrc), Transformer.InputSchemaDefinition); foreach (var col in inputDef.Columns) { if (!result.TryGetValue(col.ColumnName, out var column)) { throw Contracts.ExceptSchemaMismatch(nameof(inputSchema), "input", col.ColumnName); } SchemaShape.GetColumnTypeShape(col.ColumnType, out var vecKind, out var itemType, out var isKey); // Special treatment for vectors: if we expect variable vector, we also allow fixed-size vector. if (itemType != column.ItemType || isKey != column.IsKey || vecKind == SchemaShape.Column.VectorKind.Scalar && column.Kind != SchemaShape.Column.VectorKind.Scalar || vecKind == SchemaShape.Column.VectorKind.Vector && column.Kind != SchemaShape.Column.VectorKind.Vector || vecKind == SchemaShape.Column.VectorKind.VariableVector && column.Kind == SchemaShape.Column.VectorKind.Scalar) { throw Contracts.ExceptSchemaMismatch(nameof(inputSchema), "input", col.ColumnName, col.ColumnType.ToString(), column.GetTypeString()); } } foreach (var addedCol in addedSchemaShape.Columns) { result[addedCol.Name] = addedCol; } return(new SchemaShape(result.Values)); }
public void TestLambdaColumnPassThroughTransform() { /*using (*/ var host = EnvHelper.NewTestEnvironment(); { var inputs = new InputOutputU[] { new InputOutputU() { X = new float[] { 0.1f, 1.1f }, Y = 0 }, new InputOutputU() { X = new float[] { 0.2f, 1.2f }, Y = 1 }, new InputOutputU() { X = new float[] { 0.3f, 1.3f }, Y = 2 } }; var data = DataViewConstructionUtils.CreateFromEnumerable(host, inputs); var lambdaView = LambdaColumnHelper.Create <VBuffer <float>, VBuffer <float> >(host, "Lambda", data, "X", "XX", new VectorDataViewType(NumberDataViewType.Single, 2), new VectorDataViewType(NumberDataViewType.Single, 2), (in VBuffer <float> src, ref VBuffer <float> dst) => { dst = new VBuffer <float>(2, new float[2]); dst.Values[0] = src.Values[0] + 1f; dst.Values[1] = src.Values[1] + 1f; });
public void TestChainTransformSerialize() { /*using (*/ var host = EnvHelper.NewTestEnvironment(); { var inputs = new[] { new ExampleA() { X = new float[] { 1, 10, 100 } }, new ExampleA() { X = new float[] { 2, 3, 5 } } }; IDataView loader = DataViewConstructionUtils.CreateFromEnumerable(host, inputs); IDataTransform data = host.CreateTransform("Scaler{col=X4:X}", loader); data = host.CreateTransform("ChainTrans{ xf1=Scaler{col=X2:X} xf2=Poly{col=X3:X2} }", data); // We create a specific folder in build/UnitTest which will contain the output. var methodName = System.Reflection.MethodBase.GetCurrentMethod().Name; var outModelFilePath = FileHelper.GetOutputFile("outModelFilePath.zip", methodName); var outData = FileHelper.GetOutputFile("outData.txt", methodName); var outData2 = FileHelper.GetOutputFile("outData2.txt", methodName); TestTransformHelper.SerializationTestTransform(host, outModelFilePath, data, loader, outData, outData2); } }
public void TestI_DescribeTransformCode() { /*using (*/ var env = EnvHelper.NewTestEnvironment(); { var inputs = InputOutput.CreateInputs(); var data = DataViewConstructionUtils.CreateFromEnumerable(env, inputs); var args = new DescribeTransform.Arguments() { columns = new[] { "X" } }; var tr = new DescribeTransform(env, args, data); var values = new List <int>(); using (var cursor = tr.GetRowCursor(tr.Schema)) { var columnGetter = cursor.GetGetter <int>(SchemaHelper._dc(1, cursor)); while (cursor.MoveNext()) { int got = 0; columnGetter(ref got); values.Add((int)got); } } Assert.AreEqual(values.Count, 4); } }
public void TestTagViewTransform() { /*using (*/ var host = EnvHelper.NewTestEnvironment(); { var inputs = new[] { new ExampleA() { X = new float[] { 0, 1 } }, new ExampleA() { X = new float[] { 2, 3 } } }; IDataView loader = DataViewConstructionUtils.CreateFromEnumerable(host, inputs); var data = host.CreateTransform("Scaler{col=X1:X}", loader); data = host.CreateTransform("tag{t=memory}", data); var methodName = System.Reflection.MethodBase.GetCurrentMethod().Name; var outModelFilePath = FileHelper.GetOutputFile("outModelFilePath.zip", methodName); var outData = FileHelper.GetOutputFile("outData.txt", methodName); var outData2 = FileHelper.GetOutputFile("outData2.txt", methodName); TestTransformHelper.SerializationTestTransform(host, outModelFilePath, data, loader, outData, outData2); } }
/// <summary> /// Create a new <see cref="IDataView"/> over an enumerable of the items of user-defined type. /// The user maintains ownership of the <paramref name="data"/> and the resulting data view will /// never alter the contents of the <paramref name="data"/>. /// Since <see cref="IDataView"/> is assumed to be immutable, the user is expected to support /// multiple enumerations of the <paramref name="data"/> that would return the same results, unless /// the user knows that the data will only be cursored once. /// /// One typical usage for streaming data view could be: create the data view that lazily loads data /// as needed, then apply pre-trained transformations to it and cursor through it for transformation /// results. /// </summary> /// <typeparam name="TRow">The user-defined item type.</typeparam> /// <param name="data">The enumerable data containing type <typeparamref name="TRow"/> to convert to an<see cref="IDataView"/>.</param> /// <param name="schemaDefinition">The optional schema definition of the data view to create. If <c>null</c>, /// the schema definition is inferred from <typeparamref name="TRow"/>.</param> /// <returns>The constructed <see cref="IDataView"/>.</returns> /// <example> /// <format type="text/markdown"> /// <![CDATA[ /// [!code-csharp[LoadFromEnumerable](~/../docs/samples/docs/samples/Microsoft.ML.Samples/Dynamic/DataOperations/LoadFromEnumerable.cs)] /// ]]> /// </format> /// </example> public IDataView LoadFromEnumerable <TRow>(IEnumerable <TRow> data, SchemaDefinition schemaDefinition = null) where TRow : class { _env.CheckValue(data, nameof(data)); _env.CheckValueOrNull(schemaDefinition); return(DataViewConstructionUtils.CreateFromEnumerable(_env, data, schemaDefinition)); }
/// <summary> /// Create a new <see cref="IDataView"/> over an enumerable of the items of user-defined type using the provided <see cref="DataViewSchema"/>, /// which might contain more information about the schema than the type can capture. /// </summary> /// <remarks> /// The user maintains ownership of the <paramref name="data"/> and the resulting data view will /// never alter the contents of the <paramref name="data"/>. /// Since <see cref="IDataView"/> is assumed to be immutable, the user is expected to support /// multiple enumerations of the <paramref name="data"/> that would return the same results, unless /// the user knows that the data will only be cursored once. /// One typical usage for streaming data view could be: create the data view that lazily loads data /// as needed, then apply pre-trained transformations to it and cursor through it for transformation /// results. /// One practical usage of this would be to supply the feature column names through the <see cref="DataViewSchema.Annotations"/>. /// </remarks> /// <typeparam name="TRow">The user-defined item type.</typeparam> /// <param name="data">The enumerable data containing type <typeparamref name="TRow"/> to convert to an <see cref="IDataView"/>.</param> /// <param name="schema">The schema of the returned <see cref="IDataView"/>.</param> /// <returns>An <see cref="IDataView"/> with the given <paramref name="schema"/>.</returns> public IDataView LoadFromEnumerable <TRow>(IEnumerable <TRow> data, DataViewSchema schema) where TRow : class { _env.CheckValue(data, nameof(data)); _env.CheckValue(schema, nameof(schema)); return(DataViewConstructionUtils.CreateFromEnumerable(_env, data, schema)); }
/// <summary> /// Create a prediction engine for one-time prediction. /// It's mainly used in conjunction with <see cref="Load(Stream, out DataViewSchema)"/>, /// where input schema is extracted during loading the model. /// </summary> /// <typeparam name="TSrc">The class that defines the input data.</typeparam> /// <typeparam name="TDst">The class that defines the output data.</typeparam> /// <param name="transformer">The transformer to use for prediction.</param> /// <param name="inputSchema">Input schema.</param> public PredictionEngine <TSrc, TDst> CreatePredictionEngine <TSrc, TDst>(ITransformer transformer, DataViewSchema inputSchema) where TSrc : class where TDst : class, new() { return(transformer.CreatePredictionEngine <TSrc, TDst>(_env, false, DataViewConstructionUtils.GetSchemaDefinition <TSrc>(_env, inputSchema))); }
public void TestI_PolynomialTransformSerialize() { /*using (*/ var host = EnvHelper.NewTestEnvironment(); { var inputs = new[] { new ExampleA() { X = new float[] { 1, 10, 100 } }, new ExampleA() { X = new float[] { 2, 3, 5 } } }; IDataView loader = DataViewConstructionUtils.CreateFromEnumerable(host, inputs); var data = host.CreateTransform("poly{col=poly:X d=3}", loader); // We create a specific folder in build/UnitTest which will contain the output. var methodName = System.Reflection.MethodBase.GetCurrentMethod().Name; var outModelFilePath = FileHelper.GetOutputFile("outModelFilePath.zip", methodName); var outData = FileHelper.GetOutputFile("outData.txt", methodName); var outData2 = FileHelper.GetOutputFile("outData2.txt", methodName); // This function serializes the output data twice, once before saving the pipeline, once after loading the pipeline. // It checks it gives the same result. TestTransformHelper.SerializationTestTransform(host, outModelFilePath, data, loader, outData, outData2); } }
public void TestSelectTagContactViewTransform() { var methodName = System.Reflection.MethodBase.GetCurrentMethod().Name; var firstData = FileHelper.GetOutputFile("first.idv", methodName); var outData = FileHelper.GetOutputFile("outData.txt", methodName); var outData2 = FileHelper.GetOutputFile("outData2.txt", methodName); /*using (*/ var env = EnvHelper.NewTestEnvironment(); { var inputs = new[] { new ExampleA() { X = new float[] { 0, 1, 4 } }, new ExampleA() { X = new float[] { 2, 3, 7 } } }; // Create IDV IDataView loader = DataViewConstructionUtils.CreateFromEnumerable(env, inputs); var saver = ComponentCreation.CreateSaver(env, "binary"); using (var ch = env.Start("save")) { using (var fs0 = env.CreateOutputFile(firstData)) DataSaverUtils.SaveDataView(ch, saver, loader, fs0, true); // Create parallel pipeline loader = DataViewConstructionUtils.CreateFromEnumerable(env, inputs); var data = env.CreateTransform("Scaler{col=X1:X}", loader); data = env.CreateTransform(string.Format("selecttag{{t=first s=second f={0}}}", firstData), data); data = env.CreateTransform("Scaler{col=X1:X}", data); var merged = env.CreateTransform("append{t=first}", data); // Save the outcome var text = env.CreateSaver("Text"); var columns = new int[merged.Schema.Count]; for (int i = 0; i < columns.Length; ++i) { columns[i] = i; } using (var fs2 = File.Create(outData)) text.SaveData(fs2, merged, columns); // Final checking var lines = File.ReadAllLines(outData); if (!lines.Any()) { throw new Exception("Empty file."); } if (lines.Length != 9) { throw new Exception("Some lines are missing."); } } } }
public void TestScikitAPI_SimplePredictor() { var inputs = new[] { new ExampleA() { X = new float[] { 1, 10, 100 } }, new ExampleA() { X = new float[] { 2, 3, 5 } }, new ExampleA() { X = new float[] { 2, 4, 5 } }, new ExampleA() { X = new float[] { 2, 4, 7 } }, }; var inputs2 = new[] { new ExampleA() { X = new float[] { -1, -10, -100 } }, new ExampleA() { X = new float[] { -2, -3, -5 } }, new ExampleA() { X = new float[] { 3, 4, 5 } }, new ExampleA() { X = new float[] { 3, 4, 7 } }, }; /*using (*/ var host = EnvHelper.NewTestEnvironment(conc: 1); { var data = DataViewConstructionUtils.CreateFromEnumerable(host, inputs); using (var pipe = new ScikitPipeline(new[] { "poly{col=X}" }, "km{k=2}", host)) { var predictor = pipe.Train(data, feature: "X"); Assert.IsTrue(predictor != null); var data2 = new StreamingDataFrame(DataViewConstructionUtils.CreateFromEnumerable(host, inputs2)); var predictions = pipe.Predict(data2); var df = DataFrameIO.ReadView(predictions); Assert.AreEqual(df.Shape, new Tuple <int, int>(4, 12)); var dfs = df.ToString(); var dfs2 = dfs.Replace("\n", ";"); Assert.IsTrue(dfs2.StartsWith("X.0,X.1,X.2,X.3,X.4,X.5,X.6,X.7,X.8,PredictedLabel,Score.0,Score.1;-1,-10,-100,1,10,100,100,1000,10000")); } } }
/// <summary> /// Constructor /// </summary> /// <param name="env">environment</param> /// <param name="modelStream">stream</param> /// <param name="output">name of the output column</param> /// <param name="outputIsFloat">output is a gloat (true) or a vector of floats (false)</param> /// <param name="conc">number of concurrency threads</param> /// <param name="features">features name</param> public ValueMapperPredictionEngineFloat(IHostEnvironment env, Stream modelStream, string output = "Probability", bool outputIsFloat = true, int conc = 1, string features = "Features") { _env = env; if (_env == null) { throw Contracts.Except("env must not be null"); } var inputs = new FloatVectorInput[0]; var view = DataViewConstructionUtils.CreateFromEnumerable <FloatVectorInput>(_env, inputs); long modelPosition = modelStream.Position; _predictor = ModelFileUtils.LoadPredictorOrNull(_env, modelStream); if (_predictor == null) { throw _env.Except("Unable to load a model."); } modelStream.Seek(modelPosition, SeekOrigin.Begin); _transforms = ModelFileUtils.LoadTransforms(_env, view, modelStream); if (_transforms == null) { throw _env.Except("Unable to load a model."); } var data = _env.CreateExamples(_transforms, features); if (data == null) { throw _env.Except("Cannot create rows."); } var scorer = _env.CreateDefaultScorer(data, _predictor); if (scorer == null) { throw _env.Except("Cannot create a scorer."); } _valueMapper = new ValueMapperFromTransformFloat <VBuffer <float> >(_env, scorer, features, output, conc: conc); if (_valueMapper == null) { throw _env.Except("Cannot create a mapper."); } if (outputIsFloat) { _mapper = _valueMapper.GetMapper <VBuffer <float>, float>(); _mapperVector = null; } else { _mapper = null; _mapperVector = _valueMapper.GetMapper <VBuffer <float>, VBuffer <float> >(); } }
void TestDnnImageFeaturizer() { // Onnxruntime supports Ubuntu 16.04, but not CentOS // Do not execute on CentOS image if (!RuntimeInformation.IsOSPlatform(OSPlatform.Windows)) { return; } var samplevector = GetSampleArrayData(); var dataView = DataViewConstructionUtils.CreateFromList(Env, new TestData[] { new TestData() { data_0 = samplevector }, }); var xyData = new List <TestDataXY> { new TestDataXY() { A = new float[inputSize] } }; var stringData = new List <TestDataDifferntType> { new TestDataDifferntType() { data_0 = new string[inputSize] } }; var sizeData = new List <TestDataSize> { new TestDataSize() { data_0 = new float[2] } }; var pipe = new DnnImageFeaturizerEstimator(Env, "output_1", m => m.ModelSelector.ResNet18(m.Environment, m.OutputColumn, m.InputColumn), "data_0"); var invalidDataWrongNames = ML.Data.ReadFromEnumerable(xyData); var invalidDataWrongTypes = ML.Data.ReadFromEnumerable(stringData); var invalidDataWrongVectorSize = ML.Data.ReadFromEnumerable(sizeData); TestEstimatorCore(pipe, dataView, invalidInput: invalidDataWrongNames); TestEstimatorCore(pipe, dataView, invalidInput: invalidDataWrongTypes); pipe.GetOutputSchema(SchemaShape.Create(invalidDataWrongVectorSize.Schema)); try { pipe.Fit(invalidDataWrongVectorSize); Assert.False(true); } catch (ArgumentOutOfRangeException) { } catch (InvalidOperationException) { } }
public void TestDnnImageFeaturizer() { //skip running for x86 as this test using too much memory (over 2GB limit on x86) //and very like to hit memory related issue when running on CI //TODO: optimized memory usage in related code and enable x86 test run if (!Environment.Is64BitProcess) { return; } var samplevector = GetSampleArrayData(); var dataView = DataViewConstructionUtils.CreateFromList(Env, new TestData[] { new TestData() { data_0 = samplevector }, }); var xyData = new List <TestDataXY> { new TestDataXY() { A = new float[InputSize] } }; var stringData = new List <TestDataDifferntType> { new TestDataDifferntType() { data_0 = new string[InputSize] } }; var sizeData = new List <TestDataSize> { new TestDataSize() { data_0 = new float[2] } }; var pipe = ML.Transforms.DnnFeaturizeImage("output_1", m => m.ModelSelector.ResNet18(m.Environment, m.OutputColumn, m.InputColumn), "data_0"); var invalidDataWrongNames = ML.Data.LoadFromEnumerable(xyData); var invalidDataWrongTypes = ML.Data.LoadFromEnumerable(stringData); var invalidDataWrongVectorSize = ML.Data.LoadFromEnumerable(sizeData); TestEstimatorCore(pipe, dataView, invalidInput: invalidDataWrongNames); TestEstimatorCore(pipe, dataView, invalidInput: invalidDataWrongTypes); pipe.GetOutputSchema(SchemaShape.Create(invalidDataWrongVectorSize.Schema)); try { pipe.Fit(invalidDataWrongVectorSize); Assert.False(true); } catch (ArgumentOutOfRangeException) { } catch (InvalidOperationException) { } }
/// <summary> /// The 'reapply' constructor. /// </summary> private StatefulFilterTransform(IHostEnvironment env, StatefulFilterTransform <TSrc, TDst, TState> transform, IDataView newSource) { _host.AssertValue(transform); _host.AssertValue(newSource); _source = newSource; _filterFunc = transform._filterFunc; _typedSource = TypedCursorable <TSrc> .Create(_host, newSource, false, transform._inputSchemaDefinition); _addedSchema = transform._addedSchema; _bindings = new ColumnBindings(newSource.Schema, DataViewConstructionUtils.GetSchemaColumns(_addedSchema)); }
public PredictionEngineExample(string modelName) { _env = EnvHelper.NewTestEnvironment(); var view = DataViewConstructionUtils.CreateFromEnumerable(_env, new FloatVectorInput[] { }); var pipe = DataViewConstructionUtils.LoadPipeWithPredictor(_env, File.OpenRead(modelName), new EmptyDataView(_env, view.Schema)); var transformer = new TransformWrapper(_env, pipe); _predictor = _env.CreatePredictionEngine <FloatVectorInput, FloatOutput>(transformer); }
private static Func <DataViewSchema, IRowToRowMapper> StreamChecker(IHostEnvironment env, Stream modelStream) { env.CheckValue(modelStream, nameof(modelStream)); return(schema => { var pipe = DataViewConstructionUtils.LoadPipeWithPredictor(env, modelStream, new EmptyDataView(env, schema)); var transformer = new TransformWrapper(env, pipe); env.CheckParam(((ITransformer)transformer).IsRowToRowMapper, nameof(transformer), "Must be a row to row mapper"); return ((ITransformer)transformer).GetRowToRowMapper(schema); }); }
private protected PredictionEngineBase(IHostEnvironment env, ITransformer transformer, bool ignoreMissingColumns, SchemaDefinition inputSchemaDefinition = null, SchemaDefinition outputSchemaDefinition = null) { Contracts.CheckValue(env, nameof(env)); env.AssertValue(transformer); Transformer = transformer; var makeMapper = TransformerChecker(env, transformer); env.AssertValue(makeMapper); _inputRow = DataViewConstructionUtils.CreateInputRow <TSrc>(env, inputSchemaDefinition); PredictionEngineCore(env, _inputRow, makeMapper(_inputRow.Schema), ignoreMissingColumns, inputSchemaDefinition, outputSchemaDefinition, out _disposer, out _outputRow); }
public override SchemaShape GetOutputSchema(SchemaShape inputSchema) { var addedCols = DataViewConstructionUtils.GetSchemaColumns(Transformer.AddedSchema); var addedSchemaShape = SchemaShape.Create(new Schema(addedCols)); var result = inputSchema.Columns.ToDictionary(x => x.Name); foreach (var addedCol in addedSchemaShape.Columns) { result[addedCol.Name] = addedCol; } return(new SchemaShape(result.Values)); }
public void Testl_ShakeInputTransformVectorAdd() { /*using (*/ var host = EnvHelper.NewTestEnvironment(); { var inputs = new[] { new SHExampleA() { X = new float[] { 0, 1 } }, new SHExampleA() { X = new float[] { 2, 3 } } }; var data = DataViewConstructionUtils.CreateFromEnumerable(host, inputs); var args = new ShakeInputTransform.Arguments { inputColumn = "X", inputFeaturesInt = new[] { 0, 1 }, outputColumns = new[] { "yo" }, values = "-10,10;-100,100", aggregation = ShakeInputTransform.ShakeAggregation.add }; var trv = new ExampleValueMapperVector(); if (trv == null) { throw new Exception("Invalid"); } var shake = new ShakeInputTransform(host, args, data, new IValueMapper[] { trv }); using (var cursor = shake.GetRowCursor(shake.Schema)) { var outValues = new List <float>(); var colGetter = cursor.GetGetter <VBuffer <float> >(SchemaHelper._dc(1, cursor)); while (cursor.MoveNext()) { VBuffer <float> got = new VBuffer <float>(); colGetter(ref got); outValues.AddRange(got.DenseValues()); } if (outValues.Count != 4) { throw new Exception("expected 4"); } } } }
internal BatchPredictionEngine(IHostEnvironment env, IDataView dataPipeline, bool ignoreMissingColumns, SchemaDefinition inputSchemaDefinition = null, SchemaDefinition outputSchemaDefinition = null) { Contracts.AssertValue(env); Contracts.AssertValue(dataPipeline); Contracts.AssertValueOrNull(inputSchemaDefinition); Contracts.AssertValueOrNull(outputSchemaDefinition); // Initialize pipe. _srcDataView = DataViewConstructionUtils.CreateFromEnumerable(env, new TSrc[] { }, inputSchemaDefinition); var pipe = ApplyTransformUtils.ApplyAllTransformsToData(env, dataPipeline, _srcDataView); _pipeEngine = new PipeEngine <TDst>(env, pipe, ignoreMissingColumns, outputSchemaDefinition); }
internal BatchPredictionEngine(IHostEnvironment env, Stream modelStream, bool ignoreMissingColumns, SchemaDefinition inputSchemaDefinition = null, SchemaDefinition outputSchemaDefinition = null) { Contracts.AssertValue(env); Contracts.AssertValue(modelStream); Contracts.AssertValueOrNull(inputSchemaDefinition); Contracts.AssertValueOrNull(outputSchemaDefinition); // Initialize pipe. _srcDataView = DataViewConstructionUtils.CreateFromEnumerable(env, new TSrc[] { }, inputSchemaDefinition); var pipe = DataViewConstructionUtils.LoadPipeWithPredictor(env, modelStream, _srcDataView); _pipeEngine = new PipeEngine <TDst>(env, pipe, ignoreMissingColumns, outputSchemaDefinition); }
private void TestResampleTransform(float ratio) { /*using (*/ var env = EnvHelper.NewTestEnvironment(conc: 1); { var inputs = new InputOutput[] { new InputOutput() { X = new float[] { 0, 1 }, Y = 1 }, new InputOutput() { X = new float[] { 0, 1 }, Y = 0 }, new InputOutput() { X = new float[] { 0, 1 }, Y = 2 }, new InputOutput() { X = new float[] { 0, 1 }, Y = 3 }, }; var data = DataViewConstructionUtils.CreateFromEnumerable(env, inputs); var args = new ResampleTransform.Arguments { lambda = ratio, cache = false }; var tr = new ResampleTransform(env, args, data); var values = new List <int>(); using (var cursor = tr.GetRowCursor(tr.Schema)) { var columnGetter = cursor.GetGetter <int>(SchemaHelper._dc(1, cursor)); while (cursor.MoveNext()) { int got = 0; columnGetter(ref got); values.Add((int)got); } } if (ratio < 1 && values.Count > 8) { throw new Exception("ResampleTransform did not work."); } if (ratio > 1 && values.Count < 1) { throw new Exception("ResampleTransform did not work."); } } }
public ValueMapperExample(string modelName, string features) { _env = EnvHelper.NewTestEnvironment(); _predictor = ModelFileUtils.LoadPredictorOrNull(_env, File.OpenRead(modelName)); var inputs = new Input[0]; var view = DataViewConstructionUtils.CreateFromEnumerable(_env, inputs); _transforms = ModelFileUtils.LoadTransforms(_env, view, File.OpenRead(modelName)); var data = _env.CreateExamples(_transforms, features); var scorer = _env.CreateDefaultScorer(data, _predictor); _valueMapper = new ValueMapperFromTransformFloat <VBuffer <float> >(_env, scorer, "Features", "Probability"); _mapper = _valueMapper.GetMapper <VBuffer <float>, float>(); }
void TestDnnImageFeaturizer() { var samplevector = GetSampleArrayData(); var dataView = DataViewConstructionUtils.CreateFromList(Env, new TestData[] { new TestData() { data_0 = samplevector }, }); var xyData = new List <TestDataXY> { new TestDataXY() { A = new float[inputSize] } }; var stringData = new List <TestDataDifferntType> { new TestDataDifferntType() { data_0 = new string[inputSize] } }; var sizeData = new List <TestDataSize> { new TestDataSize() { data_0 = new float[2] } }; var pipe = ML.Transforms.DnnFeaturizeImage("output_1", m => m.ModelSelector.ResNet18(m.Environment, m.OutputColumn, m.InputColumn), "data_0"); var invalidDataWrongNames = ML.Data.LoadFromEnumerable(xyData); var invalidDataWrongTypes = ML.Data.LoadFromEnumerable(stringData); var invalidDataWrongVectorSize = ML.Data.LoadFromEnumerable(sizeData); TestEstimatorCore(pipe, dataView, invalidInput: invalidDataWrongNames); TestEstimatorCore(pipe, dataView, invalidInput: invalidDataWrongTypes); pipe.GetOutputSchema(SchemaShape.Create(invalidDataWrongVectorSize.Schema)); try { pipe.Fit(invalidDataWrongVectorSize); Assert.False(true); } catch (ArgumentOutOfRangeException) { } catch (InvalidOperationException) { } }
public void TestScikitAPI_DelegateEnvironmentVerbose0() { var inputs = new[] { new ExampleA() { X = new float[] { 1, 10, 100 } }, new ExampleA() { X = new float[] { 2, 3, 5 } } }; var inputs2 = new[] { new ExampleA() { X = new float[] { -1, -10, -100 } }, new ExampleA() { X = new float[] { -2, -3, -5 } } }; var stdout = new List <string>(); var stderr = new List <string>(); ILogWriter logout = new LogWriter(s => stdout.Add(s)); ILogWriter logerr = new LogWriter(s => stderr.Add(s)); /*using (*/ var host = new DelegateEnvironment(seed: 0, outWriter: logout, errWriter: logerr, verbose: 0); { ComponentHelper.AddStandardComponents(host); var data = DataViewConstructionUtils.CreateFromEnumerable(host, inputs); using (var pipe = new ScikitPipeline(new[] { "poly{col=X}" }, "km{k=2}", host: host)) { var predictor = pipe.Train(data, feature: "X"); Assert.IsTrue(predictor != null); } } Assert.AreEqual(stdout.Count, 0); Assert.AreEqual(stderr.Count, 0); }
public void TestI_PolynomialTransformSparse() { var inputs = new[] { new ExampleASparse() { X = new VBuffer <float> (5, 3, new float[] { 1, 10, 100 }, new int[] { 0, 2, 4 }) }, new ExampleASparse() { X = new VBuffer <float> (5, 3, new float[] { 2, 3, 5 }, new int[] { 1, 2, 3 }) } }; /*using (*/ var host = EnvHelper.NewTestEnvironment(); { var data = DataViewConstructionUtils.CreateFromEnumerable(host, inputs); List <float[]> values; CommonTestPolynomialTransform(host, data, 5, out values); List <float[]> valuesDense; data = DataViewConstructionUtils.CreateFromEnumerable(host, inputs); CommonTestPolynomialTransform(host, data, 5, out valuesDense); if (values.Count != valuesDense.Count) { throw new Exception("Mismath in number of observations."); } for (int i = 0; i < values.Count; ++i) { if (values[i].Length != valuesDense[i].Length) { throw new Exception("Mismath in dimensions."); } for (int j = 0; j < values[i].Length; ++j) { if (values[i][j] != valuesDense[i][j]) { throw new Exception("Mismath in value."); } } } } }
public void TestScikitAPI_SimpleTransform() { var inputs = new[] { new ExampleA() { X = new float[] { 1, 10, 100 } }, new ExampleA() { X = new float[] { 2, 3, 5 } } }; var inputs2 = new[] { new ExampleA() { X = new float[] { -1, -10, -100 } }, new ExampleA() { X = new float[] { -2, -3, -5 } } }; /*using (*/ var host = EnvHelper.NewTestEnvironment(conc: 1); { var data = DataViewConstructionUtils.CreateFromEnumerable(host, inputs); using (var pipe = new ScikitPipeline(new[] { "poly{col=X}" }, host: host)) { var predictor = pipe.Train(data); Assert.IsTrue(predictor != null); var data2 = DataViewConstructionUtils.CreateFromEnumerable(host, inputs2); var predictions = pipe.Transform(data2); var df = DataFrameIO.ReadView(predictions); Assert.AreEqual(df.Shape, new Tuple <int, int>(2, 9)); var dfs = df.ToString(); var dfs2 = dfs.Replace("\n", ";"); Assert.AreEqual(dfs2, "X.0,X.1,X.2,X.3,X.4,X.5,X.6,X.7,X.8;-1,-10,-100,1,10,100,100,1000,10000;-2,-3,-5,4,6,10,9,15,25"); } } }
public void TestI_ScalerTransformDenseMinMax() { var inputs = new[] { new ExampleA() { X = new float[] { 1, 10, 100 } }, new ExampleA() { X = new float[] { 2, 3, 5 } } }; /*using (*/ var host = EnvHelper.NewTestEnvironment(); { var data = DataViewConstructionUtils.CreateFromEnumerable(host, inputs); List <float[]> values; CommonTestScalerTransform(host, data, 3, ScalerTransform.ScalerStrategy.minMax, out values); } }