public void TestI_DescribeTransformSaveDataAndZip() { /*using (*/ var env = EnvHelper.NewTestEnvironment(); { var inputs = InputOutput.CreateInputs(); var data = DataViewConstructionUtils.CreateFromEnumerable(env, inputs); var args = new DescribeTransform.Arguments() { columns = new[] { "X" } }; var tr = new DescribeTransform(env, args, data); var methodName = System.Reflection.MethodBase.GetCurrentMethod().Name; var outputDataFilePath = FileHelper.GetOutputFile("outputDataFilePath.txt", methodName); StreamHelper.SavePredictions(env, tr, outputDataFilePath); Assert.IsTrue(File.Exists(outputDataFilePath)); var outModelFilePath = FileHelper.GetOutputFile("outModelFilePath.zip", methodName); StreamHelper.SaveModel(env, tr, outModelFilePath); Assert.IsTrue(File.Exists(outModelFilePath)); var outputDataFilePath2 = FileHelper.GetOutputFile("outputDataFilePath2.txt", methodName); StreamHelper.SavePredictions(env, outModelFilePath, outputDataFilePath2, data); Assert.IsTrue(File.Exists(outputDataFilePath2)); var d1 = File.ReadAllText(outputDataFilePath); Assert.IsTrue(d1.Length > 0); var d2 = File.ReadAllText(outputDataFilePath2); Assert.AreEqual(d1, d2); } }
public void TestTagViewTransform() { /*using (*/ var host = EnvHelper.NewTestEnvironment(); { var inputs = new[] { new ExampleA() { X = new float[] { 0, 1 } }, new ExampleA() { X = new float[] { 2, 3 } } }; IDataView loader = DataViewConstructionUtils.CreateFromEnumerable(host, inputs); var data = host.CreateTransform("Scaler{col=X1:X}", loader); data = host.CreateTransform("tag{t=memory}", data); var methodName = System.Reflection.MethodBase.GetCurrentMethod().Name; var outModelFilePath = FileHelper.GetOutputFile("outModelFilePath.zip", methodName); var outData = FileHelper.GetOutputFile("outData.txt", methodName); var outData2 = FileHelper.GetOutputFile("outData2.txt", methodName); TestTransformHelper.SerializationTestTransform(host, outModelFilePath, data, loader, outData, outData2); } }
/// <summary> /// Create a new <see cref="IDataView"/> over an enumerable of the items of user-defined type using the provided <see cref="DataViewSchema"/>, /// which might contain more information about the schema than the type can capture. /// </summary> /// <remarks> /// The user maintains ownership of the <paramref name="data"/> and the resulting data view will /// never alter the contents of the <paramref name="data"/>. /// Since <see cref="IDataView"/> is assumed to be immutable, the user is expected to support /// multiple enumerations of the <paramref name="data"/> that would return the same results, unless /// the user knows that the data will only be cursored once. /// One typical usage for streaming data view could be: create the data view that lazily loads data /// as needed, then apply pre-trained transformations to it and cursor through it for transformation /// results. /// One practical usage of this would be to supply the feature column names through the <see cref="DataViewSchema.Annotations"/>. /// </remarks> /// <typeparam name="TRow">The user-defined item type.</typeparam> /// <param name="data">The enumerable data containing type <typeparamref name="TRow"/> to convert to an <see cref="IDataView"/>.</param> /// <param name="schema">The schema of the returned <see cref="IDataView"/>.</param> /// <returns>An <see cref="IDataView"/> with the given <paramref name="schema"/>.</returns> public IDataView LoadFromEnumerable <TRow>(IEnumerable <TRow> data, DataViewSchema schema) where TRow : class { _env.CheckValue(data, nameof(data)); _env.CheckValue(schema, nameof(schema)); return(DataViewConstructionUtils.CreateFromEnumerable(_env, data, schema)); }
public void TestChainTransformSerialize() { /*using (*/ var host = EnvHelper.NewTestEnvironment(); { var inputs = new[] { new ExampleA() { X = new float[] { 1, 10, 100 } }, new ExampleA() { X = new float[] { 2, 3, 5 } } }; IDataView loader = DataViewConstructionUtils.CreateFromEnumerable(host, inputs); IDataTransform data = host.CreateTransform("Scaler{col=X4:X}", loader); data = host.CreateTransform("ChainTrans{ xf1=Scaler{col=X2:X} xf2=Poly{col=X3:X2} }", data); // We create a specific folder in build/UnitTest which will contain the output. var methodName = System.Reflection.MethodBase.GetCurrentMethod().Name; var outModelFilePath = FileHelper.GetOutputFile("outModelFilePath.zip", methodName); var outData = FileHelper.GetOutputFile("outData.txt", methodName); var outData2 = FileHelper.GetOutputFile("outData2.txt", methodName); TestTransformHelper.SerializationTestTransform(host, outModelFilePath, data, loader, outData, outData2); } }
public void TestLambdaColumnPassThroughTransform() { /*using (*/ var host = EnvHelper.NewTestEnvironment(); { var inputs = new InputOutputU[] { new InputOutputU() { X = new float[] { 0.1f, 1.1f }, Y = 0 }, new InputOutputU() { X = new float[] { 0.2f, 1.2f }, Y = 1 }, new InputOutputU() { X = new float[] { 0.3f, 1.3f }, Y = 2 } }; var data = DataViewConstructionUtils.CreateFromEnumerable(host, inputs); var lambdaView = LambdaColumnHelper.Create <VBuffer <float>, VBuffer <float> >(host, "Lambda", data, "X", "XX", new VectorDataViewType(NumberDataViewType.Single, 2), new VectorDataViewType(NumberDataViewType.Single, 2), (in VBuffer <float> src, ref VBuffer <float> dst) => { dst = new VBuffer <float>(2, new float[2]); dst.Values[0] = src.Values[0] + 1f; dst.Values[1] = src.Values[1] + 1f; });
public void TestI_PolynomialTransformSerialize() { /*using (*/ var host = EnvHelper.NewTestEnvironment(); { var inputs = new[] { new ExampleA() { X = new float[] { 1, 10, 100 } }, new ExampleA() { X = new float[] { 2, 3, 5 } } }; IDataView loader = DataViewConstructionUtils.CreateFromEnumerable(host, inputs); var data = host.CreateTransform("poly{col=poly:X d=3}", loader); // We create a specific folder in build/UnitTest which will contain the output. var methodName = System.Reflection.MethodBase.GetCurrentMethod().Name; var outModelFilePath = FileHelper.GetOutputFile("outModelFilePath.zip", methodName); var outData = FileHelper.GetOutputFile("outData.txt", methodName); var outData2 = FileHelper.GetOutputFile("outData2.txt", methodName); // This function serializes the output data twice, once before saving the pipeline, once after loading the pipeline. // It checks it gives the same result. TestTransformHelper.SerializationTestTransform(host, outModelFilePath, data, loader, outData, outData2); } }
/// <summary> /// Create a new <see cref="IDataView"/> over an enumerable of the items of user-defined type. /// The user maintains ownership of the <paramref name="data"/> and the resulting data view will /// never alter the contents of the <paramref name="data"/>. /// Since <see cref="IDataView"/> is assumed to be immutable, the user is expected to support /// multiple enumerations of the <paramref name="data"/> that would return the same results, unless /// the user knows that the data will only be cursored once. /// /// One typical usage for streaming data view could be: create the data view that lazily loads data /// as needed, then apply pre-trained transformations to it and cursor through it for transformation /// results. /// </summary> /// <typeparam name="TRow">The user-defined item type.</typeparam> /// <param name="data">The enumerable data containing type <typeparamref name="TRow"/> to convert to an<see cref="IDataView"/>.</param> /// <param name="schemaDefinition">The optional schema definition of the data view to create. If <c>null</c>, /// the schema definition is inferred from <typeparamref name="TRow"/>.</param> /// <returns>The constructed <see cref="IDataView"/>.</returns> /// <example> /// <format type="text/markdown"> /// <![CDATA[ /// [!code-csharp[LoadFromEnumerable](~/../docs/samples/docs/samples/Microsoft.ML.Samples/Dynamic/DataOperations/LoadFromEnumerable.cs)] /// ]]> /// </format> /// </example> public IDataView LoadFromEnumerable <TRow>(IEnumerable <TRow> data, SchemaDefinition schemaDefinition = null) where TRow : class { _env.CheckValue(data, nameof(data)); _env.CheckValueOrNull(schemaDefinition); return(DataViewConstructionUtils.CreateFromEnumerable(_env, data, schemaDefinition)); }
public void TestI_DescribeTransformCode() { /*using (*/ var env = EnvHelper.NewTestEnvironment(); { var inputs = InputOutput.CreateInputs(); var data = DataViewConstructionUtils.CreateFromEnumerable(env, inputs); var args = new DescribeTransform.Arguments() { columns = new[] { "X" } }; var tr = new DescribeTransform(env, args, data); var values = new List <int>(); using (var cursor = tr.GetRowCursor(tr.Schema)) { var columnGetter = cursor.GetGetter <int>(SchemaHelper._dc(1, cursor)); while (cursor.MoveNext()) { int got = 0; columnGetter(ref got); values.Add((int)got); } } Assert.AreEqual(values.Count, 4); } }
public void TestSelectTagContactViewTransform() { var methodName = System.Reflection.MethodBase.GetCurrentMethod().Name; var firstData = FileHelper.GetOutputFile("first.idv", methodName); var outData = FileHelper.GetOutputFile("outData.txt", methodName); var outData2 = FileHelper.GetOutputFile("outData2.txt", methodName); /*using (*/ var env = EnvHelper.NewTestEnvironment(); { var inputs = new[] { new ExampleA() { X = new float[] { 0, 1, 4 } }, new ExampleA() { X = new float[] { 2, 3, 7 } } }; // Create IDV IDataView loader = DataViewConstructionUtils.CreateFromEnumerable(env, inputs); var saver = ComponentCreation.CreateSaver(env, "binary"); using (var ch = env.Start("save")) { using (var fs0 = env.CreateOutputFile(firstData)) DataSaverUtils.SaveDataView(ch, saver, loader, fs0, true); // Create parallel pipeline loader = DataViewConstructionUtils.CreateFromEnumerable(env, inputs); var data = env.CreateTransform("Scaler{col=X1:X}", loader); data = env.CreateTransform(string.Format("selecttag{{t=first s=second f={0}}}", firstData), data); data = env.CreateTransform("Scaler{col=X1:X}", data); var merged = env.CreateTransform("append{t=first}", data); // Save the outcome var text = env.CreateSaver("Text"); var columns = new int[merged.Schema.Count]; for (int i = 0; i < columns.Length; ++i) { columns[i] = i; } using (var fs2 = File.Create(outData)) text.SaveData(fs2, merged, columns); // Final checking var lines = File.ReadAllLines(outData); if (!lines.Any()) { throw new Exception("Empty file."); } if (lines.Length != 9) { throw new Exception("Some lines are missing."); } } } }
public void TestScikitAPI_SimplePredictor() { var inputs = new[] { new ExampleA() { X = new float[] { 1, 10, 100 } }, new ExampleA() { X = new float[] { 2, 3, 5 } }, new ExampleA() { X = new float[] { 2, 4, 5 } }, new ExampleA() { X = new float[] { 2, 4, 7 } }, }; var inputs2 = new[] { new ExampleA() { X = new float[] { -1, -10, -100 } }, new ExampleA() { X = new float[] { -2, -3, -5 } }, new ExampleA() { X = new float[] { 3, 4, 5 } }, new ExampleA() { X = new float[] { 3, 4, 7 } }, }; /*using (*/ var host = EnvHelper.NewTestEnvironment(conc: 1); { var data = DataViewConstructionUtils.CreateFromEnumerable(host, inputs); using (var pipe = new ScikitPipeline(new[] { "poly{col=X}" }, "km{k=2}", host)) { var predictor = pipe.Train(data, feature: "X"); Assert.IsTrue(predictor != null); var data2 = new StreamingDataFrame(DataViewConstructionUtils.CreateFromEnumerable(host, inputs2)); var predictions = pipe.Predict(data2); var df = DataFrameIO.ReadView(predictions); Assert.AreEqual(df.Shape, new Tuple <int, int>(4, 12)); var dfs = df.ToString(); var dfs2 = dfs.Replace("\n", ";"); Assert.IsTrue(dfs2.StartsWith("X.0,X.1,X.2,X.3,X.4,X.5,X.6,X.7,X.8,PredictedLabel,Score.0,Score.1;-1,-10,-100,1,10,100,100,1000,10000")); } } }
/// <summary> /// Constructor /// </summary> /// <param name="env">environment</param> /// <param name="modelStream">stream</param> /// <param name="output">name of the output column</param> /// <param name="outputIsFloat">output is a gloat (true) or a vector of floats (false)</param> /// <param name="conc">number of concurrency threads</param> /// <param name="features">features name</param> public ValueMapperPredictionEngineFloat(IHostEnvironment env, Stream modelStream, string output = "Probability", bool outputIsFloat = true, int conc = 1, string features = "Features") { _env = env; if (_env == null) { throw Contracts.Except("env must not be null"); } var inputs = new FloatVectorInput[0]; var view = DataViewConstructionUtils.CreateFromEnumerable <FloatVectorInput>(_env, inputs); long modelPosition = modelStream.Position; _predictor = ModelFileUtils.LoadPredictorOrNull(_env, modelStream); if (_predictor == null) { throw _env.Except("Unable to load a model."); } modelStream.Seek(modelPosition, SeekOrigin.Begin); _transforms = ModelFileUtils.LoadTransforms(_env, view, modelStream); if (_transforms == null) { throw _env.Except("Unable to load a model."); } var data = _env.CreateExamples(_transforms, features); if (data == null) { throw _env.Except("Cannot create rows."); } var scorer = _env.CreateDefaultScorer(data, _predictor); if (scorer == null) { throw _env.Except("Cannot create a scorer."); } _valueMapper = new ValueMapperFromTransformFloat <VBuffer <float> >(_env, scorer, features, output, conc: conc); if (_valueMapper == null) { throw _env.Except("Cannot create a mapper."); } if (outputIsFloat) { _mapper = _valueMapper.GetMapper <VBuffer <float>, float>(); _mapperVector = null; } else { _mapper = null; _mapperVector = _valueMapper.GetMapper <VBuffer <float>, VBuffer <float> >(); } }
public PredictionEngineExample(string modelName) { _env = EnvHelper.NewTestEnvironment(); var view = DataViewConstructionUtils.CreateFromEnumerable(_env, new FloatVectorInput[] { }); var pipe = DataViewConstructionUtils.LoadPipeWithPredictor(_env, File.OpenRead(modelName), new EmptyDataView(_env, view.Schema)); var transformer = new TransformWrapper(_env, pipe); _predictor = _env.CreatePredictionEngine <FloatVectorInput, FloatOutput>(transformer); }
public void Testl_ShakeInputTransformVectorAdd() { /*using (*/ var host = EnvHelper.NewTestEnvironment(); { var inputs = new[] { new SHExampleA() { X = new float[] { 0, 1 } }, new SHExampleA() { X = new float[] { 2, 3 } } }; var data = DataViewConstructionUtils.CreateFromEnumerable(host, inputs); var args = new ShakeInputTransform.Arguments { inputColumn = "X", inputFeaturesInt = new[] { 0, 1 }, outputColumns = new[] { "yo" }, values = "-10,10;-100,100", aggregation = ShakeInputTransform.ShakeAggregation.add }; var trv = new ExampleValueMapperVector(); if (trv == null) { throw new Exception("Invalid"); } var shake = new ShakeInputTransform(host, args, data, new IValueMapper[] { trv }); using (var cursor = shake.GetRowCursor(shake.Schema)) { var outValues = new List <float>(); var colGetter = cursor.GetGetter <VBuffer <float> >(SchemaHelper._dc(1, cursor)); while (cursor.MoveNext()) { VBuffer <float> got = new VBuffer <float>(); colGetter(ref got); outValues.AddRange(got.DenseValues()); } if (outValues.Count != 4) { throw new Exception("expected 4"); } } } }
internal BatchPredictionEngine(IHostEnvironment env, Stream modelStream, bool ignoreMissingColumns, SchemaDefinition inputSchemaDefinition = null, SchemaDefinition outputSchemaDefinition = null) { Contracts.AssertValue(env); Contracts.AssertValue(modelStream); Contracts.AssertValueOrNull(inputSchemaDefinition); Contracts.AssertValueOrNull(outputSchemaDefinition); // Initialize pipe. _srcDataView = DataViewConstructionUtils.CreateFromEnumerable(env, new TSrc[] { }, inputSchemaDefinition); var pipe = DataViewConstructionUtils.LoadPipeWithPredictor(env, modelStream, _srcDataView); _pipeEngine = new PipeEngine <TDst>(env, pipe, ignoreMissingColumns, outputSchemaDefinition); }
internal BatchPredictionEngine(IHostEnvironment env, IDataView dataPipeline, bool ignoreMissingColumns, SchemaDefinition inputSchemaDefinition = null, SchemaDefinition outputSchemaDefinition = null) { Contracts.AssertValue(env); Contracts.AssertValue(dataPipeline); Contracts.AssertValueOrNull(inputSchemaDefinition); Contracts.AssertValueOrNull(outputSchemaDefinition); // Initialize pipe. _srcDataView = DataViewConstructionUtils.CreateFromEnumerable(env, new TSrc[] { }, inputSchemaDefinition); var pipe = ApplyTransformUtils.ApplyAllTransformsToData(env, dataPipeline, _srcDataView); _pipeEngine = new PipeEngine <TDst>(env, pipe, ignoreMissingColumns, outputSchemaDefinition); }
private void TestResampleTransform(float ratio) { /*using (*/ var env = EnvHelper.NewTestEnvironment(conc: 1); { var inputs = new InputOutput[] { new InputOutput() { X = new float[] { 0, 1 }, Y = 1 }, new InputOutput() { X = new float[] { 0, 1 }, Y = 0 }, new InputOutput() { X = new float[] { 0, 1 }, Y = 2 }, new InputOutput() { X = new float[] { 0, 1 }, Y = 3 }, }; var data = DataViewConstructionUtils.CreateFromEnumerable(env, inputs); var args = new ResampleTransform.Arguments { lambda = ratio, cache = false }; var tr = new ResampleTransform(env, args, data); var values = new List <int>(); using (var cursor = tr.GetRowCursor(tr.Schema)) { var columnGetter = cursor.GetGetter <int>(SchemaHelper._dc(1, cursor)); while (cursor.MoveNext()) { int got = 0; columnGetter(ref got); values.Add((int)got); } } if (ratio < 1 && values.Count > 8) { throw new Exception("ResampleTransform did not work."); } if (ratio > 1 && values.Count < 1) { throw new Exception("ResampleTransform did not work."); } } }
public ValueMapperExample(string modelName, string features) { _env = EnvHelper.NewTestEnvironment(); _predictor = ModelFileUtils.LoadPredictorOrNull(_env, File.OpenRead(modelName)); var inputs = new Input[0]; var view = DataViewConstructionUtils.CreateFromEnumerable(_env, inputs); _transforms = ModelFileUtils.LoadTransforms(_env, view, File.OpenRead(modelName)); var data = _env.CreateExamples(_transforms, features); var scorer = _env.CreateDefaultScorer(data, _predictor); _valueMapper = new ValueMapperFromTransformFloat <VBuffer <float> >(_env, scorer, "Features", "Probability"); _mapper = _valueMapper.GetMapper <VBuffer <float>, float>(); }
public void TestScikitAPI_DelegateEnvironmentVerbose0() { var inputs = new[] { new ExampleA() { X = new float[] { 1, 10, 100 } }, new ExampleA() { X = new float[] { 2, 3, 5 } } }; var inputs2 = new[] { new ExampleA() { X = new float[] { -1, -10, -100 } }, new ExampleA() { X = new float[] { -2, -3, -5 } } }; var stdout = new List <string>(); var stderr = new List <string>(); ILogWriter logout = new LogWriter(s => stdout.Add(s)); ILogWriter logerr = new LogWriter(s => stderr.Add(s)); /*using (*/ var host = new DelegateEnvironment(seed: 0, outWriter: logout, errWriter: logerr, verbose: 0); { ComponentHelper.AddStandardComponents(host); var data = DataViewConstructionUtils.CreateFromEnumerable(host, inputs); using (var pipe = new ScikitPipeline(new[] { "poly{col=X}" }, "km{k=2}", host: host)) { var predictor = pipe.Train(data, feature: "X"); Assert.IsTrue(predictor != null); } } Assert.AreEqual(stdout.Count, 0); Assert.AreEqual(stderr.Count, 0); }
public void TestI_PolynomialTransformSparse() { var inputs = new[] { new ExampleASparse() { X = new VBuffer <float> (5, 3, new float[] { 1, 10, 100 }, new int[] { 0, 2, 4 }) }, new ExampleASparse() { X = new VBuffer <float> (5, 3, new float[] { 2, 3, 5 }, new int[] { 1, 2, 3 }) } }; /*using (*/ var host = EnvHelper.NewTestEnvironment(); { var data = DataViewConstructionUtils.CreateFromEnumerable(host, inputs); List <float[]> values; CommonTestPolynomialTransform(host, data, 5, out values); List <float[]> valuesDense; data = DataViewConstructionUtils.CreateFromEnumerable(host, inputs); CommonTestPolynomialTransform(host, data, 5, out valuesDense); if (values.Count != valuesDense.Count) { throw new Exception("Mismath in number of observations."); } for (int i = 0; i < values.Count; ++i) { if (values[i].Length != valuesDense[i].Length) { throw new Exception("Mismath in dimensions."); } for (int j = 0; j < values[i].Length; ++j) { if (values[i][j] != valuesDense[i][j]) { throw new Exception("Mismath in value."); } } } } }
public void TestScikitAPI_SimpleTransform() { var inputs = new[] { new ExampleA() { X = new float[] { 1, 10, 100 } }, new ExampleA() { X = new float[] { 2, 3, 5 } } }; var inputs2 = new[] { new ExampleA() { X = new float[] { -1, -10, -100 } }, new ExampleA() { X = new float[] { -2, -3, -5 } } }; /*using (*/ var host = EnvHelper.NewTestEnvironment(conc: 1); { var data = DataViewConstructionUtils.CreateFromEnumerable(host, inputs); using (var pipe = new ScikitPipeline(new[] { "poly{col=X}" }, host: host)) { var predictor = pipe.Train(data); Assert.IsTrue(predictor != null); var data2 = DataViewConstructionUtils.CreateFromEnumerable(host, inputs2); var predictions = pipe.Transform(data2); var df = DataFrameIO.ReadView(predictions); Assert.AreEqual(df.Shape, new Tuple <int, int>(2, 9)); var dfs = df.ToString(); var dfs2 = dfs.Replace("\n", ";"); Assert.AreEqual(dfs2, "X.0,X.1,X.2,X.3,X.4,X.5,X.6,X.7,X.8;-1,-10,-100,1,10,100,100,1000,10000;-2,-3,-5,4,6,10,9,15,25"); } } }
public void TestI_ScalerTransformDenseMeanVarNoVector() { var inputs = new[] { new ExampleA0() { X = 1f }, new ExampleA0() { X = 2f } }; /*using (*/ var host = EnvHelper.NewTestEnvironment(); { var data = DataViewConstructionUtils.CreateFromEnumerable(host, inputs); List <float[]> values; CommonTestScalerTransform(host, data, 3, ScalerTransform.ScalerStrategy.meanVar, out values); } }
public void TestI_ScalerTransformDenseMinMax() { var inputs = new[] { new ExampleA() { X = new float[] { 1, 10, 100 } }, new ExampleA() { X = new float[] { 2, 3, 5 } } }; /*using (*/ var host = EnvHelper.NewTestEnvironment(); { var data = DataViewConstructionUtils.CreateFromEnumerable(host, inputs); List <float[]> values; CommonTestScalerTransform(host, data, 3, ScalerTransform.ScalerStrategy.minMax, out values); } }
public void TestTimeSeriesDeTrendSerialize() { /*using (*/ var host = EnvHelper.NewTestEnvironment(); { var inputs = new[] { new InputOutput() { X = 7f, time = 0f }, new InputOutput() { X = 7f, time = 1f }, new InputOutput() { X = 9f, time = 2f }, new InputOutput() { X = 9f, time = 3f }, new InputOutput() { X = 8f, time = 4f }, }; IDataView loader = DataViewConstructionUtils.CreateFromEnumerable(host, inputs); var data = host.CreateTransform("detrend{col=Y:X time=time optim=sasdcar{iter=50}}", loader); // To train the model. using (var cursor = data.GetRowCursor(data.Schema)) { } var methodName = System.Reflection.MethodBase.GetCurrentMethod().Name; var outModelFilePath = FileHelper.GetOutputFile("outModelFilePath.zip", methodName); var outData = FileHelper.GetOutputFile("outData.txt", methodName); var outData2 = FileHelper.GetOutputFile("outData2.txt", methodName); TestTransformHelper.SerializationTestTransform(host, outModelFilePath, data, loader, outData, outData2); } }
/// <summary> /// Constructor /// </summary> /// <param name="env">environment</param> /// <param name="modelStream">stream</param> /// <param name="conc">number of concurrency threads</param> /// <param name="features">features column</param> public ValueMapperPredictionEngine(IHostEnvironment env, Stream modelStream, string features = "Features") { _env = env; if (_env == null) { throw Contracts.Except("env must not be null"); } var inputs = new TRowValue[0]; var view = DataViewConstructionUtils.CreateFromEnumerable <TRowValue>(_env, inputs); long modelPosition = modelStream.Position; _predictor = ModelFileUtils.LoadPredictorOrNull(_env, modelStream); if (_predictor == null) { throw _env.Except("Unable to load a model."); } modelStream.Seek(modelPosition, SeekOrigin.Begin); _transforms = ModelFileUtils.LoadTransforms(_env, view, modelStream); if (_transforms == null) { throw _env.Except("Unable to load a model."); } var data = _env.CreateExamples(_transforms, features); if (data == null) { throw _env.Except("Cannot create rows."); } var scorer = _env.CreateDefaultScorer(data, _predictor); if (scorer == null) { throw _env.Except("Cannot create a scorer."); } _CreateMapper(scorer); }
static void TestSplitTrainTestTransform(string option, int numThreads = 1) { /*using (*/ var host = EnvHelper.NewTestEnvironment(conc: numThreads == 1 ? 1 : 0); { var inputsl = new List <InputOutput>(); for (int i = 0; i < 100; ++i) { inputsl.Add(new InputOutput { X = new float[] { 0, 1 }, Y = i }); } var inputs = inputsl.ToArray(); var data = DataViewConstructionUtils.CreateFromEnumerable(host, inputs); var args = new SplitTrainTestTransform.Arguments { newColumn = "Part", numThreads = numThreads }; if (option == "2") { var methodName = System.Reflection.MethodBase.GetCurrentMethod().Name; var cacheFile = FileHelper.GetOutputFile("cacheFile.idv", methodName); args.cacheFile = cacheFile; } var transformedData = new SplitTrainTestTransform(host, args, data); var counter1 = new Dictionary <int, List <int> >(); using (var cursor = transformedData.GetRowCursor(transformedData.OutputSchema)) { int index = SchemaHelper.GetColumnIndex(cursor.Schema, "Y"); var sortColumnGetter = cursor.GetGetter <int>(SchemaHelper._dc(index, cursor)); index = SchemaHelper.GetColumnIndex(cursor.Schema, args.newColumn); var partGetter = cursor.GetGetter <int>(SchemaHelper._dc(index, cursor)); var schema = SchemaHelper.ToString(cursor.Schema); if (string.IsNullOrEmpty(schema)) { throw new Exception("null"); } if (!schema.Contains("Part:I4")) { throw new Exception(schema); } var schema2 = SchemaHelper.ToString(transformedData.OutputSchema); SchemaHelper.CheckSchema(host, transformedData.OutputSchema, cursor.Schema); int got = 0; int part = 0; while (cursor.MoveNext()) { sortColumnGetter(ref got); partGetter(ref part); if (!counter1.ContainsKey(part)) { counter1[part] = new List <int>(); } if (counter1[part].Any() && got.Equals(counter1[part][counter1[part].Count - 1])) { throw new Exception("Unexpected value, they should be all different."); } counter1[part].Add(got); } } // Check than there is no overlap. if (counter1.Count != 2) { throw new Exception(string.Format("Too many or not enough parts: {0}", counter1.Count)); } var nb = counter1.Select(c => c.Value.Count).Sum(); if (inputs.Length != nb) { throw new Exception(string.Format("Length mismath: {0} != {1}", inputs.Length, nb)); } foreach (var part in counter1) { var hash = part.Value.ToDictionary(c => c, d => d); if (hash.Count != part.Value.Count) { throw new Exception(string.Format("Not identical id for part {0}", part)); } } var part0 = new HashSet <int>(counter1[0]); var part1 = new HashSet <int>(counter1[1]); if (part0.Intersect(part1).Any()) { throw new Exception("Intersection is not null."); } // Check sizes. if (part0.Count > part1.Count * 2 + 15) { throw new Exception("Size are different from ratios."); } if (part0.Count < part1.Count + 5) { throw new Exception("Size are different from ratios."); } // We check a second run brings the same results (CacheView). var counter2 = new Dictionary <int, List <int> >(); using (var cursor = transformedData.GetRowCursor(transformedData.OutputSchema)) { var schema = SchemaHelper.ToString(cursor.Schema); if (string.IsNullOrEmpty(schema)) { throw new Exception("null"); } if (!schema.Contains("Part:I4")) { throw new Exception(schema); } var schema2 = SchemaHelper.ToString(transformedData.OutputSchema); SchemaHelper.CheckSchema(host, transformedData.OutputSchema, cursor.Schema); int index = SchemaHelper.GetColumnIndex(cursor.Schema, "Y"); var sortColumnGetter = cursor.GetGetter <int>(SchemaHelper._dc(index, cursor)); index = SchemaHelper.GetColumnIndex(cursor.Schema, args.newColumn); var partGetter = cursor.GetGetter <int>(SchemaHelper._dc(index, cursor)); int got = 0; int part = 0; while (cursor.MoveNext()) { sortColumnGetter(ref got); partGetter(ref part); if (!counter2.ContainsKey(part)) { counter2[part] = new List <int>(); } counter2[part].Add(got); } } if (counter1.Count != counter2.Count) { throw new Exception("Not the same number of parts."); } foreach (var pair in counter1) { var list1 = pair.Value; var list2 = counter2[pair.Key]; var difList = list1.Where(a => !list2.Any(a1 => a1 == a)) .Union(list2.Where(a => !list1.Any(a1 => a1 == a))); if (difList.Any()) { throw new Exception("Not the same results for a part."); } } } }
/// <summary> /// Runs a simple test. /// </summary> public static void TestScikitAPI() { var inputs = new[] { new ExampleVector() { X = new float[] { 1, 10, 100 } }, new ExampleVector() { X = new float[] { 2, 3, 5 } }, new ExampleVector() { X = new float[] { 2, 4, 5 } }, new ExampleVector() { X = new float[] { 2, 4, 7 } }, }; var inputs2 = new[] { new ExampleVector() { X = new float[] { -1, -10, -100 } }, new ExampleVector() { X = new float[] { -2, -3, -5 } }, new ExampleVector() { X = new float[] { 3, 4, 5 } }, new ExampleVector() { X = new float[] { 3, 4, 7 } }, }; /*using (*/ var host = new ConsoleEnvironment(); { ComponentHelper.AddStandardComponents(host); var data = DataViewConstructionUtils.CreateFromEnumerable(host, inputs); using (var pipe = new ScikitPipeline(new[] { "poly{col=X}" }, "km{k=2}", host)) { var predictor = pipe.Train(data, feature: "X"); if (predictor == null) { throw new Exception("Test failed: no predictor."); } var data2 = DataViewConstructionUtils.CreateFromEnumerable(host, inputs2); var predictions = pipe.Predict(data2); var df = DataFrameIO.ReadView(predictions); if (df.Shape.Item1 != 4 || df.Shape.Item2 != 12) { throw new Exception("Test failed: prediction failed."); } var dfs = df.ToString(); var dfs2 = dfs.Replace("\n", ";"); if (!dfs2.StartsWith("X.0,X.1,X.2,X.3,X.4,X.5,X.6,X.7,X.8,PredictedLabel,Score.0,Score.1;-1,-10,-100,1,10,100,100,1000,10000")) { throw new Exception("Test failed: prediction failed (header)."); } } } }
public static void TestMultiToBinaryTransformVector(MultiToBinaryTransform.MultiplicationAlgorithm algo, int max) { /*using (*/ var host = EnvHelper.NewTestEnvironment(); { var inputs = new InputOutputU[] { new InputOutputU() { X = new float[] { 0.1f, 1.1f }, Y = 0 }, new InputOutputU() { X = new float[] { 0.2f, 1.2f }, Y = 1 }, new InputOutputU() { X = new float[] { 0.3f, 1.3f }, Y = 2 } }; var data = DataViewConstructionUtils.CreateFromEnumerable(host, inputs); var args = new MultiToBinaryTransform.Arguments { label = "Y", algo = algo, maxMulti = max }; var multiplied = new MultiToBinaryTransform(host, args, data); using (var cursor = multiplied.GetRowCursor(multiplied.Schema)) { var labelGetter = cursor.GetGetter <uint>(SchemaHelper._dc(1, cursor)); var labelVectorGetter = cursor.GetGetter <VBuffer <bool> >(SchemaHelper._dc(1, cursor)); var labelVectorFloatGetter = cursor.GetGetter <VBuffer <float> >(SchemaHelper._dc(1, cursor)); var binGetter = cursor.GetGetter <bool>(SchemaHelper._dc(2, cursor)); Contracts.CheckValue(binGetter, "Type mismatch."); var cont = new List <Tuple <uint, bool> >(); bool bin = false; uint got = 0; var gotv = new VBuffer <bool>(); var gotvf = new VBuffer <float>(); while (cursor.MoveNext()) { labelGetter(ref got); labelVectorGetter(ref gotv); labelVectorFloatGetter(ref gotvf); binGetter(ref bin); cont.Add(new Tuple <uint, bool>(got, bin)); if (gotv.Length != 3) { throw new Exception("Bad dimension (Length)"); } if (gotv.Count != 1) { throw new Exception("Bad dimension (Count)"); } if (!gotv.Values[0]) { throw new Exception("Bad value (Count)"); } if (gotv.Indices[0] != got) { throw new Exception("Bad index (Count)"); } var ar = gotv.DenseValues().ToArray(); if (ar.Length != 3) { throw new Exception("Bad dimension (dense)"); } if (gotvf.Length != 3) { throw new Exception("Bad dimension (Length)f"); } if (gotvf.Count != 1) { throw new Exception("Bad dimension (Count)f"); } if (gotvf.Values[0] != 1) { throw new Exception("Bad value (Count)f"); } if (gotvf.Indices[0] != got) { throw new Exception("Bad index (Count)f"); } var ar2 = gotv.DenseValues().ToArray(); if (ar2.Length != 3) { throw new Exception("Bad dimension (dense)f"); } } if (max >= 3) { if (cont.Count != 9) { throw new Exception("It should be 9."); } if (algo == MultiToBinaryTransform.MultiplicationAlgorithm.Default) { for (int i = 0; i < 3; ++i) { var co = cont.Where(c => c.Item1 == (uint)i && c.Item2); if (co.Count() != 1) { throw new Exception(string.Format("Unexpected number of true labels for class {0} - algo={1} - max={2}", i, algo, max)); } } } } else { if (cont.Count != 3 * max) { throw new Exception(string.Format("It should be {0}.", 3 * max)); } } } } }
public void TestTransform2ValueMapperMultiThread() { /*using (*/ var env = EnvHelper.NewTestEnvironment(); { var host = env.Register("unittest"); var inputs = new[] { new InputOutput { X = new float[] { 0, 1 }, Y = 10 }, new InputOutput { X = new float[] { 2, 3 }, Y = 100 } }; var data = DataViewConstructionUtils.CreateFromEnumerable(host, inputs); var trv = ExtLambdaTransform.CreateMap(host, data, (InputOutput src, InputOutput dst, EnvHelper.EmptyState state) => { dst.X = new float[] { src.X[0] + 1f, src.X[1] - 1f }; }, (EnvHelper.EmptyState state) => { }); var ino = new InputOutput { X = new float[] { -5, -5 }, Y = 3 }; var inob = new VBuffer <float>(2, ino.X); var ans = new VBuffer <float>(); using (var valueMapper = new ValueMapperFromTransformFloat <VBuffer <float> >(host, trv, "X", "X", ignoreOtherColumn: true)) { var mapper = valueMapper.GetMapper <VBuffer <float>, VBuffer <float> >(); var listy = new List <int>(); var listx = new List <float>(); int y = 0; for (int i = 0; i < inputs.Length; ++i) { mapper(in inob, ref ans); y = inputs[i].Y; if (ans.Count != 2) { throw new Exception("Issue with dimension."); } listx.AddRange(ans.GetValues().ToArray()); listy.Add((int)y); } if (listy.Count != 2) { throw new Exception("Issue with dimension."); } if (listy[0] != 10 || listy[1] != 100) { throw new Exception("Issue with values."); } if (listx.Count != 4) { throw new Exception("Issue with dimension."); } if (listx[0] != -4) { throw new Exception("Issue with values."); } if (listx[1] != -6) { throw new Exception("Issue with values."); } if (listx[2] != -4) { throw new Exception("Issue with values."); } if (listx[3] != -6) { throw new Exception("Issue with values."); } if (inob.Count != 2) { throw new Exception("Issue with dimension."); } if (inob.Values[0] != -5) { throw new Exception("Values were overwritten."); } if (inob.Values[0] != -5) { throw new Exception("Values were overwritten."); } } } }
static void TestCacheTransformSimple(int nt, bool async) { /*using (*/ var host = EnvHelper.NewTestEnvironment(conc: nt == 1 ? 1 : 0); { var inputs = new InputOutput[] { new InputOutput() { X = new float[] { 0, 1 }, Y = 1 }, new InputOutput() { X = new float[] { 0, 1 }, Y = 0 } }; var data = DataViewConstructionUtils.CreateFromEnumerable(host, inputs); using (var cursor = data.GetRowCursor(data.Schema)) { var sortedValues = new List <int>(); var sortColumnGetter = cursor.GetGetter <int>(SchemaHelper._dc(1, cursor)); while (cursor.MoveNext()) { int got = 0; sortColumnGetter(ref got); sortedValues.Add((int)got); } if (sortedValues.Count != 2) { throw new Exception(); } if (sortedValues[0] != 1) { throw new Exception(); } if (sortedValues[1] != 0) { throw new Exception(); } } var args = new ExtendedCacheTransform.Arguments { numTheads = nt, async = async }; var transformedData = new ExtendedCacheTransform(host, args, data); var lastTransform = transformedData; LambdaTransform.CreateMap <InputOutput, InputOutput, EnvHelper.EmptyState>(host, data, (input, output, state) => { output.X = input.X; output.Y = input.Y; }, (EnvHelper.EmptyState state) => { }); using (var cursor = lastTransform.GetRowCursor(data.Schema)) { var sortedValues = new List <int>(); var sortColumnGetter = cursor.GetGetter <int>(SchemaHelper._dc(1, cursor)); while (cursor.MoveNext()) { int got = 0; sortColumnGetter(ref got); sortedValues.Add((int)got); } if (sortedValues.Count != 2) { throw new Exception(); } } } }
public static void TestMultiToBinaryTransform(MultiToBinaryTransform.MultiplicationAlgorithm algo, int max) { /*using (*/ var host = EnvHelper.NewTestEnvironment(); { var inputs = new InputOutputU[] { new InputOutputU() { X = new float[] { 0.1f, 1.1f }, Y = 0 }, new InputOutputU() { X = new float[] { 0.2f, 1.2f }, Y = 1 }, new InputOutputU() { X = new float[] { 0.3f, 1.3f }, Y = 2 } }; var data = DataViewConstructionUtils.CreateFromEnumerable(host, inputs); var args = new MultiToBinaryTransform.Arguments { label = "Y", algo = algo, maxMulti = max }; var multiplied = new MultiToBinaryTransform(host, args, data); using (var cursor = multiplied.GetRowCursor(multiplied.Schema)) { var labelGetter = cursor.GetGetter <uint>(SchemaHelper._dc(1, cursor)); var binGetter = cursor.GetGetter <bool>(SchemaHelper._dc(2, cursor)); var cont = new List <Tuple <uint, bool> >(); bool bin = false; while (cursor.MoveNext()) { uint got = 0; labelGetter(ref got); binGetter(ref bin); cont.Add(new Tuple <uint, bool>(got, bin)); } if (max >= 3) { if (cont.Count != 9) { throw new Exception("It should be 9."); } if (algo == MultiToBinaryTransform.MultiplicationAlgorithm.Default) { for (int i = 0; i < 3; ++i) { var co = cont.Where(c => c.Item1 == (uint)i && c.Item2); if (co.Count() != 1) { throw new Exception(string.Format("Unexpected number of true labels for class {0} - algo={1} - max={2}", i, algo, max)); } } } } else { if (cont.Count != 3 * max) { throw new Exception(string.Format("It should be {0}.", 3 * max)); } } } } }