[ConditionalFact(typeof(Environment), nameof(Environment.Is64BitProcess))] // x86 fails with "An attempt was made to load a program with an incorrect format." void TestSimpleCase() { if (!RuntimeInformation.IsOSPlatform(OSPlatform.Windows)) { return; } var modelFile = "squeezenet/00000001/model.onnx"; var samplevector = GetSampleArrayData(); var dataView = ComponentCreation.CreateDataView(Env, new TestData[] { new TestData() { data_0 = samplevector }, new TestData() { data_0 = samplevector } }); var xyData = new List <TestDataXY> { new TestDataXY() { A = new float[inputSize] } }; var stringData = new List <TestDataDifferntType> { new TestDataDifferntType() { data_0 = new string[inputSize] } }; var sizeData = new List <TestDataSize> { new TestDataSize() { data_0 = new float[2] } }; var pipe = new OnnxScoringEstimator(Env, modelFile, new[] { "data_0" }, new[] { "softmaxout_1" }); var invalidDataWrongNames = ComponentCreation.CreateDataView(Env, xyData); var invalidDataWrongTypes = ComponentCreation.CreateDataView(Env, stringData); var invalidDataWrongVectorSize = ComponentCreation.CreateDataView(Env, sizeData); TestEstimatorCore(pipe, dataView, invalidInput: invalidDataWrongNames); TestEstimatorCore(pipe, dataView, invalidInput: invalidDataWrongTypes); pipe.GetOutputSchema(SchemaShape.Create(invalidDataWrongVectorSize.Schema)); try { pipe.Fit(invalidDataWrongVectorSize); Assert.False(true); } catch (ArgumentOutOfRangeException) { } catch (InvalidOperationException) { } }
public void TestSelectTagContactViewTransform() { var methodName = System.Reflection.MethodBase.GetCurrentMethod().Name; var firstData = FileHelper.GetOutputFile("first.idv", methodName); var outData = FileHelper.GetOutputFile("outData.txt", methodName); var outData2 = FileHelper.GetOutputFile("outData2.txt", methodName); using (var env = EnvHelper.NewTestEnvironment()) { var inputs = new[] { new ExampleA() { X = new float[] { 0, 1, 4 } }, new ExampleA() { X = new float[] { 2, 3, 7 } } }; // Create IDV IDataView loader = env.CreateStreamingDataView(inputs); var saver = ComponentCreation.CreateSaver(env, "binary"); using (var ch = env.Start("save")) { using (var fs0 = env.CreateOutputFile(firstData)) DataSaverUtils.SaveDataView(ch, saver, loader, fs0, true); // Create parallel pipeline loader = env.CreateStreamingDataView(inputs); var data = env.CreateTransform("Scaler{col=X1:X}", loader); data = env.CreateTransform(string.Format("selecttag{{t=first s=second f={0}}}", firstData), data); data = env.CreateTransform("Scaler{col=X1:X}", data); var merged = env.CreateTransform("append{t=first}", data); // Save the outcome var text = env.CreateSaver("Text"); var columns = new int[merged.Schema.Count]; for (int i = 0; i < columns.Length; ++i) { columns[i] = i; } using (var fs2 = File.Create(outData)) text.SaveData(fs2, merged, columns); // Final checking var lines = File.ReadAllLines(outData); if (!lines.Any()) { throw new Exception("Empty file."); } if (lines.Length != 9) { throw new Exception("Some lines are missing."); } } } }
[ConditionalFact(typeof(Environment), nameof(Environment.Is64BitProcess))] // This test is being fixed as part of issue #1441. public void MatrixFactorizationInMemoryData() { // Create an in-memory matrix as a list of tuples (column index, row index, value). var dataMatrix = new List <MatrixElement>(); for (uint i = _synthesizedMatrixFirstColumnIndex; i < _synthesizedMatrixFirstColumnIndex + _synthesizedMatrixColumnCount; ++i) { for (uint j = _synthesizedMatrixFirstRowIndex; j < _synthesizedMatrixFirstRowIndex + _synthesizedMatrixRowCount; ++j) { dataMatrix.Add(new MatrixElement() { MatrixColumnIndex = i, MatrixRowIndex = j, Value = (i + j) % 5 }); } } // Convert the in-memory matrix into an IDataView so that ML.NET components can consume it. var dataView = ComponentCreation.CreateDataView(Env, dataMatrix); // Create a matrix factorization trainer which may consume "Value" as the training label, "MatrixColumnIndex" as the // matrix's column index, and "MatrixRowIndex" as the matrix's row index. var mlContext = new MLContext(seed: 1, conc: 1); var pipeline = new MatrixFactorizationTrainer(mlContext, "Value", "MatrixColumnIndex", "MatrixRowIndex", advancedSettings: s => { s.NumIterations = 10; s.NumThreads = 1; // To eliminate randomness, # of threads must be 1. s.K = 32; }); // Train a matrix factorization model. var model = pipeline.Fit(dataView); // Check if the expected types in the trained model are expected. Assert.True(model.MatrixColumnIndexColumnName == "MatrixColumnIndex"); Assert.True(model.MatrixRowIndexColumnName == "MatrixRowIndex"); Assert.True(model.MatrixColumnIndexColumnType.IsKey); Assert.True(model.MatrixRowIndexColumnType.IsKey); var matColKeyType = model.MatrixColumnIndexColumnType.AsKey; Assert.True(matColKeyType.Min == _synthesizedMatrixFirstColumnIndex); Assert.True(matColKeyType.Count == _synthesizedMatrixColumnCount); var matRowKeyType = model.MatrixRowIndexColumnType.AsKey; Assert.True(matRowKeyType.Min == _synthesizedMatrixFirstRowIndex); Assert.True(matRowKeyType.Count == _synthesizedMatrixRowCount); // Apply the trained model to the training set var prediction = model.Transform(dataView); // Calculate regression matrices for the prediction result var metrics = mlContext.Regression.Evaluate(prediction, label: "Value", score: "Score"); // Native test. Just check the pipeline runs. Assert.True(metrics.L2 < 0.1); }
/// <summary> /// Constructor /// </summary> /// <param name="env">environment</param> /// <param name="modelStream">stream</param> /// <param name="output">name of the output column</param> /// <param name="outputIsFloat">output is a gloat (true) or a vector of floats (false)</param> /// <param name="conc">number of concurrency threads</param> /// <param name="features">features name</param> public ValueMapperPredictionEngineFloat(IHostEnvironment env, Stream modelStream, string output = "Probability", bool outputIsFloat = true, int conc = 1, string features = "Features") { _env = env; if (_env == null) { throw Contracts.Except("env must not be null"); } var inputs = new FloatVectorInput[0]; var view = ComponentCreation.CreateStreamingDataView <FloatVectorInput>(_env, inputs); long modelPosition = modelStream.Position; _predictor = ComponentCreation.LoadPredictorOrNull(_env, modelStream); if (_predictor == null) { throw _env.Except("Unable to load a model."); } modelStream.Seek(modelPosition, SeekOrigin.Begin); _transforms = ComponentCreation.LoadTransforms(_env, modelStream, view); if (_transforms == null) { throw _env.Except("Unable to load a model."); } var data = _env.CreateExamples(_transforms, features); if (data == null) { throw _env.Except("Cannot create rows."); } var scorer = _env.CreateDefaultScorer(data, _predictor); if (scorer == null) { throw _env.Except("Cannot create a scorer."); } _valueMapper = new ValueMapperFromTransformFloat <VBuffer <float> >(_env, scorer, features, output, conc: conc); if (_valueMapper == null) { throw _env.Except("Cannot create a mapper."); } if (outputIsFloat) { _mapper = _valueMapper.GetMapper <VBuffer <float>, float>(); _mapperVector = null; } else { _mapper = null; _mapperVector = _valueMapper.GetMapper <VBuffer <float>, VBuffer <float> >(); } }
public void NAReplaceWorkout() { var data = new[] { new TestClass() { A = 1, B = 3, C = new float[2] { 1, 2 }, D = new double[2] { 3, 4 } }, new TestClass() { A = float.NaN, B = double.NaN, C = new float[2] { float.NaN, float.NaN }, D = new double[2] { double.NaN, double.NaN } }, new TestClass() { A = float.NegativeInfinity, B = double.NegativeInfinity, C = new float[2] { float.NegativeInfinity, float.NegativeInfinity }, D = new double[2] { double.NegativeInfinity, double.NegativeInfinity } }, new TestClass() { A = float.PositiveInfinity, B = double.PositiveInfinity, C = new float[2] { float.PositiveInfinity, float.PositiveInfinity, }, D = new double[2] { double.PositiveInfinity, double.PositiveInfinity } }, new TestClass() { A = 2, B = 1, C = new float[2] { 3, 4 }, D = new double[2] { 5, 6 } }, }; var dataView = ComponentCreation.CreateDataView(Env, data); var pipe = new NAReplaceEstimator(Env, new NAReplaceTransform.ColumnInfo("A", "NAA", NAReplaceTransform.ColumnInfo.ReplacementMode.Mean), new NAReplaceTransform.ColumnInfo("B", "NAB", NAReplaceTransform.ColumnInfo.ReplacementMode.Mean), new NAReplaceTransform.ColumnInfo("C", "NAC", NAReplaceTransform.ColumnInfo.ReplacementMode.Mean), new NAReplaceTransform.ColumnInfo("D", "NAD", NAReplaceTransform.ColumnInfo.ReplacementMode.Mean)); TestEstimatorCore(pipe, dataView); Done(); }
void TestSimpleCase() { var modelFile = "model_matmul/frozen_saved_model.pb"; var dataView = ComponentCreation.CreateDataView(Env, new List <TestData>(new TestData[] { new TestData() { a = new[] { 1.0f, 2.0f, 3.0f, 4.0f }, b = new[] { 1.0f, 2.0f, 3.0f, 4.0f } }, new TestData() { a = new[] { 2.0f, 2.0f, 2.0f, 2.0f }, b = new[] { 3.0f, 3.0f, 3.0f, 3.0f } } })); var xyData = new List <TestDataXY> { new TestDataXY() { A = new float[4], B = new float[4] } }; var stringData = new List <TestDataDifferntType> { new TestDataDifferntType() { a = new string[4], b = new string[4] } }; var sizeData = new List <TestDataSize> { new TestDataSize() { a = new float[2], b = new float[2] } }; var pipe = new TensorFlowEstimator(Env, modelFile, new[] { "a", "b" }, new[] { "c" }); var invalidDataWrongNames = ComponentCreation.CreateDataView(Env, xyData); var invalidDataWrongTypes = ComponentCreation.CreateDataView(Env, stringData); var invalidDataWrongVectorSize = ComponentCreation.CreateDataView(Env, sizeData); TestEstimatorCore(pipe, dataView, invalidInput: invalidDataWrongNames); TestEstimatorCore(pipe, dataView, invalidInput: invalidDataWrongTypes); pipe.GetOutputSchema(SchemaShape.Create(invalidDataWrongVectorSize.Schema)); try { pipe.Fit(invalidDataWrongVectorSize); Assert.False(true); } catch (ArgumentOutOfRangeException) { } catch (InvalidOperationException) { } }
void TestSelectColumnsWithSameName() { var data = new[] { new TestClass() { A = 1, B = 2, C = 3, }, new TestClass() { A = 4, B = 5, C = 6 } }; var dataView = ComponentCreation.CreateDataView(Env, data); var est = new ColumnCopyingEstimator(Env, new[] { ("A", "A"), ("B", "B") });
public void TestMetadataPropagation() { var data = new[] { new TestMeta() { A = new string[2] { "A", "B" }, B = "C", C = new float[2] { 1.0f, 2.0f }, D = 1.0f, E = new string[2] { "A", "D" }, F = "D" }, new TestMeta() { A = new string[2] { "A", "B" }, B = "C", C = new float[2] { 3.0f, 4.0f }, D = -1.0f, E = new string[2] { "E", "A" }, F = "E" }, new TestMeta() { A = new string[2] { "A", "B" }, B = "C", C = new float[2] { 5.0f, 6.0f }, D = 1.0f, E = new string[2] { "D", "E" }, F = "D" } }; var dataView = ComponentCreation.CreateDataView(Env, data); var bagPipe = new OneHotHashEncodingEstimator(Env, new OneHotHashEncodingEstimator.ColumnInfo("A", "CatA", OneHotEncodingTransformer.OutputKind.Bag, invertHash: -1), new OneHotHashEncodingEstimator.ColumnInfo("B", "CatB", OneHotEncodingTransformer.OutputKind.Bag, invertHash: -1), new OneHotHashEncodingEstimator.ColumnInfo("C", "CatC", OneHotEncodingTransformer.OutputKind.Bag, invertHash: -1), new OneHotHashEncodingEstimator.ColumnInfo("D", "CatD", OneHotEncodingTransformer.OutputKind.Bag, invertHash: -1), new OneHotHashEncodingEstimator.ColumnInfo("E", "CatE", OneHotEncodingTransformer.OutputKind.Ind, invertHash: -1), new OneHotHashEncodingEstimator.ColumnInfo("F", "CatF", OneHotEncodingTransformer.OutputKind.Ind, invertHash: -1), new OneHotHashEncodingEstimator.ColumnInfo("A", "CatG", OneHotEncodingTransformer.OutputKind.Key, invertHash: -1), new OneHotHashEncodingEstimator.ColumnInfo("B", "CatH", OneHotEncodingTransformer.OutputKind.Key, invertHash: -1), new OneHotHashEncodingEstimator.ColumnInfo("A", "CatI", OneHotEncodingTransformer.OutputKind.Bin, invertHash: -1), new OneHotHashEncodingEstimator.ColumnInfo("B", "CatJ", OneHotEncodingTransformer.OutputKind.Bin, invertHash: -1)); var bagResult = bagPipe.Fit(dataView).Transform(dataView); ValidateMetadata(bagResult); Done(); }
/// <summary> /// Register a factory method with the component factory. /// </summary> /// <param name="name">Name of the component type.</param> /// <param name="factory">Factory method.</param> public static void RegisterComponentType(String name, ComponentCreation factory, bool Overwrite = false) { // Should we overwrite if something is already there? if (Overwrite) { m_FactoryMethods.Remove(name); } if (!m_FactoryMethods.ContainsKey(name)) { m_FactoryMethods.Add(name, factory); } }
void TestWorking() { var data = new[] { new TestClass() { A = 1, B = 2, C = 3, }, new TestClass() { A = 4, B = 5, C = 6 } }; var env = new MLContext(); var dataView = ComponentCreation.CreateDataView(env, data); var est = new ColumnCopyingEstimator(env, new[] { ("A", "D"), ("B", "E"), ("A", "F") });
void TestDnnImageFeaturizer() { if (!RuntimeInformation.IsOSPlatform(OSPlatform.Windows)) { return; } var samplevector = getSampleArrayData(); var dataView = ComponentCreation.CreateDataView(Env, new TestData[] { new TestData() { data_0 = samplevector }, }); var xyData = new List <TestDataXY> { new TestDataXY() { A = new float[inputSize] } }; var stringData = new List <TestDataDifferntType> { new TestDataDifferntType() { data_0 = new string[inputSize] } }; var sizeData = new List <TestDataSize> { new TestDataSize() { data_0 = new float[2] } }; var pipe = new DnnImageFeaturizerEstimator(Env, m => m.ModelSelector.ResNet18(m.Environment, m.InputColumn, m.OutputColumn), "data_0", "output_1"); var invalidDataWrongNames = ComponentCreation.CreateDataView(Env, xyData); var invalidDataWrongTypes = ComponentCreation.CreateDataView(Env, stringData); var invalidDataWrongVectorSize = ComponentCreation.CreateDataView(Env, sizeData); TestEstimatorCore(pipe, dataView, invalidInput: invalidDataWrongNames); TestEstimatorCore(pipe, dataView, invalidInput: invalidDataWrongTypes); pipe.GetOutputSchema(SchemaShape.Create(invalidDataWrongVectorSize.Schema)); try { pipe.Fit(invalidDataWrongVectorSize); Assert.False(true); } catch (ArgumentOutOfRangeException) { } catch (InvalidOperationException) { } }
public void TestMetadataPropagation() { var data = new[] { new TestMeta() { A = new string[2] { "A", "B" }, B = "C", C = new int[2] { 3, 5 }, D = 6 }, new TestMeta() { A = new string[2] { "A", "B" }, B = "C", C = new int[2] { 5, 3 }, D = 1 }, new TestMeta() { A = new string[2] { "A", "B" }, B = "C", C = new int[2] { 3, 5 }, D = 6 } }; var dataView = ComponentCreation.CreateDataView(Env, data); var termEst = new TermEstimator(Env, new[] { new TermTransform.ColumnInfo("A", "TA", textKeyValues: true), new TermTransform.ColumnInfo("B", "TB", textKeyValues: true), new TermTransform.ColumnInfo("C", "TC"), new TermTransform.ColumnInfo("D", "TD") }); var termTransformer = termEst.Fit(dataView); dataView = termTransformer.Transform(dataView); var pipe = new KeyToBinaryVectorEstimator(Env, new KeyToBinaryVectorTransform.ColumnInfo("TA", "CatA"), new KeyToBinaryVectorTransform.ColumnInfo("TB", "CatB"), new KeyToBinaryVectorTransform.ColumnInfo("TC", "CatC"), new KeyToBinaryVectorTransform.ColumnInfo("TD", "CatD")); var result = pipe.Fit(dataView).Transform(dataView); ValidateMetadata(result); Done(); }
/// <summary> /// Dump a view in binary format /// </summary> /// <param name="host">IHost</param> /// <param name="view">view to dump</param> /// <param name="filename">output filename</param> public static void ToIdv(IHostEnvironment host, IDataView view, string filename) { var settings = "Binary"; var saver = ComponentCreation.CreateSaver(host, settings); string full_output = Path.GetFullPath(filename); using (var ch = host.Start("ToIdv")) { ch.Info(MessageSensitivity.None, "Saving data into file '{0}' or '{1}'.", filename, full_output); using (var fs0 = host.CreateOutputFile(full_output)) DataSaverUtils.SaveDataView(ch, saver, view, fs0, true); } }
public void WordTokenizeWorkout() { var data = new[] { new TestClass() { A = "This is a good sentence.", B = new string[2] { "Much words", "Wow So Cool" } } }; var dataView = ComponentCreation.CreateDataView(Env, data); var invalidData = new[] { new TestWrong() { A = 1, B = new float[2] { 2, 3 } } }; var invalidDataView = ComponentCreation.CreateDataView(Env, invalidData); var pipe = new WordTokenizingEstimator(Env, new[] { new WordTokenizingTransformer.ColumnInfo("A", "TokenizeA"), new WordTokenizingTransformer.ColumnInfo("B", "TokenizeB"), }); TestEstimatorCore(pipe, dataView, invalidInput: invalidDataView); // Reuse the pipe trained on dataView in TestEstimatorCore to make prediction. var result = pipe.Fit(dataView).Transform(dataView); // Extract the transformed result of the first row (the only row we have because data contains only one TestClass) as a native class. var nativeResult = new List <NativeResult>(result.AsEnumerable <NativeResult>(Env, false))[0]; // Check the tokenization of A. Expected result is { "This", "is", "a", "good", "sentence." }. var tokenizeA = new[] { "This", "is", "a", "good", "sentence." }; Assert.True(tokenizeA.Length == nativeResult.TokenizeA.Length); for (int i = 0; i < tokenizeA.Length; ++i) { Assert.Equal(tokenizeA[i], nativeResult.TokenizeA[i]); } // Check the tokenization of B. Expected result is { "Much", "words", "Wow", "So", "Cool" }. One may think that the expected output // should be a 2-D array { { "Much", "words"}, { "Wow", "So", "Cool" } }, but please note that ML.NET may flatten all outputs if // they are high-dimension tensors. var tokenizeB = new[] { "Much", "words", "Wow", "So", "Cool" }; Assert.True(tokenizeB.Length == nativeResult.TokenizeB.Length); for (int i = 0; i < tokenizeB.Length; ++i) { Assert.Equal(tokenizeB[i], nativeResult.TokenizeB[i]); } Done(); }
public void TensorFlowTransformMatrixMultiplicationTest() { var model_location = "model_matmul/frozen_saved_model.pb"; using (var env = new TlcEnvironment(seed: 1, conc: 1)) { // Pipeline var loader = ComponentCreation.CreateDataView(env, new List <TestData>(new TestData[] { new TestData() { a = new[] { 1.0f, 2.0f, 3.0f, 4.0f }, b = new[] { 1.0f, 2.0f, 3.0f, 4.0f } }, new TestData() { a = new[] { 2.0f, 2.0f, 2.0f, 2.0f }, b = new[] { 3.0f, 3.0f, 3.0f, 3.0f } } })); var trans = TensorFlowTransform.Create(env, loader, model_location, "c", "a", "b"); using (var cursor = trans.GetRowCursor(a => true)) { var cgetter = cursor.GetGetter <VBuffer <float> >(2); Assert.True(cursor.MoveNext()); VBuffer <float> c = default; cgetter(ref c); Assert.Equal(1.0 * 1.0 + 2.0 * 3.0, c.Values[0]); Assert.Equal(1.0 * 2.0 + 2.0 * 4.0, c.Values[1]); Assert.Equal(3.0 * 1.0 + 4.0 * 3.0, c.Values[2]); Assert.Equal(3.0 * 2.0 + 4.0 * 4.0, c.Values[3]); Assert.True(cursor.MoveNext()); c = default; cgetter(ref c); Assert.Equal(2.0 * 3.0 + 2.0 * 3.0, c.Values[0]); Assert.Equal(2.0 * 3.0 + 2.0 * 3.0, c.Values[1]); Assert.Equal(2.0 * 3.0 + 2.0 * 3.0, c.Values[2]); Assert.Equal(2.0 * 3.0 + 2.0 * 3.0, c.Values[3]); Assert.False(cursor.MoveNext()); } } }
void TestWorking() { var data = new[] { new TestClass() { A = 1, B = 2, C = 3, }, new TestClass() { A = 4, B = 5, C = 6 } }; using (var env = new ConsoleEnvironment()) { var dataView = ComponentCreation.CreateDataView(env, data); var est = new CopyColumnsEstimator(env, new[] { ("A", "D"), ("B", "E"), ("A", "F") });
public void AddTransform(string transform) { if (_env == null) { throw Contracts.ExceptNotSupp("The class must be initialized with an envrionment to enable that functionality."); } var tr = ComponentCreation.CreateTransform(_env, transform, Source); if (tr == null) { throw Contracts.ExceptNotSupp($"Unable to create transform '{transform}'."); } AddTransform(tr); }
void TestSelectColumnsWithMissing() { var data = new[] { new TestClass() { A = 1, B = 2, C = 3, }, new TestClass() { A = 4, B = 5, C = 6 } }; var dataView = ComponentCreation.CreateDataView(Env, data); var est = ColumnSelectingEstimator.KeepColumns(Env, "D", "G"); Assert.Throws <ArgumentOutOfRangeException>(() => est.Fit(dataView)); }
/// <summary> /// Dump a view in csv format /// </summary> /// <param name="host">IHost</param> /// <param name="view">view to dump</param> /// <param name="filename">output filename</param> /// <param name="sep">column separator</param> /// <param name="schema">include the schema</param> public static void ToCsv(IHostEnvironment host, IDataView view, string filename, string sep = "\t", bool schema = true) { var settings = string.Format("Text{{sep={0} header=+ schema={1}}}", sep == "\t" ? "tab" : sep, schema ? "+" : "-"); var saver = ComponentCreation.CreateSaver(host, settings); string full_output = Path.GetFullPath(filename); using (var ch = host.Start("ToCsv")) { ch.Info(MessageSensitivity.None, "Saving data into file '{0}' or '{1}'.", filename, full_output); using (var fs0 = host.CreateOutputFile(full_output)) DataSaverUtils.SaveDataView(ch, saver, view, fs0, true); } }
public void CategoricalHashStatic() { string dataPath = GetDataPath("breast-cancer.txt"); var reader = TextLoader.CreateReader(Env, ctx => ( ScalarString: ctx.LoadText(1), VectorString: ctx.LoadText(1, 4))); var data = reader.Read(dataPath); var wrongCollection = new[] { new TestClass() { A = "1", B = "2", C = "3", }, new TestClass() { A = "4", B = "5", C = "6" } }; var invalidData = ComponentCreation.CreateDataView(Env, wrongCollection); var est = data.MakeNewEstimator(). Append(row => ( row.ScalarString, row.VectorString, // Create a VarVector column VarVectorString: row.ScalarString.TokenizeText())). Append(row => ( A: row.ScalarString.OneHotHashEncoding(outputKind: CategoricalHashStaticExtensions.OneHotHashScalarOutputKind.Ind), B: row.VectorString.OneHotHashEncoding(outputKind: CategoricalHashStaticExtensions.OneHotHashVectorOutputKind.Ind), C: row.VectorString.OneHotHashEncoding(outputKind: CategoricalHashStaticExtensions.OneHotHashVectorOutputKind.Bag), D: row.ScalarString.OneHotHashEncoding(outputKind: CategoricalHashStaticExtensions.OneHotHashScalarOutputKind.Bin), E: row.VectorString.OneHotHashEncoding(outputKind: CategoricalHashStaticExtensions.OneHotHashVectorOutputKind.Bin), F: row.VarVectorString.OneHotHashEncoding() )); TestEstimatorCore(est.AsDynamic, data.AsDynamic, invalidInput: invalidData); var outputPath = GetOutputPath("CategoricalHash", "featurized.tsv"); using (var ch = Env.Start("save")) { var saver = new TextSaver(Env, new TextSaver.Arguments { Silent = true }); var savedData = TakeFilter.Create(Env, est.Fit(data).Transform(data).AsDynamic, 4); var view = ColumnSelectingTransformer.CreateKeep(Env, savedData, new[] { "A", "B", "C", "D", "E", "F" }); using (var fs = File.Create(outputPath)) DataSaverUtils.SaveDataView(ch, saver, view, fs, keepHidden: true); } CheckEquality("CategoricalHash", "featurized.tsv"); Done(); }
public ValueMapperExample(string modelName, string features) { _env = EnvHelper.NewTestEnvironment(); _predictor = _env.LoadPredictorOrNull(File.OpenRead(modelName)); var inputs = new Input[0]; var view = _env.CreateStreamingDataView <Input>(inputs); _transforms = ComponentCreation.LoadTransforms(_env, File.OpenRead(modelName), view); var data = _env.CreateExamples(_transforms, features); var scorer = _env.CreateDefaultScorer(data, _predictor); _valueMapper = new ValueMapperFromTransformFloat <VBuffer <float> >(_env, scorer, "Features", "Probability"); _mapper = _valueMapper.GetMapper <VBuffer <float>, float>(); }
public void NAIndicatorWorkout() { var data = new[] { new TestClass() { A = 1, B = 3, C = new float[2] { 1, 2 }, D = new double[2] { 3, 4 } }, new TestClass() { A = float.NaN, B = double.NaN, C = new float[2] { float.NaN, float.NaN }, D = new double[2] { double.NaN, double.NaN } }, new TestClass() { A = float.NegativeInfinity, B = double.NegativeInfinity, C = new float[2] { float.NegativeInfinity, float.NegativeInfinity }, D = new double[2] { double.NegativeInfinity, double.NegativeInfinity } }, new TestClass() { A = float.PositiveInfinity, B = double.PositiveInfinity, C = new float[2] { float.PositiveInfinity, float.PositiveInfinity, }, D = new double[2] { double.PositiveInfinity, double.PositiveInfinity } }, new TestClass() { A = 2, B = 1, C = new float[2] { 3, 4 }, D = new double[2] { 5, 6 } }, }; var dataView = ComponentCreation.CreateDataView(Env, data); var pipe = new MissingValueIndicatorEstimator(Env, new (string input, string output)[] { ("A", "NAA"), ("B", "NAB"), ("C", "NAC"), ("D", "NAD") });
[ConditionalFact(typeof(Environment), nameof(Environment.Is64BitProcess))] // TensorFlow is 64-bit only public void TensorFlowTransformMatrixMultiplicationTest() { var modelLocation = "model_matmul/frozen_saved_model.pb"; var mlContext = new MLContext(seed: 1, conc: 1); // Pipeline var loader = ComponentCreation.CreateDataView(mlContext, new List <TestData>(new TestData[] { new TestData() { a = new[] { 1.0f, 2.0f, 3.0f, 4.0f }, b = new[] { 1.0f, 2.0f, 3.0f, 4.0f } }, new TestData() { a = new[] { 2.0f, 2.0f, 2.0f, 2.0f }, b = new[] { 3.0f, 3.0f, 3.0f, 3.0f } } })); var trans = new TensorFlowTransformer(mlContext, modelLocation, new[] { "a", "b" }, new[] { "c" }).Transform(loader); using (var cursor = trans.GetRowCursor(a => true)) { var cgetter = cursor.GetGetter <VBuffer <float> >(2); Assert.True(cursor.MoveNext()); VBuffer <float> c = default; cgetter(ref c); var cValues = c.GetValues(); Assert.Equal(1.0 * 1.0 + 2.0 * 3.0, cValues[0]); Assert.Equal(1.0 * 2.0 + 2.0 * 4.0, cValues[1]); Assert.Equal(3.0 * 1.0 + 4.0 * 3.0, cValues[2]); Assert.Equal(3.0 * 2.0 + 4.0 * 4.0, cValues[3]); Assert.True(cursor.MoveNext()); c = default; cgetter(ref c); cValues = c.GetValues(); Assert.Equal(2.0 * 3.0 + 2.0 * 3.0, cValues[0]); Assert.Equal(2.0 * 3.0 + 2.0 * 3.0, cValues[1]); Assert.Equal(2.0 * 3.0 + 2.0 * 3.0, cValues[2]); Assert.Equal(2.0 * 3.0 + 2.0 * 3.0, cValues[3]); Assert.False(cursor.MoveNext()); } }
void TestSsaSpikeEstimator() { int Confidence = 95; int PValueHistorySize = 10; int SeasonalitySize = 10; int NumberOfSeasonsInTraining = 5; int MaxTrainingSize = NumberOfSeasonsInTraining * SeasonalitySize; List <Data> data = new List <Data>(); var dataView = Env.CreateStreamingDataView(data); for (int j = 0; j < NumberOfSeasonsInTraining; j++) { for (int i = 0; i < SeasonalitySize; i++) { data.Add(new Data(i)); } } for (int i = 0; i < PValueHistorySize; i++) { data.Add(new Data(i * 100)); } var pipe = new SsaSpikeEstimator(Env, "Value", "Change", Confidence, PValueHistorySize, MaxTrainingSize, SeasonalitySize); var xyData = new List <TestDataXY> { new TestDataXY() { A = new float[inputSize] } }; var stringData = new List <TestDataDifferntType> { new TestDataDifferntType() { data_0 = new string[inputSize] } }; var invalidDataWrongNames = ComponentCreation.CreateDataView(Env, xyData); var invalidDataWrongTypes = ComponentCreation.CreateDataView(Env, stringData); TestEstimatorCore(pipe, dataView, invalidInput: invalidDataWrongTypes); TestEstimatorCore(pipe, dataView, invalidInput: invalidDataWrongNames); Done(); }
public void TextNormalizerWorkout() { var data = new[] { new TestClass() { A = "A 1, b. c! йЁ 24 ", B = new string[2] { "~``ё 52ds й vc", "6ksj94 vd ё dakl Юds Ё q й" } }, new TestClass() { A = null, B = new string[2] { null, string.Empty } } }; var dataView = ComponentCreation.CreateDataView(Env, data); var pipe = new TextNormalizerEstimator(Env, columns: new[] { ("A", "NormA"), ("B", "NormB") });
[ConditionalFact(typeof(Environment), nameof(Environment.Is64BitProcess))] // x86 output differs from Baseline public void OnnxModelMultiInput() { if (!RuntimeInformation.IsOSPlatform(OSPlatform.Windows)) { return; } var modelFile = @"twoinput\twoinput.onnx"; using (var env = new ConsoleEnvironment(seed: 1, conc: 1)) { var samplevector = GetSampleArrayData(); var dataView = ComponentCreation.CreateDataView(Env, new TestDataMulti[] { new TestDataMulti() { ina = new float[] { 1, 2, 3, 4, 5 }, inb = new float[] { 1, 2, 3, 4, 5 } } }); var onnx = OnnxTransform.Create(env, dataView, modelFile, new[] { "ina", "inb" }, new[] { "outa", "outb" }); onnx.Schema.TryGetColumnIndex("outa", out int scoresa); onnx.Schema.TryGetColumnIndex("outb", out int scoresb); using (var curs = onnx.GetRowCursor(col => col == scoresa || col == scoresb)) { var getScoresa = curs.GetGetter <VBuffer <float> >(scoresa); var getScoresb = curs.GetGetter <VBuffer <float> >(scoresb); var buffera = default(VBuffer <float>); var bufferb = default(VBuffer <float>); while (curs.MoveNext()) { getScoresa(ref buffera); getScoresb(ref bufferb); Assert.Equal(5, buffera.Length); Assert.Equal(5, bufferb.Length); Assert.Equal(0, buffera.GetValues().ToArray().Sum()); Assert.Equal(30, bufferb.GetValues().ToArray().Sum()); } } } }
public void CharTokenizeWorkout() { var data = new[] { new TestClass() { A = "This is a good sentence.", B = new string[2] { "Much words", "Wow So Cool" } } }; var dataView = ComponentCreation.CreateDataView(Env, data); var invalidData = new[] { new TestWrong() { A = 1, B = new float[2] { 2, 3 } } }; var invalidDataView = ComponentCreation.CreateDataView(Env, invalidData); var pipe = new TokenizingByCharactersEstimator(Env, columns: new[] { ("A", "TokenizeA"), ("B", "TokenizeB") });
public void NAReplaceStatic() { string dataPath = GetDataPath("breast-cancer.txt"); var reader = TextLoader.CreateReader(Env, ctx => ( ScalarFloat: ctx.LoadFloat(1), ScalarDouble: ctx.LoadDouble(1), VectorFloat: ctx.LoadFloat(1, 4), VectorDoulbe: ctx.LoadDouble(1, 4) )); var data = reader.Read(new MultiFileSource(dataPath)); var wrongCollection = new[] { new TestClass() { A = 1, B = 3, C = new float[2] { 1, 2 }, D = new double[2] { 3, 4 } } }; var invalidData = ComponentCreation.CreateDataView(Env, wrongCollection); var est = data.MakeNewEstimator(). Append(row => ( A: row.ScalarFloat.ReplaceWithMissingValues(NAReplaceTransform.ColumnInfo.ReplacementMode.Maximum), B: row.ScalarDouble.ReplaceWithMissingValues(NAReplaceTransform.ColumnInfo.ReplacementMode.Mean), C: row.VectorFloat.ReplaceWithMissingValues(NAReplaceTransform.ColumnInfo.ReplacementMode.Mean), D: row.VectorDoulbe.ReplaceWithMissingValues(NAReplaceTransform.ColumnInfo.ReplacementMode.Minimum) )); TestEstimatorCore(est.AsDynamic, data.AsDynamic, invalidInput: invalidData); var outputPath = GetOutputPath("NAReplace", "featurized.tsv"); using (var ch = Env.Start("save")) { var saver = new TextSaver(Env, new TextSaver.Arguments { Silent = true }); IDataView savedData = TakeFilter.Create(Env, est.Fit(data).Transform(data).AsDynamic, 4); savedData = new ChooseColumnsTransform(Env, savedData, "A", "B", "C", "D"); using (var fs = File.Create(outputPath)) DataSaverUtils.SaveDataView(ch, saver, savedData, fs, keepHidden: true); } CheckEquality("NAReplace", "featurized.tsv"); Done(); }
public void GroupTest() { var data = new List <GroupExample> { new GroupExample { Age = 18, UserName = "******", Gender = "Girl" }, new GroupExample { Age = 18, UserName = "******", Gender = "Boy" }, new GroupExample { Age = 20, UserName = "******", Gender = "Fish" }, new GroupExample { Age = 20, UserName = "******", Gender = "Mermaid" } }; var dataView = ComponentCreation.CreateDataView(Env, data); var groupTransform = new GroupTransform(Env, dataView, "Age", "UserName", "Gender"); var grouped = ML.CreateEnumerable <UngroupExample>(groupTransform, false).ToList(); // Expected content of grouped should contains two rows. // Age, UserName, Gender // 18, {"Amy", "Willy"}, {"Girl", "Boy"} // 20, {"Dori", "Ariel"}, {"Fish", "Mermaid"} // Note that "Age, UserName, Gender" is not a row; it just shows column names per row below it. Assert.Equal(2, grouped.Count); // grouped[0] is the first output row --- 18, {"Amy", "Willy"}, {"Girl", "Boy"} Assert.Equal(18, grouped[0].Age); Assert.Equal(2, grouped[0].UserName.Length); Assert.Equal("Amy", grouped[0].UserName[0]); Assert.Equal("Willy", grouped[0].UserName[1]); Assert.Equal(2, grouped[0].Gender.Length); Assert.Equal("Girl", grouped[0].Gender[0]); Assert.Equal("Boy", grouped[0].Gender[1]); // grouped[1] is the second output row --- 20, {"Dori", "Ariel"}, {"Fish", "Mermaid"} Assert.Equal(20, grouped[1].Age); Assert.Equal(2, grouped[1].Gender.Length); Assert.Equal("Dori", grouped[1].UserName[0]); Assert.Equal("Ariel", grouped[1].UserName[1]); Assert.Equal(2, grouped[1].Gender.Length); Assert.Equal("Fish", grouped[1].Gender[0]); Assert.Equal("Mermaid", grouped[1].Gender[1]); }
/// <summary> /// Create a SplitTrainTestTransform transform. /// </summary> public SplitTrainTestTransform(IHostEnvironment env, Arguments args, IDataView input) : base(env, RegistrationName, input) { Host.CheckValue(args, "args"); args.PostProcess(); Host.CheckUserArg(args.poolRows >= 0, "poolRows must be > 0"); Host.CheckUserArg(!string.IsNullOrEmpty(args.newColumn), "newColumn cannot be empty"); Host.CheckUserArg(args.ratios != null, "ratios cannot be null"); Host.CheckUserArg(args.ratios.Length > 1, "Number of ratios must be > 1"); Host.CheckUserArg(args.filename == null || args.tag != null || args.filename.Length == args.ratios.Length, "filenames must be either empty either an array of the same size as ratios"); Host.CheckUserArg(args.tag == null || args.filename != null || args.tag.Length == args.ratios.Length, "filenames must be either empty either an array of the same size as ratios"); Host.CheckUserArg(!args.numThreads.HasValue || args.numThreads.Value > 0, "numThreads cannot be negative."); var sum = args.fratios.Sum(); Host.CheckUserArg(Math.Abs(sum - 1f) < 1e-5, "Sum of ratios must be 1."); int col; Host.CheckUserArg(!input.Schema.TryGetColumnIndex(args.newColumn, out col), "newColumn must not exist in the input schema."); _newColumn = args.newColumn; _shuffleInput = args.shuffleInput; _poolRows = args.poolRows; _filenames = args.filename; _seed = args.seed; _seedShuffle = args.seedShuffle; _ratios = args.fratios; _cacheFile = args.cacheFile; _reuse = args.reuse; _tags = args.tag; var saveSettings = args.saverSettings as ICommandLineComponentFactory; Host.CheckValue(saveSettings, nameof(saveSettings)); _saverSettings = string.Format("{0}{{{1}}}", saveSettings.Name, saveSettings.GetSettingsString()); _saverSettings = _saverSettings.Replace("{}", ""); var saver = ComponentCreation.CreateSaver(Host, _saverSettings); if (saver == null) { throw Host.Except("Cannot parse '{0}'", _saverSettings); } _pipedTransform = AppendToPipeline(input); }