[ConditionalFact(typeof(BaseTestBaseline), nameof(BaseTestBaseline.LessThanNetCore30OrNotNetCore))] // netcore3.0 output differs from Baseline void TestSsaSpikeEstimator() { int Confidence = 95; int PValueHistorySize = 10; int SeasonalitySize = 10; int NumberOfSeasonsInTraining = 5; int MaxTrainingSize = NumberOfSeasonsInTraining * SeasonalitySize; List <Data> data = new List <Data>(); var dataView = Env.CreateStreamingDataView(data); for (int j = 0; j < NumberOfSeasonsInTraining; j++) { for (int i = 0; i < SeasonalitySize; i++) { data.Add(new Data(i)); } } for (int i = 0; i < PValueHistorySize; i++) { data.Add(new Data(i * 100)); } var pipe = new SsaSpikeEstimator(Env, "Value", "Change", Confidence, PValueHistorySize, MaxTrainingSize, SeasonalitySize); var xyData = new List <TestDataXY> { new TestDataXY() { A = new float[inputSize] } }; var stringData = new List <TestDataDifferntType> { new TestDataDifferntType() { data_0 = new string[inputSize] } }; var invalidDataWrongNames = ComponentCreation.CreateDataView(Env, xyData); var invalidDataWrongTypes = ComponentCreation.CreateDataView(Env, stringData); TestEstimatorCore(pipe, dataView, invalidInput: invalidDataWrongTypes); TestEstimatorCore(pipe, dataView, invalidInput: invalidDataWrongNames); Done(); }
public void NAIndicatorWorkout() { var data = new[] { new TestClass() { A = 1, B = 3, C = new float[2] { 1, 2 }, D = new double[2] { 3, 4 } }, new TestClass() { A = float.NaN, B = double.NaN, C = new float[2] { float.NaN, float.NaN }, D = new double[2] { double.NaN, double.NaN } }, new TestClass() { A = float.NegativeInfinity, B = double.NegativeInfinity, C = new float[2] { float.NegativeInfinity, float.NegativeInfinity }, D = new double[2] { double.NegativeInfinity, double.NegativeInfinity } }, new TestClass() { A = float.PositiveInfinity, B = double.PositiveInfinity, C = new float[2] { float.PositiveInfinity, float.PositiveInfinity, }, D = new double[2] { double.PositiveInfinity, double.PositiveInfinity } }, new TestClass() { A = 2, B = 1, C = new float[2] { 3, 4 }, D = new double[2] { 5, 6 } }, }; var dataView = ComponentCreation.CreateDataView(Env, data); var pipe = new MissingValueIndicatorEstimator(Env, new (string input, string output)[] { ("A", "NAA"), ("B", "NAB"), ("C", "NAC"), ("D", "NAD") });
public void TextNormalizerWorkout() { var data = new[] { new TestClass() { A = "A 1, b. c! йЁ 24 ", B = new string[2] { "~``ё 52ds й vc", "6ksj94 vd ё dakl Юds Ё q й" } }, new TestClass() { A = null, B = new string[2] { null, string.Empty } } }; var dataView = ComponentCreation.CreateDataView(Env, data); var pipe = new TextNormalizerEstimator(Env, columns: new[] { ("A", "NormA"), ("B", "NormB") });
[ConditionalFact(typeof(Environment), nameof(Environment.Is64BitProcess))] // x86 output differs from Baseline public void OnnxModelMultiInput() { if (!RuntimeInformation.IsOSPlatform(OSPlatform.Windows)) { return; } var modelFile = @"twoinput\twoinput.onnx"; using (var env = new ConsoleEnvironment(seed: 1, conc: 1)) { var samplevector = GetSampleArrayData(); var dataView = ComponentCreation.CreateDataView(Env, new TestDataMulti[] { new TestDataMulti() { ina = new float[] { 1, 2, 3, 4, 5 }, inb = new float[] { 1, 2, 3, 4, 5 } } }); var onnx = OnnxTransform.Create(env, dataView, modelFile, new[] { "ina", "inb" }, new[] { "outa", "outb" }); onnx.Schema.TryGetColumnIndex("outa", out int scoresa); onnx.Schema.TryGetColumnIndex("outb", out int scoresb); using (var curs = onnx.GetRowCursor(col => col == scoresa || col == scoresb)) { var getScoresa = curs.GetGetter <VBuffer <float> >(scoresa); var getScoresb = curs.GetGetter <VBuffer <float> >(scoresb); var buffera = default(VBuffer <float>); var bufferb = default(VBuffer <float>); while (curs.MoveNext()) { getScoresa(ref buffera); getScoresb(ref bufferb); Assert.Equal(5, buffera.Length); Assert.Equal(5, bufferb.Length); Assert.Equal(0, buffera.GetValues().ToArray().Sum()); Assert.Equal(30, bufferb.GetValues().ToArray().Sum()); } } } }
[ConditionalFact(typeof(Environment), nameof(Environment.Is64BitProcess))] // TensorFlow is 64-bit only public void TensorFlowTransformMatrixMultiplicationTest() { var model_location = "model_matmul/frozen_saved_model.pb"; var env = new MLContext(seed: 1, conc: 1); // Pipeline var loader = ComponentCreation.CreateDataView(env, new List <TestData>(new TestData[] { new TestData() { a = new[] { 1.0f, 2.0f, 3.0f, 4.0f }, b = new[] { 1.0f, 2.0f, 3.0f, 4.0f } }, new TestData() { a = new[] { 2.0f, 2.0f, 2.0f, 2.0f }, b = new[] { 3.0f, 3.0f, 3.0f, 3.0f } } })); var trans = TensorFlowTransform.Create(env, loader, model_location, new[] { "c" }, new[] { "a", "b" }); using (var cursor = trans.GetRowCursor(a => true)) { var cgetter = cursor.GetGetter <VBuffer <float> >(2); Assert.True(cursor.MoveNext()); VBuffer <float> c = default; cgetter(ref c); Assert.Equal(1.0 * 1.0 + 2.0 * 3.0, c.Values[0]); Assert.Equal(1.0 * 2.0 + 2.0 * 4.0, c.Values[1]); Assert.Equal(3.0 * 1.0 + 4.0 * 3.0, c.Values[2]); Assert.Equal(3.0 * 2.0 + 4.0 * 4.0, c.Values[3]); Assert.True(cursor.MoveNext()); c = default; cgetter(ref c); Assert.Equal(2.0 * 3.0 + 2.0 * 3.0, c.Values[0]); Assert.Equal(2.0 * 3.0 + 2.0 * 3.0, c.Values[1]); Assert.Equal(2.0 * 3.0 + 2.0 * 3.0, c.Values[2]); Assert.Equal(2.0 * 3.0 + 2.0 * 3.0, c.Values[3]); Assert.False(cursor.MoveNext()); } }
public void NAReplaceStatic() { string dataPath = GetDataPath("breast-cancer.txt"); var reader = TextLoader.CreateReader(Env, ctx => ( ScalarFloat: ctx.LoadFloat(1), ScalarDouble: ctx.LoadDouble(1), VectorFloat: ctx.LoadFloat(1, 4), VectorDoulbe: ctx.LoadDouble(1, 4) )); var data = reader.Read(new MultiFileSource(dataPath)); var wrongCollection = new[] { new TestClass() { A = 1, B = 3, C = new float[2] { 1, 2 }, D = new double[2] { 3, 4 } } }; var invalidData = ComponentCreation.CreateDataView(Env, wrongCollection); var est = data.MakeNewEstimator(). Append(row => ( A: row.ScalarFloat.ReplaceWithMissingValues(NAReplaceTransform.ColumnInfo.ReplacementMode.Maximum), B: row.ScalarDouble.ReplaceWithMissingValues(NAReplaceTransform.ColumnInfo.ReplacementMode.Mean), C: row.VectorFloat.ReplaceWithMissingValues(NAReplaceTransform.ColumnInfo.ReplacementMode.Mean), D: row.VectorDoulbe.ReplaceWithMissingValues(NAReplaceTransform.ColumnInfo.ReplacementMode.Minimum) )); TestEstimatorCore(est.AsDynamic, data.AsDynamic, invalidInput: invalidData); var outputPath = GetOutputPath("NAReplace", "featurized.tsv"); using (var ch = Env.Start("save")) { var saver = new TextSaver(Env, new TextSaver.Arguments { Silent = true }); IDataView savedData = TakeFilter.Create(Env, est.Fit(data).Transform(data).AsDynamic, 4); savedData = new ChooseColumnsTransform(Env, savedData, "A", "B", "C", "D"); using (var fs = File.Create(outputPath)) DataSaverUtils.SaveDataView(ch, saver, savedData, fs, keepHidden: true); } CheckEquality("NAReplace", "featurized.tsv"); Done(); }
public void GroupTest() { var data = new List <GroupExample> { new GroupExample { Age = 18, UserName = "******", Gender = "Girl" }, new GroupExample { Age = 18, UserName = "******", Gender = "Boy" }, new GroupExample { Age = 20, UserName = "******", Gender = "Fish" }, new GroupExample { Age = 20, UserName = "******", Gender = "Mermaid" } }; var dataView = ComponentCreation.CreateDataView(Env, data); var groupTransform = new GroupTransform(Env, dataView, "Age", "UserName", "Gender"); var grouped = ML.CreateEnumerable <UngroupExample>(groupTransform, false).ToList(); // Expected content of grouped should contains two rows. // Age, UserName, Gender // 18, {"Amy", "Willy"}, {"Girl", "Boy"} // 20, {"Dori", "Ariel"}, {"Fish", "Mermaid"} // Note that "Age, UserName, Gender" is not a row; it just shows column names per row below it. Assert.Equal(2, grouped.Count); // grouped[0] is the first output row --- 18, {"Amy", "Willy"}, {"Girl", "Boy"} Assert.Equal(18, grouped[0].Age); Assert.Equal(2, grouped[0].UserName.Length); Assert.Equal("Amy", grouped[0].UserName[0]); Assert.Equal("Willy", grouped[0].UserName[1]); Assert.Equal(2, grouped[0].Gender.Length); Assert.Equal("Girl", grouped[0].Gender[0]); Assert.Equal("Boy", grouped[0].Gender[1]); // grouped[1] is the second output row --- 20, {"Dori", "Ariel"}, {"Fish", "Mermaid"} Assert.Equal(20, grouped[1].Age); Assert.Equal(2, grouped[1].Gender.Length); Assert.Equal("Dori", grouped[1].UserName[0]); Assert.Equal("Ariel", grouped[1].UserName[1]); Assert.Equal(2, grouped[1].Gender.Length); Assert.Equal("Fish", grouped[1].Gender[0]); Assert.Equal("Mermaid", grouped[1].Gender[1]); }
public void CharTokenizeWorkout() { var data = new[] { new TestClass() { A = "This is a good sentence.", B = new string[2] { "Much words", "Wow So Cool" } } }; var dataView = ComponentCreation.CreateDataView(Env, data); var invalidData = new[] { new TestWrong() { A = 1, B = new float[2] { 2, 3 } } }; var invalidDataView = ComponentCreation.CreateDataView(Env, invalidData); var pipe = new TokenizingByCharactersEstimator(Env, columns: new[] { ("A", "TokenizeA"), ("B", "TokenizeB") });
public void ValueMapOneValueTest() { var data = new[] { new TestClass() { A = "bar", B = "test", C = "foo" } }; var dataView = ComponentCreation.CreateDataView(Env, data); var keys = new List <string>() { "foo", "bar", "test", "wahoo" }; var values = new List <int>() { 1, 2, 3, 4 }; var estimator = new ValueMappingEstimator <string, int>(Env, keys, values, new[] { ("A", "D"), ("B", "E"), ("C", "F") });
public void ValueMapOneValueTest() { var data = new[] { new TestClass() { A = "bar", B = "test", C = "foo" } }; var dataView = ComponentCreation.CreateDataView(Env, data); IEnumerable <ReadOnlyMemory <char> > keys = new List <ReadOnlyMemory <char> >() { "foo".AsMemory(), "bar".AsMemory(), "test".AsMemory(), "wahoo".AsMemory() }; IEnumerable <int> values = new List <int>() { 1, 2, 3, 4 }; var estimator = new ValueMappingEstimator <ReadOnlyMemory <char>, int>(Env, keys, values, new[] { ("A", "D"), ("B", "E"), ("C", "F") });
public void CategoricalOneHotHashEncoding() { var data = new[] { new TestClass() { A = 1, B = 2, C = 3, }, new TestClass() { A = 4, B = 5, C = 6 } }; var mlContext = new MLContext(); var dataView = ComponentCreation.CreateDataView(mlContext, data); var pipe = mlContext.Transforms.Categorical.OneHotHashEncoding("A", "CatA", 16, 0, OneHotEncodingTransformer.OutputKind.Bag); TestEstimatorCore(pipe, dataView); Done(); }
public void CategoricalHashStatic() { string dataPath = GetDataPath("breast-cancer.txt"); var reader = TextLoader.CreateReader(Env, ctx => ( ScalarString: ctx.LoadText(1), VectorString: ctx.LoadText(1, 4))); var data = reader.Read(new MultiFileSource(dataPath)); var wrongCollection = new[] { new TestClass() { A = "1", B = "2", C = "3", }, new TestClass() { A = "4", B = "5", C = "6" } }; var invalidData = ComponentCreation.CreateDataView(Env, wrongCollection); var est = data.MakeNewEstimator(). Append(row => ( A: row.ScalarString.OneHotHashEncoding(outputKind: CategoricalHashStaticExtensions.OneHotHashScalarOutputKind.Ind), B: row.VectorString.OneHotHashEncoding(outputKind: CategoricalHashStaticExtensions.OneHotHashVectorOutputKind.Ind), C: row.VectorString.OneHotHashEncoding(outputKind: CategoricalHashStaticExtensions.OneHotHashVectorOutputKind.Bag), D: row.ScalarString.OneHotHashEncoding(outputKind: CategoricalHashStaticExtensions.OneHotHashScalarOutputKind.Bin), E: row.VectorString.OneHotHashEncoding(outputKind: CategoricalHashStaticExtensions.OneHotHashVectorOutputKind.Bin) )); TestEstimatorCore(est.AsDynamic, data.AsDynamic, invalidInput: invalidData); var outputPath = GetOutputPath("CategoricalHash", "featurized.tsv"); using (var ch = Env.Start("save")) { var saver = new TextSaver(Env, new TextSaver.Arguments { Silent = true }); var savedData = TakeFilter.Create(Env, est.Fit(data).Transform(data).AsDynamic, 4); savedData = new ChooseColumnsTransform(Env, savedData, "A", "B", "C", "D", "E"); using (var fs = File.Create(outputPath)) DataSaverUtils.SaveDataView(ch, saver, savedData, fs, keepHidden: true); } CheckEquality("CategoricalHash", "featurized.tsv"); Done(); }
public void CategoricalStatic() { string dataPath = GetDataPath("breast-cancer.txt"); var reader = TextLoaderStatic.CreateReader(Env, ctx => ( ScalarString: ctx.LoadText(1), VectorString: ctx.LoadText(1, 4))); var data = reader.Read(dataPath); var wrongCollection = new[] { new TestClass() { A = 1, B = 2, C = 3, }, new TestClass() { A = 4, B = 5, C = 6 } }; var invalidData = ComponentCreation.CreateDataView(Env, wrongCollection); var est = data.MakeNewEstimator(). Append(row => ( A: row.ScalarString.OneHotEncoding(outputKind: CategoricalStaticExtensions.OneHotScalarOutputKind.Ind), B: row.VectorString.OneHotEncoding(outputKind: CategoricalStaticExtensions.OneHotVectorOutputKind.Ind), C: row.VectorString.OneHotEncoding(outputKind: CategoricalStaticExtensions.OneHotVectorOutputKind.Bag), D: row.ScalarString.OneHotEncoding(outputKind: CategoricalStaticExtensions.OneHotScalarOutputKind.Bin), E: row.VectorString.OneHotEncoding(outputKind: CategoricalStaticExtensions.OneHotVectorOutputKind.Bin) )); TestEstimatorCore(est.AsDynamic, data.AsDynamic, invalidInput: invalidData); var outputPath = GetOutputPath("Categorical", "featurized.tsv"); using (var ch = Env.Start("save")) { var saver = new TextSaver(Env, new TextSaver.Arguments { Silent = true }); var savedData = TakeFilter.Create(Env, est.Fit(data).Transform(data).AsDynamic, 4); var view = new ColumnSelectingTransformer(Env, new string[] { "A", "B", "C", "D", "E" }, null, false).Transform(savedData); using (var fs = File.Create(outputPath)) DataSaverUtils.SaveDataView(ch, saver, view, fs, keepHidden: true); } CheckEquality("Categorical", "featurized.tsv"); Done(); }
[ConditionalFact(typeof(Environment), nameof(Environment.Is64BitProcess))] // This test is being fixed as part of issue #1441. public void MatrixFactorizationInMemoryData() { // Create an in-memory matrix as a list of tuples (column index, row index, value). var dataMatrix = new List <MatrixElement>(); for (uint i = _synthesizedMatrixFirstColumnIndex; i < _synthesizedMatrixFirstColumnIndex + _synthesizedMatrixColumnCount; ++i) { for (uint j = _synthesizedMatrixFirstRowIndex; j < _synthesizedMatrixFirstRowIndex + _synthesizedMatrixRowCount; ++j) { dataMatrix.Add(new MatrixElement() { MatrixColumnIndex = i, MatrixRowIndex = j, Value = (i + j) % 5 }); } } // Convert the in-memory matrix into an IDataView so that ML.NET components can consume it. var dataView = ComponentCreation.CreateDataView(Env, dataMatrix); // Create a matrix factorization trainer which may consume "Value" as the training label, "MatrixColumnIndex" as the // matrix's column index, and "MatrixRowIndex" as the matrix's row index. var mlContext = new MLContext(seed: 1, conc: 1); var pipeline = new MatrixFactorizationTrainer(mlContext, "Value", "MatrixColumnIndex", "MatrixRowIndex", advancedSettings: s => { s.NumIterations = 10; s.NumThreads = 1; // To eliminate randomness, # of threads must be 1. s.K = 32; }); // Train a matrix factorization model. var model = pipeline.Fit(dataView); // Apply the trained model to the training set var prediction = model.Transform(dataView); // Calculate regression matrices for the prediction result var metrics = mlContext.Regression.Evaluate(prediction, label: "Value", score: "Score"); // Native test. Just check the pipeline runs. Assert.True(metrics.L2 < 0.1); }
public void TestMetadata() { var data = new[] { new TestMeta() { A = new float[2] { 3.5f, 2.5f }, B = 1, C = new double[2] { 5.1f, 6.1f }, D = 7 }, new TestMeta() { A = new float[2] { 3.5f, 2.5f }, B = 1, C = new double[2] { 5.1f, 6.1f }, D = 7 }, new TestMeta() { A = new float[2] { 3.5f, 2.5f }, B = 1, C = new double[2] { 5.1f, 6.1f }, D = 7 } }; var dataView = ComponentCreation.CreateDataView(Env, data); var pipe = new HashEstimator(Env, new[] { new HashTransformer.ColumnInfo("A", "HashA", invertHash: 1, hashBits: 10), new HashTransformer.ColumnInfo("A", "HashAUnlim", invertHash: -1, hashBits: 10), new HashTransformer.ColumnInfo("A", "HashAUnlimOrdered", invertHash: -1, hashBits: 10, ordered: true) }); var result = pipe.Fit(dataView).Transform(dataView); ValidateMetadata(result); Done(); }
public void PrivateGetSetProperties() { var data = new List <ClassWithGetter>() { new ClassWithGetter(), new ClassWithGetter(), new ClassWithGetter() }; using (var env = new TlcEnvironment()) { var dataView = ComponentCreation.CreateDataView(env, data); var enumeratorSimple = dataView.AsEnumerable <ClassWithSetter>(env, false).GetEnumerator(); var originalEnumerator = data.GetEnumerator(); while (enumeratorSimple.MoveNext() && originalEnumerator.MoveNext()) { Assert.True(enumeratorSimple.Current.GetDay == originalEnumerator.Current.Day && enumeratorSimple.Current.GetHour == originalEnumerator.Current.Hour); } } }
public void CategoricalWorkout() { var data = new[] { new TestClass() { A = 1, B = 2, C = 3, }, new TestClass() { A = 4, B = 5, C = 6 } }; var dataView = ComponentCreation.CreateDataView(Env, data); var pipe = new OneHotEncodingEstimator(Env, new[] { new OneHotEncodingEstimator.ColumnInfo("A", "CatA", OneHotEncodingTransformer.OutputKind.Bag), new OneHotEncodingEstimator.ColumnInfo("A", "CatB", OneHotEncodingTransformer.OutputKind.Bin), new OneHotEncodingEstimator.ColumnInfo("A", "CatC", OneHotEncodingTransformer.OutputKind.Ind), new OneHotEncodingEstimator.ColumnInfo("A", "CatD", OneHotEncodingTransformer.OutputKind.Key), }); TestEstimatorCore(pipe, dataView); Done(); }
public void CategoricalHashWorkout() { var data = new[] { new TestClass() { A = "1", B = "2", C = "3", }, new TestClass() { A = "4", B = "5", C = "6" } }; var dataView = ComponentCreation.CreateDataView(Env, data); var pipe = new CategoricalHashEstimator(Env, new[] { new CategoricalHashEstimator.ColumnInfo("A", "CatA", CategoricalTransform.OutputKind.Bag), new CategoricalHashEstimator.ColumnInfo("A", "CatB", CategoricalTransform.OutputKind.Bin), new CategoricalHashEstimator.ColumnInfo("A", "CatC", CategoricalTransform.OutputKind.Ind), new CategoricalHashEstimator.ColumnInfo("A", "CatD", CategoricalTransform.OutputKind.Key), }); TestEstimatorCore(pipe, dataView); Done(); }
public void HashWorkout() { var data = new[] { new TestClass() { A = 1, B = 2, C = 3, }, new TestClass() { A = 4, B = 5, C = 6 } }; var dataView = ComponentCreation.CreateDataView(Env, data); var pipe = new HashEstimator(Env, new[] { new HashTransformer.ColumnInfo("A", "HashA", hashBits: 4, invertHash: -1), new HashTransformer.ColumnInfo("B", "HashB", hashBits: 3, ordered: true), new HashTransformer.ColumnInfo("C", "HashC", seed: 42), new HashTransformer.ColumnInfo("A", "HashD"), }); TestEstimatorCore(pipe, dataView); Done(); }
public void TestMetadata() { var data = new[] { new MetaClass() { A = 1, B = "A" }, new MetaClass() { A = 2, B = "B" } }; var pipe = new OneHotEncodingEstimator(Env, new[] { new OneHotEncodingEstimator.ColumnInfo("A", "CatA", CategoricalTransform.OutputKind.Ind), new OneHotEncodingEstimator.ColumnInfo("B", "CatB", CategoricalTransform.OutputKind.Key) }).Append(new ConvertingEstimator(Env, new[] { new ConvertingTransform.ColumnInfo("CatA", "ConvA", DataKind.R8), new ConvertingTransform.ColumnInfo("CatB", "ConvB", DataKind.U2) })); var dataView = ComponentCreation.CreateDataView(Env, data); dataView = pipe.Fit(dataView).Transform(dataView); ValidateMetadata(dataView); }
void TestSelectDrop() { var data = new[] { new TestClass() { A = 1, B = 2, C = 3, }, new TestClass() { A = 4, B = 5, C = 6 } }; var dataView = ComponentCreation.CreateDataView(Env, data); var est = ColumnSelectingEstimator.DropColumns(Env, "A", "C"); var transformer = est.Fit(dataView); var result = transformer.Transform(dataView); var foundColumnA = result.Schema.TryGetColumnIndex("A", out int aIdx); var foundColumnB = result.Schema.TryGetColumnIndex("B", out int bIdx); var foundColumnC = result.Schema.TryGetColumnIndex("C", out int cIdx); Assert.False(foundColumnA); Assert.True(foundColumnB); Assert.Equal(0, bIdx); Assert.False(foundColumnC); }
[ConditionalFact(typeof(Environment), nameof(Environment.Is64BitProcess))] // x86 output differs from Baseline public void OnnxModelScenario() { if (!RuntimeInformation.IsOSPlatform(OSPlatform.Windows)) { return; } var modelFile = "squeezenet/00000001/model.onnx"; using (var env = new ConsoleEnvironment(seed: 1, conc: 1)) { var samplevector = GetSampleArrayData(); var dataView = ComponentCreation.CreateDataView(Env, new TestData[] { new TestData() { data_0 = samplevector } }); var onnx = OnnxTransform.Create(env, dataView, modelFile, new[] { "data_0" }, new[] { "softmaxout_1" }); onnx.Schema.TryGetColumnIndex("softmaxout_1", out int scores); using (var curs = onnx.GetRowCursor(col => col == scores)) { var getScores = curs.GetGetter <VBuffer <float> >(scores); var buffer = default(VBuffer <float>); while (curs.MoveNext()) { getScores(ref buffer); Assert.Equal(1000, buffer.Length); } } } }
void TestIidSpikeEstimator() { int Confidence = 95; int PValueHistorySize = 10; List <Data> data = new List <Data>(); var dataView = Env.CreateStreamingDataView(data); for (int i = 0; i < PValueHistorySize; i++) { data.Add(new Data(i * 100)); } var pipe = new IidSpikeEstimator(Env, "Value", "Change", Confidence, PValueHistorySize); var xyData = new List <TestDataXY> { new TestDataXY() { A = new float[inputSize] } }; var stringData = new List <TestDataDifferntType> { new TestDataDifferntType() { data_0 = new string[inputSize] } }; var invalidDataWrongNames = ComponentCreation.CreateDataView(Env, xyData); var invalidDataWrongTypes = ComponentCreation.CreateDataView(Env, stringData); TestEstimatorCore(pipe, dataView, invalidInput: invalidDataWrongTypes); TestEstimatorCore(pipe, dataView, invalidInput: invalidDataWrongNames); Done(); }
public void KeyToBinaryVectorWorkout() { var data = new[] { new TestClass() { A = 1, B = 2, C = 3, }, new TestClass() { A = 4, B = 5, C = 6 } }; var dataView = ComponentCreation.CreateDataView(Env, data); dataView = new TermEstimator(Env, new[] { new TermTransform.ColumnInfo("A", "TermA"), new TermTransform.ColumnInfo("B", "TermB"), new TermTransform.ColumnInfo("C", "TermC", textKeyValues: true) }).Fit(dataView).Transform(dataView); var pipe = new KeyToBinaryVectorEstimator(Env, new KeyToBinaryVectorTransform.ColumnInfo("TermA", "CatA"), new KeyToBinaryVectorTransform.ColumnInfo("TermC", "CatC")); TestEstimatorCore(pipe, dataView); Done(); }
public void WordTokenizeWorkout() { var data = new[] { new TestClass() { A = "This is a good sentence.", B = new string[2] { "Much words", "Wow So Cool" } } }; var dataView = ComponentCreation.CreateDataView(Env, data); var invalidData = new[] { new TestWrong() { A = 1, B = new float[2] { 2, 3 } } }; var invalidDataView = ComponentCreation.CreateDataView(Env, invalidData); var pipe = new WordTokenizingEstimator(Env, new[] { new WordTokenizeTransform.ColumnInfo("A", "TokenizeA"), new WordTokenizeTransform.ColumnInfo("B", "TokenizeB"), }); TestEstimatorCore(pipe, dataView, invalidInput: invalidDataView); Done(); }
void TestSelectWorkout() { var data = new[] { new TestClass() { A = 1, B = 2, C = 3, }, new TestClass() { A = 4, B = 5, C = 6 } }; var invalidData = new [] { new TestClass2 { D = 3, E = 5 } }; var dataView = ComponentCreation.CreateDataView(Env, data); var invalidDataView = ComponentCreation.CreateDataView(Env, invalidData); // Workout on keep columns var est = ML.Transforms.SelectColumns(new[] { "A", "B" }); TestEstimatorCore(est, validFitInput: dataView, invalidInput: invalidDataView); // Workout on select columns with hidden: true est = ML.Transforms.SelectColumns(new[] { "A", "B" }, true); TestEstimatorCore(est, validFitInput: dataView, invalidInput: invalidDataView); }
public void TestOldSavingAndLoading() { var data = new[] { new TestClass() { A = "This is a good sentence.", B = new string[2] { "Much words", "Wow So Cool" } } }; var dataView = ComponentCreation.CreateDataView(Env, data); var pipe = new WordTokenizingEstimator(Env, new[] { new WordTokenizingTransformer.ColumnInfo("A", "TokenizeA"), new WordTokenizingTransformer.ColumnInfo("B", "TokenizeB"), }); var result = pipe.Fit(dataView).Transform(dataView); var resultRoles = new RoleMappedData(result); using (var ms = new MemoryStream()) { TrainUtils.SaveModel(Env, Env.Start("saving"), ms, null, resultRoles); ms.Position = 0; var loadedView = ModelFileUtils.LoadTransforms(Env, dataView, ms); } }
public void FfmBinaryClassificationWithAdvancedArguments() { var mlContext = new MLContext(seed: 0); var data = DatasetUtils.GenerateFfmSamples(500); var dataView = ComponentCreation.CreateDataView(mlContext, data.ToList()); var ffmArgs = new FieldAwareFactorizationMachineTrainer.Arguments(); // Customized the field names. ffmArgs.FeatureColumn = nameof(DatasetUtils.FfmExample.Field0); // First field. ffmArgs.ExtraFeatureColumns = new[] { nameof(DatasetUtils.FfmExample.Field1), nameof(DatasetUtils.FfmExample.Field2) }; var pipeline = new FieldAwareFactorizationMachineTrainer(mlContext, ffmArgs); var model = pipeline.Fit(dataView); var prediction = model.Transform(dataView); var metrics = mlContext.BinaryClassification.Evaluate(prediction); // Run a sanity check against a few of the metrics. Assert.InRange(metrics.Accuracy, 0.9, 1); Assert.InRange(metrics.Auc, 0.9, 1); Assert.InRange(metrics.Auprc, 0.9, 1); }
public void TestMetadataPropagation() { var data = new[] { new TestMeta() { A = new string[2] { "A", "B" }, B = "C", C = new int[2] { 3, 5 }, D = 6, E = new float[2] { 1.0f, 2.0f }, F = 1.0f, G = new string[2] { "A", "D" }, H = "D" }, new TestMeta() { A = new string[2] { "A", "B" }, B = "C", C = new int[2] { 5, 3 }, D = 1, E = new float[2] { 3.0f, 4.0f }, F = -1.0f, G = new string[2] { "E", "A" }, H = "E" }, new TestMeta() { A = new string[2] { "A", "B" }, B = "C", C = new int[2] { 3, 5 }, D = 6, E = new float[2] { 5.0f, 6.0f }, F = 1.0f, G = new string[2] { "D", "E" }, H = "D" } }; var dataView = ComponentCreation.CreateDataView(Env, data); var pipe = new OneHotEncodingEstimator(Env, new[] { new OneHotEncodingEstimator.ColumnInfo("A", "CatA", OneHotEncodingTransformer.OutputKind.Bag), new OneHotEncodingEstimator.ColumnInfo("B", "CatB", OneHotEncodingTransformer.OutputKind.Bag), new OneHotEncodingEstimator.ColumnInfo("C", "CatC", OneHotEncodingTransformer.OutputKind.Bag), new OneHotEncodingEstimator.ColumnInfo("D", "CatD", OneHotEncodingTransformer.OutputKind.Bag), new OneHotEncodingEstimator.ColumnInfo("E", "CatE", OneHotEncodingTransformer.OutputKind.Ind), new OneHotEncodingEstimator.ColumnInfo("F", "CatF", OneHotEncodingTransformer.OutputKind.Ind), new OneHotEncodingEstimator.ColumnInfo("G", "CatG", OneHotEncodingTransformer.OutputKind.Key), new OneHotEncodingEstimator.ColumnInfo("H", "CatH", OneHotEncodingTransformer.OutputKind.Key), new OneHotEncodingEstimator.ColumnInfo("A", "CatI", OneHotEncodingTransformer.OutputKind.Bin), new OneHotEncodingEstimator.ColumnInfo("B", "CatJ", OneHotEncodingTransformer.OutputKind.Bin), new OneHotEncodingEstimator.ColumnInfo("C", "CatK", OneHotEncodingTransformer.OutputKind.Bin), new OneHotEncodingEstimator.ColumnInfo("D", "CatL", OneHotEncodingTransformer.OutputKind.Bin) }); var result = pipe.Fit(dataView).Transform(dataView); ValidateMetadata(result); Done(); }
public void TestConvertWorkout() { var data = new[] { new TestClass() { A = 1, B = new int[2] { 1, 4 } }, new TestClass() { A = 2, B = new int[2] { 3, 4 } } }; var dataView = ComponentCreation.CreateDataView(Env, data); var pipe = new ConvertingEstimator(Env, columns: new[] { new ConvertingTransform.ColumnInfo("A", "ConvA", DataKind.R4), new ConvertingTransform.ColumnInfo("B", "ConvB", DataKind.R4) }); TestEstimatorCore(pipe, dataView); var allTypesData = new[] { new TestPrimitiveClass() { AA = new [] { "a", "b" }, AB = new [] { false, true }, AC = new [] { -1, 1 }, AD = new uint[] { 0, 1 }, AE = new byte[] { 0, 1 }, AF = new sbyte[] { -1, 1 }, AG = new short[] { -1, 1 }, AH = new ushort[] { 0, 1 }, AK = new long[] { -1, 1 }, AL = new ulong[] { 0, 1 }, AM = new float[] { 1.0f, 1.0f, }, AN = new double[] { 1.0d, 1.0d, } }, new TestPrimitiveClass() { AA = new [] { "0", "1" }, AB = new [] { false, true }, AC = new [] { int.MinValue, int.MaxValue }, AD = new uint[] { uint.MinValue, uint.MaxValue }, AE = new byte[] { byte.MinValue, byte.MaxValue }, AF = new sbyte[] { sbyte.MinValue, sbyte.MaxValue }, AG = new short[] { short.MinValue, short.MaxValue }, AH = new ushort[] { ushort.MinValue, ushort.MaxValue }, AK = new long[] { long.MinValue, long.MaxValue }, AL = new ulong[] { ulong.MinValue, ulong.MaxValue }, AM = new float[] { float.MinValue, float.MaxValue, }, AN = new double[] { double.MinValue, double.MaxValue, } } }; var allTypesDataView = ComponentCreation.CreateDataView(Env, allTypesData); var allTypesPipe = new ConvertingEstimator(Env, columns: new[] { new ConvertingTransform.ColumnInfo("AA", "ConvA", DataKind.R4), new ConvertingTransform.ColumnInfo("AB", "ConvB", DataKind.R4), new ConvertingTransform.ColumnInfo("AC", "ConvC", DataKind.R4), new ConvertingTransform.ColumnInfo("AD", "ConvD", DataKind.R4), new ConvertingTransform.ColumnInfo("AE", "ConvE", DataKind.R4), new ConvertingTransform.ColumnInfo("AF", "ConvF", DataKind.R4), new ConvertingTransform.ColumnInfo("AG", "ConvG", DataKind.R4), new ConvertingTransform.ColumnInfo("AH", "ConvH", DataKind.R4), new ConvertingTransform.ColumnInfo("AK", "ConvK", DataKind.R4), new ConvertingTransform.ColumnInfo("AL", "ConvL", DataKind.R4), new ConvertingTransform.ColumnInfo("AM", "ConvM", DataKind.R4), new ConvertingTransform.ColumnInfo("AN", "ConvN", DataKind.R4) } ); TestEstimatorCore(allTypesPipe, allTypesDataView); var outputPath = GetOutputPath("Convert", "Types.tsv"); using (var ch = Env.Start("save")) { var saver = new TextSaver(Env, new TextSaver.Arguments { Silent = true }); var savedData = TakeFilter.Create(Env, allTypesPipe.Fit(allTypesDataView).Transform(allTypesDataView), 2); using (var fs = File.Create(outputPath)) DataSaverUtils.SaveDataView(ch, saver, savedData, fs, keepHidden: true); } CheckEquality("Convert", "Types.tsv"); Done(); }