public void UgroupTest() { var data = new List <UngroupExample> { new UngroupExample { Age = 18, UserName = new[] { "Amy", "Willy" }, Gender = new[] { "Girl", "Boy" } }, new UngroupExample { Age = 20, UserName = new[] { "Dori", "Ariel" }, Gender = new[] { "Fish", "Mermaid" } } }; var dataView = ComponentCreation.CreateDataView(Env, data); var ungroupTransform = new UngroupTransform(Env, dataView, UngroupTransform.UngroupMode.Inner, "UserName", "Gender"); var ungrouped = ML.CreateEnumerable <GroupExample>(ungroupTransform, false).ToList(); Assert.Equal(4, ungrouped.Count); Assert.Equal(18, ungrouped[0].Age); Assert.Equal("Amy", ungrouped[0].UserName); Assert.Equal("Girl", ungrouped[0].Gender); Assert.Equal(18, ungrouped[1].Age); Assert.Equal("Willy", ungrouped[1].UserName); Assert.Equal("Boy", ungrouped[1].Gender); Assert.Equal(20, ungrouped[2].Age); Assert.Equal("Dori", ungrouped[2].UserName); Assert.Equal("Fish", ungrouped[2].Gender); Assert.Equal(20, ungrouped[3].Age); Assert.Equal("Ariel", ungrouped[3].UserName); Assert.Equal("Mermaid", ungrouped[3].Gender); }
public void WordTokenizeWorkout() { var data = new[] { new TestClass() { A = "This is a good sentence.", B = new string[2] { "Much words", "Wow So Cool" } } }; var dataView = ML.Data.ReadFromEnumerable(data); var invalidData = new[] { new TestWrong() { A = 1, B = new float[2] { 2, 3 } } }; var invalidDataView = ML.Data.ReadFromEnumerable(invalidData); var pipe = new WordTokenizingEstimator(Env, new[] { new WordTokenizingEstimator.ColumnInfo("TokenizeA", "A"), new WordTokenizingEstimator.ColumnInfo("TokenizeB", "B"), }); TestEstimatorCore(pipe, dataView, invalidInput: invalidDataView); // Reuse the pipe trained on dataView in TestEstimatorCore to make prediction. var result = pipe.Fit(dataView).Transform(dataView); // Extract the transformed result of the first row (the only row we have because data contains only one TestClass) as a native class. var nativeResult = ML.CreateEnumerable <NativeResult>(result, false).First(); // Check the tokenization of A. Expected result is { "This", "is", "a", "good", "sentence." }. var tokenizeA = new[] { "This", "is", "a", "good", "sentence." }; Assert.True(tokenizeA.Length == nativeResult.TokenizeA.Length); for (int i = 0; i < tokenizeA.Length; ++i) { Assert.Equal(tokenizeA[i], nativeResult.TokenizeA[i]); } // Check the tokenization of B. Expected result is { "Much", "words", "Wow", "So", "Cool" }. One may think that the expected output // should be a 2-D array { { "Much", "words"}, { "Wow", "So", "Cool" } }, but please note that ML.NET may flatten all outputs if // they are high-dimension tensors. var tokenizeB = new[] { "Much", "words", "Wow", "So", "Cool" }; Assert.True(tokenizeB.Length == nativeResult.TokenizeB.Length); for (int i = 0; i < tokenizeB.Length; ++i) { Assert.Equal(tokenizeB[i], nativeResult.TokenizeB[i]); } Done(); }
public void GroupTest() { var data = new List <GroupExample> { new GroupExample { Age = 18, UserName = "******", Gender = "Girl" }, new GroupExample { Age = 18, UserName = "******", Gender = "Boy" }, new GroupExample { Age = 20, UserName = "******", Gender = "Fish" }, new GroupExample { Age = 20, UserName = "******", Gender = "Mermaid" } }; var dataView = ComponentCreation.CreateDataView(Env, data); var groupTransform = new GroupTransform(Env, dataView, "Age", "UserName", "Gender"); var grouped = ML.CreateEnumerable <UngroupExample>(groupTransform, false).ToList(); // Expected content of grouped should contains two rows. // Age, UserName, Gender // 18, {"Amy", "Willy"}, {"Girl", "Boy"} // 20, {"Dori", "Ariel"}, {"Fish", "Mermaid"} // Note that "Age, UserName, Gender" is not a row; it just shows column names per row below it. Assert.Equal(2, grouped.Count); // grouped[0] is the first output row --- 18, {"Amy", "Willy"}, {"Girl", "Boy"} Assert.Equal(18, grouped[0].Age); Assert.Equal(2, grouped[0].UserName.Length); Assert.Equal("Amy", grouped[0].UserName[0]); Assert.Equal("Willy", grouped[0].UserName[1]); Assert.Equal(2, grouped[0].Gender.Length); Assert.Equal("Girl", grouped[0].Gender[0]); Assert.Equal("Boy", grouped[0].Gender[1]); // grouped[1] is the second output row --- 20, {"Dori", "Ariel"}, {"Fish", "Mermaid"} Assert.Equal(20, grouped[1].Age); Assert.Equal(2, grouped[1].Gender.Length); Assert.Equal("Dori", grouped[1].UserName[0]); Assert.Equal("Ariel", grouped[1].UserName[1]); Assert.Equal(2, grouped[1].Gender.Length); Assert.Equal("Fish", grouped[1].Gender[0]); Assert.Equal("Mermaid", grouped[1].Gender[1]); }
public void TestCustomTransformer() { string dataPath = GetDataPath("adult.tiny.with-schema.txt"); var source = new MultiFileSource(dataPath); var loader = ML.Data.CreateTextLoader(new[] { new TextLoader.Column("Float1", DataKind.R4, 9), new TextLoader.Column("Float4", DataKind.R4, new[] { new TextLoader.Range(9), new TextLoader.Range(10), new TextLoader.Range(11), new TextLoader.Range(12) }) }, hasHeader: true); var data = loader.Read(source); IDataView transformedData; // We create a temporary environment to instantiate the custom transformer. This is to ensure that we don't need the same // environment for saving and loading. var tempoEnv = new MLContext(); var customEst = new CustomMappingEstimator <MyInput, MyOutput>(tempoEnv, MyLambda.MyAction, "MyLambda"); try { TestEstimatorCore(customEst, data); Assert.True(false, "Cannot work without RegisterAssembly"); } catch (InvalidOperationException ex) { if (!ex.IsMarked()) { throw; } } ML.ComponentCatalog.RegisterAssembly(typeof(MyLambda).Assembly); TestEstimatorCore(customEst, data); transformedData = customEst.Fit(data).Transform(data); var inputs = ML.CreateEnumerable <MyInput>(transformedData, true); var outputs = ML.CreateEnumerable <MyOutput>(transformedData, true); Assert.True(inputs.Zip(outputs, (x, y) => y.Together == $"{x.Float1} + {string.Join(", ", x.Float4)}").All(x => x)); Done(); }
public void TestCustomTransformer() { string dataPath = GetDataPath("adult.tiny.with-schema.txt"); var source = new MultiFileSource(dataPath); var loader = ML.Data.CreateTextLoader(new[] { new TextLoader.Column("Float1", DataKind.R4, 9), new TextLoader.Column("Float4", DataKind.R4, new[] { new TextLoader.Range(9), new TextLoader.Range(10), new TextLoader.Range(11), new TextLoader.Range(12) }) }, hasHeader: true); var data = loader.Read(source); IDataView transformedData; // We create a temporary environment to instantiate the custom transformer. This is to ensure that we don't need the same // environment for saving and loading. var tempoEnv = new MLContext(); var customEst = new CustomMappingEstimator <MyInput, MyOutput>(tempoEnv, MyLambda.MyAction, "MyLambda"); try { TestEstimatorCore(customEst, data); Assert.True(false, "Cannot work without MEF injection"); } catch (Exception) { // REVIEW: we should have a common mechanism that will make sure this is 'our' exception thrown. } ML.CompositionContainer = new CompositionContainer(new TypeCatalog(typeof(MyLambda))); TestEstimatorCore(customEst, data); transformedData = customEst.Fit(data).Transform(data); var inputs = ML.CreateEnumerable <MyInput>(transformedData, true); var outputs = ML.CreateEnumerable <MyOutput>(transformedData, true); Assert.True(inputs.Zip(outputs, (x, y) => y.Together == $"{x.Float1} + {string.Join(", ", x.Float4)}").All(x => x)); Done(); }