public void UgroupTest()
        {
            var data = new List <UngroupExample> {
                new UngroupExample {
                    Age = 18, UserName = new[] { "Amy", "Willy" }, Gender = new[] { "Girl", "Boy" }
                },
                new UngroupExample {
                    Age = 20, UserName = new[] { "Dori", "Ariel" }, Gender = new[] { "Fish", "Mermaid" }
                }
            };
            var dataView = ComponentCreation.CreateDataView(Env, data);

            var ungroupTransform = new UngroupTransform(Env, dataView, UngroupTransform.UngroupMode.Inner, "UserName", "Gender");
            var ungrouped        = ML.CreateEnumerable <GroupExample>(ungroupTransform, false).ToList();

            Assert.Equal(4, ungrouped.Count);

            Assert.Equal(18, ungrouped[0].Age);
            Assert.Equal("Amy", ungrouped[0].UserName);
            Assert.Equal("Girl", ungrouped[0].Gender);

            Assert.Equal(18, ungrouped[1].Age);
            Assert.Equal("Willy", ungrouped[1].UserName);
            Assert.Equal("Boy", ungrouped[1].Gender);

            Assert.Equal(20, ungrouped[2].Age);
            Assert.Equal("Dori", ungrouped[2].UserName);
            Assert.Equal("Fish", ungrouped[2].Gender);

            Assert.Equal(20, ungrouped[3].Age);
            Assert.Equal("Ariel", ungrouped[3].UserName);
            Assert.Equal("Mermaid", ungrouped[3].Gender);
        }
Beispiel #2
0
        public void WordTokenizeWorkout()
        {
            var data = new[] { new TestClass()
                               {
                                   A = "This is a good sentence.", B = new string[2] {
                                       "Much words", "Wow So Cool"
                                   }
                               } };
            var dataView    = ML.Data.ReadFromEnumerable(data);
            var invalidData = new[] { new TestWrong()
                                      {
                                          A = 1, B = new float[2] {
                                              2, 3
                                          }
                                      } };
            var invalidDataView = ML.Data.ReadFromEnumerable(invalidData);
            var pipe            = new WordTokenizingEstimator(Env, new[] {
                new WordTokenizingEstimator.ColumnInfo("TokenizeA", "A"),
                new WordTokenizingEstimator.ColumnInfo("TokenizeB", "B"),
            });

            TestEstimatorCore(pipe, dataView, invalidInput: invalidDataView);

            // Reuse the pipe trained on dataView in TestEstimatorCore to make prediction.
            var result = pipe.Fit(dataView).Transform(dataView);

            // Extract the transformed result of the first row (the only row we have because data contains only one TestClass) as a native class.
            var nativeResult = ML.CreateEnumerable <NativeResult>(result, false).First();

            // Check the tokenization of A. Expected result is { "This", "is", "a", "good", "sentence." }.
            var tokenizeA = new[] { "This", "is", "a", "good", "sentence." };

            Assert.True(tokenizeA.Length == nativeResult.TokenizeA.Length);
            for (int i = 0; i < tokenizeA.Length; ++i)
            {
                Assert.Equal(tokenizeA[i], nativeResult.TokenizeA[i]);
            }

            // Check the tokenization of B. Expected result is { "Much", "words", "Wow", "So", "Cool" }. One may think that the expected output
            // should be a 2-D array { { "Much", "words"}, { "Wow", "So", "Cool" } }, but please note that ML.NET may flatten all outputs if
            // they are high-dimension tensors.
            var tokenizeB = new[] { "Much", "words", "Wow", "So", "Cool" };

            Assert.True(tokenizeB.Length == nativeResult.TokenizeB.Length);
            for (int i = 0; i < tokenizeB.Length; ++i)
            {
                Assert.Equal(tokenizeB[i], nativeResult.TokenizeB[i]);
            }

            Done();
        }
        public void GroupTest()
        {
            var data = new List <GroupExample> {
                new GroupExample {
                    Age = 18, UserName = "******", Gender = "Girl"
                },
                new GroupExample {
                    Age = 18, UserName = "******", Gender = "Boy"
                },
                new GroupExample {
                    Age = 20, UserName = "******", Gender = "Fish"
                },
                new GroupExample {
                    Age = 20, UserName = "******", Gender = "Mermaid"
                }
            };
            var dataView = ComponentCreation.CreateDataView(Env, data);

            var groupTransform = new GroupTransform(Env, dataView, "Age", "UserName", "Gender");
            var grouped        = ML.CreateEnumerable <UngroupExample>(groupTransform, false).ToList();

            // Expected content of grouped should contains two rows.
            // Age, UserName, Gender
            // 18,  {"Amy", "Willy"}, {"Girl", "Boy"}
            // 20,  {"Dori", "Ariel"}, {"Fish", "Mermaid"}
            // Note that "Age, UserName, Gender" is not a row; it just shows column names per row below it.
            Assert.Equal(2, grouped.Count);

            // grouped[0] is the first output row --- 18,  {"Amy", "Willy"}, {"Girl", "Boy"}
            Assert.Equal(18, grouped[0].Age);
            Assert.Equal(2, grouped[0].UserName.Length);
            Assert.Equal("Amy", grouped[0].UserName[0]);
            Assert.Equal("Willy", grouped[0].UserName[1]);
            Assert.Equal(2, grouped[0].Gender.Length);
            Assert.Equal("Girl", grouped[0].Gender[0]);
            Assert.Equal("Boy", grouped[0].Gender[1]);

            // grouped[1] is the second output row --- 20,  {"Dori", "Ariel"}, {"Fish", "Mermaid"}
            Assert.Equal(20, grouped[1].Age);
            Assert.Equal(2, grouped[1].Gender.Length);
            Assert.Equal("Dori", grouped[1].UserName[0]);
            Assert.Equal("Ariel", grouped[1].UserName[1]);
            Assert.Equal(2, grouped[1].Gender.Length);
            Assert.Equal("Fish", grouped[1].Gender[0]);
            Assert.Equal("Mermaid", grouped[1].Gender[1]);
        }
Beispiel #4
0
        public void TestCustomTransformer()
        {
            string dataPath = GetDataPath("adult.tiny.with-schema.txt");
            var    source   = new MultiFileSource(dataPath);
            var    loader   = ML.Data.CreateTextLoader(new[] {
                new TextLoader.Column("Float1", DataKind.R4, 9),
                new TextLoader.Column("Float4", DataKind.R4, new[] { new TextLoader.Range(9), new TextLoader.Range(10), new TextLoader.Range(11), new TextLoader.Range(12) })
            }, hasHeader: true);

            var data = loader.Read(source);

            IDataView transformedData;
            // We create a temporary environment to instantiate the custom transformer. This is to ensure that we don't need the same
            // environment for saving and loading.
            var tempoEnv  = new MLContext();
            var customEst = new CustomMappingEstimator <MyInput, MyOutput>(tempoEnv, MyLambda.MyAction, "MyLambda");

            try
            {
                TestEstimatorCore(customEst, data);
                Assert.True(false, "Cannot work without RegisterAssembly");
            }
            catch (InvalidOperationException ex)
            {
                if (!ex.IsMarked())
                {
                    throw;
                }
            }
            ML.ComponentCatalog.RegisterAssembly(typeof(MyLambda).Assembly);
            TestEstimatorCore(customEst, data);
            transformedData = customEst.Fit(data).Transform(data);

            var inputs  = ML.CreateEnumerable <MyInput>(transformedData, true);
            var outputs = ML.CreateEnumerable <MyOutput>(transformedData, true);

            Assert.True(inputs.Zip(outputs, (x, y) => y.Together == $"{x.Float1} + {string.Join(", ", x.Float4)}").All(x => x));

            Done();
        }
Beispiel #5
0
        public void TestCustomTransformer()
        {
            string dataPath = GetDataPath("adult.tiny.with-schema.txt");
            var    source   = new MultiFileSource(dataPath);
            var    loader   = ML.Data.CreateTextLoader(new[] {
                new TextLoader.Column("Float1", DataKind.R4, 9),
                new TextLoader.Column("Float4", DataKind.R4, new[] { new TextLoader.Range(9), new TextLoader.Range(10), new TextLoader.Range(11), new TextLoader.Range(12) })
            }, hasHeader: true);

            var data = loader.Read(source);

            IDataView transformedData;
            // We create a temporary environment to instantiate the custom transformer. This is to ensure that we don't need the same
            // environment for saving and loading.
            var tempoEnv  = new MLContext();
            var customEst = new CustomMappingEstimator <MyInput, MyOutput>(tempoEnv, MyLambda.MyAction, "MyLambda");

            try
            {
                TestEstimatorCore(customEst, data);
                Assert.True(false, "Cannot work without MEF injection");
            }
            catch (Exception)
            {
                // REVIEW: we should have a common mechanism that will make sure this is 'our' exception thrown.
            }
            ML.CompositionContainer = new CompositionContainer(new TypeCatalog(typeof(MyLambda)));
            TestEstimatorCore(customEst, data);
            transformedData = customEst.Fit(data).Transform(data);

            var inputs  = ML.CreateEnumerable <MyInput>(transformedData, true);
            var outputs = ML.CreateEnumerable <MyOutput>(transformedData, true);

            Assert.True(inputs.Zip(outputs, (x, y) => y.Together == $"{x.Float1} + {string.Join(", ", x.Float4)}").All(x => x));

            Done();
        }