Example #1
0
        private static IDataTransform CreateLambdaTransform(IHost host, IDataView input, string inputColumnName, string outputColumnName,
                                                            Action <TState> initFunction, bool hasBuffer, ColumnType outputColTypeOverride)
        {
            var inputSchema = SchemaDefinition.Create(typeof(DataBox <TInput>));

            inputSchema[0].ColumnName = inputColumnName;

            var outputSchema = SchemaDefinition.Create(typeof(DataBox <TOutput>));

            outputSchema[0].ColumnName = outputColumnName;

            if (outputColTypeOverride != null)
            {
                outputSchema[0].ColumnType = outputColTypeOverride;
            }

            Action <DataBox <TInput>, DataBox <TOutput>, TState> lambda;

            if (hasBuffer)
            {
                lambda = MapFunction;
            }
            else
            {
                lambda = MapFunctionWithoutBuffer;
            }

            return(LambdaTransform.CreateMap(host, input, lambda, initFunction, inputSchema, outputSchema));
        }
Example #2
0
        public void LambdaTransformCreate()
        {
            using (var env = new ConsoleEnvironment(42))
            {
                var data = ReadBreastCancerExamples();
                var idv  = env.CreateDataView(data);

                var map = LambdaTransform.CreateMap <BreastCancerExample, LambdaOutput>(env, idv,
                                                                                        (input, output) =>
                {
                    output.OutField = string.Join(";", input.Features);
                });

                var filter = LambdaTransform.CreateFilter <BreastCancerExample, object>(env, map,
                                                                                        (input, state) => input.Label == 0, null);

                Assert.Null(filter.GetRowCount(false));

                // test re-apply
                var applied = env.CreateDataView(data);
                applied = ApplyTransformUtils.ApplyAllTransformsToData(env, filter, applied);

                var saver = new TextSaver(env, new TextSaver.Arguments());
                Assert.True(applied.Schema.TryGetColumnIndex("Label", out int label));
                Assert.True(applied.Schema.TryGetColumnIndex("OutField", out int outField));
                using (var fs = File.Create(GetOutputPath(OutputRelativePath, "lambda-output.tsv")))
                    saver.SaveData(fs, applied, label, outField);
            }
        }
Example #3
0
        public void TrainAveragedPerceptronWithCache()
        {
            var       mlContext     = new MLContext(0);
            var       dataFile      = GetDataPath(TestDatasets.breastCancer.trainFilename);
            var       loader        = TextLoader.Create(mlContext, new TextLoader.Options(), new MultiFileSource(dataFile));
            var       globalCounter = 0;
            IDataView xf            = LambdaTransform.CreateFilter <object, object>(mlContext, loader,
                                                                                    (i, s) => true,
                                                                                    s => { globalCounter++; });

            xf = mlContext.Transforms.Conversion.ConvertType("Label", outputKind: DataKind.Boolean).Fit(xf).Transform(xf);
            // The baseline result of this was generated with everything cached in memory. As auto-cache is removed,
            // an explicit step of caching is required to make this test ok.
            var cached = mlContext.Data.Cache(xf);

            var estimator = mlContext.BinaryClassification.Trainers.AveragedPerceptron(
                new AveragedPerceptronTrainer.Options {
                NumberOfIterations = 2
            });

            estimator.Fit(cached).Transform(cached);

            // Make sure there were 2 cursoring events.
            Assert.Equal(1, globalCounter);
        }
Example #4
0
        public TransformWrapper Fit(IDataView input)
        {
            var xf    = LambdaTransform.CreateMap(_env, input, _action);
            var empty = new EmptyDataView(_env, input.Schema);
            var chunk = ApplyTransformUtils.ApplyAllTransformsToData(_env, xf, empty, input);

            return(new TransformWrapper(_env, chunk));
        }
Example #5
0
            private static IDataTransform CreateLambdaTransform(IHost host, IDataView input, string inputColumnName,
                                                                string outputColumnName, string forecastingConfidenceIntervalMinOutputColumnName,
                                                                string forecastingConfidenceIntervalMaxOutputColumnName, Action <TState> initFunction, bool hasBuffer, DataViewType outputColTypeOverride)
            {
                var inputSchema = SchemaDefinition.Create(typeof(DataBox <TInput>));

                inputSchema[0].ColumnName = inputColumnName;

                SchemaDefinition outputSchema;

                if (!string.IsNullOrEmpty(forecastingConfidenceIntervalMinOutputColumnName))
                {
                    outputSchema = SchemaDefinition.Create(typeof(DataBoxForecastingWithConfidenceIntervals <TOutput>));
                    outputSchema[0].ColumnName = outputColumnName;

                    if (outputColTypeOverride != null)
                    {
                        outputSchema[0].ColumnType = outputSchema[1].ColumnType = outputSchema[2].ColumnType = outputColTypeOverride;
                    }

                    outputSchema[1].ColumnName = forecastingConfidenceIntervalMinOutputColumnName;
                    outputSchema[2].ColumnName = forecastingConfidenceIntervalMaxOutputColumnName;

                    Action <DataBox <TInput>, DataBoxForecastingWithConfidenceIntervals <TOutput>, TState> lambda;
                    if (hasBuffer)
                    {
                        lambda = MapFunction;
                    }
                    else
                    {
                        lambda = MapFunctionWithoutBuffer;
                    }

                    return(LambdaTransform.CreateMap(host, input, lambda, initFunction, inputSchema, outputSchema));
                }
                else
                {
                    outputSchema = SchemaDefinition.Create(typeof(DataBox <TOutput>));
                    outputSchema[0].ColumnName = outputColumnName;

                    if (outputColTypeOverride != null)
                    {
                        outputSchema[0].ColumnType = outputColTypeOverride;
                    }

                    Action <DataBox <TInput>, DataBox <TOutput>, TState> lambda;
                    if (hasBuffer)
                    {
                        lambda = MapFunction;
                    }
                    else
                    {
                        lambda = MapFunctionWithoutBuffer;
                    }

                    return(LambdaTransform.CreateMap(host, input, lambda, initFunction, inputSchema, outputSchema));
                }
            }
Example #6
0
        public void TrainAveragedPerceptronWithCache()
        {
            var env           = new MLContext(0);
            var dataFile      = GetDataPath("breast-cancer.txt");
            var loader        = TextLoader.Create(env, new TextLoader.Arguments(), new MultiFileSource(dataFile));
            var globalCounter = 0;
            var xf            = LambdaTransform.CreateFilter <object, object>(env, loader,
                                                                              (i, s) => true,
                                                                              s => { globalCounter++; });

            new AveragedPerceptronTrainer(env, "Label", "Features", numIterations: 2).Fit(xf).Transform(xf);

            // Make sure there were 2 cursoring events.
            Assert.Equal(1, globalCounter);
        }
        void Extensibility()
        {
            var dataPath = GetDataPath(IrisDataPath);

            using (var env = new LocalEnvironment())
            {
                var loader = TextLoader.ReadFile(env, MakeIrisTextLoaderArgs(), new MultiFileSource(dataPath));
                Action <IrisData, IrisData> action = (i, j) =>
                {
                    j.Label       = i.Label;
                    j.PetalLength = i.SepalLength > 3 ? i.PetalLength : i.SepalLength;
                    j.PetalWidth  = i.PetalWidth;
                    j.SepalLength = i.SepalLength;
                    j.SepalWidth  = i.SepalWidth;
                };
                var lambda = LambdaTransform.CreateMap(env, loader, action);
                var term   = TermTransform.Create(env, lambda, "Label");
                var concat = new ConcatTransform(env, "Features", "SepalLength", "SepalWidth", "PetalLength", "PetalWidth")
                             .Transform(term);

                var trainer = new SdcaMultiClassTrainer(env, new SdcaMultiClassTrainer.Arguments {
                    MaxIterations = 100, Shuffle = true, NumThreads = 1
                });

                IDataView trainData  = trainer.Info.WantCaching ? (IDataView) new CacheDataView(env, concat, prefetch: null) : concat;
                var       trainRoles = new RoleMappedData(trainData, label: "Label", feature: "Features");

                // Auto-normalization.
                NormalizeTransform.CreateIfNeeded(env, ref trainRoles, trainer);
                var predictor = trainer.Train(new Runtime.TrainContext(trainRoles));

                var scoreRoles = new RoleMappedData(concat, label: "Label", feature: "Features");
                IDataScorerTransform scorer = ScoreUtils.GetScorer(predictor, scoreRoles, env, trainRoles.Schema);

                var keyToValue = new KeyToValueTransform(env, "PredictedLabel").Transform(scorer);
                var model      = env.CreatePredictionEngine <IrisData, IrisPrediction>(keyToValue);

                var testLoader = TextLoader.ReadFile(env, MakeIrisTextLoaderArgs(), new MultiFileSource(dataPath));
                var testData   = testLoader.AsEnumerable <IrisData>(env, false);
                foreach (var input in testData.Take(20))
                {
                    var prediction = model.Predict(input);
                    Assert.True(prediction.PredictedLabel == input.Label);
                }
            }
        }
Example #8
0
        public void TrainAveragedPerceptronWithCache()
        {
            var env           = new MLContext(0);
            var dataFile      = GetDataPath("breast-cancer.txt");
            var loader        = TextLoader.Create(env, new TextLoader.Arguments(), new MultiFileSource(dataFile));
            var globalCounter = 0;
            var xf            = LambdaTransform.CreateFilter <object, object>(env, loader,
                                                                              (i, s) => true,
                                                                              s => { globalCounter++; });

            // The baseline result of this was generated with everything cached in memory. As auto-cache is removed,
            // an explicit step of caching is required to make this test ok.
            var cached = env.Data.Cache(xf);

            new AveragedPerceptronTrainer(env, "Label", "Features", numIterations: 2).Fit(cached).Transform(cached);

            // Make sure there were 2 cursoring events.
            Assert.Equal(1, globalCounter);
        }
Example #9
0
        public void LambdaTransformCreate()
        {
            var env = new MLContext(seed: 42);
            var data = ReadBreastCancerExamples();
            var idv = env.CreateDataView(data);

            var filter = LambdaTransform.CreateFilter<BreastCancerExample, object>(env, idv,
                (input, state) => input.Label == 0, null);

            Assert.Null(filter.GetRowCount());

            // test re-apply
            var applied = env.CreateDataView(data);
            applied = ApplyTransformUtils.ApplyAllTransformsToData(env, filter, applied);

            var saver = new TextSaver(env, new TextSaver.Arguments());
            Assert.True(applied.Schema.TryGetColumnIndex("Label", out int label));
            using (var fs = File.Create(GetOutputPath(OutputRelativePath, "lambda-output.tsv")))
                saver.SaveData(fs, applied, label);
        }
Example #10
0
        public void CursorChannelExposedInMapTransform()
        {
            var env = new MLContext(seed: 0);
            // Correct use of CursorChannel attribute.
            var data1 = Utils.CreateArray(10, new OneIChannelWithAttribute());
            var idv1 = env.CreateDataView(data1);
            Assert.Null(data1[0].Channel);

            var filter1 = LambdaTransform.CreateFilter<OneIChannelWithAttribute, object>(env, idv1,
                (input, state) =>
                {
                    Assert.NotNull(input.Channel);
                    return false;
                }, null);
            filter1.GetRowCursorForAllColumns().MoveNext();

            // Error case: non-IChannel field marked with attribute.
            var data2 = Utils.CreateArray(10, new OneStringWithAttribute());
            var idv2 = env.CreateDataView(data2);
            Assert.Null(data2[0].Channel);

            var filter2 = LambdaTransform.CreateFilter<OneStringWithAttribute, object>(env, idv2,
                (input, state) =>
                {
                    Assert.Null(input.Channel);
                    return false;
                }, null);
            try
            {
                filter2.GetRowCursorForAllColumns().MoveNext();
                Assert.True(false, "Throw an error if attribute is applied to a field that is not an IChannel.");
            }
            catch (InvalidOperationException ex)
            {
                Assert.True(ex.IsMarked());
            }

            // Error case: multiple fields marked with attributes.
            var data3 = Utils.CreateArray(10, new TwoIChannelsWithAttributes());
            var idv3 = env.CreateDataView(data3);
            Assert.Null(data3[0].ChannelOne);
            Assert.Null(data3[2].ChannelTwo);

            var filter3 = LambdaTransform.CreateFilter<TwoIChannelsWithAttributes, object>(env, idv3,
                (input, state) =>
                {
                    Assert.Null(input.ChannelOne);
                    Assert.Null(input.ChannelTwo);
                    return false;
                }, null);
            try
            {
                filter3.GetRowCursorForAllColumns().MoveNext();
                Assert.True(false, "Throw an error if attribute is applied to a field that is not an IChannel.");
            }
            catch (InvalidOperationException ex)
            {
                Assert.True(ex.IsMarked());
            }

            // Correct case: non-marked IChannel field is not touched.
            var example4 = new TwoIChannelsOnlyOneWithAttribute();
            Assert.Null(example4.ChannelTwo);
            Assert.Null(example4.ChannelOne);
            var idv4 = env.CreateDataView(Utils.CreateArray(10, example4));

            var filter4 = LambdaTransform.CreateFilter<TwoIChannelsOnlyOneWithAttribute, object>(env, idv4,
                (input, state) =>
                {
                    Assert.Null(input.ChannelOne);
                    Assert.NotNull(input.ChannelTwo);
                    return false;
                }, null);
            filter1.GetRowCursorForAllColumns().MoveNext();
        }
        static void TestCacheTransformSimple(int nt, bool async)
        {
            using (var host = EnvHelper.NewTestEnvironment(conc: nt == 1 ? 1 : 0))
            {
                var inputs = new InputOutput[] {
                    new InputOutput()
                    {
                        X = new float[] { 0, 1 }, Y = 1
                    },
                    new InputOutput()
                    {
                        X = new float[] { 0, 1 }, Y = 0
                    }
                };

                var data = host.CreateStreamingDataView(inputs);

                using (var cursor = data.GetRowCursor(i => true))
                {
                    var sortedValues     = new List <int>();
                    var sortColumnGetter = cursor.GetGetter <int>(1);
                    while (cursor.MoveNext())
                    {
                        int got = 0;
                        sortColumnGetter(ref got);
                        sortedValues.Add((int)got);
                    }
                    if (sortedValues.Count != 2)
                    {
                        throw new Exception();
                    }
                    if (sortedValues[0] != 1)
                    {
                        throw new Exception();
                    }
                    if (sortedValues[1] != 0)
                    {
                        throw new Exception();
                    }
                }

                var args = new ExtendedCacheTransform.Arguments {
                    numTheads = nt, async = async
                };
                var transformedData = new ExtendedCacheTransform(host, args, data);
                var lastTransform   = transformedData;
                LambdaTransform.CreateMap <InputOutput, InputOutput, EnvHelper.EmptyState>(host, data,
                                                                                           (input, output, state) =>
                {
                    output.X = input.X;
                    output.Y = input.Y;
                }, (EnvHelper.EmptyState state) => { });

                using (var cursor = lastTransform.GetRowCursor(i => true))
                {
                    var sortedValues     = new List <int>();
                    var sortColumnGetter = cursor.GetGetter <int>(1);
                    while (cursor.MoveNext())
                    {
                        int got = 0;
                        sortColumnGetter(ref got);
                        sortedValues.Add((int)got);
                    }
                    if (sortedValues.Count != 2)
                    {
                        throw new Exception();
                    }
                }
            }
        }
        public void TestSortInDataFrameTransformSimple()
        {
            using (var host = EnvHelper.NewTestEnvironment())
            {
                var inputs = new InputOutput[] {
                    new InputOutput()
                    {
                        X = new float[] { 0, 1 }, Y = 1
                    },
                    new InputOutput()
                    {
                        X = new float[] { 0, 1 }, Y = 0
                    }
                };

                var data = host.CreateStreamingDataView(inputs);

                using (var cursor = data.GetRowCursor(i => true))
                {
                    var sortedValues     = new List <int>();
                    var sortColumnGetter = cursor.GetGetter <int>(1);
                    while (cursor.MoveNext())
                    {
                        int got = 0;
                        sortColumnGetter(ref got);
                        sortedValues.Add((int)got);
                    }
                    if (sortedValues.Count != 2)
                    {
                        throw new Exception();
                    }
                    if (sortedValues[0] != 1)
                    {
                        throw new Exception();
                    }
                    if (sortedValues[1] != 0)
                    {
                        throw new Exception();
                    }
                }

                var args = new SortInDataFrameTransform.Arguments {
                    sortColumn = "Y"
                };
                var transformedData = new SortInDataFrameTransform(host, args, data);
                var sorted          = transformedData;
                LambdaTransform.CreateMap <InputOutput, InputOutput, EnvHelper.EmptyState>(host, data,
                                                                                           (input, output, state) =>
                {
                    output.X = input.X;
                    output.Y = input.Y;
                }, (state) => { });

                using (var cursor = sorted.GetRowCursor(i => true))
                {
                    var sortedValues     = new List <int>();
                    var sortColumnGetter = cursor.GetGetter <int>(1);
                    while (cursor.MoveNext())
                    {
                        int got = 0;
                        sortColumnGetter(ref got);
                        sortedValues.Add((int)got);
                    }
                    if (sortedValues.Count != 2)
                    {
                        throw new Exception();
                    }
                    if (sortedValues[0] != 0)
                    {
                        throw new Exception();
                    }
                    if (sortedValues[1] != 1)
                    {
                        throw new Exception();
                    }
                }
            }
        }
Example #13
0
        public void TestTransform2ValueMapperMultiThread()
        {
            using (var env = EnvHelper.NewTestEnvironment())
            {
                var host = env.Register("unittest");

                var inputs = new[] {
                    new InputOutput {
                        X = new float[] { 0, 1 }, Y = 10
                    },
                    new InputOutput {
                        X = new float[] { 2, 3 }, Y = 100
                    }
                };

                var data = host.CreateStreamingDataView(inputs);

                var trv = LambdaTransform.CreateMap(host, data,
                                                    (InputOutput src, InputOutput dst, EnvHelper.EmptyState state) =>
                {
                    dst.X = new float[] { src.X[0] + 1f, src.X[1] - 1f };
                }, (EnvHelper.EmptyState state) => { });

                var ino = new InputOutput {
                    X = new float[] { -5, -5 }, Y = 3
                };
                var inob = new VBuffer <float>(2, ino.X);
                var ans  = new VBuffer <float>();

                using (var valueMapper = new ValueMapperFromTransformFloat <VBuffer <float> >(host, trv, "X", "X", ignoreOtherColumn: true))
                {
                    var mapper = valueMapper.GetMapper <VBuffer <float>, VBuffer <float> >();

                    var listy = new List <int>();
                    var listx = new List <float>();
                    int y     = 0;
                    for (int i = 0; i < inputs.Length; ++i)
                    {
                        mapper(in inob, ref ans);
                        y = inputs[i].Y;
                        if (ans.Count != 2)
                        {
                            throw new Exception("Issue with dimension.");
                        }
                        listx.AddRange(ans.GetValues().ToArray());
                        listy.Add((int)y);
                    }
                    if (listy.Count != 2)
                    {
                        throw new Exception("Issue with dimension.");
                    }
                    if (listy[0] != 10 || listy[1] != 100)
                    {
                        throw new Exception("Issue with values.");
                    }
                    if (listx.Count != 4)
                    {
                        throw new Exception("Issue with dimension.");
                    }
                    if (listx[0] != -4)
                    {
                        throw new Exception("Issue with values.");
                    }
                    if (listx[1] != -6)
                    {
                        throw new Exception("Issue with values.");
                    }
                    if (listx[2] != -4)
                    {
                        throw new Exception("Issue with values.");
                    }
                    if (listx[3] != -6)
                    {
                        throw new Exception("Issue with values.");
                    }
                    if (inob.Count != 2)
                    {
                        throw new Exception("Issue with dimension.");
                    }
                    if (inob.Values[0] != -5)
                    {
                        throw new Exception("Values were overwritten.");
                    }
                    if (inob.Values[0] != -5)
                    {
                        throw new Exception("Values were overwritten.");
                    }
                }
            }
        }
Example #14
0
        public void CursorChannelExposedInMapTransform()
        {
            using (var env = new ConsoleEnvironment(0))
            {
                // Correct use of CursorChannel attribute.
                var data1 = Utils.CreateArray(10, new OneIChannelWithAttribute());
                var idv1  = env.CreateDataView(data1);
                Assert.Null(data1[0].Channel);

                var map1 = LambdaTransform.CreateMap <OneIChannelWithAttribute, OneIChannelWithAttribute>(env, idv1,
                                                                                                          (input, output) =>
                {
                    output.OutField = input.OutField + input.OutField;
                });
                map1.GetRowCursor(col => true);

                var filter1 = LambdaTransform.CreateFilter <OneIChannelWithAttribute, object>(env, idv1,
                                                                                              (input, state) =>
                {
                    Assert.NotNull(input.Channel);
                    return(false);
                }, null);
                filter1.GetRowCursor(col => true).MoveNext();

                // Error case: non-IChannel field marked with attribute.
                var data2 = Utils.CreateArray(10, new OneStringWithAttribute());
                var idv2  = env.CreateDataView(data2);
                Assert.Null(data2[0].Channel);

                var filter2 = LambdaTransform.CreateFilter <OneStringWithAttribute, object>(env, idv2,
                                                                                            (input, state) =>
                {
                    Assert.Null(input.Channel);
                    return(false);
                }, null);
                try
                {
                    filter2.GetRowCursor(col => true).MoveNext();
                    Assert.True(false, "Throw an error if attribute is applied to a field that is not an IChannel.");
                }
                catch (InvalidOperationException ex)
                {
                    Assert.True(ex.IsMarked());
                }

                var map2 = LambdaTransform.CreateMap <OneStringWithAttribute, OneStringWithAttribute>(env, idv2,
                                                                                                      (input, output) =>
                {
                    output.OutField = input.OutField + input.OutField;
                });
                try
                {
                    map2.GetRowCursor(col => true);
                    Assert.True(false, "Throw an error if attribute is applied to a field that is not an IChannel.");
                }
                catch (InvalidOperationException ex)
                {
                    Assert.True(ex.IsMarked());
                }

                // Error case: multiple fields marked with attributes.
                var data3 = Utils.CreateArray(10, new TwoIChannelsWithAttributes());
                var idv3  = env.CreateDataView(data3);
                Assert.Null(data3[0].ChannelOne);
                Assert.Null(data3[2].ChannelTwo);

                var filter3 = LambdaTransform.CreateFilter <TwoIChannelsWithAttributes, object>(env, idv3,
                                                                                                (input, state) =>
                {
                    Assert.Null(input.ChannelOne);
                    Assert.Null(input.ChannelTwo);
                    return(false);
                }, null);
                try
                {
                    filter3.GetRowCursor(col => true).MoveNext();
                    Assert.True(false, "Throw an error if attribute is applied to a field that is not an IChannel.");
                }
                catch (InvalidOperationException ex)
                {
                    Assert.True(ex.IsMarked());
                }

                var map3 = LambdaTransform.CreateMap <TwoIChannelsWithAttributes, TwoIChannelsWithAttributes>(env, idv3,
                                                                                                              (input, output) =>
                {
                    output.OutField = input.OutField + input.OutField;
                });
                try
                {
                    map3.GetRowCursor(col => true);
                    Assert.True(false, "Throw an error if attribute is applied to a multiple fields.");
                }
                catch (InvalidOperationException ex)
                {
                    Assert.True(ex.IsMarked());
                }

                // Correct case: non-marked IChannel field is not touched.
                var example4 = new TwoIChannelsOnlyOneWithAttribute();
                Assert.Null(example4.ChannelTwo);
                Assert.Null(example4.ChannelOne);
                var idv4 = env.CreateDataView(Utils.CreateArray(10, example4));

                var map4 = LambdaTransform.CreateMap <TwoIChannelsOnlyOneWithAttribute, TwoIChannelsOnlyOneWithAttribute>(env, idv4,
                                                                                                                          (input, output) => { });
                map4.GetRowCursor(col => true);

                var filter4 = LambdaTransform.CreateFilter <TwoIChannelsOnlyOneWithAttribute, object>(env, idv4,
                                                                                                      (input, state) =>
                {
                    Assert.Null(input.ChannelOne);
                    Assert.NotNull(input.ChannelTwo);
                    return(false);
                }, null);
                filter1.GetRowCursor(col => true).MoveNext();
            }
        }