예제 #1
0
        public void TestI_DescribeTransformSaveDataAndZip()
        {
            /*using (*/ var env = EnvHelper.NewTestEnvironment();
            {
                var inputs = InputOutput.CreateInputs();
                var data   = DataViewConstructionUtils.CreateFromEnumerable(env, inputs);
                var args   = new DescribeTransform.Arguments()
                {
                    columns = new[] { "X" }
                };
                var tr = new DescribeTransform(env, args, data);

                var methodName = System.Reflection.MethodBase.GetCurrentMethod().Name;

                var outputDataFilePath = FileHelper.GetOutputFile("outputDataFilePath.txt", methodName);
                StreamHelper.SavePredictions(env, tr, outputDataFilePath);
                Assert.IsTrue(File.Exists(outputDataFilePath));

                var outModelFilePath = FileHelper.GetOutputFile("outModelFilePath.zip", methodName);
                StreamHelper.SaveModel(env, tr, outModelFilePath);
                Assert.IsTrue(File.Exists(outModelFilePath));

                var outputDataFilePath2 = FileHelper.GetOutputFile("outputDataFilePath2.txt", methodName);
                StreamHelper.SavePredictions(env, outModelFilePath, outputDataFilePath2, data);
                Assert.IsTrue(File.Exists(outputDataFilePath2));

                var d1 = File.ReadAllText(outputDataFilePath);
                Assert.IsTrue(d1.Length > 0);
                var d2 = File.ReadAllText(outputDataFilePath2);
                Assert.AreEqual(d1, d2);
            }
        }
        public void TestTagViewTransform()
        {
            /*using (*/ var host = EnvHelper.NewTestEnvironment();
            {
                var inputs = new[] {
                    new ExampleA()
                    {
                        X = new float[] { 0, 1 }
                    },
                    new ExampleA()
                    {
                        X = new float[] { 2, 3 }
                    }
                };

                IDataView loader = DataViewConstructionUtils.CreateFromEnumerable(host, inputs);
                var       data   = host.CreateTransform("Scaler{col=X1:X}", loader);
                data = host.CreateTransform("tag{t=memory}", data);

                var methodName       = System.Reflection.MethodBase.GetCurrentMethod().Name;
                var outModelFilePath = FileHelper.GetOutputFile("outModelFilePath.zip", methodName);
                var outData          = FileHelper.GetOutputFile("outData.txt", methodName);
                var outData2         = FileHelper.GetOutputFile("outData2.txt", methodName);
                TestTransformHelper.SerializationTestTransform(host, outModelFilePath, data, loader, outData, outData2);
            }
        }
예제 #3
0
 /// <summary>
 /// Create a new <see cref="IDataView"/> over an enumerable of the items of user-defined type using the provided <see cref="DataViewSchema"/>,
 /// which might contain more information about the schema than the type can capture.
 /// </summary>
 /// <remarks>
 /// The user maintains ownership of the <paramref name="data"/> and the resulting data view will
 /// never alter the contents of the <paramref name="data"/>.
 /// Since <see cref="IDataView"/> is assumed to be immutable, the user is expected to support
 /// multiple enumerations of the <paramref name="data"/> that would return the same results, unless
 /// the user knows that the data will only be cursored once.
 /// One typical usage for streaming data view could be: create the data view that lazily loads data
 /// as needed, then apply pre-trained transformations to it and cursor through it for transformation
 /// results.
 /// One practical usage of this would be to supply the feature column names through the <see cref="DataViewSchema.Annotations"/>.
 /// </remarks>
 /// <typeparam name="TRow">The user-defined item type.</typeparam>
 /// <param name="data">The enumerable data containing type <typeparamref name="TRow"/> to convert to an <see cref="IDataView"/>.</param>
 /// <param name="schema">The schema of the returned <see cref="IDataView"/>.</param>
 /// <returns>An <see cref="IDataView"/> with the given <paramref name="schema"/>.</returns>
 public IDataView LoadFromEnumerable <TRow>(IEnumerable <TRow> data, DataViewSchema schema)
     where TRow : class
 {
     _env.CheckValue(data, nameof(data));
     _env.CheckValue(schema, nameof(schema));
     return(DataViewConstructionUtils.CreateFromEnumerable(_env, data, schema));
 }
        public void TestChainTransformSerialize()
        {
            /*using (*/ var host = EnvHelper.NewTestEnvironment();
            {
                var inputs = new[] {
                    new ExampleA()
                    {
                        X = new float[] { 1, 10, 100 }
                    },
                    new ExampleA()
                    {
                        X = new float[] { 2, 3, 5 }
                    }
                };

                IDataView      loader = DataViewConstructionUtils.CreateFromEnumerable(host, inputs);
                IDataTransform data   = host.CreateTransform("Scaler{col=X4:X}", loader);
                data = host.CreateTransform("ChainTrans{ xf1=Scaler{col=X2:X} xf2=Poly{col=X3:X2} }", data);

                // We create a specific folder in build/UnitTest which will contain the output.
                var methodName       = System.Reflection.MethodBase.GetCurrentMethod().Name;
                var outModelFilePath = FileHelper.GetOutputFile("outModelFilePath.zip", methodName);
                var outData          = FileHelper.GetOutputFile("outData.txt", methodName);
                var outData2         = FileHelper.GetOutputFile("outData2.txt", methodName);
                TestTransformHelper.SerializationTestTransform(host, outModelFilePath, data, loader, outData, outData2);
            }
        }
        public void TestLambdaColumnPassThroughTransform()
        {
            /*using (*/
            var host = EnvHelper.NewTestEnvironment();
            {
                var inputs = new InputOutputU[] {
                    new InputOutputU()
                    {
                        X = new float[] { 0.1f, 1.1f }, Y = 0
                    },
                    new InputOutputU()
                    {
                        X = new float[] { 0.2f, 1.2f }, Y = 1
                    },
                    new InputOutputU()
                    {
                        X = new float[] { 0.3f, 1.3f }, Y = 2
                    }
                };

                var data       = DataViewConstructionUtils.CreateFromEnumerable(host, inputs);
                var lambdaView = LambdaColumnHelper.Create <VBuffer <float>, VBuffer <float> >(host,
                                                                                               "Lambda", data, "X", "XX", new VectorDataViewType(NumberDataViewType.Single, 2),
                                                                                               new VectorDataViewType(NumberDataViewType.Single, 2),
                                                                                               (in VBuffer <float> src, ref VBuffer <float> dst) =>
                {
                    dst           = new VBuffer <float>(2, new float[2]);
                    dst.Values[0] = src.Values[0] + 1f;
                    dst.Values[1] = src.Values[1] + 1f;
                });
        public void TestI_PolynomialTransformSerialize()
        {
            /*using (*/
            var host = EnvHelper.NewTestEnvironment();
            {
                var inputs = new[] {
                    new ExampleA()
                    {
                        X = new float[] { 1, 10, 100 }
                    },
                    new ExampleA()
                    {
                        X = new float[] { 2, 3, 5 }
                    }
                };

                IDataView loader = DataViewConstructionUtils.CreateFromEnumerable(host, inputs);
                var       data   = host.CreateTransform("poly{col=poly:X d=3}", loader);

                // We create a specific folder in build/UnitTest which will contain the output.
                var methodName       = System.Reflection.MethodBase.GetCurrentMethod().Name;
                var outModelFilePath = FileHelper.GetOutputFile("outModelFilePath.zip", methodName);
                var outData          = FileHelper.GetOutputFile("outData.txt", methodName);
                var outData2         = FileHelper.GetOutputFile("outData2.txt", methodName);

                // This function serializes the output data twice, once before saving the pipeline, once after loading the pipeline.
                // It checks it gives the same result.
                TestTransformHelper.SerializationTestTransform(host, outModelFilePath, data, loader, outData, outData2);
            }
        }
예제 #7
0
 /// <summary>
 /// Create a new <see cref="IDataView"/> over an enumerable of the items of user-defined type.
 /// The user maintains ownership of the <paramref name="data"/> and the resulting data view will
 /// never alter the contents of the <paramref name="data"/>.
 /// Since <see cref="IDataView"/> is assumed to be immutable, the user is expected to support
 /// multiple enumerations of the <paramref name="data"/> that would return the same results, unless
 /// the user knows that the data will only be cursored once.
 ///
 /// One typical usage for streaming data view could be: create the data view that lazily loads data
 /// as needed, then apply pre-trained transformations to it and cursor through it for transformation
 /// results.
 /// </summary>
 /// <typeparam name="TRow">The user-defined item type.</typeparam>
 /// <param name="data">The enumerable data containing type <typeparamref name="TRow"/> to convert to an<see cref="IDataView"/>.</param>
 /// <param name="schemaDefinition">The optional schema definition of the data view to create. If <c>null</c>,
 /// the schema definition is inferred from <typeparamref name="TRow"/>.</param>
 /// <returns>The constructed <see cref="IDataView"/>.</returns>
 /// <example>
 /// <format type="text/markdown">
 /// <![CDATA[
 /// [!code-csharp[LoadFromEnumerable](~/../docs/samples/docs/samples/Microsoft.ML.Samples/Dynamic/DataOperations/LoadFromEnumerable.cs)]
 /// ]]>
 /// </format>
 /// </example>
 public IDataView LoadFromEnumerable <TRow>(IEnumerable <TRow> data, SchemaDefinition schemaDefinition = null)
     where TRow : class
 {
     _env.CheckValue(data, nameof(data));
     _env.CheckValueOrNull(schemaDefinition);
     return(DataViewConstructionUtils.CreateFromEnumerable(_env, data, schemaDefinition));
 }
예제 #8
0
        public void TestI_DescribeTransformCode()
        {
            /*using (*/ var env = EnvHelper.NewTestEnvironment();
            {
                var inputs = InputOutput.CreateInputs();
                var data   = DataViewConstructionUtils.CreateFromEnumerable(env, inputs);
                var args   = new DescribeTransform.Arguments()
                {
                    columns = new[] { "X" }
                };
                var tr = new DescribeTransform(env, args, data);

                var values = new List <int>();
                using (var cursor = tr.GetRowCursor(tr.Schema))
                {
                    var columnGetter = cursor.GetGetter <int>(SchemaHelper._dc(1, cursor));
                    while (cursor.MoveNext())
                    {
                        int got = 0;
                        columnGetter(ref got);
                        values.Add((int)got);
                    }
                }
                Assert.AreEqual(values.Count, 4);
            }
        }
        public void TestSelectTagContactViewTransform()
        {
            var methodName = System.Reflection.MethodBase.GetCurrentMethod().Name;
            var firstData  = FileHelper.GetOutputFile("first.idv", methodName);
            var outData    = FileHelper.GetOutputFile("outData.txt", methodName);
            var outData2   = FileHelper.GetOutputFile("outData2.txt", methodName);

            /*using (*/ var env = EnvHelper.NewTestEnvironment();
            {
                var inputs = new[] {
                    new ExampleA()
                    {
                        X = new float[] { 0, 1, 4 }
                    },
                    new ExampleA()
                    {
                        X = new float[] { 2, 3, 7 }
                    }
                };

                // Create IDV
                IDataView loader = DataViewConstructionUtils.CreateFromEnumerable(env, inputs);
                var       saver  = ComponentCreation.CreateSaver(env, "binary");
                using (var ch = env.Start("save"))
                {
                    using (var fs0 = env.CreateOutputFile(firstData))
                        DataSaverUtils.SaveDataView(ch, saver, loader, fs0, true);

                    // Create parallel pipeline
                    loader = DataViewConstructionUtils.CreateFromEnumerable(env, inputs);
                    var data = env.CreateTransform("Scaler{col=X1:X}", loader);
                    data = env.CreateTransform(string.Format("selecttag{{t=first s=second f={0}}}", firstData), data);
                    data = env.CreateTransform("Scaler{col=X1:X}", data);
                    var merged = env.CreateTransform("append{t=first}", data);

                    // Save the outcome
                    var text    = env.CreateSaver("Text");
                    var columns = new int[merged.Schema.Count];
                    for (int i = 0; i < columns.Length; ++i)
                    {
                        columns[i] = i;
                    }
                    using (var fs2 = File.Create(outData))
                        text.SaveData(fs2, merged, columns);

                    // Final checking
                    var lines = File.ReadAllLines(outData);
                    if (!lines.Any())
                    {
                        throw new Exception("Empty file.");
                    }
                    if (lines.Length != 9)
                    {
                        throw new Exception("Some lines are missing.");
                    }
                }
            }
        }
예제 #10
0
        public void TestScikitAPI_SimplePredictor()
        {
            var inputs = new[] {
                new ExampleA()
                {
                    X = new float[] { 1, 10, 100 }
                },
                new ExampleA()
                {
                    X = new float[] { 2, 3, 5 }
                },
                new ExampleA()
                {
                    X = new float[] { 2, 4, 5 }
                },
                new ExampleA()
                {
                    X = new float[] { 2, 4, 7 }
                },
            };

            var inputs2 = new[] {
                new ExampleA()
                {
                    X = new float[] { -1, -10, -100 }
                },
                new ExampleA()
                {
                    X = new float[] { -2, -3, -5 }
                },
                new ExampleA()
                {
                    X = new float[] { 3, 4, 5 }
                },
                new ExampleA()
                {
                    X = new float[] { 3, 4, 7 }
                },
            };

            /*using (*/
            var host = EnvHelper.NewTestEnvironment(conc: 1);
            {
                var data = DataViewConstructionUtils.CreateFromEnumerable(host, inputs);
                using (var pipe = new ScikitPipeline(new[] { "poly{col=X}" }, "km{k=2}", host))
                {
                    var predictor = pipe.Train(data, feature: "X");
                    Assert.IsTrue(predictor != null);
                    var data2       = new StreamingDataFrame(DataViewConstructionUtils.CreateFromEnumerable(host, inputs2));
                    var predictions = pipe.Predict(data2);
                    var df          = DataFrameIO.ReadView(predictions);
                    Assert.AreEqual(df.Shape, new Tuple <int, int>(4, 12));
                    var dfs  = df.ToString();
                    var dfs2 = dfs.Replace("\n", ";");
                    Assert.IsTrue(dfs2.StartsWith("X.0,X.1,X.2,X.3,X.4,X.5,X.6,X.7,X.8,PredictedLabel,Score.0,Score.1;-1,-10,-100,1,10,100,100,1000,10000"));
                }
            }
        }
        /// <summary>
        /// Constructor
        /// </summary>
        /// <param name="env">environment</param>
        /// <param name="modelStream">stream</param>
        /// <param name="output">name of the output column</param>
        /// <param name="outputIsFloat">output is a gloat (true) or a vector of floats (false)</param>
        /// <param name="conc">number of concurrency threads</param>
        /// <param name="features">features name</param>
        public ValueMapperPredictionEngineFloat(IHostEnvironment env, Stream modelStream,
                                                string output   = "Probability", bool outputIsFloat = true, int conc = 1,
                                                string features = "Features")
        {
            _env = env;
            if (_env == null)
            {
                throw Contracts.Except("env must not be null");
            }
            var inputs = new FloatVectorInput[0];
            var view   = DataViewConstructionUtils.CreateFromEnumerable <FloatVectorInput>(_env, inputs);

            long modelPosition = modelStream.Position;

            _predictor = ModelFileUtils.LoadPredictorOrNull(_env, modelStream);
            if (_predictor == null)
            {
                throw _env.Except("Unable to load a model.");
            }
            modelStream.Seek(modelPosition, SeekOrigin.Begin);
            _transforms = ModelFileUtils.LoadTransforms(_env, view, modelStream);
            if (_transforms == null)
            {
                throw _env.Except("Unable to load a model.");
            }

            var data = _env.CreateExamples(_transforms, features);

            if (data == null)
            {
                throw _env.Except("Cannot create rows.");
            }
            var scorer = _env.CreateDefaultScorer(data, _predictor);

            if (scorer == null)
            {
                throw _env.Except("Cannot create a scorer.");
            }

            _valueMapper = new ValueMapperFromTransformFloat <VBuffer <float> >(_env,
                                                                                scorer, features, output, conc: conc);
            if (_valueMapper == null)
            {
                throw _env.Except("Cannot create a mapper.");
            }
            if (outputIsFloat)
            {
                _mapper       = _valueMapper.GetMapper <VBuffer <float>, float>();
                _mapperVector = null;
            }
            else
            {
                _mapper       = null;
                _mapperVector = _valueMapper.GetMapper <VBuffer <float>, VBuffer <float> >();
            }
        }
예제 #12
0
            public PredictionEngineExample(string modelName)
            {
                _env = EnvHelper.NewTestEnvironment();

                var view = DataViewConstructionUtils.CreateFromEnumerable(_env, new FloatVectorInput[] { });
                var pipe = DataViewConstructionUtils.LoadPipeWithPredictor(_env, File.OpenRead(modelName),
                                                                           new EmptyDataView(_env, view.Schema));
                var transformer = new TransformWrapper(_env, pipe);

                _predictor = _env.CreatePredictionEngine <FloatVectorInput, FloatOutput>(transformer);
            }
        public void Testl_ShakeInputTransformVectorAdd()
        {
            /*using (*/ var host = EnvHelper.NewTestEnvironment();
            {
                var inputs = new[] {
                    new SHExampleA()
                    {
                        X = new float[] { 0, 1 }
                    },
                    new SHExampleA()
                    {
                        X = new float[] { 2, 3 }
                    }
                };

                var data = DataViewConstructionUtils.CreateFromEnumerable(host, inputs);

                var args = new ShakeInputTransform.Arguments
                {
                    inputColumn      = "X",
                    inputFeaturesInt = new[] { 0, 1 },
                    outputColumns    = new[] { "yo" },
                    values           = "-10,10;-100,100",
                    aggregation      = ShakeInputTransform.ShakeAggregation.add
                };

                var trv = new ExampleValueMapperVector();
                if (trv == null)
                {
                    throw new Exception("Invalid");
                }
                var shake = new ShakeInputTransform(host, args, data, new IValueMapper[] { trv });

                using (var cursor = shake.GetRowCursor(shake.Schema))
                {
                    var outValues = new List <float>();
                    var colGetter = cursor.GetGetter <VBuffer <float> >(SchemaHelper._dc(1, cursor));
                    while (cursor.MoveNext())
                    {
                        VBuffer <float> got = new VBuffer <float>();
                        colGetter(ref got);
                        outValues.AddRange(got.DenseValues());
                    }
                    if (outValues.Count != 4)
                    {
                        throw new Exception("expected 4");
                    }
                }
            }
        }
예제 #14
0
        internal BatchPredictionEngine(IHostEnvironment env, Stream modelStream, bool ignoreMissingColumns,
                                       SchemaDefinition inputSchemaDefinition = null, SchemaDefinition outputSchemaDefinition = null)
        {
            Contracts.AssertValue(env);
            Contracts.AssertValue(modelStream);
            Contracts.AssertValueOrNull(inputSchemaDefinition);
            Contracts.AssertValueOrNull(outputSchemaDefinition);

            // Initialize pipe.
            _srcDataView = DataViewConstructionUtils.CreateFromEnumerable(env, new TSrc[] { }, inputSchemaDefinition);
            var pipe = DataViewConstructionUtils.LoadPipeWithPredictor(env, modelStream, _srcDataView);

            _pipeEngine = new PipeEngine <TDst>(env, pipe, ignoreMissingColumns, outputSchemaDefinition);
        }
예제 #15
0
        internal BatchPredictionEngine(IHostEnvironment env, IDataView dataPipeline, bool ignoreMissingColumns,
                                       SchemaDefinition inputSchemaDefinition = null, SchemaDefinition outputSchemaDefinition = null)
        {
            Contracts.AssertValue(env);
            Contracts.AssertValue(dataPipeline);
            Contracts.AssertValueOrNull(inputSchemaDefinition);
            Contracts.AssertValueOrNull(outputSchemaDefinition);

            // Initialize pipe.
            _srcDataView = DataViewConstructionUtils.CreateFromEnumerable(env, new TSrc[] { }, inputSchemaDefinition);
            var pipe = ApplyTransformUtils.ApplyAllTransformsToData(env, dataPipeline, _srcDataView);

            _pipeEngine = new PipeEngine <TDst>(env, pipe, ignoreMissingColumns, outputSchemaDefinition);
        }
        private void TestResampleTransform(float ratio)
        {
            /*using (*/ var env = EnvHelper.NewTestEnvironment(conc: 1);
            {
                var inputs = new InputOutput[] {
                    new InputOutput()
                    {
                        X = new float[] { 0, 1 }, Y = 1
                    },
                    new InputOutput()
                    {
                        X = new float[] { 0, 1 }, Y = 0
                    },
                    new InputOutput()
                    {
                        X = new float[] { 0, 1 }, Y = 2
                    },
                    new InputOutput()
                    {
                        X = new float[] { 0, 1 }, Y = 3
                    },
                };

                var data = DataViewConstructionUtils.CreateFromEnumerable(env, inputs);
                var args = new ResampleTransform.Arguments {
                    lambda = ratio, cache = false
                };
                var tr     = new ResampleTransform(env, args, data);
                var values = new List <int>();
                using (var cursor = tr.GetRowCursor(tr.Schema))
                {
                    var columnGetter = cursor.GetGetter <int>(SchemaHelper._dc(1, cursor));
                    while (cursor.MoveNext())
                    {
                        int got = 0;
                        columnGetter(ref got);
                        values.Add((int)got);
                    }
                }
                if (ratio < 1 && values.Count > 8)
                {
                    throw new Exception("ResampleTransform did not work.");
                }
                if (ratio > 1 && values.Count < 1)
                {
                    throw new Exception("ResampleTransform did not work.");
                }
            }
        }
            public ValueMapperExample(string modelName, string features)
            {
                _env       = EnvHelper.NewTestEnvironment();
                _predictor = ModelFileUtils.LoadPredictorOrNull(_env, File.OpenRead(modelName));
                var inputs = new Input[0];

                var view = DataViewConstructionUtils.CreateFromEnumerable(_env, inputs);

                _transforms = ModelFileUtils.LoadTransforms(_env, view, File.OpenRead(modelName));
                var data   = _env.CreateExamples(_transforms, features);
                var scorer = _env.CreateDefaultScorer(data, _predictor);

                _valueMapper = new ValueMapperFromTransformFloat <VBuffer <float> >(_env, scorer, "Features", "Probability");
                _mapper      = _valueMapper.GetMapper <VBuffer <float>, float>();
            }
예제 #18
0
        public void TestScikitAPI_DelegateEnvironmentVerbose0()
        {
            var inputs = new[] {
                new ExampleA()
                {
                    X = new float[] { 1, 10, 100 }
                },
                new ExampleA()
                {
                    X = new float[] { 2, 3, 5 }
                }
            };

            var inputs2 = new[] {
                new ExampleA()
                {
                    X = new float[] { -1, -10, -100 }
                },
                new ExampleA()
                {
                    X = new float[] { -2, -3, -5 }
                }
            };

            var        stdout = new List <string>();
            var        stderr = new List <string>();
            ILogWriter logout = new LogWriter(s => stdout.Add(s));
            ILogWriter logerr = new LogWriter(s => stderr.Add(s));

            /*using (*/
            var host = new DelegateEnvironment(seed: 0, outWriter: logout, errWriter: logerr, verbose: 0);

            {
                ComponentHelper.AddStandardComponents(host);
                var data = DataViewConstructionUtils.CreateFromEnumerable(host, inputs);
                using (var pipe = new ScikitPipeline(new[] { "poly{col=X}" }, "km{k=2}", host: host))
                {
                    var predictor = pipe.Train(data, feature: "X");
                    Assert.IsTrue(predictor != null);
                }
            }
            Assert.AreEqual(stdout.Count, 0);
            Assert.AreEqual(stderr.Count, 0);
        }
        public void TestI_PolynomialTransformSparse()
        {
            var inputs = new[] {
                new ExampleASparse()
                {
                    X = new VBuffer <float> (5, 3, new float[] { 1, 10, 100 }, new int[] { 0, 2, 4 })
                },
                new ExampleASparse()
                {
                    X = new VBuffer <float> (5, 3, new float[] { 2, 3, 5 }, new int[] { 1, 2, 3 })
                }
            };
            /*using (*/
            var host = EnvHelper.NewTestEnvironment();
            {
                var            data = DataViewConstructionUtils.CreateFromEnumerable(host, inputs);
                List <float[]> values;
                CommonTestPolynomialTransform(host, data, 5, out values);

                List <float[]> valuesDense;
                data = DataViewConstructionUtils.CreateFromEnumerable(host, inputs);
                CommonTestPolynomialTransform(host, data, 5, out valuesDense);

                if (values.Count != valuesDense.Count)
                {
                    throw new Exception("Mismath in number of observations.");
                }
                for (int i = 0; i < values.Count; ++i)
                {
                    if (values[i].Length != valuesDense[i].Length)
                    {
                        throw new Exception("Mismath in dimensions.");
                    }
                    for (int j = 0; j < values[i].Length; ++j)
                    {
                        if (values[i][j] != valuesDense[i][j])
                        {
                            throw new Exception("Mismath in value.");
                        }
                    }
                }
            }
        }
예제 #20
0
        public void TestScikitAPI_SimpleTransform()
        {
            var inputs = new[] {
                new ExampleA()
                {
                    X = new float[] { 1, 10, 100 }
                },
                new ExampleA()
                {
                    X = new float[] { 2, 3, 5 }
                }
            };

            var inputs2 = new[] {
                new ExampleA()
                {
                    X = new float[] { -1, -10, -100 }
                },
                new ExampleA()
                {
                    X = new float[] { -2, -3, -5 }
                }
            };

            /*using (*/
            var host = EnvHelper.NewTestEnvironment(conc: 1);
            {
                var data = DataViewConstructionUtils.CreateFromEnumerable(host, inputs);
                using (var pipe = new ScikitPipeline(new[] { "poly{col=X}" }, host: host))
                {
                    var predictor = pipe.Train(data);
                    Assert.IsTrue(predictor != null);
                    var data2       = DataViewConstructionUtils.CreateFromEnumerable(host, inputs2);
                    var predictions = pipe.Transform(data2);
                    var df          = DataFrameIO.ReadView(predictions);
                    Assert.AreEqual(df.Shape, new Tuple <int, int>(2, 9));
                    var dfs  = df.ToString();
                    var dfs2 = dfs.Replace("\n", ";");
                    Assert.AreEqual(dfs2, "X.0,X.1,X.2,X.3,X.4,X.5,X.6,X.7,X.8;-1,-10,-100,1,10,100,100,1000,10000;-2,-3,-5,4,6,10,9,15,25");
                }
            }
        }
 public void TestI_ScalerTransformDenseMeanVarNoVector()
 {
     var inputs = new[] {
         new ExampleA0()
         {
             X = 1f
         },
         new ExampleA0()
         {
             X = 2f
         }
     };
     /*using (*/
     var host = EnvHelper.NewTestEnvironment();
     {
         var            data = DataViewConstructionUtils.CreateFromEnumerable(host, inputs);
         List <float[]> values;
         CommonTestScalerTransform(host, data, 3, ScalerTransform.ScalerStrategy.meanVar, out values);
     }
 }
 public void TestI_ScalerTransformDenseMinMax()
 {
     var inputs = new[] {
         new ExampleA()
         {
             X = new float[] { 1, 10, 100 }
         },
         new ExampleA()
         {
             X = new float[] { 2, 3, 5 }
         }
     };
     /*using (*/
     var host = EnvHelper.NewTestEnvironment();
     {
         var            data = DataViewConstructionUtils.CreateFromEnumerable(host, inputs);
         List <float[]> values;
         CommonTestScalerTransform(host, data, 3, ScalerTransform.ScalerStrategy.minMax, out values);
     }
 }
예제 #23
0
        public void TestTimeSeriesDeTrendSerialize()
        {
            /*using (*/ var host = EnvHelper.NewTestEnvironment();
            {
                var inputs = new[] {
                    new InputOutput()
                    {
                        X = 7f, time = 0f
                    },
                    new InputOutput()
                    {
                        X = 7f, time = 1f
                    },
                    new InputOutput()
                    {
                        X = 9f, time = 2f
                    },
                    new InputOutput()
                    {
                        X = 9f, time = 3f
                    },
                    new InputOutput()
                    {
                        X = 8f, time = 4f
                    },
                };

                IDataView loader = DataViewConstructionUtils.CreateFromEnumerable(host, inputs);
                var       data   = host.CreateTransform("detrend{col=Y:X time=time optim=sasdcar{iter=50}}", loader);

                // To train the model.
                using (var cursor = data.GetRowCursor(data.Schema)) { }

                var methodName       = System.Reflection.MethodBase.GetCurrentMethod().Name;
                var outModelFilePath = FileHelper.GetOutputFile("outModelFilePath.zip", methodName);
                var outData          = FileHelper.GetOutputFile("outData.txt", methodName);
                var outData2         = FileHelper.GetOutputFile("outData2.txt", methodName);
                TestTransformHelper.SerializationTestTransform(host, outModelFilePath, data, loader, outData, outData2);
            }
        }
        /// <summary>
        /// Constructor
        /// </summary>
        /// <param name="env">environment</param>
        /// <param name="modelStream">stream</param>
        /// <param name="conc">number of concurrency threads</param>
        /// <param name="features">features column</param>
        public ValueMapperPredictionEngine(IHostEnvironment env, Stream modelStream, string features = "Features")
        {
            _env = env;
            if (_env == null)
            {
                throw Contracts.Except("env must not be null");
            }
            var inputs = new TRowValue[0];
            var view   = DataViewConstructionUtils.CreateFromEnumerable <TRowValue>(_env, inputs);

            long modelPosition = modelStream.Position;

            _predictor = ModelFileUtils.LoadPredictorOrNull(_env, modelStream);
            if (_predictor == null)
            {
                throw _env.Except("Unable to load a model.");
            }
            modelStream.Seek(modelPosition, SeekOrigin.Begin);
            _transforms = ModelFileUtils.LoadTransforms(_env, view, modelStream);
            if (_transforms == null)
            {
                throw _env.Except("Unable to load a model.");
            }

            var data = _env.CreateExamples(_transforms, features);

            if (data == null)
            {
                throw _env.Except("Cannot create rows.");
            }
            var scorer = _env.CreateDefaultScorer(data, _predictor);

            if (scorer == null)
            {
                throw _env.Except("Cannot create a scorer.");
            }
            _CreateMapper(scorer);
        }
        static void TestSplitTrainTestTransform(string option, int numThreads = 1)
        {
            /*using (*/
            var host = EnvHelper.NewTestEnvironment(conc: numThreads == 1 ? 1 : 0);
            {
                var inputsl = new List <InputOutput>();
                for (int i = 0; i < 100; ++i)
                {
                    inputsl.Add(new InputOutput {
                        X = new float[] { 0, 1 }, Y = i
                    });
                }
                var inputs = inputsl.ToArray();
                var data   = DataViewConstructionUtils.CreateFromEnumerable(host, inputs);

                var args = new SplitTrainTestTransform.Arguments {
                    newColumn = "Part", numThreads = numThreads
                };
                if (option == "2")
                {
                    var methodName = System.Reflection.MethodBase.GetCurrentMethod().Name;
                    var cacheFile  = FileHelper.GetOutputFile("cacheFile.idv", methodName);
                    args.cacheFile = cacheFile;
                }

                var transformedData = new SplitTrainTestTransform(host, args, data);

                var counter1 = new Dictionary <int, List <int> >();
                using (var cursor = transformedData.GetRowCursor(transformedData.OutputSchema))
                {
                    int index            = SchemaHelper.GetColumnIndex(cursor.Schema, "Y");
                    var sortColumnGetter = cursor.GetGetter <int>(SchemaHelper._dc(index, cursor));
                    index = SchemaHelper.GetColumnIndex(cursor.Schema, args.newColumn);
                    var partGetter = cursor.GetGetter <int>(SchemaHelper._dc(index, cursor));
                    var schema     = SchemaHelper.ToString(cursor.Schema);
                    if (string.IsNullOrEmpty(schema))
                    {
                        throw new Exception("null");
                    }
                    if (!schema.Contains("Part:I4"))
                    {
                        throw new Exception(schema);
                    }
                    var schema2 = SchemaHelper.ToString(transformedData.OutputSchema);
                    SchemaHelper.CheckSchema(host, transformedData.OutputSchema, cursor.Schema);
                    int got  = 0;
                    int part = 0;
                    while (cursor.MoveNext())
                    {
                        sortColumnGetter(ref got);
                        partGetter(ref part);
                        if (!counter1.ContainsKey(part))
                        {
                            counter1[part] = new List <int>();
                        }
                        if (counter1[part].Any() && got.Equals(counter1[part][counter1[part].Count - 1]))
                        {
                            throw new Exception("Unexpected value, they should be all different.");
                        }
                        counter1[part].Add(got);
                    }
                }

                // Check than there is no overlap.
                if (counter1.Count != 2)
                {
                    throw new Exception(string.Format("Too many or not enough parts: {0}", counter1.Count));
                }
                var nb = counter1.Select(c => c.Value.Count).Sum();
                if (inputs.Length != nb)
                {
                    throw new Exception(string.Format("Length mismath: {0} != {1}", inputs.Length, nb));
                }
                foreach (var part in counter1)
                {
                    var hash = part.Value.ToDictionary(c => c, d => d);
                    if (hash.Count != part.Value.Count)
                    {
                        throw new Exception(string.Format("Not identical id for part {0}", part));
                    }
                }
                var part0 = new HashSet <int>(counter1[0]);
                var part1 = new HashSet <int>(counter1[1]);
                if (part0.Intersect(part1).Any())
                {
                    throw new Exception("Intersection is not null.");
                }

                // Check sizes.
                if (part0.Count > part1.Count * 2 + 15)
                {
                    throw new Exception("Size are different from ratios.");
                }
                if (part0.Count < part1.Count + 5)
                {
                    throw new Exception("Size are different from ratios.");
                }

                // We check a second run brings the same results (CacheView).
                var counter2 = new Dictionary <int, List <int> >();
                using (var cursor = transformedData.GetRowCursor(transformedData.OutputSchema))
                {
                    var schema = SchemaHelper.ToString(cursor.Schema);
                    if (string.IsNullOrEmpty(schema))
                    {
                        throw new Exception("null");
                    }
                    if (!schema.Contains("Part:I4"))
                    {
                        throw new Exception(schema);
                    }
                    var schema2 = SchemaHelper.ToString(transformedData.OutputSchema);
                    SchemaHelper.CheckSchema(host, transformedData.OutputSchema, cursor.Schema);
                    int index            = SchemaHelper.GetColumnIndex(cursor.Schema, "Y");
                    var sortColumnGetter = cursor.GetGetter <int>(SchemaHelper._dc(index, cursor));
                    index = SchemaHelper.GetColumnIndex(cursor.Schema, args.newColumn);
                    var partGetter = cursor.GetGetter <int>(SchemaHelper._dc(index, cursor));
                    int got        = 0;
                    int part       = 0;
                    while (cursor.MoveNext())
                    {
                        sortColumnGetter(ref got);
                        partGetter(ref part);
                        if (!counter2.ContainsKey(part))
                        {
                            counter2[part] = new List <int>();
                        }
                        counter2[part].Add(got);
                    }
                }

                if (counter1.Count != counter2.Count)
                {
                    throw new Exception("Not the same number of parts.");
                }
                foreach (var pair in counter1)
                {
                    var list1   = pair.Value;
                    var list2   = counter2[pair.Key];
                    var difList = list1.Where(a => !list2.Any(a1 => a1 == a))
                                  .Union(list2.Where(a => !list1.Any(a1 => a1 == a)));
                    if (difList.Any())
                    {
                        throw new Exception("Not the same results for a part.");
                    }
                }
            }
        }
예제 #26
0
        /// <summary>
        /// Runs a simple test.
        /// </summary>
        public static void TestScikitAPI()
        {
            var inputs = new[] {
                new ExampleVector()
                {
                    X = new float[] { 1, 10, 100 }
                },
                new ExampleVector()
                {
                    X = new float[] { 2, 3, 5 }
                },
                new ExampleVector()
                {
                    X = new float[] { 2, 4, 5 }
                },
                new ExampleVector()
                {
                    X = new float[] { 2, 4, 7 }
                },
            };

            var inputs2 = new[] {
                new ExampleVector()
                {
                    X = new float[] { -1, -10, -100 }
                },
                new ExampleVector()
                {
                    X = new float[] { -2, -3, -5 }
                },
                new ExampleVector()
                {
                    X = new float[] { 3, 4, 5 }
                },
                new ExampleVector()
                {
                    X = new float[] { 3, 4, 7 }
                },
            };

            /*using (*/
            var host = new ConsoleEnvironment();
            {
                ComponentHelper.AddStandardComponents(host);
                var data = DataViewConstructionUtils.CreateFromEnumerable(host, inputs);
                using (var pipe = new ScikitPipeline(new[] { "poly{col=X}" }, "km{k=2}", host))
                {
                    var predictor = pipe.Train(data, feature: "X");
                    if (predictor == null)
                    {
                        throw new Exception("Test failed: no predictor.");
                    }
                    var data2       = DataViewConstructionUtils.CreateFromEnumerable(host, inputs2);
                    var predictions = pipe.Predict(data2);
                    var df          = DataFrameIO.ReadView(predictions);
                    if (df.Shape.Item1 != 4 || df.Shape.Item2 != 12)
                    {
                        throw new Exception("Test failed: prediction failed.");
                    }
                    var dfs  = df.ToString();
                    var dfs2 = dfs.Replace("\n", ";");
                    if (!dfs2.StartsWith("X.0,X.1,X.2,X.3,X.4,X.5,X.6,X.7,X.8,PredictedLabel,Score.0,Score.1;-1,-10,-100,1,10,100,100,1000,10000"))
                    {
                        throw new Exception("Test failed: prediction failed (header).");
                    }
                }
            }
        }
예제 #27
0
        public static void TestMultiToBinaryTransformVector(MultiToBinaryTransform.MultiplicationAlgorithm algo, int max)
        {
            /*using (*/ var host = EnvHelper.NewTestEnvironment();
            {
                var inputs = new InputOutputU[] {
                    new InputOutputU()
                    {
                        X = new float[] { 0.1f, 1.1f }, Y = 0
                    },
                    new InputOutputU()
                    {
                        X = new float[] { 0.2f, 1.2f }, Y = 1
                    },
                    new InputOutputU()
                    {
                        X = new float[] { 0.3f, 1.3f }, Y = 2
                    }
                };

                var data = DataViewConstructionUtils.CreateFromEnumerable(host, inputs);

                var args = new MultiToBinaryTransform.Arguments {
                    label = "Y", algo = algo, maxMulti = max
                };
                var multiplied = new MultiToBinaryTransform(host, args, data);

                using (var cursor = multiplied.GetRowCursor(multiplied.Schema))
                {
                    var labelGetter            = cursor.GetGetter <uint>(SchemaHelper._dc(1, cursor));
                    var labelVectorGetter      = cursor.GetGetter <VBuffer <bool> >(SchemaHelper._dc(1, cursor));
                    var labelVectorFloatGetter = cursor.GetGetter <VBuffer <float> >(SchemaHelper._dc(1, cursor));
                    var binGetter = cursor.GetGetter <bool>(SchemaHelper._dc(2, cursor));
                    Contracts.CheckValue(binGetter, "Type mismatch.");
                    var  cont  = new List <Tuple <uint, bool> >();
                    bool bin   = false;
                    uint got   = 0;
                    var  gotv  = new VBuffer <bool>();
                    var  gotvf = new VBuffer <float>();
                    while (cursor.MoveNext())
                    {
                        labelGetter(ref got);
                        labelVectorGetter(ref gotv);
                        labelVectorFloatGetter(ref gotvf);
                        binGetter(ref bin);
                        cont.Add(new Tuple <uint, bool>(got, bin));
                        if (gotv.Length != 3)
                        {
                            throw new Exception("Bad dimension (Length)");
                        }
                        if (gotv.Count != 1)
                        {
                            throw new Exception("Bad dimension (Count)");
                        }
                        if (!gotv.Values[0])
                        {
                            throw new Exception("Bad value (Count)");
                        }
                        if (gotv.Indices[0] != got)
                        {
                            throw new Exception("Bad index (Count)");
                        }
                        var ar = gotv.DenseValues().ToArray();
                        if (ar.Length != 3)
                        {
                            throw new Exception("Bad dimension (dense)");
                        }

                        if (gotvf.Length != 3)
                        {
                            throw new Exception("Bad dimension (Length)f");
                        }
                        if (gotvf.Count != 1)
                        {
                            throw new Exception("Bad dimension (Count)f");
                        }
                        if (gotvf.Values[0] != 1)
                        {
                            throw new Exception("Bad value (Count)f");
                        }
                        if (gotvf.Indices[0] != got)
                        {
                            throw new Exception("Bad index (Count)f");
                        }
                        var ar2 = gotv.DenseValues().ToArray();
                        if (ar2.Length != 3)
                        {
                            throw new Exception("Bad dimension (dense)f");
                        }
                    }

                    if (max >= 3)
                    {
                        if (cont.Count != 9)
                        {
                            throw new Exception("It should be 9.");
                        }
                        if (algo == MultiToBinaryTransform.MultiplicationAlgorithm.Default)
                        {
                            for (int i = 0; i < 3; ++i)
                            {
                                var co = cont.Where(c => c.Item1 == (uint)i && c.Item2);
                                if (co.Count() != 1)
                                {
                                    throw new Exception(string.Format("Unexpected number of true labels for class {0} - algo={1} - max={2}", i, algo, max));
                                }
                            }
                        }
                    }
                    else
                    {
                        if (cont.Count != 3 * max)
                        {
                            throw new Exception(string.Format("It should be {0}.", 3 * max));
                        }
                    }
                }
            }
        }
        public void TestTransform2ValueMapperMultiThread()
        {
            /*using (*/
            var env = EnvHelper.NewTestEnvironment();
            {
                var host = env.Register("unittest");

                var inputs = new[] {
                    new InputOutput {
                        X = new float[] { 0, 1 }, Y = 10
                    },
                    new InputOutput {
                        X = new float[] { 2, 3 }, Y = 100
                    }
                };

                var data = DataViewConstructionUtils.CreateFromEnumerable(host, inputs);

                var trv = ExtLambdaTransform.CreateMap(host, data,
                                                       (InputOutput src, InputOutput dst, EnvHelper.EmptyState state) =>
                {
                    dst.X = new float[] { src.X[0] + 1f, src.X[1] - 1f };
                }, (EnvHelper.EmptyState state) => { });

                var ino = new InputOutput {
                    X = new float[] { -5, -5 }, Y = 3
                };
                var inob = new VBuffer <float>(2, ino.X);
                var ans  = new VBuffer <float>();

                using (var valueMapper = new ValueMapperFromTransformFloat <VBuffer <float> >(host, trv, "X", "X", ignoreOtherColumn: true))
                {
                    var mapper = valueMapper.GetMapper <VBuffer <float>, VBuffer <float> >();

                    var listy = new List <int>();
                    var listx = new List <float>();
                    int y     = 0;
                    for (int i = 0; i < inputs.Length; ++i)
                    {
                        mapper(in inob, ref ans);
                        y = inputs[i].Y;
                        if (ans.Count != 2)
                        {
                            throw new Exception("Issue with dimension.");
                        }
                        listx.AddRange(ans.GetValues().ToArray());
                        listy.Add((int)y);
                    }
                    if (listy.Count != 2)
                    {
                        throw new Exception("Issue with dimension.");
                    }
                    if (listy[0] != 10 || listy[1] != 100)
                    {
                        throw new Exception("Issue with values.");
                    }
                    if (listx.Count != 4)
                    {
                        throw new Exception("Issue with dimension.");
                    }
                    if (listx[0] != -4)
                    {
                        throw new Exception("Issue with values.");
                    }
                    if (listx[1] != -6)
                    {
                        throw new Exception("Issue with values.");
                    }
                    if (listx[2] != -4)
                    {
                        throw new Exception("Issue with values.");
                    }
                    if (listx[3] != -6)
                    {
                        throw new Exception("Issue with values.");
                    }
                    if (inob.Count != 2)
                    {
                        throw new Exception("Issue with dimension.");
                    }
                    if (inob.Values[0] != -5)
                    {
                        throw new Exception("Values were overwritten.");
                    }
                    if (inob.Values[0] != -5)
                    {
                        throw new Exception("Values were overwritten.");
                    }
                }
            }
        }
예제 #29
0
        static void TestCacheTransformSimple(int nt, bool async)
        {
            /*using (*/
            var host = EnvHelper.NewTestEnvironment(conc: nt == 1 ? 1 : 0);
            {
                var inputs = new InputOutput[] {
                    new InputOutput()
                    {
                        X = new float[] { 0, 1 }, Y = 1
                    },
                    new InputOutput()
                    {
                        X = new float[] { 0, 1 }, Y = 0
                    }
                };

                var data = DataViewConstructionUtils.CreateFromEnumerable(host, inputs);

                using (var cursor = data.GetRowCursor(data.Schema))
                {
                    var sortedValues     = new List <int>();
                    var sortColumnGetter = cursor.GetGetter <int>(SchemaHelper._dc(1, cursor));
                    while (cursor.MoveNext())
                    {
                        int got = 0;
                        sortColumnGetter(ref got);
                        sortedValues.Add((int)got);
                    }
                    if (sortedValues.Count != 2)
                    {
                        throw new Exception();
                    }
                    if (sortedValues[0] != 1)
                    {
                        throw new Exception();
                    }
                    if (sortedValues[1] != 0)
                    {
                        throw new Exception();
                    }
                }

                var args = new ExtendedCacheTransform.Arguments {
                    numTheads = nt, async = async
                };
                var transformedData = new ExtendedCacheTransform(host, args, data);
                var lastTransform   = transformedData;
                LambdaTransform.CreateMap <InputOutput, InputOutput, EnvHelper.EmptyState>(host, data,
                                                                                           (input, output, state) =>
                {
                    output.X = input.X;
                    output.Y = input.Y;
                }, (EnvHelper.EmptyState state) => { });

                using (var cursor = lastTransform.GetRowCursor(data.Schema))
                {
                    var sortedValues     = new List <int>();
                    var sortColumnGetter = cursor.GetGetter <int>(SchemaHelper._dc(1, cursor));
                    while (cursor.MoveNext())
                    {
                        int got = 0;
                        sortColumnGetter(ref got);
                        sortedValues.Add((int)got);
                    }
                    if (sortedValues.Count != 2)
                    {
                        throw new Exception();
                    }
                }
            }
        }
예제 #30
0
        public static void TestMultiToBinaryTransform(MultiToBinaryTransform.MultiplicationAlgorithm algo, int max)
        {
            /*using (*/ var host = EnvHelper.NewTestEnvironment();
            {
                var inputs = new InputOutputU[] {
                    new InputOutputU()
                    {
                        X = new float[] { 0.1f, 1.1f }, Y = 0
                    },
                    new InputOutputU()
                    {
                        X = new float[] { 0.2f, 1.2f }, Y = 1
                    },
                    new InputOutputU()
                    {
                        X = new float[] { 0.3f, 1.3f }, Y = 2
                    }
                };

                var data = DataViewConstructionUtils.CreateFromEnumerable(host, inputs);

                var args = new MultiToBinaryTransform.Arguments {
                    label = "Y", algo = algo, maxMulti = max
                };
                var multiplied = new MultiToBinaryTransform(host, args, data);

                using (var cursor = multiplied.GetRowCursor(multiplied.Schema))
                {
                    var  labelGetter = cursor.GetGetter <uint>(SchemaHelper._dc(1, cursor));
                    var  binGetter   = cursor.GetGetter <bool>(SchemaHelper._dc(2, cursor));
                    var  cont        = new List <Tuple <uint, bool> >();
                    bool bin         = false;
                    while (cursor.MoveNext())
                    {
                        uint got = 0;
                        labelGetter(ref got);
                        binGetter(ref bin);
                        cont.Add(new Tuple <uint, bool>(got, bin));
                    }

                    if (max >= 3)
                    {
                        if (cont.Count != 9)
                        {
                            throw new Exception("It should be 9.");
                        }
                        if (algo == MultiToBinaryTransform.MultiplicationAlgorithm.Default)
                        {
                            for (int i = 0; i < 3; ++i)
                            {
                                var co = cont.Where(c => c.Item1 == (uint)i && c.Item2);
                                if (co.Count() != 1)
                                {
                                    throw new Exception(string.Format("Unexpected number of true labels for class {0} - algo={1} - max={2}", i, algo, max));
                                }
                            }
                        }
                    }
                    else
                    {
                        if (cont.Count != 3 * max)
                        {
                            throw new Exception(string.Format("It should be {0}.", 3 * max));
                        }
                    }
                }
            }
        }