Exemple #1
0
        [ConditionalFact(typeof(Environment), nameof(Environment.Is64BitProcess))] // x86 fails with "An attempt was made to load a program with an incorrect format."
        void TestSimpleCase()
        {
            if (!RuntimeInformation.IsOSPlatform(OSPlatform.Windows))
            {
                return;
            }

            var modelFile = "squeezenet/00000001/model.onnx";

            var samplevector = GetSampleArrayData();

            var dataView = ComponentCreation.CreateDataView(Env,
                                                            new TestData[] {
                new TestData()
                {
                    data_0 = samplevector
                },
                new TestData()
                {
                    data_0 = samplevector
                }
            });

            var xyData = new List <TestDataXY> {
                new TestDataXY()
                {
                    A = new float[inputSize]
                }
            };
            var stringData = new List <TestDataDifferntType> {
                new TestDataDifferntType()
                {
                    data_0 = new string[inputSize]
                }
            };
            var sizeData = new List <TestDataSize> {
                new TestDataSize()
                {
                    data_0 = new float[2]
                }
            };
            var pipe = new OnnxScoringEstimator(Env, modelFile, new[] { "data_0" }, new[] { "softmaxout_1" });

            var invalidDataWrongNames      = ComponentCreation.CreateDataView(Env, xyData);
            var invalidDataWrongTypes      = ComponentCreation.CreateDataView(Env, stringData);
            var invalidDataWrongVectorSize = ComponentCreation.CreateDataView(Env, sizeData);

            TestEstimatorCore(pipe, dataView, invalidInput: invalidDataWrongNames);
            TestEstimatorCore(pipe, dataView, invalidInput: invalidDataWrongTypes);

            pipe.GetOutputSchema(SchemaShape.Create(invalidDataWrongVectorSize.Schema));
            try
            {
                pipe.Fit(invalidDataWrongVectorSize);
                Assert.False(true);
            }
            catch (ArgumentOutOfRangeException) { }
            catch (InvalidOperationException) { }
        }
        public void TestSelectTagContactViewTransform()
        {
            var methodName = System.Reflection.MethodBase.GetCurrentMethod().Name;
            var firstData  = FileHelper.GetOutputFile("first.idv", methodName);
            var outData    = FileHelper.GetOutputFile("outData.txt", methodName);
            var outData2   = FileHelper.GetOutputFile("outData2.txt", methodName);

            using (var env = EnvHelper.NewTestEnvironment())
            {
                var inputs = new[] {
                    new ExampleA()
                    {
                        X = new float[] { 0, 1, 4 }
                    },
                    new ExampleA()
                    {
                        X = new float[] { 2, 3, 7 }
                    }
                };

                // Create IDV
                IDataView loader = env.CreateStreamingDataView(inputs);
                var       saver  = ComponentCreation.CreateSaver(env, "binary");
                using (var ch = env.Start("save"))
                {
                    using (var fs0 = env.CreateOutputFile(firstData))
                        DataSaverUtils.SaveDataView(ch, saver, loader, fs0, true);

                    // Create parallel pipeline
                    loader = env.CreateStreamingDataView(inputs);
                    var data = env.CreateTransform("Scaler{col=X1:X}", loader);
                    data = env.CreateTransform(string.Format("selecttag{{t=first s=second f={0}}}", firstData), data);
                    data = env.CreateTransform("Scaler{col=X1:X}", data);
                    var merged = env.CreateTransform("append{t=first}", data);

                    // Save the outcome
                    var text    = env.CreateSaver("Text");
                    var columns = new int[merged.Schema.Count];
                    for (int i = 0; i < columns.Length; ++i)
                    {
                        columns[i] = i;
                    }
                    using (var fs2 = File.Create(outData))
                        text.SaveData(fs2, merged, columns);

                    // Final checking
                    var lines = File.ReadAllLines(outData);
                    if (!lines.Any())
                    {
                        throw new Exception("Empty file.");
                    }
                    if (lines.Length != 9)
                    {
                        throw new Exception("Some lines are missing.");
                    }
                }
            }
        }
Exemple #3
0
        [ConditionalFact(typeof(Environment), nameof(Environment.Is64BitProcess))] // This test is being fixed as part of issue #1441.
        public void MatrixFactorizationInMemoryData()
        {
            // Create an in-memory matrix as a list of tuples (column index, row index, value).
            var dataMatrix = new List <MatrixElement>();

            for (uint i = _synthesizedMatrixFirstColumnIndex; i < _synthesizedMatrixFirstColumnIndex + _synthesizedMatrixColumnCount; ++i)
            {
                for (uint j = _synthesizedMatrixFirstRowIndex; j < _synthesizedMatrixFirstRowIndex + _synthesizedMatrixRowCount; ++j)
                {
                    dataMatrix.Add(new MatrixElement()
                    {
                        MatrixColumnIndex = i, MatrixRowIndex = j, Value = (i + j) % 5
                    });
                }
            }

            // Convert the in-memory matrix into an IDataView so that ML.NET components can consume it.
            var dataView = ComponentCreation.CreateDataView(Env, dataMatrix);

            // Create a matrix factorization trainer which may consume "Value" as the training label, "MatrixColumnIndex" as the
            // matrix's column index, and "MatrixRowIndex" as the matrix's row index.
            var mlContext = new MLContext(seed: 1, conc: 1);
            var pipeline  = new MatrixFactorizationTrainer(mlContext, "Value", "MatrixColumnIndex", "MatrixRowIndex",
                                                           advancedSettings: s =>
            {
                s.NumIterations = 10;
                s.NumThreads    = 1;  // To eliminate randomness, # of threads must be 1.
                s.K             = 32;
            });

            // Train a matrix factorization model.
            var model = pipeline.Fit(dataView);

            // Check if the expected types in the trained model are expected.
            Assert.True(model.MatrixColumnIndexColumnName == "MatrixColumnIndex");
            Assert.True(model.MatrixRowIndexColumnName == "MatrixRowIndex");
            Assert.True(model.MatrixColumnIndexColumnType.IsKey);
            Assert.True(model.MatrixRowIndexColumnType.IsKey);
            var matColKeyType = model.MatrixColumnIndexColumnType.AsKey;

            Assert.True(matColKeyType.Min == _synthesizedMatrixFirstColumnIndex);
            Assert.True(matColKeyType.Count == _synthesizedMatrixColumnCount);
            var matRowKeyType = model.MatrixRowIndexColumnType.AsKey;

            Assert.True(matRowKeyType.Min == _synthesizedMatrixFirstRowIndex);
            Assert.True(matRowKeyType.Count == _synthesizedMatrixRowCount);

            // Apply the trained model to the training set
            var prediction = model.Transform(dataView);

            // Calculate regression matrices for the prediction result
            var metrics = mlContext.Regression.Evaluate(prediction, label: "Value", score: "Score");

            // Native test. Just check the pipeline runs.
            Assert.True(metrics.L2 < 0.1);
        }
Exemple #4
0
        /// <summary>
        /// Constructor
        /// </summary>
        /// <param name="env">environment</param>
        /// <param name="modelStream">stream</param>
        /// <param name="output">name of the output column</param>
        /// <param name="outputIsFloat">output is a gloat (true) or a vector of floats (false)</param>
        /// <param name="conc">number of concurrency threads</param>
        /// <param name="features">features name</param>
        public ValueMapperPredictionEngineFloat(IHostEnvironment env, Stream modelStream,
                                                string output   = "Probability", bool outputIsFloat = true, int conc = 1,
                                                string features = "Features")
        {
            _env = env;
            if (_env == null)
            {
                throw Contracts.Except("env must not be null");
            }
            var inputs = new FloatVectorInput[0];
            var view   = ComponentCreation.CreateStreamingDataView <FloatVectorInput>(_env, inputs);

            long modelPosition = modelStream.Position;

            _predictor = ComponentCreation.LoadPredictorOrNull(_env, modelStream);
            if (_predictor == null)
            {
                throw _env.Except("Unable to load a model.");
            }
            modelStream.Seek(modelPosition, SeekOrigin.Begin);
            _transforms = ComponentCreation.LoadTransforms(_env, modelStream, view);
            if (_transforms == null)
            {
                throw _env.Except("Unable to load a model.");
            }

            var data = _env.CreateExamples(_transforms, features);

            if (data == null)
            {
                throw _env.Except("Cannot create rows.");
            }
            var scorer = _env.CreateDefaultScorer(data, _predictor);

            if (scorer == null)
            {
                throw _env.Except("Cannot create a scorer.");
            }

            _valueMapper = new ValueMapperFromTransformFloat <VBuffer <float> >(_env,
                                                                                scorer, features, output, conc: conc);
            if (_valueMapper == null)
            {
                throw _env.Except("Cannot create a mapper.");
            }
            if (outputIsFloat)
            {
                _mapper       = _valueMapper.GetMapper <VBuffer <float>, float>();
                _mapperVector = null;
            }
            else
            {
                _mapper       = null;
                _mapperVector = _valueMapper.GetMapper <VBuffer <float>, VBuffer <float> >();
            }
        }
Exemple #5
0
        public void NAReplaceWorkout()
        {
            var data = new[] {
                new TestClass()
                {
                    A = 1, B = 3, C = new float[2] {
                        1, 2
                    }, D = new double[2] {
                        3, 4
                    }
                },
                new TestClass()
                {
                    A = float.NaN, B = double.NaN, C = new float[2] {
                        float.NaN, float.NaN
                    }, D = new double[2] {
                        double.NaN, double.NaN
                    }
                },
                new TestClass()
                {
                    A = float.NegativeInfinity, B = double.NegativeInfinity, C = new float[2] {
                        float.NegativeInfinity, float.NegativeInfinity
                    }, D = new double[2] {
                        double.NegativeInfinity, double.NegativeInfinity
                    }
                },
                new TestClass()
                {
                    A = float.PositiveInfinity, B = double.PositiveInfinity, C = new float[2] {
                        float.PositiveInfinity, float.PositiveInfinity,
                    }, D = new double[2] {
                        double.PositiveInfinity, double.PositiveInfinity
                    }
                },
                new TestClass()
                {
                    A = 2, B = 1, C = new float[2] {
                        3, 4
                    }, D = new double[2] {
                        5, 6
                    }
                },
            };

            var dataView = ComponentCreation.CreateDataView(Env, data);
            var pipe     = new NAReplaceEstimator(Env,
                                                  new NAReplaceTransform.ColumnInfo("A", "NAA", NAReplaceTransform.ColumnInfo.ReplacementMode.Mean),
                                                  new NAReplaceTransform.ColumnInfo("B", "NAB", NAReplaceTransform.ColumnInfo.ReplacementMode.Mean),
                                                  new NAReplaceTransform.ColumnInfo("C", "NAC", NAReplaceTransform.ColumnInfo.ReplacementMode.Mean),
                                                  new NAReplaceTransform.ColumnInfo("D", "NAD", NAReplaceTransform.ColumnInfo.ReplacementMode.Mean));

            TestEstimatorCore(pipe, dataView);
            Done();
        }
Exemple #6
0
        void TestSimpleCase()
        {
            var modelFile = "model_matmul/frozen_saved_model.pb";

            var dataView = ComponentCreation.CreateDataView(Env,
                                                            new List <TestData>(new TestData[] {
                new TestData()
                {
                    a = new[] { 1.0f, 2.0f, 3.0f, 4.0f },
                    b = new[] { 1.0f, 2.0f, 3.0f, 4.0f }
                },
                new TestData()
                {
                    a = new[] { 2.0f, 2.0f, 2.0f, 2.0f },
                    b = new[] { 3.0f, 3.0f, 3.0f, 3.0f }
                }
            }));

            var xyData = new List <TestDataXY> {
                new TestDataXY()
                {
                    A = new float[4], B = new float[4]
                }
            };
            var stringData = new List <TestDataDifferntType> {
                new TestDataDifferntType()
                {
                    a = new string[4], b = new string[4]
                }
            };
            var sizeData = new List <TestDataSize> {
                new TestDataSize()
                {
                    a = new float[2], b = new float[2]
                }
            };
            var pipe = new TensorFlowEstimator(Env, modelFile, new[] { "a", "b" }, new[] { "c" });

            var invalidDataWrongNames      = ComponentCreation.CreateDataView(Env, xyData);
            var invalidDataWrongTypes      = ComponentCreation.CreateDataView(Env, stringData);
            var invalidDataWrongVectorSize = ComponentCreation.CreateDataView(Env, sizeData);

            TestEstimatorCore(pipe, dataView, invalidInput: invalidDataWrongNames);
            TestEstimatorCore(pipe, dataView, invalidInput: invalidDataWrongTypes);

            pipe.GetOutputSchema(SchemaShape.Create(invalidDataWrongVectorSize.Schema));
            try
            {
                pipe.Fit(invalidDataWrongVectorSize);
                Assert.False(true);
            }
            catch (ArgumentOutOfRangeException) { }
            catch (InvalidOperationException) { }
        }
 void TestSelectColumnsWithSameName()
 {
     var data = new[] { new TestClass()
                        {
                            A = 1, B = 2, C = 3,
                        }, new TestClass()
                        {
                            A = 4, B = 5, C = 6
                        } };
     var dataView    = ComponentCreation.CreateDataView(Env, data);
     var est         = new ColumnCopyingEstimator(Env, new[] { ("A", "A"), ("B", "B") });
        public void TestMetadataPropagation()
        {
            var data = new[] {
                new TestMeta()
                {
                    A = new string[2] {
                        "A", "B"
                    }, B = "C", C = new float[2] {
                        1.0f, 2.0f
                    }, D = 1.0f, E = new string[2] {
                        "A", "D"
                    }, F = "D"
                },
                new TestMeta()
                {
                    A = new string[2] {
                        "A", "B"
                    }, B = "C", C = new float[2] {
                        3.0f, 4.0f
                    }, D = -1.0f, E = new string[2] {
                        "E", "A"
                    }, F = "E"
                },
                new TestMeta()
                {
                    A = new string[2] {
                        "A", "B"
                    }, B = "C", C = new float[2] {
                        5.0f, 6.0f
                    }, D = 1.0f, E = new string[2] {
                        "D", "E"
                    }, F = "D"
                }
            };

            var dataView = ComponentCreation.CreateDataView(Env, data);
            var bagPipe  = new OneHotHashEncodingEstimator(Env,
                                                           new OneHotHashEncodingEstimator.ColumnInfo("A", "CatA", OneHotEncodingTransformer.OutputKind.Bag, invertHash: -1),
                                                           new OneHotHashEncodingEstimator.ColumnInfo("B", "CatB", OneHotEncodingTransformer.OutputKind.Bag, invertHash: -1),
                                                           new OneHotHashEncodingEstimator.ColumnInfo("C", "CatC", OneHotEncodingTransformer.OutputKind.Bag, invertHash: -1),
                                                           new OneHotHashEncodingEstimator.ColumnInfo("D", "CatD", OneHotEncodingTransformer.OutputKind.Bag, invertHash: -1),
                                                           new OneHotHashEncodingEstimator.ColumnInfo("E", "CatE", OneHotEncodingTransformer.OutputKind.Ind, invertHash: -1),
                                                           new OneHotHashEncodingEstimator.ColumnInfo("F", "CatF", OneHotEncodingTransformer.OutputKind.Ind, invertHash: -1),
                                                           new OneHotHashEncodingEstimator.ColumnInfo("A", "CatG", OneHotEncodingTransformer.OutputKind.Key, invertHash: -1),
                                                           new OneHotHashEncodingEstimator.ColumnInfo("B", "CatH", OneHotEncodingTransformer.OutputKind.Key, invertHash: -1),
                                                           new OneHotHashEncodingEstimator.ColumnInfo("A", "CatI", OneHotEncodingTransformer.OutputKind.Bin, invertHash: -1),
                                                           new OneHotHashEncodingEstimator.ColumnInfo("B", "CatJ", OneHotEncodingTransformer.OutputKind.Bin, invertHash: -1));

            var bagResult = bagPipe.Fit(dataView).Transform(dataView);

            ValidateMetadata(bagResult);
            Done();
        }
Exemple #9
0
 /// <summary>
 /// Register a factory method with the component factory.
 /// </summary>
 /// <param name="name">Name of the component type.</param>
 /// <param name="factory">Factory method.</param>
 public static void RegisterComponentType(String name, ComponentCreation factory, bool Overwrite = false)
 {
     // Should we overwrite if something is already there?
     if (Overwrite)
     {
         m_FactoryMethods.Remove(name);
     }
     if (!m_FactoryMethods.ContainsKey(name))
     {
         m_FactoryMethods.Add(name, factory);
     }
 }
 void TestWorking()
 {
     var data = new[] { new TestClass()
                        {
                            A = 1, B = 2, C = 3,
                        }, new TestClass()
                        {
                            A = 4, B = 5, C = 6
                        } };
     var env         = new MLContext();
     var dataView    = ComponentCreation.CreateDataView(env, data);
     var est         = new ColumnCopyingEstimator(env, new[] { ("A", "D"), ("B", "E"), ("A", "F") });
Exemple #11
0
        void TestDnnImageFeaturizer()
        {
            if (!RuntimeInformation.IsOSPlatform(OSPlatform.Windows))
            {
                return;
            }


            var samplevector = getSampleArrayData();

            var dataView = ComponentCreation.CreateDataView(Env,
                                                            new TestData[] {
                new TestData()
                {
                    data_0 = samplevector
                },
            });

            var xyData = new List <TestDataXY> {
                new TestDataXY()
                {
                    A = new float[inputSize]
                }
            };
            var stringData = new List <TestDataDifferntType> {
                new TestDataDifferntType()
                {
                    data_0 = new string[inputSize]
                }
            };
            var sizeData = new List <TestDataSize> {
                new TestDataSize()
                {
                    data_0 = new float[2]
                }
            };
            var pipe = new DnnImageFeaturizerEstimator(Env, m => m.ModelSelector.ResNet18(m.Environment, m.InputColumn, m.OutputColumn), "data_0", "output_1");

            var invalidDataWrongNames      = ComponentCreation.CreateDataView(Env, xyData);
            var invalidDataWrongTypes      = ComponentCreation.CreateDataView(Env, stringData);
            var invalidDataWrongVectorSize = ComponentCreation.CreateDataView(Env, sizeData);

            TestEstimatorCore(pipe, dataView, invalidInput: invalidDataWrongNames);
            TestEstimatorCore(pipe, dataView, invalidInput: invalidDataWrongTypes);
            pipe.GetOutputSchema(SchemaShape.Create(invalidDataWrongVectorSize.Schema));
            try
            {
                pipe.Fit(invalidDataWrongVectorSize);
                Assert.False(true);
            }
            catch (ArgumentOutOfRangeException) { }
            catch (InvalidOperationException) { }
        }
        public void TestMetadataPropagation()
        {
            var data = new[] {
                new TestMeta()
                {
                    A = new string[2] {
                        "A", "B"
                    }, B = "C", C = new int[2] {
                        3, 5
                    }, D = 6
                },
                new TestMeta()
                {
                    A = new string[2] {
                        "A", "B"
                    }, B = "C", C = new int[2] {
                        5, 3
                    }, D = 1
                },
                new TestMeta()
                {
                    A = new string[2] {
                        "A", "B"
                    }, B = "C", C = new int[2] {
                        3, 5
                    }, D = 6
                }
            };


            var dataView = ComponentCreation.CreateDataView(Env, data);
            var termEst  = new TermEstimator(Env, new[] {
                new TermTransform.ColumnInfo("A", "TA", textKeyValues: true),
                new TermTransform.ColumnInfo("B", "TB", textKeyValues: true),
                new TermTransform.ColumnInfo("C", "TC"),
                new TermTransform.ColumnInfo("D", "TD")
            });
            var termTransformer = termEst.Fit(dataView);

            dataView = termTransformer.Transform(dataView);

            var pipe = new KeyToBinaryVectorEstimator(Env,
                                                      new KeyToBinaryVectorTransform.ColumnInfo("TA", "CatA"),
                                                      new KeyToBinaryVectorTransform.ColumnInfo("TB", "CatB"),
                                                      new KeyToBinaryVectorTransform.ColumnInfo("TC", "CatC"),
                                                      new KeyToBinaryVectorTransform.ColumnInfo("TD", "CatD"));

            var result = pipe.Fit(dataView).Transform(dataView);

            ValidateMetadata(result);
            Done();
        }
Exemple #13
0
        /// <summary>
        /// Dump a view in binary format
        /// </summary>
        /// <param name="host">IHost</param>
        /// <param name="view">view to dump</param>
        /// <param name="filename">output filename</param>
        public static void ToIdv(IHostEnvironment host, IDataView view, string filename)
        {
            var    settings    = "Binary";
            var    saver       = ComponentCreation.CreateSaver(host, settings);
            string full_output = Path.GetFullPath(filename);

            using (var ch = host.Start("ToIdv"))
            {
                ch.Info(MessageSensitivity.None, "Saving data into file '{0}' or '{1}'.", filename, full_output);
                using (var fs0 = host.CreateOutputFile(full_output))
                    DataSaverUtils.SaveDataView(ch, saver, view, fs0, true);
            }
        }
        public void WordTokenizeWorkout()
        {
            var data = new[] { new TestClass()
                               {
                                   A = "This is a good sentence.", B = new string[2] {
                                       "Much words", "Wow So Cool"
                                   }
                               } };
            var dataView    = ComponentCreation.CreateDataView(Env, data);
            var invalidData = new[] { new TestWrong()
                                      {
                                          A = 1, B = new float[2] {
                                              2, 3
                                          }
                                      } };
            var invalidDataView = ComponentCreation.CreateDataView(Env, invalidData);
            var pipe            = new WordTokenizingEstimator(Env, new[] {
                new WordTokenizingTransformer.ColumnInfo("A", "TokenizeA"),
                new WordTokenizingTransformer.ColumnInfo("B", "TokenizeB"),
            });

            TestEstimatorCore(pipe, dataView, invalidInput: invalidDataView);

            // Reuse the pipe trained on dataView in TestEstimatorCore to make prediction.
            var result = pipe.Fit(dataView).Transform(dataView);

            // Extract the transformed result of the first row (the only row we have because data contains only one TestClass) as a native class.
            var nativeResult = new List <NativeResult>(result.AsEnumerable <NativeResult>(Env, false))[0];

            // Check the tokenization of A. Expected result is { "This", "is", "a", "good", "sentence." }.
            var tokenizeA = new[] { "This", "is", "a", "good", "sentence." };

            Assert.True(tokenizeA.Length == nativeResult.TokenizeA.Length);
            for (int i = 0; i < tokenizeA.Length; ++i)
            {
                Assert.Equal(tokenizeA[i], nativeResult.TokenizeA[i]);
            }

            // Check the tokenization of B. Expected result is { "Much", "words", "Wow", "So", "Cool" }. One may think that the expected output
            // should be a 2-D array { { "Much", "words"}, { "Wow", "So", "Cool" } }, but please note that ML.NET may flatten all outputs if
            // they are high-dimension tensors.
            var tokenizeB = new[] { "Much", "words", "Wow", "So", "Cool" };

            Assert.True(tokenizeB.Length == nativeResult.TokenizeB.Length);
            for (int i = 0; i < tokenizeB.Length; ++i)
            {
                Assert.Equal(tokenizeB[i], nativeResult.TokenizeB[i]);
            }

            Done();
        }
        public void TensorFlowTransformMatrixMultiplicationTest()
        {
            var model_location = "model_matmul/frozen_saved_model.pb";

            using (var env = new TlcEnvironment(seed: 1, conc: 1))
            {
                // Pipeline
                var loader = ComponentCreation.CreateDataView(env,
                                                              new List <TestData>(new TestData[] { new TestData()
                                                                                                   {
                                                                                                       a = new[] { 1.0f, 2.0f,
                                                                                                                   3.0f, 4.0f },
                                                                                                       b = new[] { 1.0f, 2.0f,
                                                                                                                   3.0f, 4.0f }
                                                                                                   },
                                                                                                   new TestData()
                                                                                                   {
                                                                                                       a = new[] { 2.0f, 2.0f,
                                                                                                                   2.0f, 2.0f },
                                                                                                       b = new[] { 3.0f, 3.0f,
                                                                                                                   3.0f, 3.0f }
                                                                                                   } }));

                var trans = TensorFlowTransform.Create(env, loader, model_location, "c", "a", "b");

                using (var cursor = trans.GetRowCursor(a => true))
                {
                    var cgetter = cursor.GetGetter <VBuffer <float> >(2);
                    Assert.True(cursor.MoveNext());
                    VBuffer <float> c = default;
                    cgetter(ref c);

                    Assert.Equal(1.0 * 1.0 + 2.0 * 3.0, c.Values[0]);
                    Assert.Equal(1.0 * 2.0 + 2.0 * 4.0, c.Values[1]);
                    Assert.Equal(3.0 * 1.0 + 4.0 * 3.0, c.Values[2]);
                    Assert.Equal(3.0 * 2.0 + 4.0 * 4.0, c.Values[3]);

                    Assert.True(cursor.MoveNext());
                    c = default;
                    cgetter(ref c);

                    Assert.Equal(2.0 * 3.0 + 2.0 * 3.0, c.Values[0]);
                    Assert.Equal(2.0 * 3.0 + 2.0 * 3.0, c.Values[1]);
                    Assert.Equal(2.0 * 3.0 + 2.0 * 3.0, c.Values[2]);
                    Assert.Equal(2.0 * 3.0 + 2.0 * 3.0, c.Values[3]);

                    Assert.False(cursor.MoveNext());
                }
            }
        }
        void TestWorking()
        {
            var data = new[] { new TestClass()
                               {
                                   A = 1, B = 2, C = 3,
                               }, new TestClass()
                               {
                                   A = 4, B = 5, C = 6
                               } };

            using (var env = new ConsoleEnvironment())
            {
                var dataView    = ComponentCreation.CreateDataView(env, data);
                var est         = new CopyColumnsEstimator(env, new[] { ("A", "D"), ("B", "E"), ("A", "F") });
        public void AddTransform(string transform)
        {
            if (_env == null)
            {
                throw Contracts.ExceptNotSupp("The class must be initialized with an envrionment to enable that functionality.");
            }
            var tr = ComponentCreation.CreateTransform(_env, transform, Source);

            if (tr == null)
            {
                throw Contracts.ExceptNotSupp($"Unable to create transform '{transform}'.");
            }
            AddTransform(tr);
        }
        void TestSelectColumnsWithMissing()
        {
            var data = new[] { new TestClass()
                               {
                                   A = 1, B = 2, C = 3,
                               }, new TestClass()
                               {
                                   A = 4, B = 5, C = 6
                               } };
            var dataView = ComponentCreation.CreateDataView(Env, data);
            var est      = ColumnSelectingEstimator.KeepColumns(Env, "D", "G");

            Assert.Throws <ArgumentOutOfRangeException>(() => est.Fit(dataView));
        }
Exemple #19
0
        /// <summary>
        /// Dump a view in csv format
        /// </summary>
        /// <param name="host">IHost</param>
        /// <param name="view">view to dump</param>
        /// <param name="filename">output filename</param>
        /// <param name="sep">column separator</param>
        /// <param name="schema">include the schema</param>
        public static void ToCsv(IHostEnvironment host, IDataView view, string filename, string sep = "\t", bool schema = true)
        {
            var settings = string.Format("Text{{sep={0} header=+ schema={1}}}",
                                         sep == "\t" ? "tab" : sep, schema ? "+" : "-");
            var    saver       = ComponentCreation.CreateSaver(host, settings);
            string full_output = Path.GetFullPath(filename);

            using (var ch = host.Start("ToCsv"))
            {
                ch.Info(MessageSensitivity.None, "Saving data into file '{0}' or '{1}'.", filename, full_output);
                using (var fs0 = host.CreateOutputFile(full_output))
                    DataSaverUtils.SaveDataView(ch, saver, view, fs0, true);
            }
        }
Exemple #20
0
        public void CategoricalHashStatic()
        {
            string dataPath = GetDataPath("breast-cancer.txt");
            var    reader   = TextLoader.CreateReader(Env, ctx => (
                                                          ScalarString: ctx.LoadText(1),
                                                          VectorString: ctx.LoadText(1, 4)));
            var data            = reader.Read(dataPath);
            var wrongCollection = new[] { new TestClass()
                                          {
                                              A = "1", B = "2", C = "3",
                                          }, new TestClass()
                                          {
                                              A = "4", B = "5", C = "6"
                                          } };

            var invalidData = ComponentCreation.CreateDataView(Env, wrongCollection);
            var est         = data.MakeNewEstimator().
                              Append(row => (
                                         row.ScalarString,
                                         row.VectorString,
                                         // Create a VarVector column
                                         VarVectorString: row.ScalarString.TokenizeText())).
                              Append(row => (
                                         A: row.ScalarString.OneHotHashEncoding(outputKind: CategoricalHashStaticExtensions.OneHotHashScalarOutputKind.Ind),
                                         B: row.VectorString.OneHotHashEncoding(outputKind: CategoricalHashStaticExtensions.OneHotHashVectorOutputKind.Ind),
                                         C: row.VectorString.OneHotHashEncoding(outputKind: CategoricalHashStaticExtensions.OneHotHashVectorOutputKind.Bag),
                                         D: row.ScalarString.OneHotHashEncoding(outputKind: CategoricalHashStaticExtensions.OneHotHashScalarOutputKind.Bin),
                                         E: row.VectorString.OneHotHashEncoding(outputKind: CategoricalHashStaticExtensions.OneHotHashVectorOutputKind.Bin),
                                         F: row.VarVectorString.OneHotHashEncoding()
                                         ));

            TestEstimatorCore(est.AsDynamic, data.AsDynamic, invalidInput: invalidData);

            var outputPath = GetOutputPath("CategoricalHash", "featurized.tsv");

            using (var ch = Env.Start("save"))
            {
                var saver = new TextSaver(Env, new TextSaver.Arguments {
                    Silent = true
                });
                var savedData = TakeFilter.Create(Env, est.Fit(data).Transform(data).AsDynamic, 4);
                var view      = ColumnSelectingTransformer.CreateKeep(Env, savedData, new[] { "A", "B", "C", "D", "E", "F" });
                using (var fs = File.Create(outputPath))
                    DataSaverUtils.SaveDataView(ch, saver, view, fs, keepHidden: true);
            }

            CheckEquality("CategoricalHash", "featurized.tsv");
            Done();
        }
Exemple #21
0
            public ValueMapperExample(string modelName, string features)
            {
                _env       = EnvHelper.NewTestEnvironment();
                _predictor = _env.LoadPredictorOrNull(File.OpenRead(modelName));
                var inputs = new Input[0];

                var view = _env.CreateStreamingDataView <Input>(inputs);

                _transforms = ComponentCreation.LoadTransforms(_env, File.OpenRead(modelName), view);
                var data   = _env.CreateExamples(_transforms, features);
                var scorer = _env.CreateDefaultScorer(data, _predictor);

                _valueMapper = new ValueMapperFromTransformFloat <VBuffer <float> >(_env, scorer, "Features", "Probability");
                _mapper      = _valueMapper.GetMapper <VBuffer <float>, float>();
            }
        public void NAIndicatorWorkout()
        {
            var data = new[] {
                new TestClass()
                {
                    A = 1, B = 3, C = new float[2] {
                        1, 2
                    }, D = new double[2] {
                        3, 4
                    }
                },
                new TestClass()
                {
                    A = float.NaN, B = double.NaN, C = new float[2] {
                        float.NaN, float.NaN
                    }, D = new double[2] {
                        double.NaN, double.NaN
                    }
                },
                new TestClass()
                {
                    A = float.NegativeInfinity, B = double.NegativeInfinity, C = new float[2] {
                        float.NegativeInfinity, float.NegativeInfinity
                    }, D = new double[2] {
                        double.NegativeInfinity, double.NegativeInfinity
                    }
                },
                new TestClass()
                {
                    A = float.PositiveInfinity, B = double.PositiveInfinity, C = new float[2] {
                        float.PositiveInfinity, float.PositiveInfinity,
                    }, D = new double[2] {
                        double.PositiveInfinity, double.PositiveInfinity
                    }
                },
                new TestClass()
                {
                    A = 2, B = 1, C = new float[2] {
                        3, 4
                    }, D = new double[2] {
                        5, 6
                    }
                },
            };

            var dataView = ComponentCreation.CreateDataView(Env, data);
            var pipe     = new MissingValueIndicatorEstimator(Env,
                                                              new (string input, string output)[] { ("A", "NAA"), ("B", "NAB"), ("C", "NAC"), ("D", "NAD") });
        [ConditionalFact(typeof(Environment), nameof(Environment.Is64BitProcess))] // TensorFlow is 64-bit only
        public void TensorFlowTransformMatrixMultiplicationTest()
        {
            var modelLocation = "model_matmul/frozen_saved_model.pb";
            var mlContext     = new MLContext(seed: 1, conc: 1);
            // Pipeline
            var loader = ComponentCreation.CreateDataView(mlContext,
                                                          new List <TestData>(new TestData[] { new TestData()
                                                                                               {
                                                                                                   a = new[] { 1.0f, 2.0f,
                                                                                                               3.0f, 4.0f },
                                                                                                   b = new[] { 1.0f, 2.0f,
                                                                                                               3.0f, 4.0f }
                                                                                               },
                                                                                               new TestData()
                                                                                               {
                                                                                                   a = new[] { 2.0f, 2.0f,
                                                                                                               2.0f, 2.0f },
                                                                                                   b = new[] { 3.0f, 3.0f,
                                                                                                               3.0f, 3.0f }
                                                                                               } }));
            var trans = new TensorFlowTransformer(mlContext, modelLocation, new[] { "a", "b" }, new[] { "c" }).Transform(loader);

            using (var cursor = trans.GetRowCursor(a => true))
            {
                var cgetter = cursor.GetGetter <VBuffer <float> >(2);
                Assert.True(cursor.MoveNext());
                VBuffer <float> c = default;
                cgetter(ref c);

                var cValues = c.GetValues();
                Assert.Equal(1.0 * 1.0 + 2.0 * 3.0, cValues[0]);
                Assert.Equal(1.0 * 2.0 + 2.0 * 4.0, cValues[1]);
                Assert.Equal(3.0 * 1.0 + 4.0 * 3.0, cValues[2]);
                Assert.Equal(3.0 * 2.0 + 4.0 * 4.0, cValues[3]);

                Assert.True(cursor.MoveNext());
                c = default;
                cgetter(ref c);

                cValues = c.GetValues();
                Assert.Equal(2.0 * 3.0 + 2.0 * 3.0, cValues[0]);
                Assert.Equal(2.0 * 3.0 + 2.0 * 3.0, cValues[1]);
                Assert.Equal(2.0 * 3.0 + 2.0 * 3.0, cValues[2]);
                Assert.Equal(2.0 * 3.0 + 2.0 * 3.0, cValues[3]);

                Assert.False(cursor.MoveNext());
            }
        }
Exemple #24
0
        void TestSsaSpikeEstimator()
        {
            int Confidence                = 95;
            int PValueHistorySize         = 10;
            int SeasonalitySize           = 10;
            int NumberOfSeasonsInTraining = 5;
            int MaxTrainingSize           = NumberOfSeasonsInTraining * SeasonalitySize;

            List <Data> data     = new List <Data>();
            var         dataView = Env.CreateStreamingDataView(data);

            for (int j = 0; j < NumberOfSeasonsInTraining; j++)
            {
                for (int i = 0; i < SeasonalitySize; i++)
                {
                    data.Add(new Data(i));
                }
            }

            for (int i = 0; i < PValueHistorySize; i++)
            {
                data.Add(new Data(i * 100));
            }

            var pipe = new SsaSpikeEstimator(Env, "Value", "Change",
                                             Confidence, PValueHistorySize, MaxTrainingSize, SeasonalitySize);

            var xyData = new List <TestDataXY> {
                new TestDataXY()
                {
                    A = new float[inputSize]
                }
            };
            var stringData = new List <TestDataDifferntType> {
                new TestDataDifferntType()
                {
                    data_0 = new string[inputSize]
                }
            };

            var invalidDataWrongNames = ComponentCreation.CreateDataView(Env, xyData);
            var invalidDataWrongTypes = ComponentCreation.CreateDataView(Env, stringData);

            TestEstimatorCore(pipe, dataView, invalidInput: invalidDataWrongTypes);
            TestEstimatorCore(pipe, dataView, invalidInput: invalidDataWrongNames);

            Done();
        }
 public void TextNormalizerWorkout()
 {
     var data = new[] { new TestClass()
                        {
                            A = "A 1, b. c! йЁ 24 ", B = new string[2] {
                                "~``ё 52ds й vc", "6ksj94 vd ё dakl Юds Ё q й"
                            }
                        },
                        new TestClass()
                        {
                            A = null, B = new string[2]  {
                                null, string.Empty
                            }
                        } };
     var dataView = ComponentCreation.CreateDataView(Env, data);
     var pipe     = new TextNormalizerEstimator(Env, columns: new[] { ("A", "NormA"), ("B", "NormB") });
        [ConditionalFact(typeof(Environment), nameof(Environment.Is64BitProcess))] // x86 output differs from Baseline
        public void OnnxModelMultiInput()
        {
            if (!RuntimeInformation.IsOSPlatform(OSPlatform.Windows))
            {
                return;
            }

            var modelFile = @"twoinput\twoinput.onnx";

            using (var env = new ConsoleEnvironment(seed: 1, conc: 1))
            {
                var samplevector = GetSampleArrayData();

                var dataView = ComponentCreation.CreateDataView(Env,
                                                                new TestDataMulti[] {
                    new TestDataMulti()
                    {
                        ina = new float[] { 1, 2, 3, 4, 5 },
                        inb = new float[] { 1, 2, 3, 4, 5 }
                    }
                });

                var onnx = OnnxTransform.Create(env, dataView, modelFile,
                                                new[] { "ina", "inb" },
                                                new[] { "outa", "outb" });

                onnx.Schema.TryGetColumnIndex("outa", out int scoresa);
                onnx.Schema.TryGetColumnIndex("outb", out int scoresb);
                using (var curs = onnx.GetRowCursor(col => col == scoresa || col == scoresb))
                {
                    var getScoresa = curs.GetGetter <VBuffer <float> >(scoresa);
                    var getScoresb = curs.GetGetter <VBuffer <float> >(scoresb);
                    var buffera    = default(VBuffer <float>);
                    var bufferb    = default(VBuffer <float>);

                    while (curs.MoveNext())
                    {
                        getScoresa(ref buffera);
                        getScoresb(ref bufferb);
                        Assert.Equal(5, buffera.Length);
                        Assert.Equal(5, bufferb.Length);
                        Assert.Equal(0, buffera.GetValues().ToArray().Sum());
                        Assert.Equal(30, bufferb.GetValues().ToArray().Sum());
                    }
                }
            }
        }
 public void CharTokenizeWorkout()
 {
     var data = new[] { new TestClass()
                        {
                            A = "This is a good sentence.", B = new string[2] {
                                "Much words", "Wow So Cool"
                            }
                        } };
     var dataView    = ComponentCreation.CreateDataView(Env, data);
     var invalidData = new[] { new TestWrong()
                               {
                                   A = 1, B = new float[2] {
                                       2, 3
                                   }
                               } };
     var invalidDataView = ComponentCreation.CreateDataView(Env, invalidData);
     var pipe            = new TokenizingByCharactersEstimator(Env, columns: new[] { ("A", "TokenizeA"), ("B", "TokenizeB") });
Exemple #28
0
        public void NAReplaceStatic()
        {
            string dataPath = GetDataPath("breast-cancer.txt");
            var    reader   = TextLoader.CreateReader(Env, ctx => (
                                                          ScalarFloat: ctx.LoadFloat(1),
                                                          ScalarDouble: ctx.LoadDouble(1),
                                                          VectorFloat: ctx.LoadFloat(1, 4),
                                                          VectorDoulbe: ctx.LoadDouble(1, 4)
                                                          ));

            var data            = reader.Read(new MultiFileSource(dataPath));
            var wrongCollection = new[] { new TestClass()
                                          {
                                              A = 1, B = 3, C = new float[2] {
                                                  1, 2
                                              }, D = new double[2] {
                                                  3, 4
                                              }
                                          } };
            var invalidData = ComponentCreation.CreateDataView(Env, wrongCollection);

            var est = data.MakeNewEstimator().
                      Append(row => (
                                 A: row.ScalarFloat.ReplaceWithMissingValues(NAReplaceTransform.ColumnInfo.ReplacementMode.Maximum),
                                 B: row.ScalarDouble.ReplaceWithMissingValues(NAReplaceTransform.ColumnInfo.ReplacementMode.Mean),
                                 C: row.VectorFloat.ReplaceWithMissingValues(NAReplaceTransform.ColumnInfo.ReplacementMode.Mean),
                                 D: row.VectorDoulbe.ReplaceWithMissingValues(NAReplaceTransform.ColumnInfo.ReplacementMode.Minimum)
                                 ));

            TestEstimatorCore(est.AsDynamic, data.AsDynamic, invalidInput: invalidData);
            var outputPath = GetOutputPath("NAReplace", "featurized.tsv");

            using (var ch = Env.Start("save"))
            {
                var saver = new TextSaver(Env, new TextSaver.Arguments {
                    Silent = true
                });
                IDataView savedData = TakeFilter.Create(Env, est.Fit(data).Transform(data).AsDynamic, 4);
                savedData = new ChooseColumnsTransform(Env, savedData, "A", "B", "C", "D");
                using (var fs = File.Create(outputPath))
                    DataSaverUtils.SaveDataView(ch, saver, savedData, fs, keepHidden: true);
            }

            CheckEquality("NAReplace", "featurized.tsv");
            Done();
        }
        public void GroupTest()
        {
            var data = new List <GroupExample> {
                new GroupExample {
                    Age = 18, UserName = "******", Gender = "Girl"
                },
                new GroupExample {
                    Age = 18, UserName = "******", Gender = "Boy"
                },
                new GroupExample {
                    Age = 20, UserName = "******", Gender = "Fish"
                },
                new GroupExample {
                    Age = 20, UserName = "******", Gender = "Mermaid"
                }
            };
            var dataView = ComponentCreation.CreateDataView(Env, data);

            var groupTransform = new GroupTransform(Env, dataView, "Age", "UserName", "Gender");
            var grouped        = ML.CreateEnumerable <UngroupExample>(groupTransform, false).ToList();

            // Expected content of grouped should contains two rows.
            // Age, UserName, Gender
            // 18,  {"Amy", "Willy"}, {"Girl", "Boy"}
            // 20,  {"Dori", "Ariel"}, {"Fish", "Mermaid"}
            // Note that "Age, UserName, Gender" is not a row; it just shows column names per row below it.
            Assert.Equal(2, grouped.Count);

            // grouped[0] is the first output row --- 18,  {"Amy", "Willy"}, {"Girl", "Boy"}
            Assert.Equal(18, grouped[0].Age);
            Assert.Equal(2, grouped[0].UserName.Length);
            Assert.Equal("Amy", grouped[0].UserName[0]);
            Assert.Equal("Willy", grouped[0].UserName[1]);
            Assert.Equal(2, grouped[0].Gender.Length);
            Assert.Equal("Girl", grouped[0].Gender[0]);
            Assert.Equal("Boy", grouped[0].Gender[1]);

            // grouped[1] is the second output row --- 20,  {"Dori", "Ariel"}, {"Fish", "Mermaid"}
            Assert.Equal(20, grouped[1].Age);
            Assert.Equal(2, grouped[1].Gender.Length);
            Assert.Equal("Dori", grouped[1].UserName[0]);
            Assert.Equal("Ariel", grouped[1].UserName[1]);
            Assert.Equal(2, grouped[1].Gender.Length);
            Assert.Equal("Fish", grouped[1].Gender[0]);
            Assert.Equal("Mermaid", grouped[1].Gender[1]);
        }
Exemple #30
0
        /// <summary>
        /// Create a SplitTrainTestTransform transform.
        /// </summary>
        public SplitTrainTestTransform(IHostEnvironment env, Arguments args, IDataView input)
            : base(env, RegistrationName, input)
        {
            Host.CheckValue(args, "args");
            args.PostProcess();
            Host.CheckUserArg(args.poolRows >= 0, "poolRows must be > 0");
            Host.CheckUserArg(!string.IsNullOrEmpty(args.newColumn), "newColumn cannot be empty");
            Host.CheckUserArg(args.ratios != null, "ratios cannot be null");
            Host.CheckUserArg(args.ratios.Length > 1, "Number of ratios must be > 1");
            Host.CheckUserArg(args.filename == null || args.tag != null || args.filename.Length == args.ratios.Length, "filenames must be either empty either an array of the same size as ratios");
            Host.CheckUserArg(args.tag == null || args.filename != null || args.tag.Length == args.ratios.Length, "filenames must be either empty either an array of the same size as ratios");
            Host.CheckUserArg(!args.numThreads.HasValue || args.numThreads.Value > 0, "numThreads cannot be negative.");
            var sum = args.fratios.Sum();

            Host.CheckUserArg(Math.Abs(sum - 1f) < 1e-5, "Sum of ratios must be 1.");
            int col;

            Host.CheckUserArg(!input.Schema.TryGetColumnIndex(args.newColumn, out col), "newColumn must not exist in the input schema.");


            _newColumn    = args.newColumn;
            _shuffleInput = args.shuffleInput;
            _poolRows     = args.poolRows;
            _filenames    = args.filename;
            _seed         = args.seed;
            _seedShuffle  = args.seedShuffle;
            _ratios       = args.fratios;
            _cacheFile    = args.cacheFile;
            _reuse        = args.reuse;
            _tags         = args.tag;

            var saveSettings = args.saverSettings as ICommandLineComponentFactory;

            Host.CheckValue(saveSettings, nameof(saveSettings));
            _saverSettings = string.Format("{0}{{{1}}}", saveSettings.Name, saveSettings.GetSettingsString());
            _saverSettings = _saverSettings.Replace("{}", "");

            var saver = ComponentCreation.CreateSaver(Host, _saverSettings);

            if (saver == null)
            {
                throw Host.Except("Cannot parse '{0}'", _saverSettings);
            }

            _pipedTransform = AppendToPipeline(input);
        }