Beispiel #1
0
        void CrossValidation()
        {
            var dataPath = GetDataPath(SentimentDataPath);

            var pipeline = new Legacy.LearningPipeline();
            var loader   = new TextLoader(dataPath).CreateFrom <SentimentData>();

            loader.Arguments.HasHeader = true;
            pipeline.Add(loader);
            pipeline.Add(MakeSentimentTextTransform());
            pipeline.Add(new FastTreeBinaryClassifier()
            {
                NumLeaves = 5, NumTrees = 5, MinDocumentsInLeafs = 2
            });
            pipeline.Add(new PredictedLabelColumnOriginalValueConverter()
            {
                PredictedLabelColumn = "PredictedLabel"
            });

            var cv               = new CrossValidator().CrossValidate <SentimentData, SentimentPrediction>(pipeline);
            var metrics          = cv.BinaryClassificationMetrics[0];
            var singlePrediction = cv.PredictorModels[0].Predict(new SentimentData()
            {
                SentimentText = "Not big fan of this."
            });

            Assert.True(singlePrediction.Sentiment);
        }
Beispiel #2
0
        public void Evaluation()
        {
            var dataPath     = GetDataPath(SentimentDataPath);
            var testDataPath = GetDataPath(SentimentDataPath);
            var pipeline     = new Legacy.LearningPipeline();

            var loader = new TextLoader(dataPath).CreateFrom <SentimentData>();

            loader.Arguments.HasHeader = true;
            pipeline.Add(loader);
            pipeline.Add(MakeSentimentTextTransform());
            pipeline.Add(new FastTreeBinaryClassifier()
            {
                NumLeaves = 5, NumTrees = 5, MinDocumentsInLeafs = 2
            });
            pipeline.Add(new PredictedLabelColumnOriginalValueConverter()
            {
                PredictedLabelColumn = "PredictedLabel"
            });
            var model = pipeline.Train <SentimentData, SentimentPrediction>();
            var testLearningPipelineItem = new TextLoader(testDataPath).CreateFrom <SentimentData>();

            testLearningPipelineItem.Arguments.HasHeader = true;
            var evaluator = new BinaryClassificationEvaluator();
            var metrics   = evaluator.Evaluate(model, testLearningPipelineItem);
        }
        public async void TrainSaveModelAndPredict()
        {
            var dataPath     = GetDataPath(SentimentDataPath);
            var testDataPath = GetDataPath(SentimentDataPath);
            var pipeline     = new Legacy.LearningPipeline();

            var loader = new TextLoader(dataPath).CreateFrom <SentimentData>();

            loader.Arguments.HasHeader = true;
            pipeline.Add(loader);
            pipeline.Add(MakeSentimentTextTransform());
            pipeline.Add(new FastTreeBinaryClassifier()
            {
                NumLeaves = 5, NumTrees = 5, MinDocumentsInLeafs = 2
            });
            pipeline.Add(new PredictedLabelColumnOriginalValueConverter()
            {
                PredictedLabelColumn = "PredictedLabel"
            });

            var model     = pipeline.Train <SentimentData, SentimentPrediction>();
            var modelName = "trainSaveAndPredictdModel.zip";

            DeleteOutputPath(modelName);
            await model.WriteAsync(modelName);

            var loadedModel = await Legacy.PredictionModel.ReadAsync <SentimentData, SentimentPrediction>(modelName);

            var singlePrediction = loadedModel.Predict(new SentimentData()
            {
                SentimentText = "Not big fan of this."
            });

            Assert.True(singlePrediction.Sentiment);
        }
        void SimpleTrainAndPredict()
        {
            var dataPath     = GetDataPath(SentimentDataPath);
            var testDataPath = GetDataPath(SentimentDataPath);
            var pipeline     = new Legacy.LearningPipeline();

            var loader = new TextLoader(dataPath).CreateFrom <SentimentData>();

            loader.Arguments.HasHeader = true;
            pipeline.Add(loader);

            pipeline.Add(MakeSentimentTextTransform());

            pipeline.Add(new FastTreeBinaryClassifier()
            {
                NumLeaves = 5, NumTrees = 5, MinDocumentsInLeafs = 2
            });

            pipeline.Add(new PredictedLabelColumnOriginalValueConverter()
            {
                PredictedLabelColumn = "PredictedLabel"
            });
            var model            = pipeline.Train <SentimentData, SentimentPrediction>();
            var singlePrediction = model.Predict(new SentimentData()
            {
                SentimentText = "Not big fan of this."
            });

            Assert.True(singlePrediction.Sentiment);
        }
        public void TransformOnlyPipeline()
        {
            const string _dataPath = @"..\..\Data\breast-cancer.txt";
            var          pipeline  = new Legacy.LearningPipeline(seed: 1, conc: 1);

            pipeline.Add(new ML.Legacy.Data.TextLoader(_dataPath).CreateFrom <InputData>(useHeader: false));
            pipeline.Add(new CategoricalHashOneHotVectorizer("F1")
            {
                HashBits = 10, Seed = 314489979, OutputKind = CategoricalTransformOutputKind.Bag
            });
            var model           = pipeline.Train <InputData, TransformedData>();
            var predictionModel = model.Predict(new InputData()
            {
                F1 = "5"
            });

            Assert.NotNull(predictionModel);
            Assert.NotNull(predictionModel.TransformedF1);
            Assert.Equal(1024, predictionModel.TransformedF1.Length);

            for (int index = 0; index < 1024; index++)
            {
                if (index == 265)
                {
                    Assert.Equal(1, predictionModel.TransformedF1[index]);
                }
                else
                {
                    Assert.Equal(0, predictionModel.TransformedF1[index]);
                }
            }
        }
        public void TrainOneVersusAll()
        {
            string dataPath = GetDataPath("iris.txt");

            var pipeline = new Legacy.LearningPipeline(seed: 1, conc: 1);

            pipeline.Add(new TextLoader(dataPath).CreateFrom <IrisData>(useHeader: false));
            pipeline.Add(new ColumnConcatenator(outputColumn: "Features",
                                                "SepalLength", "SepalWidth", "PetalLength", "PetalWidth"));

            pipeline.Add(OneVersusAll.With(new StochasticDualCoordinateAscentBinaryClassifier()));

            var model = pipeline.Train <IrisData, IrisPrediction>();

            var testData  = new TextLoader(dataPath).CreateFrom <IrisData>(useHeader: false);
            var evaluator = new ClassificationEvaluator();
            ClassificationMetrics metrics = evaluator.Evaluate(model, testData);

            CheckMetrics(metrics);

            var trainTest = new TrainTestEvaluator()
            {
                Kind = MacroUtilsTrainerKinds.SignatureMultiClassClassifierTrainer
            }.TrainTestEvaluate <IrisData, IrisPrediction>(pipeline, testData);

            CheckMetrics(trainTest.ClassificationMetrics);
        }
        public void WordEmbeddingsTest()
        {
            string dataPath = GetDataPath(@"small-sentiment-test.tsv");
            var    pipeline = new Legacy.LearningPipeline(0);

            pipeline.Add(new Legacy.Data.TextLoader(dataPath)
            {
                Arguments = new TextLoaderArguments
                {
                    Separator = new[] { '\t' },
                    HasHeader = false,
                    Column    = new[]
                    {
                        new TextLoaderColumn()
                        {
                            Name   = "Cat",
                            Source = new [] { new TextLoaderRange(0, 3) },
                            Type   = Legacy.Data.DataKind.TX
                        },
                    }
                }
            });

            var modelPath = GetDataPath(@"shortsentiment.emd");
            var embed     = new WordEmbeddings()
            {
                CustomLookupTable = modelPath
            };

            embed.AddColumn("Cat", "Cat");
            pipeline.Add(embed);
            var model = pipeline.Train <EmbeddingsData, EmbeddingsResult>();

            var subDir   = Path.Combine("..", "..", "BaselineOutput", "Common", "Onnx", "WordEmbeddings");
            var onnxPath = GetOutputPath(subDir, "WordEmbeddings.onnx");

            DeleteOutputPath(onnxPath);

            var onnxAsJsonPath = GetOutputPath(subDir, "WordEmbeddings.json");

            DeleteOutputPath(onnxAsJsonPath);

            OnnxConverter converter = new OnnxConverter()
            {
                Onnx   = onnxPath,
                Json   = onnxAsJsonPath,
                Domain = "Onnx"
            };

            converter.Convert(model);

            var fileText = File.ReadAllText(onnxAsJsonPath);

            fileText = Regex.Replace(fileText, "\"producerVersion\": \"([^\"]+)\"", "\"producerVersion\": \"##VERSION##\"");
            File.WriteAllText(onnxAsJsonPath, fileText);

            CheckEquality(subDir, "WordEmbeddings.json");
            Done();
        }
Beispiel #8
0
        public void PredictClusters()
        {
            int n        = 1000;
            int k        = 4;
            var rand     = new Random(1);
            var clusters = new ClusteringData[k];
            var data     = new ClusteringData[n];

            for (int i = 0; i < k; i++)
            {
                //pick clusters as points on circle with angle to axis X equal to 360*i/k
                clusters[i] = new ClusteringData {
                    Points = new float[2] {
                        (float)Math.Cos(Math.PI * i * 2 / k), (float)Math.Sin(Math.PI * i * 2 / k)
                    }
                };
            }
            // create data points by randomly picking cluster and shifting point slightly away from it.
            for (int i = 0; i < n; i++)
            {
                var index = rand.Next(0, k);
                var shift = (rand.NextDouble() - 0.5) / 10;
                data[i] = new ClusteringData
                {
                    Points = new float[2]
                    {
                        (float)(clusters[index].Points[0] + shift),
                        (float)(clusters[index].Points[1] + shift)
                    }
                };
            }
            var pipeline = new Legacy.LearningPipeline(seed: 1, conc: 1);

            pipeline.Add(Legacy.Data.CollectionDataSource.Create(data));
            pipeline.Add(new Legacy.Trainers.KMeansPlusPlusClusterer()
            {
                K = k
            });
            var model = pipeline.Train <ClusteringData, ClusteringPrediction>();
            //validate that initial points we pick up as centers of cluster during data generation belong to different clusters.
            var labels = new HashSet <uint>();

            for (int i = 0; i < k; i++)
            {
                var scores = model.Predict(clusters[i]);
                Assert.True(!labels.Contains(scores.SelectedClusterId));
                labels.Add(scores.SelectedClusterId);
            }

            var evaluator = new Legacy.Models.ClusterEvaluator();
            var testData  = Legacy.Data.CollectionDataSource.Create(clusters);
            var metrics   = evaluator.Evaluate(model, testData);

            //Label is not specified, so NMI would be equal to NaN
            Assert.Equal(metrics.Nmi, double.NaN);
            //Calculate dbi is false by default so Dbi would be 0
            Assert.Equal(metrics.Dbi, (double)0.0);
            Assert.Equal(metrics.AvgMinScore, (double)0.0, 5);
        }
        public void TrainAndPredictHousePriceModelTest()
        {
            string dataPath = GetDataPath("kc_house_data.csv");

            var pipeline = new Legacy.LearningPipeline();

            pipeline.Add(new TextLoader(dataPath).CreateFrom <HousePriceData>(useHeader: true, separator: ','));

            pipeline.Add(new ColumnConcatenator(outputColumn: "NumericalFeatures",
                                                "SqftLiving", "SqftLot", "SqftAbove", "SqftBasement", "Lat", "Long", "SqftLiving15", "SqftLot15"));

            pipeline.Add(new ColumnConcatenator(outputColumn: "CategoryFeatures",
                                                "Bedrooms", "Bathrooms", "Floors", "Waterfront", "View", "Condition", "Grade", "YearBuilt", "YearRenovated", "Zipcode"));

            pipeline.Add(new CategoricalOneHotVectorizer("CategoryFeatures"));
            pipeline.Add(new ColumnConcatenator(outputColumn: "Features",
                                                "NumericalFeatures", "CategoryFeatures"));
            pipeline.Add(new StochasticDualCoordinateAscentRegressor());

            var model = pipeline.Train <HousePriceData, HousePricePrediction>();

            HousePricePrediction prediction = model.Predict(new HousePriceData()
            {
                Bedrooms      = 3,
                Bathrooms     = 2,
                SqftLiving    = 1710,
                SqftLot       = 4697,
                Floors        = 1.5f,
                Waterfront    = 0,
                View          = 0,
                Condition     = 5,
                Grade         = 6,
                SqftAbove     = 1710,
                SqftBasement  = 0,
                YearBuilt     = 1941,
                YearRenovated = 0,
                Zipcode       = 98002,
                Lat           = 47.3048f,
                Long          = -122.218f,
                SqftLiving15  = 1030,
                SqftLot15     = 4705
            });

            Assert.InRange(prediction.Price, 260_000, 330_000);

            string testDataPath = GetDataPath("kc_house_test.csv");
            var    testData     = new TextLoader(testDataPath).CreateFrom <HousePriceData>(useHeader: true, separator: ',');

            var evaluator             = new RegressionEvaluator();
            RegressionMetrics metrics = evaluator.Evaluate(model, testData);

            Assert.InRange(metrics.L1, 85_000, 89_000);
            Assert.InRange(metrics.L2, 17_000_000_000, 19_000_000_000);
            Assert.InRange(metrics.Rms, 130_500, 135_000);
            Assert.InRange(metrics.LossFn, 17_000_000_000, 19_000_000_000);
            Assert.Equal(.8, metrics.RSquared, 1);
        }
Beispiel #10
0
        public void KmeansTest()
        {
            string dataPath = GetDataPath(@"breast-cancer.txt");
            var    pipeline = new Legacy.LearningPipeline(0);

            pipeline.Add(new Legacy.Data.TextLoader(dataPath)
            {
                Arguments = new TextLoaderArguments
                {
                    Separator = new[] { '\t' },
                    HasHeader = true,
                    Column    = new[]
                    {
                        new TextLoaderColumn()
                        {
                            Name   = "Features",
                            Source = new [] { new TextLoaderRange(1, 9) },
                            Type   = Legacy.Data.DataKind.R4
                        },
                    }
                }
            });

            pipeline.Add(new KMeansPlusPlusClusterer()
            {
                K = 2, MaxIterations = 1, NumThreads = 1, InitAlgorithm = KMeansPlusPlusTrainerInitAlgorithm.Random
            });
            var model    = pipeline.Train <BreastNumericalColumns, BreastCancerClusterPrediction>();
            var subDir   = Path.Combine("..", "..", "BaselineOutput", "Common", "Onnx", "Cluster", "BreastCancer");
            var onnxPath = GetOutputPath(subDir, "Kmeans.onnx");

            DeleteOutputPath(onnxPath);

            var onnxAsJsonPath = GetOutputPath(subDir, "Kmeans.json");

            DeleteOutputPath(onnxAsJsonPath);

            OnnxConverter converter = new OnnxConverter()
            {
                Onnx   = onnxPath,
                Json   = onnxAsJsonPath,
                Domain = "Onnx"
            };

            converter.Convert(model);

            // Strip the version.
            var fileText = File.ReadAllText(onnxAsJsonPath);

            fileText = Regex.Replace(fileText, "\"producerVersion\": \"([^\"]+)\"", "\"producerVersion\": \"##VERSION##\"");
            File.WriteAllText(onnxAsJsonPath, fileText);

            CheckEquality(subDir, "Kmeans.json");
            Done();
        }
Beispiel #11
0
        private Legacy.PredictionModel <IrisData, IrisPrediction> Train(string dataPath)
        {
            var pipeline = new Legacy.LearningPipeline();

            pipeline.Add(new Legacy.Data.TextLoader(dataPath).CreateFrom <IrisData>(useHeader: true));
            pipeline.Add(new ColumnConcatenator(outputColumn: "Features", "SepalLength", "SepalWidth", "PetalLength", "PetalWidth"));

            pipeline.Add(new StochasticDualCoordinateAscentClassifier());

            return(pipeline.Train <IrisData, IrisPrediction>());
        }
        public void TensorFlowTransformMNISTConvPipelineTest()
        {
            var model_location = "mnist_model/frozen_saved_model.pb";
            var dataPath       = GetDataPath("Train-Tiny-28x28.txt");

            var pipeline = new Legacy.LearningPipeline(seed: 1);

            pipeline.Add(new Microsoft.ML.Legacy.Data.TextLoader(dataPath).CreateFrom <MNISTData>(useHeader: false));
            pipeline.Add(new Legacy.Transforms.ColumnCopier()
            {
                Column = new[] { new CopyColumnsTransformColumn()
                                 {
                                     Name = "reshape_input", Source = "Placeholder"
                                 } }
            });
            pipeline.Add(new TensorFlowScorer()
            {
                ModelFile     = model_location,
                OutputColumns = new[] { "Softmax", "dense/Relu" },
                InputColumns  = new[] { "Placeholder", "reshape_input" }
            });
            pipeline.Add(new Legacy.Transforms.ColumnConcatenator()
            {
                Column = new[] { new ConcatTransformColumn()
                                 {
                                     Name = "Features", Source = new[] { "Placeholder", "dense/Relu" }
                                 } }
            });
            pipeline.Add(new Legacy.Trainers.LogisticRegressionClassifier());
            TensorFlowUtils.Initialize();
            var model = pipeline.Train <MNISTData, MNISTPrediction>();

            var sample1 = new MNISTData()
            {
                Placeholder = new float[] { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
                                            0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
                                            0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
                                            0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
                                            0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 18, 18, 18, 126, 136, 175, 26,
                                            166, 255, 247, 127, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 30, 36, 94, 154, 170, 253, 253, 253, 253, 253,
                                            225, 172, 253, 242, 195, 64, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 49, 238, 253, 253, 253, 253, 253, 253, 253,
                                            253, 251, 93, 82, 82, 56, 39, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 18, 219, 253, 253, 253, 253, 253, 198,
                                            182, 247, 241, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 80, 156, 107, 253, 253, 205, 11, 0,
                                            43, 154, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 14, 1, 154, 253, 90, 0, 0, 0, 0, 0, 0,
                                            0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 139, 253, 190, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
                                            0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 11, 190, 253, 70, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
                                            0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 35, 241, 225, 160, 108, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 81, 240, 253, 253, 119, 25, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 45, 186, 253, 253, 150, 27, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 16, 93, 252, 253, 187, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 249, 253, 249, 64, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 46, 130, 183, 253, 253, 207, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 39, 148, 229, 253, 253, 253, 250, 182, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 24, 114, 221, 253, 253, 253, 253, 201, 78, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 23, 66, 213, 253, 253, 253, 253, 198, 81, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 18, 171, 219, 253, 253, 253, 253, 195, 80, 9, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 55, 172, 226, 253, 253, 253, 253, 244, 133, 11, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 136, 253, 253, 253, 212, 135, 132, 16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }
            };

            MNISTPrediction prediction = model.Predict(sample1);
        }
Beispiel #13
0
        [ConditionalFact(typeof(Environment), nameof(Environment.Is64BitProcess))] // TensorFlow is 64-bit only
        public void TensorFlowTransformCifarLearningPipelineTest()
        {
            var imageHeight    = 32;
            var imageWidth     = 32;
            var model_location = "cifar_model/frozen_model.pb";
            var dataFile       = GetDataPath("images/images.tsv");
            var imageFolder    = Path.GetDirectoryName(dataFile);

            var pipeline = new Legacy.LearningPipeline(seed: 1);

            pipeline.Add(new Microsoft.ML.Legacy.Data.TextLoader(dataFile).CreateFrom <CifarData>(useHeader: false));
            pipeline.Add(new ImageLoader(("ImagePath", "ImageReal"))
            {
                ImageFolder = imageFolder
            });
        public void BooleanLabelPipeline()
        {
            var data = new BooleanLabelData[1];

            data[0] = new BooleanLabelData
            {
                Features = new float[] { 0.0f, 1.0f },
                Label    = false
            };
            var pipeline = new Legacy.LearningPipeline();

            pipeline.Add(CollectionDataSource.Create(data));
            pipeline.Add(new FastForestBinaryClassifier());
            var model = pipeline.Train <Data, Prediction>();
        }
        public void AppendPipeline()
        {
            var pipeline = new Legacy.LearningPipeline();

            pipeline.Append(new CategoricalOneHotVectorizer("String1", "String2"))
            .Append(new ColumnConcatenator(outputColumn: "Features", "String1", "String2", "Number1", "Number2"))
            .Append(new StochasticDualCoordinateAscentRegressor());
            Assert.NotNull(pipeline);
            Assert.Equal(3, pipeline.Count);

            pipeline.Remove(pipeline.ElementAt(2));
            Assert.Equal(2, pipeline.Count);

            pipeline.Append(new StochasticDualCoordinateAscentRegressor());
            Assert.Equal(3, pipeline.Count);
        }
        public void PredictNewsCluster()
        {
            string dataPath = GetDataPath(@"external/20newsgroups.txt");

            var pipeline = new Legacy.LearningPipeline(seed: 1, conc: 1);

            pipeline.Add(new Legacy.Data.TextLoader(dataPath).CreateFrom <NewsData>(useHeader: false, allowQuotedStrings: true, supportSparse: false));
            pipeline.Add(new ColumnConcatenator("AllText", "Subject", "Content"));
            pipeline.Add(new TextFeaturizer("Features", "AllText")
            {
                KeepDiacritics       = false,
                KeepPunctuations     = false,
                TextCase             = TextNormalizerTransformCaseNormalizationMode.Lower,
                StopWordsRemover     = new PredefinedStopWordsRemover(),
                VectorNormalizer     = TextTransformTextNormKind.L2,
                CharFeatureExtractor = new NGramNgramExtractor()
                {
                    NgramLength = 3, AllLengths = false
                },
                WordFeatureExtractor = new NGramNgramExtractor()
                {
                    NgramLength = 1, AllLengths = true
                }
            });

            pipeline.Add(new Legacy.Trainers.KMeansPlusPlusClusterer()
            {
                K = 20
            });
            var model     = pipeline.Train <NewsData, ClusteringPrediction>();
            var gunResult = model.Predict(new NewsData()
            {
                Subject = "Let's disscuss gun control", Content = @"The United States has 88.8 guns per 100 people, or about 270,000,000 guns, which is the highest total and per capita number in the world. 22% of Americans own one or more guns (35% of men and 12% of women). America's pervasive gun culture stems in part from its colonial history, revolutionary roots, frontier expansion, and the Second Amendment, which states: ""A well regulated militia,
                being necessary to the security of a free State,
                the right of the people to keep and bear Arms,
                shall not be infringed.""

Proponents of more gun control laws state that the Second Amendment was intended for militias; that gun violence would be reduced; that gun restrictions have always existed; and that a majority of Americans, including gun owners, support new gun restrictions. "
            });
            var puppiesResult = model.Predict(new NewsData()
            {
                Subject = "Studies Reveal Five Ways Dogs Show Us Their Love",
                Content = @"Let's face it: We all adore our dogs as if they were family and we tend to shower our dogs with affection in numerous ways. Perhaps you may buy your dog a favorite toy or stop by the dog bakery to order some great tasting doggy cookies, or perhaps you just love patting your dog in the evening in the way he most loves. But how do our dogs tell us they love us too?

Until the day your dog can talk, you'll never likely hear him pronounce ""I love you,"" and in the meantime, don't expect him to purchase you a Hallmark card or some balloons with those renowned romantic words printed on top. Also, don’t expect a box of chocolates or a bouquet of flowers from your dog when Valentine's day is around the corner. Sometimes it might feel like we're living an uneven relationship, but just because dogs don't communicate their love the way we do, doesn't mean they don't love us!"
            });
        }
        public void CanTrainProperties()
        {
            var pipeline = new Legacy.LearningPipeline();
            var data     = new List <IrisData>()
            {
                new IrisData {
                    SepalLength = 1f, SepalWidth = 1f, PetalLength = 0.3f, PetalWidth = 5.1f, Label = 1
                },
                new IrisData {
                    SepalLength = 1f, SepalWidth = 1f, PetalLength = 0.3f, PetalWidth = 5.1f, Label = 1
                },
                new IrisData {
                    SepalLength = 1.2f, SepalWidth = 0.5f, PetalLength = 0.3f, PetalWidth = 5.1f, Label = 0
                }
            };
            var collection = CollectionDataSource.Create(data);

            pipeline.Add(collection);
            pipeline.Add(new ColumnConcatenator(outputColumn: "Features",
                                                "SepalLength", "SepalWidth", "PetalLength", "PetalWidth"));
            pipeline.Add(new StochasticDualCoordinateAscentClassifier());
            var model = pipeline.Train <IrisData, IrisPredictionProperties>();

            IrisPredictionProperties prediction = model.Predict(new IrisData
            {
                SepalLength = 3.3f,
                SepalWidth  = 1.6f,
                PetalLength = 0.2f,
                PetalWidth  = 5.1f,
            });

            pipeline   = new Legacy.LearningPipeline();
            collection = CollectionDataSource.Create(data.AsEnumerable());
            pipeline.Add(collection);
            pipeline.Add(new ColumnConcatenator(outputColumn: "Features",
                                                "SepalLength", "SepalWidth", "PetalLength", "PetalWidth"));
            pipeline.Add(new StochasticDualCoordinateAscentClassifier());
            model = pipeline.Train <IrisData, IrisPredictionProperties>();

            prediction = model.Predict(new IrisData
            {
                SepalLength = 3.3f,
                SepalWidth  = 1.6f,
                PetalLength = 0.2f,
                PetalWidth  = 5.1f,
            });
        }
        void MultithreadedPrediction()
        {
            var dataPath     = GetDataPath(SentimentDataPath);
            var testDataPath = GetDataPath(SentimentDataPath);
            var pipeline     = new Legacy.LearningPipeline();

            var loader = new TextLoader(dataPath).CreateFrom <SentimentData>();

            loader.Arguments.HasHeader = true;
            pipeline.Add(loader);

            pipeline.Add(MakeSentimentTextTransform());

            pipeline.Add(new FastTreeBinaryClassifier()
            {
                NumLeaves = 5, NumTrees = 5, MinDocumentsInLeafs = 2
            });

            pipeline.Add(new PredictedLabelColumnOriginalValueConverter()
            {
                PredictedLabelColumn = "PredictedLabel"
            });
            var model       = pipeline.Train <SentimentData, SentimentPrediction>();
            var collection  = new List <SentimentData>();
            int numExamples = 100;

            for (int i = 0; i < numExamples; i++)
            {
                collection.Add(new SentimentData()
                {
                    SentimentText = "Let's predict this one!"
                });
            }

            Parallel.ForEach(collection, (input) =>
            {
                // We need this lock because model itself is stateful object, and probably not thread safe.
                // See comment on top of test.
                lock (model)
                {
                    var prediction = model.Predict(input);
                }
            });
        }
Beispiel #19
0
        [ConditionalFact(typeof(Environment), nameof(Environment.Is64BitProcess))] // TensorFlow is 64-bit only
        public void TensorFlowTransformMNISTConvPipelineTest()
        {
            var model_location = "mnist_model/frozen_saved_model.pb";
            var dataPath       = GetDataPath(TestDatasets.mnistTiny28.trainFilename);

            var pipeline = new Legacy.LearningPipeline(seed: 1);

            pipeline.Add(new Microsoft.ML.Legacy.Data.TextLoader(dataPath).CreateFrom <MNISTData>(useHeader: false));
            pipeline.Add(new Legacy.Transforms.ColumnCopier()
            {
                Column = new[] { new ColumnsCopyingTransformerColumn()
                                 {
                                     Name = "reshape_input", Source = "Placeholder"
                                 } }
            });
            pipeline.Add(new TensorFlowScorer()
            {
                ModelLocation = model_location,
                OutputColumns = new[] { "Softmax", "dense/Relu" },
                InputColumns  = new[] { "Placeholder", "reshape_input" }
            });
            pipeline.Add(new Legacy.Transforms.ColumnConcatenator()
            {
                Column = new[] { new ColumnConcatenatingTransformerColumn()
                                 {
                                     Name = "Features", Source = new[] { "Placeholder", "dense/Relu" }
                                 } }
            });
            pipeline.Add(new Legacy.Transforms.LabelToFloatConverter()
            {
                LabelColumn = "Label"
            });
            pipeline.Add(new Legacy.Trainers.LogisticRegressionClassifier());

            var model = pipeline.Train <MNISTData, MNISTPrediction>();

            var sample1 = GetOneMNISTExample();;

            MNISTPrediction prediction = model.Predict(sample1);
        }
        void Metacomponents()
        {
            var dataPath = GetDataPath(IrisDataPath);
            var pipeline = new Legacy.LearningPipeline(seed: 1, conc: 1);

            pipeline.Add(new TextLoader(dataPath).CreateFrom <IrisData>(useHeader: false));
            pipeline.Add(new Dictionarizer(new[] { "Label" }));
            pipeline.Add(new ColumnConcatenator(outputColumn: "Features",
                                                "SepalLength", "SepalWidth", "PetalLength", "PetalWidth"));

            // This will throw exception during training time if you specify any other than binary classifier.
            pipeline.Add(OneVersusAll.With(new StochasticDualCoordinateAscentBinaryClassifier()));

            var model = pipeline.Train <IrisData, IrisPrediction>();

            var testData  = new TextLoader(dataPath).CreateFrom <IrisData>(useHeader: false);
            var evaluator = new ClassificationEvaluator();
            ClassificationMetrics metrics = evaluator.Evaluate(model, testData);

            var prediction = model.Predict(new IrisData {
                PetalLength = 1, PetalWidth = 2, SepalLength = 1.4f, SepalWidth = 1.6f
            });
        }
Beispiel #21
0
        public void MultiClassificationLRSaveModelToOnnxTest()
        {
            string dataPath = GetDataPath(@"breast-cancer.txt");
            var    pipeline = new Legacy.LearningPipeline();

            pipeline.Add(new Legacy.Data.TextLoader(dataPath)
            {
                Arguments = new TextLoaderArguments
                {
                    Separator = new[] { '\t' },
                    HasHeader = true,
                    Column    = new[]
                    {
                        new TextLoaderColumn()
                        {
                            Name   = "Label",
                            Source = new [] { new TextLoaderRange(0) },
                            Type   = Legacy.Data.DataKind.Num
                        },

                        new TextLoaderColumn()
                        {
                            Name   = "Features",
                            Source = new [] { new TextLoaderRange(1, 9) },
                            Type   = Legacy.Data.DataKind.Num
                        }
                    }
                }
            });

            pipeline.Add(new Dictionarizer("Label"));
            pipeline.Add(new LogisticRegressionClassifier()
            {
                UseThreads = false
            });

            var model    = pipeline.Train <BreastCancerDataAllColumns, BreastCancerMCPrediction>();
            var subDir   = Path.Combine("..", "..", "BaselineOutput", "Common", "Onnx", "MultiClassClassification", "BreastCancer");
            var onnxPath = GetOutputPath(subDir, "MultiClassificationLRSaveModelToOnnxTest.onnx");

            DeleteOutputPath(onnxPath);

            var onnxAsJsonPath = GetOutputPath(subDir, "MultiClassificationLRSaveModelToOnnxTest.json");

            DeleteOutputPath(onnxAsJsonPath);

            OnnxConverter converter = new OnnxConverter()
            {
                InputsToDrop  = new[] { "Label" },
                OutputsToDrop = new[] { "Label", "Features" },
                Onnx          = onnxPath,
                Json          = onnxAsJsonPath,
                Domain        = "Onnx"
            };

            converter.Convert(model);

            // Strip the version.
            var fileText = File.ReadAllText(onnxAsJsonPath);

            fileText = Regex.Replace(fileText, "\"producerVersion\": \"([^\"]+)\"", "\"producerVersion\": \"##VERSION##\"");
            File.WriteAllText(onnxAsJsonPath, fileText);

            CheckEquality(subDir, "MultiClassificationLRSaveModelToOnnxTest.json");
            Done();
        }
Beispiel #22
0
        public void KeyToVectorWithBagTest()
        {
            string dataPath = GetDataPath(@"breast-cancer.txt");
            var    pipeline = new Legacy.LearningPipeline();

            pipeline.Add(new Legacy.Data.TextLoader(dataPath)
            {
                Arguments = new TextLoaderArguments
                {
                    Separator = new[] { '\t' },
                    HasHeader = true,
                    Column    = new[]
                    {
                        new TextLoaderColumn()
                        {
                            Name   = "Label",
                            Source = new [] { new TextLoaderRange(0) },
                            Type   = Legacy.Data.DataKind.Num
                        },

                        new TextLoaderColumn()
                        {
                            Name   = "F1",
                            Source = new [] { new TextLoaderRange(1, 1) },
                            Type   = Legacy.Data.DataKind.Num
                        },

                        new TextLoaderColumn()
                        {
                            Name   = "F2",
                            Source = new [] { new TextLoaderRange(2, 2) },
                            Type   = Legacy.Data.DataKind.TX
                        }
                    }
                }
            });

            var vectorizer        = new CategoricalOneHotVectorizer();
            var categoricalColumn = new CategoricalTransformColumn()
            {
                OutputKind = CategoricalTransformOutputKind.Bag, Name = "F2", Source = "F2"
            };

            vectorizer.Column = new CategoricalTransformColumn[1] {
                categoricalColumn
            };
            pipeline.Add(vectorizer);
            pipeline.Add(new ColumnConcatenator("Features", "F1", "F2"));
            pipeline.Add(new FastTreeBinaryClassifier()
            {
                NumLeaves = 2, NumTrees = 1, MinDocumentsInLeafs = 2
            });

            var model    = pipeline.Train <BreastCancerData, BreastCancerPrediction>();
            var subDir   = Path.Combine("..", "..", "BaselineOutput", "Common", "Onnx", "BinaryClassification", "BreastCancer");
            var onnxPath = GetOutputPath(subDir, "KeyToVectorBag.onnx");

            DeleteOutputPath(onnxPath);

            var onnxAsJsonPath = GetOutputPath(subDir, "KeyToVectorBag.json");

            DeleteOutputPath(onnxAsJsonPath);

            OnnxConverter converter = new OnnxConverter()
            {
                InputsToDrop  = new[] { "Label" },
                OutputsToDrop = new[] { "Label", "F1", "F2", "Features" },
                Onnx          = onnxPath,
                Json          = onnxAsJsonPath,
                Domain        = "Onnx"
            };

            converter.Convert(model);

            // Strip the version.
            var fileText = File.ReadAllText(onnxAsJsonPath);

            fileText = Regex.Replace(fileText, "\"producerVersion\": \"([^\"]+)\"", "\"producerVersion\": \"##VERSION##\"");
            File.WriteAllText(onnxAsJsonPath, fileText);

            CheckEquality(subDir, "KeyToVectorBag.json");
            Done();
        }
        public void TrainAndPredictIrisModelTest()
        {
            string dataPath = GetDataPath("iris.txt");

            var pipeline = new Legacy.LearningPipeline(seed: 1, conc: 1);

            pipeline.Add(new TextLoader(dataPath).CreateFrom <IrisData>(useHeader: false));
            pipeline.Add(new ColumnConcatenator(outputColumn: "Features",
                                                "SepalLength", "SepalWidth", "PetalLength", "PetalWidth"));

            pipeline.Add(new StochasticDualCoordinateAscentClassifier());

            Legacy.PredictionModel <IrisData, IrisPrediction> model = pipeline.Train <IrisData, IrisPrediction>();

            IrisPrediction prediction = model.Predict(new IrisData()
            {
                SepalLength = 5.1f,
                SepalWidth  = 3.3f,
                PetalLength = 1.6f,
                PetalWidth  = 0.2f,
            });

            Assert.Equal(1, prediction.PredictedLabels[0], 2);
            Assert.Equal(0, prediction.PredictedLabels[1], 2);
            Assert.Equal(0, prediction.PredictedLabels[2], 2);

            prediction = model.Predict(new IrisData()
            {
                SepalLength = 6.4f,
                SepalWidth  = 3.1f,
                PetalLength = 5.5f,
                PetalWidth  = 2.2f,
            });

            Assert.Equal(0, prediction.PredictedLabels[0], 2);
            Assert.Equal(0, prediction.PredictedLabels[1], 2);
            Assert.Equal(1, prediction.PredictedLabels[2], 2);

            prediction = model.Predict(new IrisData()
            {
                SepalLength = 4.4f,
                SepalWidth  = 3.1f,
                PetalLength = 2.5f,
                PetalWidth  = 1.2f,
            });

            Assert.Equal(.2, prediction.PredictedLabels[0], 1);
            Assert.Equal(.8, prediction.PredictedLabels[1], 1);
            Assert.Equal(0, prediction.PredictedLabels[2], 2);

            // Note: Testing against the same data set as a simple way to test evaluation.
            // This isn't appropriate in real-world scenarios.
            string testDataPath = GetDataPath("iris.txt");
            var    testData     = new TextLoader(testDataPath).CreateFrom <IrisData>(useHeader: false);

            var evaluator = new ClassificationEvaluator();

            evaluator.OutputTopKAcc = 3;
            ClassificationMetrics metrics = evaluator.Evaluate(model, testData);

            Assert.Equal(.98, metrics.AccuracyMacro);
            Assert.Equal(.98, metrics.AccuracyMicro, 2);
            Assert.Equal(.06, metrics.LogLoss, 2);
            Assert.InRange(metrics.LogLossReduction, 94, 96);
            Assert.Equal(1, metrics.TopKAccuracy);

            Assert.Equal(3, metrics.PerClassLogLoss.Length);
            Assert.Equal(0, metrics.PerClassLogLoss[0], 1);
            Assert.Equal(.1, metrics.PerClassLogLoss[1], 1);
            Assert.Equal(.1, metrics.PerClassLogLoss[2], 1);

            ConfusionMatrix matrix = metrics.ConfusionMatrix;

            Assert.Equal(3, matrix.Order);
            Assert.Equal(3, matrix.ClassNames.Count);
            Assert.Equal("0", matrix.ClassNames[0]);
            Assert.Equal("1", matrix.ClassNames[1]);
            Assert.Equal("2", matrix.ClassNames[2]);

            Assert.Equal(50, matrix[0, 0]);
            Assert.Equal(50, matrix["0", "0"]);
            Assert.Equal(0, matrix[0, 1]);
            Assert.Equal(0, matrix["0", "1"]);
            Assert.Equal(0, matrix[0, 2]);
            Assert.Equal(0, matrix["0", "2"]);

            Assert.Equal(0, matrix[1, 0]);
            Assert.Equal(0, matrix["1", "0"]);
            Assert.Equal(48, matrix[1, 1]);
            Assert.Equal(48, matrix["1", "1"]);
            Assert.Equal(2, matrix[1, 2]);
            Assert.Equal(2, matrix["1", "2"]);

            Assert.Equal(0, matrix[2, 0]);
            Assert.Equal(0, matrix["2", "0"]);
            Assert.Equal(1, matrix[2, 1]);
            Assert.Equal(1, matrix["2", "1"]);
            Assert.Equal(49, matrix[2, 2]);
            Assert.Equal(49, matrix["2", "2"]);
        }
        public void TrainAndPredictIrisModelWithStringLabelTest()
        {
            string dataPath = GetDataPath("iris.data");

            var pipeline = new Legacy.LearningPipeline();

            pipeline.Add(new TextLoader(dataPath).CreateFrom <IrisDataWithStringLabel>(useHeader: false, separator: ','));

            pipeline.Add(new Dictionarizer("Label"));  // "IrisPlantType" is used as "Label" because of column attribute name on the field.

            pipeline.Add(new ColumnConcatenator(outputColumn: "Features",
                                                "SepalLength", "SepalWidth", "PetalLength", "PetalWidth"));

            pipeline.Add(new StochasticDualCoordinateAscentClassifier());

            var model = pipeline.Train <IrisDataWithStringLabel, IrisPrediction>();

            string[] scoreLabels;
            model.TryGetScoreLabelNames(out scoreLabels);

            Assert.NotNull(scoreLabels);
            Assert.Equal(3, scoreLabels.Length);
            Assert.Equal("Iris-setosa", scoreLabels[0]);
            Assert.Equal("Iris-versicolor", scoreLabels[1]);
            Assert.Equal("Iris-virginica", scoreLabels[2]);

            IrisPrediction prediction = model.Predict(new IrisDataWithStringLabel()
            {
                SepalLength = 5.1f,
                SepalWidth  = 3.3f,
                PetalLength = 1.6f,
                PetalWidth  = 0.2f,
            });

            Assert.Equal(1, prediction.PredictedLabels[0], 2);
            Assert.Equal(0, prediction.PredictedLabels[1], 2);
            Assert.Equal(0, prediction.PredictedLabels[2], 2);

            prediction = model.Predict(new IrisDataWithStringLabel()
            {
                SepalLength = 6.4f,
                SepalWidth  = 3.1f,
                PetalLength = 5.5f,
                PetalWidth  = 2.2f,
            });

            Assert.Equal(0, prediction.PredictedLabels[0], 2);
            Assert.Equal(0, prediction.PredictedLabels[1], 2);
            Assert.Equal(1, prediction.PredictedLabels[2], 2);

            prediction = model.Predict(new IrisDataWithStringLabel()
            {
                SepalLength = 4.4f,
                SepalWidth  = 3.1f,
                PetalLength = 2.5f,
                PetalWidth  = 1.2f,
            });

            Assert.Equal(.2, prediction.PredictedLabels[0], 1);
            Assert.Equal(.8, prediction.PredictedLabels[1], 1);
            Assert.Equal(0, prediction.PredictedLabels[2], 2);

            // Note: Testing against the same data set as a simple way to test evaluation.
            // This isn't appropriate in real-world scenarios.
            string testDataPath = GetDataPath("iris.data");
            var    testData     = new TextLoader(testDataPath).CreateFrom <IrisDataWithStringLabel>(useHeader: false, separator: ',');

            var evaluator = new ClassificationEvaluator();

            evaluator.OutputTopKAcc = 3;
            ClassificationMetrics metrics = evaluator.Evaluate(model, testData);

            Assert.Equal(.98, metrics.AccuracyMacro);
            Assert.Equal(.98, metrics.AccuracyMicro, 2);
            Assert.Equal(.06, metrics.LogLoss, 2);
            Assert.InRange(metrics.LogLossReduction, 94, 96);
            Assert.Equal(1, metrics.TopKAccuracy);

            Assert.Equal(3, metrics.PerClassLogLoss.Length);
            Assert.Equal(0, metrics.PerClassLogLoss[0], 1);
            Assert.Equal(.1, metrics.PerClassLogLoss[1], 1);
            Assert.Equal(.1, metrics.PerClassLogLoss[2], 1);

            ConfusionMatrix matrix = metrics.ConfusionMatrix;

            Assert.Equal(3, matrix.Order);
            Assert.Equal(3, matrix.ClassNames.Count);
            Assert.Equal("Iris-setosa", matrix.ClassNames[0]);
            Assert.Equal("Iris-versicolor", matrix.ClassNames[1]);
            Assert.Equal("Iris-virginica", matrix.ClassNames[2]);

            Assert.Equal(50, matrix[0, 0]);
            Assert.Equal(50, matrix["Iris-setosa", "Iris-setosa"]);
            Assert.Equal(0, matrix[0, 1]);
            Assert.Equal(0, matrix["Iris-setosa", "Iris-versicolor"]);
            Assert.Equal(0, matrix[0, 2]);
            Assert.Equal(0, matrix["Iris-setosa", "Iris-virginica"]);

            Assert.Equal(0, matrix[1, 0]);
            Assert.Equal(0, matrix["Iris-versicolor", "Iris-setosa"]);
            Assert.Equal(48, matrix[1, 1]);
            Assert.Equal(48, matrix["Iris-versicolor", "Iris-versicolor"]);
            Assert.Equal(2, matrix[1, 2]);
            Assert.Equal(2, matrix["Iris-versicolor", "Iris-virginica"]);

            Assert.Equal(0, matrix[2, 0]);
            Assert.Equal(0, matrix["Iris-virginica", "Iris-setosa"]);
            Assert.Equal(1, matrix[2, 1]);
            Assert.Equal(1, matrix["Iris-virginica", "Iris-versicolor"]);
            Assert.Equal(49, matrix[2, 2]);
            Assert.Equal(49, matrix["Iris-virginica", "Iris-virginica"]);
        }