Esempio n. 1
0
        public static void CreateModelPipeline(MLContext context, string outputModelPath = "occupancy_fastTreeTweedie.zip")
        {
            ConsoleWriteHeader("Training prediction model");
            IList <OccupancyTrainer> Data = GetOccupancyData();
            // Load the data
            var trainData = context.CreateDataView(Data);

            // Choosing regression algorithm
            var trainer = context.Regression.Trainers.FastTreeTweedie("Label", "Features");

            // Transform the data
            var pipeline = context.Transforms.Categorical.OneHotEncoding("Date")
                           .Append(context.Transforms.CopyColumns("Next", "Label"))
                           .Append(context.Transforms.Concatenate(outputColumn: "Features", "Date", "TotalRoom", "Prev", "Occupied"))
                           .Append(trainer);

            // Cross-Validate with single dataset
            Console.WriteLine("=============== Cross-validating to get model's accuracy metrics ===============");
            var crossValidateResults = context.Regression.CrossValidate(trainData, pipeline, numFolds: 10, labelColumn: "Label");

            PrintRegressionFoldsAverageMetrics(trainer.ToString(), crossValidateResults);

            // Create and train the model
            var model = pipeline.Fit(trainData);

            using (var file = File.OpenWrite(outputModelPath))
                model.SaveTo(context, file);
        }
Esempio n. 2
0
        public void LambdaTransformCreate()
        {
            var env = new MLContext(seed: 42);
            var data = ReadBreastCancerExamples();
            var idv = env.CreateDataView(data);

            var filter = LambdaTransform.CreateFilter<BreastCancerExample, object>(env, idv,
                (input, state) => input.Label == 0, null);

            Assert.Null(filter.GetRowCount());

            // test re-apply
            var applied = env.CreateDataView(data);
            applied = ApplyTransformUtils.ApplyAllTransformsToData(env, filter, applied);

            var saver = new TextSaver(env, new TextSaver.Arguments());
            Assert.True(applied.Schema.TryGetColumnIndex("Label", out int label));
            using (var fs = File.Create(GetOutputPath(OutputRelativePath, "lambda-output.tsv")))
                saver.SaveData(fs, applied, label);
        }
Esempio n. 3
0
        public void CursorChannelExposedInMapTransform()
        {
            var env = new MLContext(seed: 0);
            // Correct use of CursorChannel attribute.
            var data1 = Utils.CreateArray(10, new OneIChannelWithAttribute());
            var idv1 = env.CreateDataView(data1);
            Assert.Null(data1[0].Channel);

            var filter1 = LambdaTransform.CreateFilter<OneIChannelWithAttribute, object>(env, idv1,
                (input, state) =>
                {
                    Assert.NotNull(input.Channel);
                    return false;
                }, null);
            filter1.GetRowCursorForAllColumns().MoveNext();

            // Error case: non-IChannel field marked with attribute.
            var data2 = Utils.CreateArray(10, new OneStringWithAttribute());
            var idv2 = env.CreateDataView(data2);
            Assert.Null(data2[0].Channel);

            var filter2 = LambdaTransform.CreateFilter<OneStringWithAttribute, object>(env, idv2,
                (input, state) =>
                {
                    Assert.Null(input.Channel);
                    return false;
                }, null);
            try
            {
                filter2.GetRowCursorForAllColumns().MoveNext();
                Assert.True(false, "Throw an error if attribute is applied to a field that is not an IChannel.");
            }
            catch (InvalidOperationException ex)
            {
                Assert.True(ex.IsMarked());
            }

            // Error case: multiple fields marked with attributes.
            var data3 = Utils.CreateArray(10, new TwoIChannelsWithAttributes());
            var idv3 = env.CreateDataView(data3);
            Assert.Null(data3[0].ChannelOne);
            Assert.Null(data3[2].ChannelTwo);

            var filter3 = LambdaTransform.CreateFilter<TwoIChannelsWithAttributes, object>(env, idv3,
                (input, state) =>
                {
                    Assert.Null(input.ChannelOne);
                    Assert.Null(input.ChannelTwo);
                    return false;
                }, null);
            try
            {
                filter3.GetRowCursorForAllColumns().MoveNext();
                Assert.True(false, "Throw an error if attribute is applied to a field that is not an IChannel.");
            }
            catch (InvalidOperationException ex)
            {
                Assert.True(ex.IsMarked());
            }

            // Correct case: non-marked IChannel field is not touched.
            var example4 = new TwoIChannelsOnlyOneWithAttribute();
            Assert.Null(example4.ChannelTwo);
            Assert.Null(example4.ChannelOne);
            var idv4 = env.CreateDataView(Utils.CreateArray(10, example4));

            var filter4 = LambdaTransform.CreateFilter<TwoIChannelsOnlyOneWithAttribute, object>(env, idv4,
                (input, state) =>
                {
                    Assert.Null(input.ChannelOne);
                    Assert.NotNull(input.ChannelTwo);
                    return false;
                }, null);
            filter1.GetRowCursorForAllColumns().MoveNext();
        }
Esempio n. 4
0
        public void MetadataSupportInDataViewConstruction()
        {
            var data = ReadBreastCancerExamples();
            var autoSchema = SchemaDefinition.Create(typeof(BreastCancerExample));

            var mlContext = new MLContext(0);

            // Create Metadata.
            var kindFloat = "Testing float as metadata.";
            var valueFloat = 10;
            var coltypeFloat = NumberType.Float;
            var kindString = "Testing string as metadata.";
            var valueString = "Strings have value.";
            var kindStringArray = "Testing string array as metadata.";
            var valueStringArray = "I really have no idea what these features entail.".Split(' ');
            var kindFloatArray = "Testing float array as metadata.";
            var valueFloatArray = new float[] { 1, 17, 7, 19, 25, 0 };
            var kindVBuffer = "Testing VBuffer as metadata.";
            var valueVBuffer = new VBuffer<float>(4, new float[] { 4, 6, 89, 5 });

            var metaFloat = new MetadataInfo<float>(kindFloat, valueFloat, coltypeFloat);
            var metaString = new MetadataInfo<string>(kindString, valueString);

            // Add Metadata.
            var labelColumn = autoSchema[0];
            var labelColumnWithMetadata = new SchemaDefinition.Column(mlContext, labelColumn.MemberName, labelColumn.ColumnType,
                metadataInfos: new MetadataInfo[] { metaFloat, metaString });

            var featureColumnWithMetadata = autoSchema[1];
            featureColumnWithMetadata.AddMetadata(kindStringArray, valueStringArray);
            featureColumnWithMetadata.AddMetadata(kindFloatArray, valueFloatArray);
            featureColumnWithMetadata.AddMetadata(kindVBuffer, valueVBuffer);

            var mySchema = new SchemaDefinition { labelColumnWithMetadata, featureColumnWithMetadata };
            var idv = mlContext.CreateDataView(data, mySchema);

            Assert.True(idv.Schema[0].Metadata.Schema.Count == 2);
            Assert.True(idv.Schema[0].Metadata.Schema[0].Name == kindFloat);
            Assert.True(idv.Schema[0].Metadata.Schema[0].Type == coltypeFloat);
            Assert.True(idv.Schema[0].Metadata.Schema[1].Name == kindString);
            Assert.True(idv.Schema[0].Metadata.Schema[1].Type == TextType.Instance);

            Assert.True(idv.Schema[1].Metadata.Schema.Count == 3);
            Assert.True(idv.Schema[1].Metadata.Schema[0].Name == kindStringArray);
            Assert.True(idv.Schema[1].Metadata.Schema[0].Type is VectorType vectorType && vectorType.ItemType is TextType);
            Assert.Throws<ArgumentOutOfRangeException>(() => idv.Schema[1].Metadata.Schema[kindFloat]);

            float retrievedFloat = 0;
            idv.Schema[0].Metadata.GetValue(kindFloat, ref retrievedFloat);
            Assert.True(Math.Abs(retrievedFloat - valueFloat) < .000001);

            ReadOnlyMemory<char> retrievedReadOnlyMemory = new ReadOnlyMemory<char>();
            idv.Schema[0].Metadata.GetValue(kindString, ref retrievedReadOnlyMemory);
            Assert.True(retrievedReadOnlyMemory.Span.SequenceEqual(valueString.AsMemory().Span));

            VBuffer<ReadOnlyMemory<char>> retrievedReadOnlyMemoryVBuffer = new VBuffer<ReadOnlyMemory<char>>();
            idv.Schema[1].Metadata.GetValue(kindStringArray, ref retrievedReadOnlyMemoryVBuffer);
            Assert.True(retrievedReadOnlyMemoryVBuffer.DenseValues().Select((s, i) => s.ToString() == valueStringArray[i]).All(b => b));

            VBuffer<float> retrievedFloatVBuffer = new VBuffer<float>(1, new float[] { 2 });
            idv.Schema[1].Metadata.GetValue(kindFloatArray, ref retrievedFloatVBuffer);
            VBuffer<float> valueFloatVBuffer = new VBuffer<float>(valueFloatArray.Length, valueFloatArray);
            Assert.True(retrievedFloatVBuffer.Items().SequenceEqual(valueFloatVBuffer.Items()));

            VBuffer<float> retrievedVBuffer = new VBuffer<float>();
            idv.Schema[1].Metadata.GetValue(kindVBuffer, ref retrievedVBuffer);
            Assert.True(retrievedVBuffer.Items().SequenceEqual(valueVBuffer.Items()));

            var ex = Assert.Throws<InvalidOperationException>(() => idv.Schema[1].Metadata.GetValue(kindFloat, ref retrievedReadOnlyMemoryVBuffer));
            Assert.True(ex.IsMarked());
        }
        public static void IidSpikeDetectorPrediction()
        {
            // Create a new ML context, for ML.NET operations. It can be used for exception tracking and logging,
            // as well as the source of randomness.
            var ml = new MLContext();

            // Generate sample series data with a spike
            const int Size = 10;
            var       data = new List <IidSpikeData>(Size);

            for (int i = 0; i < Size / 2; i++)
            {
                data.Add(new IidSpikeData(5));
            }
            // This is a spike
            data.Add(new IidSpikeData(10));
            for (int i = 0; i < Size / 2; i++)
            {
                data.Add(new IidSpikeData(5));
            }

            // Convert data to IDataView.
            var dataView = ml.CreateDataView(data);

            // Setup IidSpikeDetector arguments
            string outputColumnName = nameof(IidSpikePrediction.Prediction);
            string inputColumnName  = nameof(IidSpikeData.Value);
            var    args             = new IidSpikeDetector.Arguments()
            {
                Source              = inputColumnName,
                Name                = outputColumnName,
                Confidence          = 95,       // The confidence for spike detection in the range [0, 100]
                PvalueHistoryLength = Size / 4  // The size of the sliding window for computing the p-value; shorter windows are more sensitive to spikes.
            };

            // The transformed model.
            ITransformer model = new IidSpikeEstimator(ml, args).Fit(dataView);

            // Create a time series prediction engine from the model.
            var engine = model.CreateTimeSeriesPredictionFunction <IidSpikeData, IidSpikePrediction>(ml);

            for (int index = 0; index < 5; index++)
            {
                // Anomaly spike detection.
                var prediction = engine.Predict(new IidSpikeData(5));
                Console.WriteLine("{0}\t{1}\t{2:0.00}\t{3:0.00}", 5, prediction.Prediction[0],
                                  prediction.Prediction[1], prediction.Prediction[2]);
            }

            // Spike.
            var spikePrediction = engine.Predict(new IidSpikeData(10));

            Console.WriteLine("{0}\t{1}\t{2:0.00}\t{3:0.00}", 10, spikePrediction.Prediction[0],
                              spikePrediction.Prediction[1], spikePrediction.Prediction[2]);

            // Checkpoint the model.
            var modelPath = "temp.zip";

            engine.CheckPoint(ml, modelPath);

            // Load the model.
            using (var file = File.OpenRead(modelPath))
                model = TransformerChain.LoadFrom(ml, file);

            for (int index = 0; index < 5; index++)
            {
                // Anomaly spike detection.
                var prediction = engine.Predict(new IidSpikeData(5));
                Console.WriteLine("{0}\t{1}\t{2:0.00}\t{3:0.00}", 5, prediction.Prediction[0],
                                  prediction.Prediction[1], prediction.Prediction[2]);
            }

            // Data Alert   Score   P-Value
            // 5      0       5.00    0.50
            // 5      0       5.00    0.50
            // 5      0       5.00    0.50
            // 5      0       5.00    0.50
            // 5      0       5.00    0.50
            // 10     1      10.00    0.00  <-- alert is on, predicted spike (check-point model)
            // 5      0       5.00    0.26  <-- load model from disk.
            // 5      0       5.00    0.26
            // 5      0       5.00    0.50
            // 5      0       5.00    0.50
            // 5      0       5.00    0.50
        }