public static void CreateModelPipeline(MLContext context, string outputModelPath = "occupancy_fastTreeTweedie.zip") { ConsoleWriteHeader("Training prediction model"); IList <OccupancyTrainer> Data = GetOccupancyData(); // Load the data var trainData = context.CreateDataView(Data); // Choosing regression algorithm var trainer = context.Regression.Trainers.FastTreeTweedie("Label", "Features"); // Transform the data var pipeline = context.Transforms.Categorical.OneHotEncoding("Date") .Append(context.Transforms.CopyColumns("Next", "Label")) .Append(context.Transforms.Concatenate(outputColumn: "Features", "Date", "TotalRoom", "Prev", "Occupied")) .Append(trainer); // Cross-Validate with single dataset Console.WriteLine("=============== Cross-validating to get model's accuracy metrics ==============="); var crossValidateResults = context.Regression.CrossValidate(trainData, pipeline, numFolds: 10, labelColumn: "Label"); PrintRegressionFoldsAverageMetrics(trainer.ToString(), crossValidateResults); // Create and train the model var model = pipeline.Fit(trainData); using (var file = File.OpenWrite(outputModelPath)) model.SaveTo(context, file); }
public void LambdaTransformCreate() { var env = new MLContext(seed: 42); var data = ReadBreastCancerExamples(); var idv = env.CreateDataView(data); var filter = LambdaTransform.CreateFilter<BreastCancerExample, object>(env, idv, (input, state) => input.Label == 0, null); Assert.Null(filter.GetRowCount()); // test re-apply var applied = env.CreateDataView(data); applied = ApplyTransformUtils.ApplyAllTransformsToData(env, filter, applied); var saver = new TextSaver(env, new TextSaver.Arguments()); Assert.True(applied.Schema.TryGetColumnIndex("Label", out int label)); using (var fs = File.Create(GetOutputPath(OutputRelativePath, "lambda-output.tsv"))) saver.SaveData(fs, applied, label); }
public void CursorChannelExposedInMapTransform() { var env = new MLContext(seed: 0); // Correct use of CursorChannel attribute. var data1 = Utils.CreateArray(10, new OneIChannelWithAttribute()); var idv1 = env.CreateDataView(data1); Assert.Null(data1[0].Channel); var filter1 = LambdaTransform.CreateFilter<OneIChannelWithAttribute, object>(env, idv1, (input, state) => { Assert.NotNull(input.Channel); return false; }, null); filter1.GetRowCursorForAllColumns().MoveNext(); // Error case: non-IChannel field marked with attribute. var data2 = Utils.CreateArray(10, new OneStringWithAttribute()); var idv2 = env.CreateDataView(data2); Assert.Null(data2[0].Channel); var filter2 = LambdaTransform.CreateFilter<OneStringWithAttribute, object>(env, idv2, (input, state) => { Assert.Null(input.Channel); return false; }, null); try { filter2.GetRowCursorForAllColumns().MoveNext(); Assert.True(false, "Throw an error if attribute is applied to a field that is not an IChannel."); } catch (InvalidOperationException ex) { Assert.True(ex.IsMarked()); } // Error case: multiple fields marked with attributes. var data3 = Utils.CreateArray(10, new TwoIChannelsWithAttributes()); var idv3 = env.CreateDataView(data3); Assert.Null(data3[0].ChannelOne); Assert.Null(data3[2].ChannelTwo); var filter3 = LambdaTransform.CreateFilter<TwoIChannelsWithAttributes, object>(env, idv3, (input, state) => { Assert.Null(input.ChannelOne); Assert.Null(input.ChannelTwo); return false; }, null); try { filter3.GetRowCursorForAllColumns().MoveNext(); Assert.True(false, "Throw an error if attribute is applied to a field that is not an IChannel."); } catch (InvalidOperationException ex) { Assert.True(ex.IsMarked()); } // Correct case: non-marked IChannel field is not touched. var example4 = new TwoIChannelsOnlyOneWithAttribute(); Assert.Null(example4.ChannelTwo); Assert.Null(example4.ChannelOne); var idv4 = env.CreateDataView(Utils.CreateArray(10, example4)); var filter4 = LambdaTransform.CreateFilter<TwoIChannelsOnlyOneWithAttribute, object>(env, idv4, (input, state) => { Assert.Null(input.ChannelOne); Assert.NotNull(input.ChannelTwo); return false; }, null); filter1.GetRowCursorForAllColumns().MoveNext(); }
public void MetadataSupportInDataViewConstruction() { var data = ReadBreastCancerExamples(); var autoSchema = SchemaDefinition.Create(typeof(BreastCancerExample)); var mlContext = new MLContext(0); // Create Metadata. var kindFloat = "Testing float as metadata."; var valueFloat = 10; var coltypeFloat = NumberType.Float; var kindString = "Testing string as metadata."; var valueString = "Strings have value."; var kindStringArray = "Testing string array as metadata."; var valueStringArray = "I really have no idea what these features entail.".Split(' '); var kindFloatArray = "Testing float array as metadata."; var valueFloatArray = new float[] { 1, 17, 7, 19, 25, 0 }; var kindVBuffer = "Testing VBuffer as metadata."; var valueVBuffer = new VBuffer<float>(4, new float[] { 4, 6, 89, 5 }); var metaFloat = new MetadataInfo<float>(kindFloat, valueFloat, coltypeFloat); var metaString = new MetadataInfo<string>(kindString, valueString); // Add Metadata. var labelColumn = autoSchema[0]; var labelColumnWithMetadata = new SchemaDefinition.Column(mlContext, labelColumn.MemberName, labelColumn.ColumnType, metadataInfos: new MetadataInfo[] { metaFloat, metaString }); var featureColumnWithMetadata = autoSchema[1]; featureColumnWithMetadata.AddMetadata(kindStringArray, valueStringArray); featureColumnWithMetadata.AddMetadata(kindFloatArray, valueFloatArray); featureColumnWithMetadata.AddMetadata(kindVBuffer, valueVBuffer); var mySchema = new SchemaDefinition { labelColumnWithMetadata, featureColumnWithMetadata }; var idv = mlContext.CreateDataView(data, mySchema); Assert.True(idv.Schema[0].Metadata.Schema.Count == 2); Assert.True(idv.Schema[0].Metadata.Schema[0].Name == kindFloat); Assert.True(idv.Schema[0].Metadata.Schema[0].Type == coltypeFloat); Assert.True(idv.Schema[0].Metadata.Schema[1].Name == kindString); Assert.True(idv.Schema[0].Metadata.Schema[1].Type == TextType.Instance); Assert.True(idv.Schema[1].Metadata.Schema.Count == 3); Assert.True(idv.Schema[1].Metadata.Schema[0].Name == kindStringArray); Assert.True(idv.Schema[1].Metadata.Schema[0].Type is VectorType vectorType && vectorType.ItemType is TextType); Assert.Throws<ArgumentOutOfRangeException>(() => idv.Schema[1].Metadata.Schema[kindFloat]); float retrievedFloat = 0; idv.Schema[0].Metadata.GetValue(kindFloat, ref retrievedFloat); Assert.True(Math.Abs(retrievedFloat - valueFloat) < .000001); ReadOnlyMemory<char> retrievedReadOnlyMemory = new ReadOnlyMemory<char>(); idv.Schema[0].Metadata.GetValue(kindString, ref retrievedReadOnlyMemory); Assert.True(retrievedReadOnlyMemory.Span.SequenceEqual(valueString.AsMemory().Span)); VBuffer<ReadOnlyMemory<char>> retrievedReadOnlyMemoryVBuffer = new VBuffer<ReadOnlyMemory<char>>(); idv.Schema[1].Metadata.GetValue(kindStringArray, ref retrievedReadOnlyMemoryVBuffer); Assert.True(retrievedReadOnlyMemoryVBuffer.DenseValues().Select((s, i) => s.ToString() == valueStringArray[i]).All(b => b)); VBuffer<float> retrievedFloatVBuffer = new VBuffer<float>(1, new float[] { 2 }); idv.Schema[1].Metadata.GetValue(kindFloatArray, ref retrievedFloatVBuffer); VBuffer<float> valueFloatVBuffer = new VBuffer<float>(valueFloatArray.Length, valueFloatArray); Assert.True(retrievedFloatVBuffer.Items().SequenceEqual(valueFloatVBuffer.Items())); VBuffer<float> retrievedVBuffer = new VBuffer<float>(); idv.Schema[1].Metadata.GetValue(kindVBuffer, ref retrievedVBuffer); Assert.True(retrievedVBuffer.Items().SequenceEqual(valueVBuffer.Items())); var ex = Assert.Throws<InvalidOperationException>(() => idv.Schema[1].Metadata.GetValue(kindFloat, ref retrievedReadOnlyMemoryVBuffer)); Assert.True(ex.IsMarked()); }
public static void IidSpikeDetectorPrediction() { // Create a new ML context, for ML.NET operations. It can be used for exception tracking and logging, // as well as the source of randomness. var ml = new MLContext(); // Generate sample series data with a spike const int Size = 10; var data = new List <IidSpikeData>(Size); for (int i = 0; i < Size / 2; i++) { data.Add(new IidSpikeData(5)); } // This is a spike data.Add(new IidSpikeData(10)); for (int i = 0; i < Size / 2; i++) { data.Add(new IidSpikeData(5)); } // Convert data to IDataView. var dataView = ml.CreateDataView(data); // Setup IidSpikeDetector arguments string outputColumnName = nameof(IidSpikePrediction.Prediction); string inputColumnName = nameof(IidSpikeData.Value); var args = new IidSpikeDetector.Arguments() { Source = inputColumnName, Name = outputColumnName, Confidence = 95, // The confidence for spike detection in the range [0, 100] PvalueHistoryLength = Size / 4 // The size of the sliding window for computing the p-value; shorter windows are more sensitive to spikes. }; // The transformed model. ITransformer model = new IidSpikeEstimator(ml, args).Fit(dataView); // Create a time series prediction engine from the model. var engine = model.CreateTimeSeriesPredictionFunction <IidSpikeData, IidSpikePrediction>(ml); for (int index = 0; index < 5; index++) { // Anomaly spike detection. var prediction = engine.Predict(new IidSpikeData(5)); Console.WriteLine("{0}\t{1}\t{2:0.00}\t{3:0.00}", 5, prediction.Prediction[0], prediction.Prediction[1], prediction.Prediction[2]); } // Spike. var spikePrediction = engine.Predict(new IidSpikeData(10)); Console.WriteLine("{0}\t{1}\t{2:0.00}\t{3:0.00}", 10, spikePrediction.Prediction[0], spikePrediction.Prediction[1], spikePrediction.Prediction[2]); // Checkpoint the model. var modelPath = "temp.zip"; engine.CheckPoint(ml, modelPath); // Load the model. using (var file = File.OpenRead(modelPath)) model = TransformerChain.LoadFrom(ml, file); for (int index = 0; index < 5; index++) { // Anomaly spike detection. var prediction = engine.Predict(new IidSpikeData(5)); Console.WriteLine("{0}\t{1}\t{2:0.00}\t{3:0.00}", 5, prediction.Prediction[0], prediction.Prediction[1], prediction.Prediction[2]); } // Data Alert Score P-Value // 5 0 5.00 0.50 // 5 0 5.00 0.50 // 5 0 5.00 0.50 // 5 0 5.00 0.50 // 5 0 5.00 0.50 // 10 1 10.00 0.00 <-- alert is on, predicted spike (check-point model) // 5 0 5.00 0.26 <-- load model from disk. // 5 0 5.00 0.26 // 5 0 5.00 0.50 // 5 0 5.00 0.50 // 5 0 5.00 0.50 }