public void ChangePointDetectionWithSeasonality() { using (var env = new ConsoleEnvironment(conc: 1)) { const int ChangeHistorySize = 10; const int SeasonalitySize = 10; const int NumberOfSeasonsInTraining = 5; const int MaxTrainingSize = NumberOfSeasonsInTraining * SeasonalitySize; List <Data> data = new List <Data>(); var dataView = env.CreateStreamingDataView(data); var args = new SsaChangePointDetector.Arguments() { Confidence = 95, Source = "Value", Name = "Change", ChangeHistoryLength = ChangeHistorySize, TrainingWindowSize = MaxTrainingSize, SeasonalWindowSize = SeasonalitySize }; for (int j = 0; j < NumberOfSeasonsInTraining; j++) { for (int i = 0; i < SeasonalitySize; i++) { data.Add(new Data(i)); } } for (int i = 0; i < ChangeHistorySize; i++) { data.Add(new Data(i * 100)); } // Train var detector = new SsaChangePointEstimator(env, args).Fit(dataView); // Transform var output = detector.Transform(dataView); // Get predictions var enumerator = output.AsEnumerable <Prediction>(env, true).GetEnumerator(); Prediction row = null; List <double> expectedValues = new List <double>() { 0, -3.31410598754883, 0.5, 5.12000000000001E-08, 0, 1.5700820684432983, 5.2001145245395008E-07, 0.012414560443710681, 0, 1.2854313254356384, 0.28810801662678009, 0.02038940454467935, 0, -1.0950627326965332, 0.36663890634019225, 0.026956459625565483 }; int index = 0; while (enumerator.MoveNext() && index < expectedValues.Count) { row = enumerator.Current; Assert.Equal(expectedValues[index++], row.Change[0], precision: 7); // Alert Assert.Equal(expectedValues[index++], row.Change[1], precision: 7); // Raw score Assert.Equal(expectedValues[index++], row.Change[2], precision: 7); // P-Value score Assert.Equal(expectedValues[index++], row.Change[3], precision: 7); // Martingale score } } }
public void TestSsaChangePointEstimator() { int confidence = 95; int changeHistorySize = 10; int seasonalitySize = 10; int numberOfSeasonsInTraining = 5; int maxTrainingSize = numberOfSeasonsInTraining * seasonalitySize; List <Data> data = new List <Data>(); var dataView = ML.Data.LoadFromEnumerable(data); for (int j = 0; j < numberOfSeasonsInTraining; j++) { for (int i = 0; i < seasonalitySize; i++) { data.Add(new Data(i)); } } for (int i = 0; i < changeHistorySize; i++) { data.Add(new Data(i * 100)); } var pipe = new SsaChangePointEstimator(Env, "Change", confidence, changeHistorySize, maxTrainingSize, seasonalitySize, "Value"); var xyData = new List <TestDataXY> { new TestDataXY() { A = new float[InputSize] } }; var stringData = new List <TestDataDifferentType> { new TestDataDifferentType() { data_0 = new string[InputSize] } }; var invalidDataWrongNames = ML.Data.LoadFromEnumerable(xyData); var invalidDataWrongTypes = ML.Data.LoadFromEnumerable(stringData); TestEstimatorCore(pipe, dataView, invalidInput: invalidDataWrongTypes); TestEstimatorCore(pipe, dataView, invalidInput: invalidDataWrongNames); Done(); }
void TestSsaChangePointEstimator() { int Confidence = 95; int ChangeHistorySize = 10; int SeasonalitySize = 10; int NumberOfSeasonsInTraining = 5; int MaxTrainingSize = NumberOfSeasonsInTraining * SeasonalitySize; List <Data> data = new List <Data>(); var dataView = Env.CreateStreamingDataView(data); for (int j = 0; j < NumberOfSeasonsInTraining; j++) { for (int i = 0; i < SeasonalitySize; i++) { data.Add(new Data(i)); } } for (int i = 0; i < ChangeHistorySize; i++) { data.Add(new Data(i * 100)); } var pipe = new SsaChangePointEstimator(Env, "Value", "Change", Confidence, ChangeHistorySize, MaxTrainingSize, SeasonalitySize); var xyData = new List <TestDataXY> { new TestDataXY() { A = new float[inputSize] } }; var stringData = new List <TestDataDifferntType> { new TestDataDifferntType() { data_0 = new string[inputSize] } }; var invalidDataWrongNames = ComponentCreation.CreateDataView(Env, xyData); var invalidDataWrongTypes = ComponentCreation.CreateDataView(Env, stringData); TestEstimatorCore(pipe, dataView, invalidInput: invalidDataWrongTypes); TestEstimatorCore(pipe, dataView, invalidInput: invalidDataWrongNames); Done(); }
public IList <double> Execute(IList <double> source) { var count = source.Count; if (count == 0) { return(new double[0]); } var res = new double[count]; var context = Context; Contract.Assert(context != null); var size = count / 2; var data = source.Take(size).Select(PredictionData.MakePredictionData); var ml = new MLContext(); var dataView = ml.Data.LoadFromEnumerable(data); const string outputColumnName = "Prediction"; const string inputColumnName = "Value"; var args = new SsaChangePointDetector.Arguments() { Source = inputColumnName, Name = outputColumnName, Confidence = Confidence, // The confidence for spike detection in the range [0, 100] ChangeHistoryLength = size / 4, // The length of the sliding window on p-values for computing the martingale score. TrainingWindowSize = size / 2, // The number of points from the beginning of the sequence used for training. SeasonalWindowSize = size / 8, // An upper bound on the largest relevant seasonality in the input time - series." }; // Train the change point detector. ITransformer model = new SsaChangePointEstimator(ml, args).Fit(dataView); // Create a prediction engine from the model for feeding new data. var engine = model.CreateTimeSeriesPredictionFunction <PredictionData, ChangePointPrediction>(ml); for (var i = size; i < count; i++) { var prediction = engine.Predict(PredictionData.MakePredictionData(source[i])); res[i] = prediction.Prediction[OutputNumber]; } return(res); }
// This example creates a time series (list of Data with the i-th element corresponding to the i-th time slot). // SsaChangePointDetector is applied then to identify points where data distribution changed. // SsaChangePointDetector differs from IidChangePointDetector in that it can account for temporal seasonality // in the data. public static void SsaChangePointDetectorTransform() { // Create a new ML context, for ML.NET operations. It can be used for exception tracking and logging, // as well as the source of randomness. var ml = new MLContext(); // Generate sample series data with a recurring pattern and then a change in trend const int SeasonalitySize = 5; const int TrainingSeasons = 3; const int TrainingSize = SeasonalitySize * TrainingSeasons; var data = new List <SsaChangePointData>(); for (int i = 0; i < TrainingSeasons; i++) { for (int j = 0; j < SeasonalitySize; j++) { data.Add(new SsaChangePointData(j)); } } // This is a change point for (int i = 0; i < SeasonalitySize; i++) { data.Add(new SsaChangePointData(i * 100)); } // Convert data to IDataView. var dataView = ml.CreateStreamingDataView(data); // Setup SsaChangePointDetector arguments var inputColumnName = nameof(SsaChangePointData.Value); var outputColumnName = nameof(ChangePointPrediction.Prediction); var args = new SsaChangePointDetector.Arguments() { Source = inputColumnName, Name = outputColumnName, Confidence = 95, // The confidence for spike detection in the range [0, 100] ChangeHistoryLength = 8, // The length of the window for detecting a change in trend; shorter windows are more sensitive to spikes. TrainingWindowSize = TrainingSize, // The number of points from the beginning of the sequence used for training. SeasonalWindowSize = SeasonalitySize + 1 // An upper bound on the largest relevant seasonality in the input time series." }; // The transformed data. var transformedData = new SsaChangePointEstimator(ml, args).Fit(dataView).Transform(dataView); // Getting the data of the newly created column as an IEnumerable of ChangePointPrediction. var predictionColumn = transformedData.AsEnumerable <ChangePointPrediction>(ml, reuseRowObject: false); Console.WriteLine($"{outputColumnName} column obtained post-transformation."); Console.WriteLine("Data\tAlert\tScore\tP-Value\tMartingale value"); int k = 0; foreach (var prediction in predictionColumn) { Console.WriteLine("{0}\t{1}\t{2:0.00}\t{3:0.00}\t{4:0.00}", data[k++].Value, prediction.Prediction[0], prediction.Prediction[1], prediction.Prediction[2], prediction.Prediction[3]); } Console.WriteLine(""); // Prediction column obtained post-transformation. // Data Alert Score P-Value Martingale value // 0 0 - 2.53 0.50 0.00 // 1 0 - 0.01 0.01 0.00 // 2 0 0.76 0.14 0.00 // 3 0 0.69 0.28 0.00 // 4 0 1.44 0.18 0.00 // 0 0 - 1.84 0.17 0.00 // 1 0 0.22 0.44 0.00 // 2 0 0.20 0.45 0.00 // 3 0 0.16 0.47 0.00 // 4 0 1.33 0.18 0.00 // 0 0 - 1.79 0.07 0.00 // 1 0 0.16 0.50 0.00 // 2 0 0.09 0.50 0.00 // 3 0 0.08 0.45 0.00 // 4 0 1.31 0.12 0.00 // 0 0 - 1.79 0.07 0.00 // 100 1 99.16 0.00 4031.94 <-- alert is on, predicted changepoint // 200 0 185.23 0.00 731260.87 // 300 0 270.40 0.01 3578470.47 // 400 0 357.11 0.03 45298370.86 }
// This example shows change point detection as above, but demonstrates how to train a model // that can run predictions on streaming data, and how to persist the trained model and then re-load it. public static void SsaChangePointDetectorPrediction() { // Create a new ML context, for ML.NET operations. It can be used for exception tracking and logging, // as well as the source of randomness. var ml = new MLContext(); // Generate sample series data with a recurring pattern const int SeasonalitySize = 5; const int TrainingSeasons = 3; const int TrainingSize = SeasonalitySize * TrainingSeasons; var data = new List <SsaChangePointData>(); for (int i = 0; i < TrainingSeasons; i++) { for (int j = 0; j < SeasonalitySize; j++) { data.Add(new SsaChangePointData(j)); } } // Convert data to IDataView. var dataView = ml.CreateStreamingDataView(data); // Setup SsaChangePointDetector arguments var inputColumnName = nameof(SsaChangePointData.Value); var outputColumnName = nameof(ChangePointPrediction.Prediction); var args = new SsaChangePointDetector.Arguments() { Source = inputColumnName, Name = outputColumnName, Confidence = 95, // The confidence for spike detection in the range [0, 100] ChangeHistoryLength = 8, // The length of the window for detecting a change in trend; shorter windows are more sensitive to spikes. TrainingWindowSize = TrainingSize, // The number of points from the beginning of the sequence used for training. SeasonalWindowSize = SeasonalitySize + 1 // An upper bound on the largest relevant seasonality in the input time series." }; // Train the change point detector. ITransformer model = new SsaChangePointEstimator(ml, args).Fit(dataView); // Create a prediction engine from the model for feeding new data. var engine = model.CreateTimeSeriesPredictionFunction <SsaChangePointData, ChangePointPrediction>(ml); // Start streaming new data points with no change point to the prediction engine. Console.WriteLine($"Output from ChangePoint predictions on new data:"); Console.WriteLine("Data\tAlert\tScore\tP-Value\tMartingale value"); ChangePointPrediction prediction = null; for (int i = 0; i < 5; i++) { var value = i; prediction = engine.Predict(new SsaChangePointData(value)); Console.WriteLine("{0}\t{1}\t{2:0.00}\t{3:0.00}\t{4:0.00}", value, prediction.Prediction[0], prediction.Prediction[1], prediction.Prediction[2], prediction.Prediction[3]); } // Now stream data points that reflect a change in trend. for (int i = 0; i < 5; i++) { var value = (i + 1) * 100; prediction = engine.Predict(new SsaChangePointData(value)); Console.WriteLine("{0}\t{1}\t{2:0.00}\t{3:0.00}\t{4:0.00}", value, prediction.Prediction[0], prediction.Prediction[1], prediction.Prediction[2], prediction.Prediction[3]); } // Now we demonstrate saving and loading the model. // Save the model that exists within the prediction engine. // The engine has been updating this model with every new data point. var modelPath = "model.zip"; engine.CheckPoint(ml, modelPath); // Load the model. using (var file = File.OpenRead(modelPath)) model = TransformerChain.LoadFrom(ml, file); // We must create a new prediction engine from the persisted model. engine = model.CreateTimeSeriesPredictionFunction <SsaChangePointData, ChangePointPrediction>(ml); // Run predictions on the loaded model. for (int i = 0; i < 5; i++) { var value = (i + 1) * 100; prediction = engine.Predict(new SsaChangePointData(value)); Console.WriteLine("{0}\t{1}\t{2:0.00}\t{3:0.00}\t{4:0.00}", value, prediction.Prediction[0], prediction.Prediction[1], prediction.Prediction[2], prediction.Prediction[3]); } // Output from ChangePoint predictions on new data: // Data Alert Score P-Value Martingale value // 0 0 - 1.01 0.50 0.00 // 1 0 - 0.24 0.22 0.00 // 2 0 - 0.31 0.30 0.00 // 3 0 0.44 0.01 0.00 // 4 0 2.16 0.00 0.24 // 100 0 86.23 0.00 2076098.24 // 200 0 171.38 0.00 809668524.21 // 300 1 256.83 0.01 22130423541.93 <-- alert is on, note that delay is expected // 400 0 326.55 0.04 241162710263.29 // 500 0 364.82 0.08 597660527041.45 <-- saved to disk // 100 0 - 58.58 0.15 1096021098844.34 <-- loaded from disk and running new predictions // 200 0 - 41.24 0.20 97579154688.98 // 300 0 - 30.61 0.24 95319753.87 // 400 0 58.87 0.38 14.24 // 500 0 219.28 0.36 0.05 }
public void ChangePointDetectionWithSeasonality() { var env = new MLContext(1); const int changeHistorySize = 10; const int seasonalitySize = 10; const int numberOfSeasonsInTraining = 5; const int maxTrainingSize = numberOfSeasonsInTraining * seasonalitySize; List <Data> data = new List <Data>(); var dataView = env.Data.LoadFromEnumerable(data); var args = new SsaChangePointDetector.Options() { Confidence = 95, Source = "Value", Name = "Change", ChangeHistoryLength = changeHistorySize, TrainingWindowSize = maxTrainingSize, SeasonalWindowSize = seasonalitySize }; for (int j = 0; j < numberOfSeasonsInTraining; j++) { for (int i = 0; i < seasonalitySize; i++) { data.Add(new Data(i)); } } for (int i = 0; i < changeHistorySize; i++) { data.Add(new Data(i * 100)); } // Train var detector = new SsaChangePointEstimator(env, args).Fit(dataView); // Transform var output = detector.Transform(dataView); // Get predictions var enumerator = env.Data.CreateEnumerable <Prediction>(output, true).GetEnumerator(); Prediction row = null; // [TEST_STABILITY]: dotnet core 3.1 generates slightly different result #if NETCOREAPP3_1 List <double> expectedValues = new List <double>() { 0, -3.31410551071167, 0.5, 5.12000000000001E-08, 0, 1.570083498954773, 5.2001145245395008E-07, 0.012414560443710681, 0, 1.2854313850402832, 0.2881081472302483, 0.020389485008225454, 0, -1.0950632095336914, 0.3666388047550645, 0.02695657272695535 }; #else List <double> expectedValues = new List <double>() { 0, -3.31410598754883, 0.5, 5.12000000000001E-08, 0, 1.5700820684432983, 5.2001145245395008E-07, 0.012414560443710681, 0, 1.2854313254356384, 0.28810801662678009, 0.02038940454467935, 0, -1.0950627326965332, 0.36663890634019225, 0.026956459625565483 }; #endif int index = 0; while (enumerator.MoveNext() && index < expectedValues.Count) { row = enumerator.Current; Assert.Equal(expectedValues[index++], row.Change[0], precision: 7); // Alert Assert.Equal(expectedValues[index++], row.Change[1], precision: 7); // Raw score Assert.Equal(expectedValues[index++], row.Change[2], precision: 7); // P-Value score Assert.Equal(expectedValues[index++], row.Change[3], precision: 7); // Martingale score } }
// This example creates a time series (list of Data with the i-th element corresponding to the i-th time slot). // SsaChangePointDetector is applied then to identify points where data distribution changed. public static void SsaChangePointDetectorTransform() { // Create a new ML context, for ML.NET operations. It can be used for exception tracking and logging, // as well as the source of randomness. var ml = new MLContext(); // Generate sample series data with a change const int size = 16; var data = new List <Data>(size); for (int i = 0; i < size / 2; i++) { data.Add(new Data(5)); } // This is a change point for (int i = 0; i < size / 2; i++) { data.Add(new Data(7)); } // Convert data to IDataView. var dataView = ml.CreateStreamingDataView(data); // Setup IidSpikeDetector arguments string outputColumnName = "Prediction"; string inputColumnName = "Value"; var args = new SsaChangePointDetector.Arguments() { Source = inputColumnName, Name = outputColumnName, Confidence = 95, // The confidence for spike detection in the range [0, 100] ChangeHistoryLength = size / 4, // The length of the sliding window on p-values for computing the martingale score. TrainingWindowSize = size / 2, // The number of points from the beginning of the sequence used for training. SeasonalWindowSize = size / 8, // An upper bound on the largest relevant seasonality in the input time - series." }; // The transformed data. var transformedData = new SsaChangePointEstimator(ml, args).Fit(dataView).Transform(dataView); // Getting the data of the newly created column as an IEnumerable of ChangePointPrediction. var predictionColumn = transformedData.AsEnumerable <ChangePointPrediction>(ml, reuseRowObject: false); Console.WriteLine($"{outputColumnName} column obtained post-transformation."); Console.WriteLine("Data\tAlert\tScore\tP-Value\tMartingale value"); int k = 0; foreach (var prediction in predictionColumn) { Console.WriteLine("{0}\t{1}\t{2:0.00}\t{3:0.00}\t{4:0.00}", data[k++].Value, prediction.Prediction[0], prediction.Prediction[1], prediction.Prediction[2], prediction.Prediction[3]); } Console.WriteLine(""); // Prediction column obtained post-transformation. // Data Alert Score P-Value Martingale value // 5 0 0.00 0.50 0.00 // 5 0 0.00 0.50 0.00 // 5 0 0.00 0.50 0.00 // 5 0 0.00 0.50 0.00 // 5 0 0.00 0.50 0.00 // 5 0 0.00 0.50 0.00 // 5 0 0.00 0.50 0.00 // 5 0 0.00 0.50 0.00 // 7 1 2.00 0.00 10298.67 <-- alert is on, predicted changepoint // 7 0 1.00 0.31 15741.58 // 7 0 0.00 0.28 26487.48 // 7 0 0.00 0.28 44569.02 // 7 0 0.00 0.28 0.01 // 7 0 0.00 0.38 0.01 // 7 0 0.00 0.50 0.00 // 7 0 0.00 0.50 0.00 }