/** * Return the probability that the current value plus anomaly score represents * an anomaly given the historical distribution of anomaly scores. The closer * the number is to 1, the higher the chance it is an anomaly. * * Given the current metric value, plus the current anomaly score, output the * anomalyLikelihood for this record. * * @param value input value * @param anomalyScore current anomaly score * @param timestamp (optional) timestamp * @return Given the current metric value, plus the current anomaly score, output the * anomalyLikelihood for this record. */ public double AnomalyProbability(double value, double anomalyScore, DateTime timestamp) { if (timestamp == null) { timestamp = new DateTime(); } Sample dataPoint = new Sample(timestamp, value, anomalyScore); double likelihoodRetval; if (historicalScores.Count < probationaryPeriod) { likelihoodRetval = 0.5; } else { if (distribution == null || iteration % reestimationPeriod == 0) { this.distribution = EstimateAnomalyLikelihoods( historicalScores, 10, claLearningPeriod).GetParams(); } AnomalyLikelihoodMetrics metrics = UpdateAnomalyLikelihoods(new List <Sample> { dataPoint }, this.distribution); this.distribution = metrics.GetParams(); likelihoodRetval = 1.0 - metrics.GetLikelihoods()[0]; } historicalScores.Add(dataPoint); this.iteration += 1; return(likelihoodRetval); }
/** * Returns a flag indicating whether the specified params are valid. * true if so, false if not * * @param params a <see cref="NamedTuple"/> containing { distribution, movingAverage, historicalLikelihoods } * @return */ public bool IsValidEstimatorParams(AnomalyParams @params) { if (@params.Distribution() == null || @params.MovingAverage() == null) { return(false); } Statistic stat = @params.Distribution(); if (stat.mean == 0 || stat.variance == 0 || stat.stdev == 0) { return(false); } return(true); }
/** * Compute updated probabilities for anomalyScores using the given params. * * @param anomalyScores a list of records. Each record is a list with a {@link Sample} containing the * following three elements: [timestamp, value, score] * @param params Associative <see cref="NamedTuple"/> returned by the {@link AnomalyLikelihoodMetrics} from * {@link #estimateAnomalyLikelihoods(List, int, int)} * @return */ public AnomalyLikelihoodMetrics UpdateAnomalyLikelihoods(List <Sample> anomalyScores, AnomalyParams @params) { int anomalySize = anomalyScores.Count; if (LOG.IsDebugEnabled) { LOG.Debug("in updateAnomalyLikelihoods"); LOG.Debug(string.Format("Number of anomaly scores: {0}", anomalySize)); LOG.Debug(string.Format("First 20: {0}", anomalyScores.SubList(0, Math.Min(20, anomalySize)))); LOG.Debug(string.Format("Params: {0}", @params)); } if (anomalyScores.Count == 0) { throw new ArgumentException("Must have at least one anomaly score."); } if (!IsValidEstimatorParams(@params)) { throw new ArgumentException("\"params\" is not a valid parameter structure"); } double[] histLikelihoods; if ((histLikelihoods = @params.HistoricalLikelihoods()) == null || histLikelihoods.Length == 0) { Parameters anomalyParameters = Parameters.Empty(); anomalyParameters.SetParameterByKey(Parameters.KEY.ANOMALY_KEY_DIST, @params.Distribution()); anomalyParameters.SetParameterByKey(Parameters.KEY.ANOMALY_KEY_MVG_AVG, @params.MovingAverage()); anomalyParameters.SetParameterByKey(Parameters.KEY.ANOMALY_KEY_HIST_LIKE, histLikelihoods = new double[] { 1 }); @params = new AnomalyParams(anomalyParameters); //@params = new NamedTuple( // new string[] { "distribution", "movingAverage", "historicalLikelihoods" }, // @params.Distribution(), // @params.MovingAverage(), // histLikelihoods = new double[] { 1 }); } // Compute moving averages of these new scores using the previous values // as well as likelihood for these scores using the old estimator MovingAverage mvgAvg = (MovingAverage)@params.MovingAverage(); List <double> historicalValues = mvgAvg.GetSlidingWindow(); double total = mvgAvg.GetTotal(); int windowSize = mvgAvg.GetWindowSize(); List <Sample> aggRecordList = new List <Sample>(anomalySize); double[] likelihoods = new double[anomalySize]; int i = 0; foreach (Sample sample in anomalyScores) { MovingAverage.Calculation calc = MovingAverage.Compute(historicalValues, total, sample.score, windowSize); aggRecordList.Add( new Sample( sample.date, sample.value, calc.GetAverage())); total = calc.GetTotal(); likelihoods[i++] = NormalProbability(calc.GetAverage(), (Statistic)@params.Distribution()); } // Filter the likelihood values. First we prepend the historical likelihoods // to the current set. Then we filter the values. We peel off the likelihoods // to return and the last windowSize values to store for later. double[] likelihoods2 = ArrayUtils.Concat(histLikelihoods, likelihoods); double[] filteredLikelihoods = FilterLikelihoods(likelihoods2); likelihoods = Arrays.CopyOfRange(filteredLikelihoods, filteredLikelihoods.Length - likelihoods.Length, filteredLikelihoods.Length); double[] historicalLikelihoods = Arrays.CopyOf(likelihoods2, likelihoods2.Length - Math.Min(windowSize, likelihoods2.Length)); // Update the estimator Parameters newAnomalyParameters = Parameters.Empty(); newAnomalyParameters.SetParameterByKey(Parameters.KEY.ANOMALY_KEY_DIST, @params.Distribution()); newAnomalyParameters.SetParameterByKey(Parameters.KEY.ANOMALY_KEY_MVG_AVG, new MovingAverage(historicalValues, total, windowSize)); newAnomalyParameters.SetParameterByKey(Parameters.KEY.ANOMALY_KEY_HIST_LIKE, historicalLikelihoods); AnomalyParams newParams = new AnomalyParams(newAnomalyParameters); //AnomalyParams newParams = new AnomalyParams( // new string[] { "distribution", "movingAverage", "historicalLikelihoods" }, // @params.Distribution(), // new MovingAverage(historicalValues, total, windowSize), // historicalLikelihoods); return(new AnomalyLikelihoodMetrics( likelihoods, new AveragedAnomalyRecordList(aggRecordList, historicalValues, total), newParams)); }
/** * Given a series of anomaly scores, compute the likelihood for each score. This * function should be called once on a bunch of historical anomaly scores for an * initial estimate of the distribution. It should be called again every so often * (say every 50 records) to update the estimate. * * @param anomalyScores * @param averagingWindow * @param skipRecords * @return */ public AnomalyLikelihoodMetrics EstimateAnomalyLikelihoods(List <Sample> anomalyScores, int averagingWindow, int skipRecords) { if (anomalyScores.Count == 0) { throw new ArgumentException("Must have at least one anomaly score."); } // Compute averaged anomaly scores AveragedAnomalyRecordList records = AnomalyScoreMovingAverage(anomalyScores, averagingWindow); // Estimate the distribution of anomaly scores based on aggregated records Statistic distribution; if (records.AveragedRecords.Count <= skipRecords) { distribution = NullDistribution(); } else { List <double> samples = records.GetMetrics(); distribution = EstimateNormal(samples.Skip(skipRecords).Take(samples.Count).ToArray(), true); /* Taken from the Python Documentation * # HACK ALERT! The CLA model currently does not handle constant metric values # very well (time of day encoder changes sometimes lead to unstable SDR's # even though the metric is constant). Until this is resolved, we explicitly # detect and handle completely flat metric values by reporting them as not # anomalous. # */ samples = records.GetSamples(); Statistic metricDistribution = EstimateNormal(samples.Skip(skipRecords).Take(samples.Count).ToArray(), false); if (metricDistribution.variance < 1.5e-5) { distribution = NullDistribution(); } } // Estimate likelihoods based on this distribution int i = 0; double[] likelihoods = new double[records.AveragedRecords.Count]; foreach (Sample sample in records.AveragedRecords) { likelihoods[i++] = NormalProbability(sample.score, distribution); } // Filter likelihood values double[] filteredLikelihoods = FilterLikelihoods(likelihoods); int len = likelihoods.Length; Parameters anomalyParameters = Parameters.Empty(); anomalyParameters.SetParameterByKey(Parameters.KEY.ANOMALY_KEY_DIST, distribution); anomalyParameters.SetParameterByKey(Parameters.KEY.ANOMALY_KEY_MVG_AVG, new MovingAverage(records.HistoricalValues, records.Total, averagingWindow)); anomalyParameters.SetParameterByKey(Parameters.KEY.ANOMALY_KEY_HIST_LIKE, len > 0 ? Arrays.CopyOfRange(likelihoods, len - Math.Min(averagingWindow, len), len) : new double[0]); AnomalyParams @params = new AnomalyParams(anomalyParameters); //AnomalyParams @params = new AnomalyParams( // new string[] { "distribution", "movingAverage", "historicalLikelihoods" }, // distribution, // new MovingAverage(records.historicalValues, records.total, averagingWindow), // len > 0 ? // Arrays.CopyOfRange(likelihoods, len - Math.Min(averagingWindow, len), len) : // new double[0]); if (LOG.IsDebugEnabled) { LOG.Debug(string.Format("Discovered params={0} Number of likelihoods:{1} First 20 likelihoods:{2}", @params, len, Arrays.CopyOfRange(filteredLikelihoods, 0, 20))); } return(new AnomalyLikelihoodMetrics(filteredLikelihoods, records, @params)); }