/// <summary>
 /// <para>
 /// In time series data, seasonality (or periodicity) is the presence of variations that occur at specific regular intervals,
 /// such as weekly, monthly, or quarterly.
 /// </para>
 /// <para>
 /// This method detects this predictable interval (or period) by adopting techniques of fourier analysis.
 /// Assuming the input values have the same time interval (e.g., sensor data collected at every second ordered by timestamps),
 /// this method takes a list of time-series data, and returns the regular period for the input seasonal data,
 /// if a predictable fluctuation or pattern can be found that recurs or repeats over this period throughout the input values.
 /// </para>
 /// <para>
 /// Returns -1 if no such pattern is found, that is, the input values do not follow a seasonal fluctuation.
 /// </para>
 /// </summary>
 /// <param name="catalog">The detect seasonality catalog.</param>
 /// <param name="input">Input DataView.The data is an instance of <see cref="Microsoft.ML.IDataView"/>.</param>
 /// <param name="inputColumnName">Name of column to process. The column data must be <see cref="System.Double"/>.</param>
 /// <param name="seasonalityWindowSize">An upper bound on the number of values to be considered in the input values.
 /// When set to -1, use the whole input to fit model; when set to a positive integer, only the first windowSize number
 /// of values will be considered. Default value is -1.</param>
 /// <param name="randomnessThreshold"><a href ="https://en.wikipedia.org/wiki/Correlogram">Randomness threshold</a>
 /// that specifies how confidently the input values follow a predictable pattern recurring as seasonal data.
 /// The range is between [0, 1]. By default, it is set as 0.95.
 /// </param>
 /// <returns>The regular interval for the input as seasonal data, otherwise return -1.</returns>
 /// <example>
 /// <format type="text/markdown">
 /// <![CDATA[
 /// [!code-csharp[LocalizeRootCause](~/../docs/samples/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TimeSeries/DetectSeasonality.cs)]
 /// ]]>
 /// </format>
 /// </example>
 public static int DetectSeasonality(
     this AnomalyDetectionCatalog catalog,
     IDataView input,
     string inputColumnName,
     int seasonalityWindowSize  = -1,
     double randomnessThreshold = 0.95)
 => new SeasonalityDetector().DetectSeasonality(
     CatalogUtils.GetEnvironment(catalog),
     input,
     inputColumnName,
     seasonalityWindowSize,
     randomnessThreshold);
        /// <summary>
        /// Create <see cref="RootCause"/>, which localizes root causes using decision tree algorithm.
        /// </summary>
        /// <param name="catalog">The anomaly detection catalog.</param>
        /// <param name="src">Root cause's input. The data is an instance of <see cref="Microsoft.ML.TimeSeries.RootCauseLocalizationInput"/>.</param>
        /// <param name="beta">Beta is a weight parameter for user to choose.
        /// It is used when score is calculated for each root cause item. The range of beta should be in [0,1].
        /// For a larger beta, root cause items which have a large difference between value and expected value will get a high score.
        /// For a small beta, root cause items which have a high relative change will get a low score.</param>
        /// <param name="rootCauseThreshold">A threshold to determine whether the point should be root cause. The range of this threshold should be in [0,1].
        /// If the point's delta is equal to or larger than rootCauseThreshold multiplied by anomaly dimension point's delta, this point is treated as a root cause. Different threshold will turn out different results. Users can choose the delta according to their data and requirments.</param>
        /// <example>
        /// <format type="text/markdown">
        /// <![CDATA[
        /// [!code-csharp[LocalizeRootCause](~/../docs/samples/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TimeSeries/LocalizeRootCause.cs)]
        /// ]]>
        /// </format>
        /// </example>
        public static RootCause LocalizeRootCause(this AnomalyDetectionCatalog catalog, RootCauseLocalizationInput src, double beta = 0.3, double rootCauseThreshold = 0.95)
        {
            List <RootCause> causes = LocalizeRootCauses(catalog, src, beta, rootCauseThreshold);

            if (causes?.Count > 0)
            {
                return(causes[0]);
            }
            else
            {
                return(null);
            }
        }
        /// <summary>
        /// Create <see cref="SrCnnEntireAnomalyDetector"/>, which detects timeseries anomalies for entire input using SRCNN algorithm.
        /// </summary>
        /// <param name="catalog">The AnomalyDetectionCatalog.</param>
        /// <param name="input">Input DataView.</param>
        /// <param name="outputColumnName">Name of the column resulting from data processing of <paramref name="inputColumnName"/>.
        /// The column data is a vector of <see cref="System.Double"/>. The length of this vector varies depending on <paramref name="detectMode"/>.</param>
        /// <param name="inputColumnName">Name of column to process. The column data must be <see cref="System.Double"/>.</param>
        /// <param name="threshold">The threshold to determine an anomaly. An anomaly is detected when the calculated SR raw score for a given point is more than the set threshold. This threshold must  fall between [0,1], and its default value is 0.3.</param>
        /// <param name="batchSize">Divide the input data into batches to fit srcnn model.
        /// When set to -1, use the whole input to fit model instead of batch by batch, when set to a positive integer, use this number as batch size.
        /// Must be -1 or a positive integer no less than 12. Default value is 1024.</param>
        /// <param name="sensitivity">Sensitivity of boundaries, only useful when srCnnDetectMode is AnomalyAndMargin. Must be in [0,100]. Default value is 99.</param>
        /// <param name="detectMode">An enum type of <see cref="SrCnnDetectMode"/>.
        /// When set to AnomalyOnly, the output vector would be a 3-element Double vector of (IsAnomaly, RawScore, Mag).
        /// When set to AnomalyAndExpectedValue, the output vector would be a 4-element Double vector of (IsAnomaly, RawScore, Mag, ExpectedValue).
        /// When set to AnomalyAndMargin, the output vector would be a 7-element Double vector of (IsAnomaly, AnomalyScore, Mag, ExpectedValue, BoundaryUnit, UpperBoundary, LowerBoundary).
        /// The RawScore is output by SR to determine whether a point is an anomaly or not, under AnomalyAndMargin mode, when a point is an anomaly, an AnomalyScore will be calculated according to sensitivity setting.
        /// Default value is AnomalyOnly.</param>
        /// <example>
        /// <format type="text/markdown">
        /// <![CDATA[
        /// [!code-csharp[DetectEntireAnomalyBySrCnn](~/../docs/samples/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TimeSeries/DetectEntireAnomalyBySrCnn.cs)]
        /// ]]>
        /// </format>
        /// </example>
        public static IDataView DetectEntireAnomalyBySrCnn(this AnomalyDetectionCatalog catalog, IDataView input, string outputColumnName, string inputColumnName,
                                                           double threshold = 0.3, int batchSize = 1024, double sensitivity = 99, SrCnnDetectMode detectMode = SrCnnDetectMode.AnomalyOnly)
        {
            var options = new SrCnnEntireAnomalyDetectorOptions()
            {
                Threshold   = threshold,
                BatchSize   = batchSize,
                Sensitivity = sensitivity,
                DetectMode  = detectMode,
            };

            return(DetectEntireAnomalyBySrCnn(catalog, input, outputColumnName, inputColumnName, options));
        }
Пример #4
0
        /// <summary>
        /// Create the ML context.
        /// </summary>
        /// <param name="seed">Random seed. Set to <c>null</c> for a non-deterministic environment.</param>
        public MLContext(int?seed = null)
        {
            _env = new LocalEnvironment(seed);
            _env.AddListener(ProcessMessage);

            BinaryClassification     = new BinaryClassificationCatalog(_env);
            MulticlassClassification = new MulticlassClassificationCatalog(_env);
            Regression       = new RegressionCatalog(_env);
            Clustering       = new ClusteringCatalog(_env);
            Ranking          = new RankingCatalog(_env);
            AnomalyDetection = new AnomalyDetectionCatalog(_env);
            Transforms       = new TransformsCatalog(_env);
            Model            = new ModelOperationsCatalog(_env);
            Data             = new DataOperationsCatalog(_env);
        }
Пример #5
0
        /// <summary>
        /// Create <see cref="RootCause"/>, which localizes root causes using decision tree algorithm.
        /// </summary>
        /// <param name="catalog">The anomaly detection catalog.</param>
        /// <param name="src">Root cause's input. The data is an instance of <see cref="Microsoft.ML.TimeSeries.RootCauseLocalizationInput"/>.</param>
        /// <param name="beta">Beta is a weight parameter for user to choose. It is used when score is calculated for each root cause item. The range of beta should be in [0,1]. For a larger beta, root cause point which has a large difference between value and expected value will get a high score. On the contrary, for a small beta, root cause items which has a high relative change will get a high score.</param>
        /// <param name="anomalyDeltaThreshold">A threshold to determine whether the point should be root cause. If the point's delta is equal to or larger than anomalyDeltaThreshold multiplies anomaly dimension point's delta, this point is treated as a root cause. Different threshold will turn out different result. Users can choose the delta according to their data and requirment. </param>
        /// <example>
        /// <format type="text/markdown">
        /// <![CDATA[
        /// [!code-csharp[LocalizeRootCause](~/../docs/samples/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TimeSeries/LocalizeRootCause.cs)]
        /// ]]>
        /// </format>
        /// </example>
        public static RootCause LocalizeRootCause(this AnomalyDetectionCatalog catalog, RootCauseLocalizationInput src, double beta = 0.5, double anomalyDeltaThreshold = 0.95)
        {
            IHostEnvironment host = CatalogUtils.GetEnvironment(catalog);

            //check the root cause input
            CheckRootCauseInput(host, src);

            //check beta
            host.CheckUserArg(beta >= 0 && beta <= 1, nameof(beta), "Must be in [0,1]");

            //find out the root cause
            RootCauseAnalyzer analyzer = new RootCauseAnalyzer(src, beta, anomalyDeltaThreshold);
            RootCause         dst      = analyzer.Analyze();

            return(dst);
        }
        /// <summary>
        /// Outputs an ordered list of <see cref="RootCause"/>s. The order corresponds to which prepared cause is most likely to be the root cause.
        /// </summary>
        /// <param name="catalog">The anomaly detection catalog.</param>
        /// <param name="src">Root cause's input. The data is an instance of <see cref="Microsoft.ML.TimeSeries.RootCauseLocalizationInput"/>.</param>
        /// <param name="beta">Beta is a weight parameter for user to choose. It is used when score is calculated for each root cause item. The range of beta should be in [0,1]. For a larger beta, root cause point which has a large difference between value and expected value will get a high score. On the contrary, for a small beta, root cause items which has a high relative change will get a high score.</param>
        /// <param name="rootCauseThreshold">A threshold to determine whether the point should be root cause. The range of this threshold should be in [0,1].
        /// If the point's delta is equal to or larger than rootCauseThreshold multiplied by anomaly dimension point's delta, this point is treated as a root cause. Different threshold will turn out different results. Users can choose the delta according to their data and requirments.</param>
        /// <example>
        /// <format type="text/markdown">
        /// <![CDATA[
        /// [!code-csharp[LocalizeRootCauseMultipleDimensions](~/../docs/samples/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TimeSeries/LocalizeRootCauseMultipleDimensions.cs)]
        /// ]]>
        /// </format>
        /// </example>
        public static List <RootCause> LocalizeRootCauses(this AnomalyDetectionCatalog catalog, RootCauseLocalizationInput src, double beta = 0.5, double rootCauseThreshold = 0.95)
        {
            IHostEnvironment host = CatalogUtils.GetEnvironment(catalog);

            //check the root cause input
            CheckRootCauseInput(host, src);

            //check parameters
            host.CheckUserArg(beta >= 0 && beta <= 1, nameof(beta), "Must be in [0,1]");
            host.CheckUserArg(rootCauseThreshold >= 0 && rootCauseThreshold <= 1, nameof(rootCauseThreshold), "Must be in [0,1]");

            //find out the possible causes
            RootCauseAnalyzer analyzer = new RootCauseAnalyzer(src, beta, rootCauseThreshold);

            return(analyzer.AnalyzePossibleCauses());
        }
Пример #7
0
 internal AnomalyDetectionTrainers(AnomalyDetectionCatalog catalog)
     : base(catalog)
 {
 }
 /// <summary>
 /// Create <see cref="SrCnnEntireAnomalyDetector"/>, which detects timeseries anomalies for entire input using SRCNN algorithm.
 /// </summary>
 /// <param name="catalog">The AnomalyDetectionCatalog.</param>
 /// <param name="input">Input DataView.</param>
 /// <param name="outputColumnName">Name of the column resulting from data processing of <paramref name="inputColumnName"/>.
 /// The column data is a vector of <see cref="System.Double"/>. The length of this vector varies depending on <paramref name="options.DetectMode"/>.</param>
 /// <param name="inputColumnName">Name of column to process. The column data must be <see cref="System.Double"/>.</param>
 /// <param name="options">Defines the settings of the load operation.</param>
 /// <example>
 /// <format type="text/markdown">
 /// <![CDATA[
 /// [!code-csharp[DetectEntireAnomalyBySrCnn](~/../docs/samples/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TimeSeries/DetectEntireAnomalyBySrCnn.cs)]
 /// ]]>
 /// </format>
 /// </example>
 public static IDataView DetectEntireAnomalyBySrCnn(this AnomalyDetectionCatalog catalog, IDataView input, string outputColumnName, string inputColumnName, SrCnnEntireAnomalyDetectorOptions options)
 => new SrCnnEntireAnomalyDetector(CatalogUtils.GetEnvironment(catalog), input, outputColumnName, inputColumnName, options);
Пример #9
0
 /// <summary>
 /// Create <see cref="SrCnnEntireAnomalyDetector"/>, which detects timeseries anomalies for entire input using SRCNN algorithm.
 /// </summary>
 /// <param name="catalog">The AnomalyDetectionCatalog.</param>
 /// <param name="input">Input DataView.</param>
 /// <param name="outputColumnName">Name of the column resulting from data processing of <paramref name="inputColumnName"/>.
 /// The column data is a vector of <see cref="System.Double"/>. The length of this vector varies depending on <paramref name="detectMode"/>.</param>
 /// <param name="inputColumnName">Name of column to process. The column data must be <see cref="System.Double"/>.</param>
 /// <param name="threshold">The threshold to determine an anomaly. An anomaly is detected when the calculated SR raw score for a given point is more than the set threshold. This threshold must  fall between [0,1], and its default value is 0.3.</param>
 /// <param name="batchSize">Divide the input data into batches to fit srcnn model.
 /// When set to -1, use the whole input to fit model instead of batch by batch, when set to a positive integer, use this number as batch size.
 /// Must be -1 or a positive integer no less than 12. Default value is 1024.</param>
 /// <param name="sensitivity">Sensitivity of boundaries, only useful when srCnnDetectMode is AnomalyAndMargin. Must be in [0,100]. Default value is 99.</param>
 /// <param name="detectMode">An enum type of <see cref="SrCnnDetectMode"/>.
 /// When set to AnomalyOnly, the output vector would be a 3-element Double vector of (IsAnomaly, RawScore, Mag).
 /// When set to AnomalyAndExpectedValue, the output vector would be a 4-element Double vector of (IsAnomaly, RawScore, Mag, ExpectedValue).
 /// When set to AnomalyAndMargin, the output vector would be a 7-element Double vector of (IsAnomaly, AnomalyScore, Mag, ExpectedValue, BoundaryUnit, UpperBoundary, LowerBoundary).
 /// The RawScore is output by SR to determine whether a point is an anomaly or not, under AnomalyAndMargin mode, when a point is an anomaly, an AnomalyScore will be calculated according to sensitivity setting.
 /// Default value is AnomalyOnly.</param>
 /// <example>
 /// <format type="text/markdown">
 /// <![CDATA[
 /// [!code-csharp[DetectEntireAnomalyBySrCnn](~/../docs/samples/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TimeSeries/DetectEntireAnomalyBySrCnn.cs)]
 /// ]]>
 /// </format>
 /// </example>
 public static IDataView DetectEntireAnomalyBySrCnn(this AnomalyDetectionCatalog catalog, IDataView input, string outputColumnName, string inputColumnName,
                                                    double threshold = 0.3, int batchSize = 1024, double sensitivity = 99, SrCnnDetectMode detectMode = SrCnnDetectMode.AnomalyOnly)
 => new SrCnnEntireAnomalyDetector(CatalogUtils.GetEnvironment(catalog), input, inputColumnName, outputColumnName, threshold, batchSize, sensitivity, detectMode);