private void FillRepositoryWithPreviousResults(IMetricsRepository repository)
        {
            Enumerable.Range(1, 31)
            .ToList()
            .ForEach(pastDay =>
            {
                var pastResultsEU = new Dictionary <IAnalyzer <IMetric>, IMetric>
                {
                    { Initializers.Size(), new DoubleMetric(MetricEntity.Dataset, "*", "Size", Math.Floor(pastDay / 3.0)) },
                    { Initializers.Mean("sales"), new DoubleMetric(MetricEntity.Column, "sales", "Mean", pastDay * 7) }
                };

                var pastResultsNA = new Dictionary <IAnalyzer <IMetric>, IMetric>
                {
                    { Initializers.Size(), new DoubleMetric(MetricEntity.Dataset, "*", "Size", pastDay) },
                    { Initializers.Mean("sales"), new DoubleMetric(MetricEntity.Column, "sales", "Mean", pastDay * 9) }
                };

                var analyzerContextEU = new AnalyzerContext(pastResultsEU);
                var analyzerContextNA = new AnalyzerContext(pastResultsNA);

                long dateTime = CreateDate(2018, 7, pastDay);

                repository.Save(new ResultKey(dateTime, new Dictionary <string, string> {
                    { "marketplace", "EU" }
                }),
                                analyzerContextEU);

                repository.Save(new ResultKey(dateTime, new Dictionary <string, string> {
                    { "marketplace", "NA" }
                }),
                                analyzerContextNA);
            });
        }
 public void save_should_ignore_failed_result_metrics_when_saving()
 {
     Dictionary <IAnalyzer <IMetric>, IMetric> metrics = new Dictionary <IAnalyzer <IMetric>, IMetric>
     {
         {
             Initializers.Size(Option <string> .None),
             new DoubleMetric(MetricEntity.Column, "Size", "*", Try <double> .From(() => 5.0))
         },
 public void analysis_results_serialization_with_mixed_Values_should_fail()
 {
     ArgumentException sampleException = new ArgumentException("Some");
     AnalyzerContext   analyzerContextWithMixedValues = new AnalyzerContext(
         new Dictionary <IAnalyzer <IMetric>, IMetric>
     {
         {
             Initializers.Size(Option <string> .None),
             new DoubleMetric(MetricEntity.Column, "Size", "*", Try <double> .From(() => 5.0))
         },
        private VerificationResult CreateAnomalyChecksAndRunEverything(
            DataFrame data,
            IMetricsRepository repository,
            Check otherCheck,
            IEnumerable <IAnalyzer <IMetric> > additionalRequiredAnalyzers)
        {
            // We only want to use historic data with the EU tag for the anomaly checks since the new
            // data point is from the EU marketplace
            var filterEU = new Dictionary <string, string> {
                { "marketplace", "EU" }
            };

            // We only want to use data points before the date time associated with the current
            // data point and only ones that are from 2018
            var afterDateTime  = CreateDate(2018, 1, 1);
            var beforeDateTime = CreateDate(2018, 8, 1);

            // Config for the size anomaly check
            var sizeAnomalyCheckConfig = new AnomalyCheckConfig(CheckLevel.Error, "Size only increases",
                                                                filterEU, afterDateTime, beforeDateTime);
            var sizeAnomalyDetectionStrategy = new AbsoluteChangeStrategy(0);

            // Config for the mean sales anomaly check
            var meanSalesAnomalyCheckConfig = new AnomalyCheckConfig(
                CheckLevel.Warning,
                "Sales mean within 2 standard deviations",
                filterEU,
                afterDateTime,
                beforeDateTime
                );

            var meanSalesAnomalyDetectionStrategy = new OnlineNormalStrategy(upperDeviationFactor: 2, lowerDeviationFactor: Option <double> .None,
                                                                             ignoreAnomalies: false);

            // ResultKey to be used when saving the results of this run
            var currentRunResultKey =
                new ResultKey(CreateDate(2018, 8, 1), new Dictionary <string, string> {
                { "marketplace", "EU" }
            });


            return(new VerificationSuite()
                   .OnData(data)
                   .AddCheck(otherCheck)
                   .AddRequiredAnalyzers(additionalRequiredAnalyzers)
                   .UseRepository(repository)
                   // Add the Size anomaly check
                   .AddAnomalyCheck(sizeAnomalyDetectionStrategy, Initializers.Size(), sizeAnomalyCheckConfig)
                   // Add the Mean sales anomaly check
                   .AddAnomalyCheck(meanSalesAnomalyDetectionStrategy, Initializers.Mean("sales"),
                                    meanSalesAnomalyCheckConfig)
                   // Save new data point in the repository after we calculated everything
                   .SaveOrAppendResult(currentRunResultKey)
                   .Run());
        }
Пример #5
0
        public static AnalyzerContext DoAnalysisRun
        (
            DataFrame data,
            IEnumerable <IAnalyzer <IMetric> > analyzers,
            Option <IStateLoader> aggregateWith,
            Option <IStatePersister> saveStatesWith,
            StorageLevel storageLevelOfGroupedDataForMultiplePasses,
            AnalysisRunnerRepositoryOptions metricsRepositoryOptions = default,
            AnalysisRunnerFileOutputOptions fileOutputOptions        = default)
        {
            if (!analyzers.Any())
            {
                return(AnalyzerContext.Empty());
            }

            IEnumerable <IAnalyzer <IMetric> > allAnalyzers = analyzers.Select(analyzer => analyzer);

            IAnalyzer <IMetric>[] enumerable = allAnalyzers as IAnalyzer <IMetric>[] ?? allAnalyzers.ToArray();
            IEnumerable <IAnalyzer <IMetric> > distinctAnalyzers = enumerable.Distinct();


            AnalyzerContext resultComputedPreviously = (metricsRepositoryOptions?.metricRepository.HasValue,
                                                        metricsRepositoryOptions?.reuseExistingResultsForKey.HasValue) switch
            {
                (true, true) => metricsRepositoryOptions?.metricRepository.Value
                .LoadByKey(metricsRepositoryOptions.reuseExistingResultsForKey.Value)
                .GetOrElse(AnalyzerContext.Empty()),
                _ => AnalyzerContext.Empty()
            };


            IEnumerable <IAnalyzer <IMetric> >
            analyzersAlreadyRan = resultComputedPreviously.MetricMap.Keys.AsEnumerable();
            IEnumerable <IAnalyzer <IMetric> > analyzersToRun = enumerable.Except(analyzersAlreadyRan);

            IEnumerable <IAnalyzer <IMetric> > passedAnalyzers = analyzersToRun.Where(analyzer =>
                                                                                      !FindFirstFailing(data.Schema(), analyzer.Preconditions()).HasValue);

            IEnumerable <IAnalyzer <IMetric> > failedAnalyzers = analyzersToRun.Except(passedAnalyzers);

            AnalyzerContext preconditionFailures = ComputePreconditionFailureMetrics(failedAnalyzers, data.Schema());

            IEnumerable <IGroupingAnalyzer <IMetric> > groupingAnalyzers =
                passedAnalyzers.OfType <IGroupingAnalyzer <IMetric> >();

            IEnumerable <IAnalyzer <IMetric> > allScanningAnalyzers =
                passedAnalyzers.Except(groupingAnalyzers).Select(analyzer => analyzer);

            AnalyzerContext nonGroupedMetrics =
                RunScanningAnalyzers(data, allScanningAnalyzers, aggregateWith, saveStatesWith);

            Option <double> numRowsOfData = nonGroupedMetrics.Metric(Initializers.Size()).Select(metric =>
            {
                if (metric is DoubleMetric dm)
                {
                    return(dm.Value.Success.Value);
                }

                return(0);
            });

            AnalyzerContext groupedMetrics = AnalyzerContext.Empty();

            IEnumerable <IGrouping <(IOrderedEnumerable <string>, Option <string>), IGroupingAnalyzer <IMetric> > >
            sortedAndFilteredGroupingAnalyzers = groupingAnalyzers
                                                 .Select(analyzer => analyzer)
                                                 .GroupBy(analyzer => (analyzer.GroupingColumns().OrderBy(columnName => columnName), GetFilterCondition(analyzer)));

            foreach (IGrouping <(IOrderedEnumerable <string>, Option <string>), IGroupingAnalyzer <IMetric> >
                     analyzerGroup in sortedAndFilteredGroupingAnalyzers)
            {
                (long numRows, AnalyzerContext metrics) =
                    RunGroupingAnalyzers(data,
                                         analyzerGroup.Key.Item1.ToList(),
                                         analyzerGroup.Key.Item2, analyzerGroup, aggregateWith, saveStatesWith,
                                         storageLevelOfGroupedDataForMultiplePasses, numRowsOfData);

                groupedMetrics += metrics;

                if (!numRowsOfData.HasValue)
                {
                    numRowsOfData = new Option <double>(numRows);
                }
            }

            AnalyzerContext resultingAnalyzerContext =
                resultComputedPreviously + preconditionFailures + nonGroupedMetrics +
                groupedMetrics; //TODO: add kllMetrics

            if (metricsRepositoryOptions != null)
            {
                SaveOrAppendResultsIfNecessary(resultingAnalyzerContext,
                                               metricsRepositoryOptions.metricRepository,
                                               metricsRepositoryOptions.saveOrAppendResultsWithKey);
            }

            SaveJsonOutputsToFilesystemIfNecessary(fileOutputOptions, resultingAnalyzerContext);

            return(resultingAnalyzerContext);
        }
 private static Analysis CreateAnalysis() =>
 new Analysis()
 .AddAnalyzer(Initializers.Size(Option <string> .None))
 .AddAnalyzer(Initializers.Distinctness(new[] { "item" }, Option <string> .None))
 .AddAnalyzer(Initializers.Completeness("att1"))
 .AddAnalyzer(Initializers.Uniqueness(new[] { "att1", "att2" }));