public static void RunAnalysesParallel(AFElement element, List<AFTime> aftimes, int analysesThreadCount, ConcurrentQueue<List<AFValue>> dataQueue)
        {
            // for each analyse configured for our element
            Parallel.ForEach(element.Analyses, new ParallelOptions() { MaxDegreeOfParallelism = analysesThreadCount }, (afAnalysis) =>
            {
                TimeSpan evaluationTime;
                int evaluationErrorsCount;
                var analysisRunner = new AnalysisRunner(Configuration.EnableAnalysisErrorOutput);

                var results = analysisRunner.Run(afAnalysis, aftimes, out evaluationTime, out evaluationErrorsCount);

                string prettyElementName = element.Parent != null ? string.Format("{0}\\{1}", element.Parent.Name, element.Name) : element.Name;
                var stats = new StatisticsInfo()
                {
                    AnalyseName = afAnalysis.Name,
                    Duration = evaluationTime,
                    ElementName = prettyElementName,
                    EvaluationsCount = aftimes.Count,
                    EvaluationsErrorCount = evaluationErrorsCount

                };

                // we add statistics to the queue
                Statistics.StatisticsQueue.Add(stats);

                // send data to queue
                if (dataQueue != null)
                {
                    dataQueue.Enqueue(results);
                }

            });
        }
        public void should_parse_commit_information_from_file_correctly()
        {
            AnalysisRunner runner = new AnalysisRunner(input_path);

            runner.run();
            Assert.Equal(runner.CommitInfos.Count, 2);
            Assert.Equal(runner.CommitInfos[0].Hash, "5016e80");
            Assert.Equal(runner.CommitInfos[0].AddTime, DateTime.Parse("2016-07-08"));
            Assert.Equal(runner.CommitInfos[0].CommitTime, DateTime.Parse("2016-07-08"));
            Assert.Equal(new List <string> {
                "wangjian", "shengqi", "jijie"
            }, runner.CommitInfos[0].Devs);
            Assert.Equal(runner.CommitInfos[0].StoryNumber, "1919");
            Assert.Equal(runner.CommitInfos[0].Comment, "Change comments length from 255 to 500 characters for Upload teq feature, and fix some ie issue for text-area.");
            Assert.Equal(runner.CommitInfos[0].TestFileList, new HashSet <string>());

            Assert.Equal(runner.CommitInfos[1].Hash, "870b57c");
            Assert.Equal(runner.CommitInfos[1].AddTime, DateTime.Parse("2016-07-08"));
            Assert.Equal(runner.CommitInfos[1].CommitTime, DateTime.Parse("2016-07-08"));
            Assert.Equal(new List <string> {
                "naijia"
            }, runner.CommitInfos[1].Devs);
            Assert.Equal(runner.CommitInfos[1].StoryNumber, null);
            Assert.Equal(runner.CommitInfos[1].Comment, "Handle the case of no publications nor subscriptions in Tiger and MyData database when dropping existing publications and subscriptions");
            Assert.Equal(runner.CommitInfos[1].TestFileList, new HashSet <string>());
        }
Exemple #3
0
        public void should_execute_incremental_metrics_example()
        {
            DataFrame dataSetDE = LoadIncrementalMetricsData(
                new[] { new object[] { 1, "ManufacturerA", "DE" }, new object[] { 2, "ManufacturerB", "DE" },
                        new object[] { 2, "ManufacturerC", "DE" } });

            DataFrame dataSetUS = LoadIncrementalMetricsData(
                new[]
            {
                new object[] { 3, "ManufacturerD", "US" }, new object[] { 4, "ManufacturerE", "US" },
                new object[] { 5, "ManufacturerF", "US" }
            });

            DataFrame dataSetCN = LoadIncrementalMetricsData(
                new[] { new object[] { 6, "ManufacturerG", "CN" }, new object[] { 7, "ManufacturerH", "CN" }, });

            // We initialize a new check for the following data fields
            var check = new Check(CheckLevel.Warning, "generic check")
                        .IsComplete("manufacturerName")
                        .ContainsURL("manufacturerName", val => val == 0.0)
                        .IsContainedIn("countryCode", new[] { "DE", "US", "CN" });


            // We create a new Analysis instance with the corresponding RequiredAnalyzers defined in the check
            Analysis analysis = new Analysis(check.RequiredAnalyzers());

            // We create a new in-memory state provider for each countryCode defined in the dataset
            InMemoryStateProvider deStates = new InMemoryStateProvider();
            InMemoryStateProvider usStates = new InMemoryStateProvider();
            InMemoryStateProvider cnStates = new InMemoryStateProvider();

            // These call will store the resulting metrics in the separate states providers for each dataSet
            AnalysisRunner.Run(dataSetDE, analysis, saveStatesWith: deStates);
            AnalysisRunner.Run(dataSetUS, analysis, saveStatesWith: usStates);
            AnalysisRunner.Run(dataSetCN, analysis, saveStatesWith: cnStates);

            // Next, we are able to compute the metrics for the whole table from the partition states
            // This just aggregates the previously calculated metrics, it doesn't performs computation on the data
            AnalyzerContext tableMetrics = AnalysisRunner.RunOnAggregatedStates(dataSetDE.Schema(), analysis,
                                                                                new[] { deStates, usStates, cnStates });

            // Lets now assume that a single partition changes. We only need to recompute the state of this
            // partition in order to update the metrics for the whole table.
            DataFrame updatedUsManufacturers = LoadIncrementalMetricsData(new[]
            {
                new object[] { 3, "ManufacturerDNew", "US" }, new object[] { 4, null, "US" },
                new object[] { 5, "ManufacturerFNew http://clickme.com", "US" },
            });

            // Recompute state of partition
            InMemoryStateProvider updatedUsStates = new InMemoryStateProvider();

            AnalysisRunner.Run(updatedUsManufacturers, analysis, updatedUsStates);

            // Recompute metrics for whole tables from states. We do not need to touch old data!
            AnalyzerContext updatedTableMetrics = AnalysisRunner.RunOnAggregatedStates(dataSetDE.Schema(), analysis,
                                                                                       new[] { deStates, usStates, cnStates });
        }
Exemple #4
0
 public ServiceRegistry()
 {
     _logger         = new UnityLogger();
     CheckerFactory  = new CheckerFactory(Logger);
     ExceptionLogger = new ExceptionLogger(Logger);
     ConfigFetcher   = new ConfigFetcher(Logger);
     AnalysisRunner  = new AnalysisRunner
                       (
         Logger,
         CheckerFactory,
         ExceptionLogger
                       );
 }
        /// <summary>
        /// Runs all check groups and returns the verification result. Metrics are computed from aggregated states. Verification result includes all the metrics generated during the run
        /// </summary>
        /// <param name="schema">schema of the tabular data on which the checks should be verified</param>
        /// <param name="checks">A sequence of check objects to be executed</param>
        /// <param name="stateLoaders">loaders from which we retrieve the states to aggregate</param>
        /// <param name="requiredAnalysis">can be used to enforce the some metrics regardless of if there are constraints on them (optional)</param>
        /// <param name="saveStatesWith">persist resulting states for the configured analyzers (optional)</param>
        /// <param name="metricsRepository"></param>
        /// <param name="saveOrAppendResultsWithKey"></param>
        /// <returns>Result for every check including the overall status, detailed status for each constraints and all metrics produced</returns>
        public VerificationResult RunOnAggregatedStates(
            StructType schema,
            IEnumerable <Check> checks,
            IEnumerable <IStateLoader> stateLoaders,
            Analysis requiredAnalysis,
            Option <IStatePersister> saveStatesWith,
            Option <IMetricsRepository> metricsRepository,
            Option <ResultKey> saveOrAppendResultsWithKey)
        {
            Analysis analysis = requiredAnalysis.AddAnalyzers(checks.SelectMany(check => check.RequiredAnalyzers()));

            AnalyzerContext analysisResults = AnalysisRunner.RunOnAggregatedStates(
                schema,
                analysis,
                stateLoaders,
                saveStatesWith,
                metricsRepository, saveOrAppendResultsWithKey, new StorageLevel());

            return(Evaluate(checks, analysisResults));
        }
        /// <summary>
        /// Runs all check groups and returns the verification result. Verification result includes all the metrics computed during the run.
        /// </summary>
        /// <param name="data">tabular data on which the checks should be verified.</param>
        /// <param name="checks">A sequence of check objects to be executed.</param>
        /// <param name="requiredAnalyzers">can be used to enforce the calculation of some some metrics. Regardless of if there are constraints on them (optional)</param>
        /// <param name="aggregateWith">loader from which we retrieve initial states to aggregate (optional)</param>
        /// <param name="saveStatesWith">persist resulting states for the configured analyzers (optional)</param>
        /// <param name="metricsRepositoryOptions">Options related to the MetricsRepository</param>
        /// <param name="fileOutputOptions">Options related to FileOuput using a SparkSession</param>
        /// <returns> Result for every check including the overall status, detailed status for each constraints and all metrics produced</returns>
        internal VerificationResult DoVerificationRun(
            DataFrame data,
            IEnumerable <Check> checks,
            IEnumerable <IAnalyzer <IMetric> > requiredAnalyzers,
            Option <IStateLoader> aggregateWith     = default,
            Option <IStatePersister> saveStatesWith = default,
            VerificationMetricsRepositoryOptions metricsRepositoryOptions = default,
            VerificationFileOutputOptions fileOutputOptions = default)
        {
            IEnumerable <IAnalyzer <IMetric> > analyzers =
                requiredAnalyzers.Concat(checks.SelectMany(check => check.RequiredAnalyzers()));

            AnalysisRunnerRepositoryOptions options = new AnalysisRunnerRepositoryOptions(
                metricsRepositoryOptions.metricRepository,
                metricsRepositoryOptions.reuseExistingResultsForKey,
                Option <ResultKey> .None,
                metricsRepositoryOptions.failIfResultsForReusingMissing);

            AnalyzerContext analysisResults = AnalysisRunner.DoAnalysisRun(
                data,
                analyzers,
                aggregateWith,
                saveStatesWith,
                new StorageLevel(),
                options);

            VerificationResult verificationResult = Evaluate(checks, analysisResults);

            AnalyzerContext analyzerContext = new AnalyzerContext(verificationResult.Metrics);


            SaveOrAppendResultsIfNecessary(analyzerContext,
                                           metricsRepositoryOptions.metricRepository,
                                           metricsRepositoryOptions.saveOrAppendResultsWithKey);

            SaveJsonOutputsToFilesystemIfNecessary(fileOutputOptions, verificationResult);

            return(verificationResult);
        }