public static void RunAnalysesParallel(AFElement element, List<AFTime> aftimes, int analysesThreadCount, ConcurrentQueue<List<AFValue>> dataQueue) { // for each analyse configured for our element Parallel.ForEach(element.Analyses, new ParallelOptions() { MaxDegreeOfParallelism = analysesThreadCount }, (afAnalysis) => { TimeSpan evaluationTime; int evaluationErrorsCount; var analysisRunner = new AnalysisRunner(Configuration.EnableAnalysisErrorOutput); var results = analysisRunner.Run(afAnalysis, aftimes, out evaluationTime, out evaluationErrorsCount); string prettyElementName = element.Parent != null ? string.Format("{0}\\{1}", element.Parent.Name, element.Name) : element.Name; var stats = new StatisticsInfo() { AnalyseName = afAnalysis.Name, Duration = evaluationTime, ElementName = prettyElementName, EvaluationsCount = aftimes.Count, EvaluationsErrorCount = evaluationErrorsCount }; // we add statistics to the queue Statistics.StatisticsQueue.Add(stats); // send data to queue if (dataQueue != null) { dataQueue.Enqueue(results); } }); }
public void should_parse_commit_information_from_file_correctly() { AnalysisRunner runner = new AnalysisRunner(input_path); runner.run(); Assert.Equal(runner.CommitInfos.Count, 2); Assert.Equal(runner.CommitInfos[0].Hash, "5016e80"); Assert.Equal(runner.CommitInfos[0].AddTime, DateTime.Parse("2016-07-08")); Assert.Equal(runner.CommitInfos[0].CommitTime, DateTime.Parse("2016-07-08")); Assert.Equal(new List <string> { "wangjian", "shengqi", "jijie" }, runner.CommitInfos[0].Devs); Assert.Equal(runner.CommitInfos[0].StoryNumber, "1919"); Assert.Equal(runner.CommitInfos[0].Comment, "Change comments length from 255 to 500 characters for Upload teq feature, and fix some ie issue for text-area."); Assert.Equal(runner.CommitInfos[0].TestFileList, new HashSet <string>()); Assert.Equal(runner.CommitInfos[1].Hash, "870b57c"); Assert.Equal(runner.CommitInfos[1].AddTime, DateTime.Parse("2016-07-08")); Assert.Equal(runner.CommitInfos[1].CommitTime, DateTime.Parse("2016-07-08")); Assert.Equal(new List <string> { "naijia" }, runner.CommitInfos[1].Devs); Assert.Equal(runner.CommitInfos[1].StoryNumber, null); Assert.Equal(runner.CommitInfos[1].Comment, "Handle the case of no publications nor subscriptions in Tiger and MyData database when dropping existing publications and subscriptions"); Assert.Equal(runner.CommitInfos[1].TestFileList, new HashSet <string>()); }
public void should_execute_incremental_metrics_example() { DataFrame dataSetDE = LoadIncrementalMetricsData( new[] { new object[] { 1, "ManufacturerA", "DE" }, new object[] { 2, "ManufacturerB", "DE" }, new object[] { 2, "ManufacturerC", "DE" } }); DataFrame dataSetUS = LoadIncrementalMetricsData( new[] { new object[] { 3, "ManufacturerD", "US" }, new object[] { 4, "ManufacturerE", "US" }, new object[] { 5, "ManufacturerF", "US" } }); DataFrame dataSetCN = LoadIncrementalMetricsData( new[] { new object[] { 6, "ManufacturerG", "CN" }, new object[] { 7, "ManufacturerH", "CN" }, }); // We initialize a new check for the following data fields var check = new Check(CheckLevel.Warning, "generic check") .IsComplete("manufacturerName") .ContainsURL("manufacturerName", val => val == 0.0) .IsContainedIn("countryCode", new[] { "DE", "US", "CN" }); // We create a new Analysis instance with the corresponding RequiredAnalyzers defined in the check Analysis analysis = new Analysis(check.RequiredAnalyzers()); // We create a new in-memory state provider for each countryCode defined in the dataset InMemoryStateProvider deStates = new InMemoryStateProvider(); InMemoryStateProvider usStates = new InMemoryStateProvider(); InMemoryStateProvider cnStates = new InMemoryStateProvider(); // These call will store the resulting metrics in the separate states providers for each dataSet AnalysisRunner.Run(dataSetDE, analysis, saveStatesWith: deStates); AnalysisRunner.Run(dataSetUS, analysis, saveStatesWith: usStates); AnalysisRunner.Run(dataSetCN, analysis, saveStatesWith: cnStates); // Next, we are able to compute the metrics for the whole table from the partition states // This just aggregates the previously calculated metrics, it doesn't performs computation on the data AnalyzerContext tableMetrics = AnalysisRunner.RunOnAggregatedStates(dataSetDE.Schema(), analysis, new[] { deStates, usStates, cnStates }); // Lets now assume that a single partition changes. We only need to recompute the state of this // partition in order to update the metrics for the whole table. DataFrame updatedUsManufacturers = LoadIncrementalMetricsData(new[] { new object[] { 3, "ManufacturerDNew", "US" }, new object[] { 4, null, "US" }, new object[] { 5, "ManufacturerFNew http://clickme.com", "US" }, }); // Recompute state of partition InMemoryStateProvider updatedUsStates = new InMemoryStateProvider(); AnalysisRunner.Run(updatedUsManufacturers, analysis, updatedUsStates); // Recompute metrics for whole tables from states. We do not need to touch old data! AnalyzerContext updatedTableMetrics = AnalysisRunner.RunOnAggregatedStates(dataSetDE.Schema(), analysis, new[] { deStates, usStates, cnStates }); }
public ServiceRegistry() { _logger = new UnityLogger(); CheckerFactory = new CheckerFactory(Logger); ExceptionLogger = new ExceptionLogger(Logger); ConfigFetcher = new ConfigFetcher(Logger); AnalysisRunner = new AnalysisRunner ( Logger, CheckerFactory, ExceptionLogger ); }
/// <summary> /// Runs all check groups and returns the verification result. Metrics are computed from aggregated states. Verification result includes all the metrics generated during the run /// </summary> /// <param name="schema">schema of the tabular data on which the checks should be verified</param> /// <param name="checks">A sequence of check objects to be executed</param> /// <param name="stateLoaders">loaders from which we retrieve the states to aggregate</param> /// <param name="requiredAnalysis">can be used to enforce the some metrics regardless of if there are constraints on them (optional)</param> /// <param name="saveStatesWith">persist resulting states for the configured analyzers (optional)</param> /// <param name="metricsRepository"></param> /// <param name="saveOrAppendResultsWithKey"></param> /// <returns>Result for every check including the overall status, detailed status for each constraints and all metrics produced</returns> public VerificationResult RunOnAggregatedStates( StructType schema, IEnumerable <Check> checks, IEnumerable <IStateLoader> stateLoaders, Analysis requiredAnalysis, Option <IStatePersister> saveStatesWith, Option <IMetricsRepository> metricsRepository, Option <ResultKey> saveOrAppendResultsWithKey) { Analysis analysis = requiredAnalysis.AddAnalyzers(checks.SelectMany(check => check.RequiredAnalyzers())); AnalyzerContext analysisResults = AnalysisRunner.RunOnAggregatedStates( schema, analysis, stateLoaders, saveStatesWith, metricsRepository, saveOrAppendResultsWithKey, new StorageLevel()); return(Evaluate(checks, analysisResults)); }
/// <summary> /// Runs all check groups and returns the verification result. Verification result includes all the metrics computed during the run. /// </summary> /// <param name="data">tabular data on which the checks should be verified.</param> /// <param name="checks">A sequence of check objects to be executed.</param> /// <param name="requiredAnalyzers">can be used to enforce the calculation of some some metrics. Regardless of if there are constraints on them (optional)</param> /// <param name="aggregateWith">loader from which we retrieve initial states to aggregate (optional)</param> /// <param name="saveStatesWith">persist resulting states for the configured analyzers (optional)</param> /// <param name="metricsRepositoryOptions">Options related to the MetricsRepository</param> /// <param name="fileOutputOptions">Options related to FileOuput using a SparkSession</param> /// <returns> Result for every check including the overall status, detailed status for each constraints and all metrics produced</returns> internal VerificationResult DoVerificationRun( DataFrame data, IEnumerable <Check> checks, IEnumerable <IAnalyzer <IMetric> > requiredAnalyzers, Option <IStateLoader> aggregateWith = default, Option <IStatePersister> saveStatesWith = default, VerificationMetricsRepositoryOptions metricsRepositoryOptions = default, VerificationFileOutputOptions fileOutputOptions = default) { IEnumerable <IAnalyzer <IMetric> > analyzers = requiredAnalyzers.Concat(checks.SelectMany(check => check.RequiredAnalyzers())); AnalysisRunnerRepositoryOptions options = new AnalysisRunnerRepositoryOptions( metricsRepositoryOptions.metricRepository, metricsRepositoryOptions.reuseExistingResultsForKey, Option <ResultKey> .None, metricsRepositoryOptions.failIfResultsForReusingMissing); AnalyzerContext analysisResults = AnalysisRunner.DoAnalysisRun( data, analyzers, aggregateWith, saveStatesWith, new StorageLevel(), options); VerificationResult verificationResult = Evaluate(checks, analysisResults); AnalyzerContext analyzerContext = new AnalyzerContext(verificationResult.Metrics); SaveOrAppendResultsIfNecessary(analyzerContext, metricsRepositoryOptions.metricRepository, metricsRepositoryOptions.saveOrAppendResultsWithKey); SaveJsonOutputsToFilesystemIfNecessary(fileOutputOptions, verificationResult); return(verificationResult); }