Ejemplo n.º 1
0
        public override void SetDatabaseObject(IActivateItems activator, Catalogue databaseObject)
        {
            base.SetDatabaseObject(activator, databaseObject);
            timePeriodicityChart1.SetItemActivator(activator);

            //clear old DQE graphs
            ClearDQEGraphs();

            DQERepository dqeRepository = null;

            try
            {
                //try to get the dqe server
                dqeRepository = new DQERepository(databaseObject.CatalogueRepository);
            }
            catch (Exception)
            {
                //there is no dqe server, ah well nevermind
            }

            //dqe server did exist!
            if (dqeRepository != null)
            {
                //get evaluations for the catalogue
                Evaluation[] evaluations = dqeRepository.GetAllEvaluationsFor(databaseObject).ToArray();

                //there have been some evaluations
                evaluationTrackBar1.Evaluations = evaluations;
            }

            CommonFunctionality.Add(new ExecuteCommandConfigureCatalogueValidationRules(activator).SetTarget(databaseObject));
            CommonFunctionality.Add(new ExecuteCommandRunDQEOnCatalogue(activator, databaseObject), "Run Data Quality Engine...");
        }
Ejemplo n.º 2
0
        public void Check(ICheckNotifier notifier)
        {
            try
            {
                dqeRepository = new DQERepository((ICatalogueRepository)_loadProgress.Repository);
            }
            catch (Exception ex)
            {
                notifier.OnCheckPerformed(
                    new CheckEventArgs(
                        "Could not contact DQE server to check the 'real' coverage of the datasets linked to this LoadProgress, possibly because there isn't a DQE server yet.  You should create one in ManageExternalServers",
                        CheckResult.Fail, ex));
                return;
            }

            if (CataloguesPeriodictiyData == null)
            {
                FetchDataFromDQE(notifier);
            }

            if (CachePeriodictiyData == null)
            {
                FetchCacheData(notifier);
            }

            foreach (Catalogue cataloguesMissingDQERun in CataloguesMissingDQERuns)
            {
                notifier.OnCheckPerformed(
                    new CheckEventArgs(
                        "Catalogue '" + cataloguesMissingDQERun +
                        "' is associated with the load '" + _loadMetadata +
                        "' but has never had a DQE run executed on it, you should configure some basic validation on it and choose a time periodicity column and execute a DQE run on it.",
                        CheckResult.Fail));
            }
        }
Ejemplo n.º 3
0
        private Evaluation GetEvaluation(Catalogue c)
        {
            if (!EvaluationCache.TryGetValue(c, out Evaluation evaluation))
            {
                evaluation = DQERepository?.GetMostRecentEvaluationFor(c);
                EvaluationCache.Add(c, evaluation);
            }

            return(evaluation);
        }
Ejemplo n.º 4
0
        private void ProcessRecord(DQERepository dqeRepository, int dataLoadRunIDOfCurrentRecord, DbDataReader r, PeriodicityCubesOverTime periodicity, DQEStateOverDataLoadRunId states)
        {
            //make sure all the results dictionaries
            states.AddKeyToDictionaries(dataLoadRunIDOfCurrentRecord, _validator, _queryBuilder);

            //ask the validator to validate!
            Consequence?worstConsequence;

            _validator.ValidateVerboseAdditive(
                r,                                                                            //validate the data reader
                states.ColumnValidationFailuresByDataLoadRunID[dataLoadRunIDOfCurrentRecord], //additively adjust the validation failures dictionary
                out worstConsequence);                                                        //and tell us what the worst consequence in the row was


            //increment the time periodicity hypercube!
            if (_timePeriodicityField != null)
            {
                DateTime?dt;

                try
                {
                    dt = dqeRepository.ObjectToNullableDateTime(r[_timePeriodicityField]);
                }
                catch (InvalidCastException e)
                {
                    throw new Exception("Found value " + r[_timePeriodicityField] + " of type " + r[_timePeriodicityField].GetType().Name + " in your time periodicity field which was not a valid date time, make sure your time periodicity field is a datetime datatype", e);
                }
                if (dt != null)
                {
                    periodicity.IncrementHyperCube(dt.Value.Year, dt.Value.Month, worstConsequence);
                }
            }

            //now we need to update everything we know about all the columns
            foreach (var state in states.AllColumnStates[dataLoadRunIDOfCurrentRecord])
            {
                //start out by assuming everything is dandy
                state.CountCorrect++;

                if (r[state.TargetProperty] == DBNull.Value)
                {
                    state.CountDBNull++;
                }
            }

            //update row level dictionaries
            if (worstConsequence == null)
            {
                states.RowsPassingValidationByDataLoadRunID[dataLoadRunIDOfCurrentRecord]++;
            }
            else
            {
                states.WorstConsequencesByDataLoadRunID[dataLoadRunIDOfCurrentRecord][(Consequence)worstConsequence]++;
            }
        }
Ejemplo n.º 5
0
        public void TestGetPeriodicityForDataTableForEvaluation_EmptyEvaluation(bool pivot)
        {
            var cata = new Catalogue(CatalogueRepository, "MyCata");

            var dqeRepo = new DQERepository(CatalogueRepository);

            var eval = new Evaluation(dqeRepo, cata);

            var dt = PeriodicityState.GetPeriodicityForDataTableForEvaluation(eval, "ALL", pivot);

            Assert.IsNull(dt);
        }
Ejemplo n.º 6
0
        /// <summary>
        /// Starts a new evaluation with the given transaction
        /// </summary>
        internal Evaluation(DQERepository dqeRepository, ICatalogue c)
        {
            DQERepository = dqeRepository;
            Catalogue     = c;

            dqeRepository.InsertAndHydrate(this,
                                           new Dictionary <string, object>()
            {
                { "CatalogueID", c.ID },
                { "DateOfEvaluation", DateTime.Now }
            });
        }
Ejemplo n.º 7
0
        public void GetPeriodicityCountsForEvaluation_EmptyEvaluation()
        {
            var cata = new Catalogue(CatalogueRepository, "MyCata");

            var dqeRepo = new DQERepository(CatalogueRepository);

            var eval = new Evaluation(dqeRepo, cata);

            var dict = PeriodicityState.GetPeriodicityCountsForEvaluation(eval, true);

            Assert.IsNotNull(dict);
            Assert.IsEmpty(dict);
        }
Ejemplo n.º 8
0
        public LoadProgressSummaryReport(LoadProgress loadProgress)
        {
            _loadProgress  = loadProgress;
            _loadMetadata  = _loadProgress.LoadMetadata;
            _cacheProgress = _loadProgress.CacheProgress;

            try
            {
                dqeRepository = new DQERepository(loadProgress.CatalogueRepository);
            }
            catch (NotSupportedException)
            {
                dqeRepository = null;
            }
        }
Ejemplo n.º 9
0
        /// <summary>
        ///
        /// </summary>
        /// <param name="repository"></param>
        /// <param name="r"></param>
        internal Evaluation(DQERepository repository, DbDataReader r) : base(repository, r)
        {
            DQERepository = repository;

            DateOfEvaluation = DateTime.Parse(r["DateOfEvaluation"].ToString());
            CatalogueID      = int.Parse(r["CatalogueID"].ToString());

            try
            {
                Catalogue = DQERepository.CatalogueRepository.GetObjectByID <Catalogue>(CatalogueID);
            }
            catch (Exception e)
            {
                throw new Exception("Could not create a DataQualityEngine.Evaluation for Evaluation with ID " + ID + " because it is a report of an old Catalogue that has been deleted or otherwise does not exist/could not be retrieved (CatalogueID was:" + CatalogueID + ").  See inner exception for full details", e);
            }
        }
Ejemplo n.º 10
0
        public DQEGraphAnnotation(DQERepository repository, DbDataReader r) : base(repository, r)
        {
            Repository = repository;

            //coordinates of the annotation
            StartX = double.Parse(r["StartX"].ToString());
            StartY = double.Parse(r["StartY"].ToString());
            EndX   = double.Parse(r["EndX"].ToString());
            EndY   = double.Parse(r["EndY"].ToString());

            Text                 = r["Text"].ToString();
            Evaluation_ID        = int.Parse(r["Evaluation_ID"].ToString());
            Username             = r["Username"].ToString();
            CreationDate         = (DateTime)r["CreationDate"];
            PivotCategory        = (string)r["PivotCategory"];
            AnnotationIsForGraph = (DQEGraphType)Enum.Parse(typeof(DQEGraphType), r["AnnotationIsForGraph"].ToString());
        }
Ejemplo n.º 11
0
        public void GenerateChart()
        {
            ragSmiley1.Reset();
            ragSmiley1.SetVisible(false);

            var allCatalogues = _collection.GetCatalogues();

            Dictionary <Catalogue, Dictionary <DateTime, ArchivalPeriodicityCount> > cataloguesToAdd = new Dictionary <Catalogue, Dictionary <DateTime, ArchivalPeriodicityCount> >();

            DQERepository dqeRepository;

            try
            {
                dqeRepository = new DQERepository(_activator.RepositoryLocator.CatalogueRepository);
            }
            catch (NotSupportedException e)
            {
                ragSmiley1.SetVisible(true);
                ragSmiley1.Fatal(e);
                return;
            }

            foreach (var cata in allCatalogues.OrderBy(c => c.Name))
            {
                var eval = dqeRepository.GetMostRecentEvaluationFor(cata);

                Dictionary <DateTime, ArchivalPeriodicityCount> dictionary = null;

                if (eval != null)
                {
                    dictionary = PeriodicityState.GetPeriodicityCountsForEvaluation(eval);
                }

                cataloguesToAdd.Add(cata, dictionary);
            }

            //every month seen in every dataset ever
            var buckets = GetBuckets(cataloguesToAdd);

            racewayRenderArea.AddTracks(_activator, cataloguesToAdd, buckets, _collection.IgnoreRows);
            racewayRenderArea.Refresh();

            this.Invalidate();
        }
Ejemplo n.º 12
0
        public CustomMetadataReport(IRDMPPlatformRepositoryServiceLocator repositoryLocator)
        {
            // Catalogue level properties  (works in root or $foreach Catalogue block)

            //add basic properties
            foreach (var prop in typeof(Catalogue).GetProperties())
            {
                Replacements.Add("$" + prop.Name, (c) => prop.GetValue(c));
            }

            //add basic properties TableInfo
            foreach (var prop in typeof(TableInfo).GetProperties())
            {
                // if it's not already a property of Catalogue
                Replacements.TryAdd("$" + prop.Name, (c) => GetTable(c) == null ? null : prop.GetValue(GetTable(c)));
            }

            AddDQEReplacements();

            // Catalogue Item level properties (only work in a $foreach CatalogueItem block)

            //add basic properties CatalogueItem
            foreach (var prop in typeof(CatalogueItem).GetProperties())
            {
                ReplacementsCatalogueItem.Add("$" + prop.Name, (s) => prop.GetValue(s));
            }

            //add basic properties ColumnInfo
            foreach (var prop in typeof(ColumnInfo).GetProperties())
            {
                // if it's not already a property of CatalogueItem
                ReplacementsCatalogueItem.TryAdd("$" + prop.Name, (s) => s.ColumnInfo_ID == null ? null : prop.GetValue(s.ColumnInfo));
            }

            try
            {
                DQERepository = new DQERepository(repositoryLocator.CatalogueRepository);
            }
            catch (NotSupportedException)
            {
                DQERepository = null;
            }
        }
Ejemplo n.º 13
0
        public DQEGraphAnnotation(DQERepository repository, double startX, double startY, double endX, double endY, string text, Evaluation evaluation, DQEGraphType annotationIsForGraphType, string pivotCategory)
        {
            Repository = repository;

            var username = Environment.UserName;

            Repository.InsertAndHydrate(this, new Dictionary <string, object>
            {
                { "StartX", startX },
                { "StartY", startY },
                { "EndX", endX },
                { "EndY", endY },
                { "Text", text },
                { "Evaluation_ID", evaluation.ID },
                { "Username", username },
                { "CreationDate", DateTime.Now },
                { "AnnotationIsForGraph", annotationIsForGraphType },
                { "PivotCategory", pivotCategory }
            });
        }
Ejemplo n.º 14
0
        public void TestCreatingOne()
        {
            Catalogue c = new Catalogue(CatalogueRepository, "FrankyMicky");


            try
            {
                var        dqeRepo    = new DQERepository(CatalogueRepository);
                Evaluation evaluation = new Evaluation(dqeRepo, c);

                var annotation = new DQEGraphAnnotation(dqeRepo, 1, 2, 3, 4, "Fishesfly", evaluation, DQEGraphType.TimePeriodicityGraph, "ALL");

                Assert.AreEqual(annotation.StartX, 1);
                Assert.AreEqual(annotation.StartY, 2);
                Assert.AreEqual(annotation.EndX, 3);
                Assert.AreEqual(annotation.EndY, 4);
                Assert.AreEqual(annotation.AnnotationIsForGraph, DQEGraphType.TimePeriodicityGraph);

                //should be about 2 milliseconds ago
                Assert.IsTrue(annotation.CreationDate <= DateTime.Now.AddSeconds(3));
                //certainly shouldnt be before yesterday!
                Assert.IsTrue(annotation.CreationDate > DateTime.Now.AddDays(-1));

                //text should match
                Assert.AreEqual(annotation.Text, "Fishesfly");

                annotation.Text = "flibble";
                annotation.SaveToDatabase();

                annotation.Text = "";

                //new copy is flibble
                Assert.AreEqual("flibble", dqeRepo.GetObjectByID <DQEGraphAnnotation>(annotation.ID).Text);

                annotation.DeleteInDatabase();
            }
            finally
            {
                c.DeleteInDatabase();
            }
        }
Ejemplo n.º 15
0
        public Tuple <DateTime?, DateTime?> GetMachineReadableTimespanIfKnownOf(Catalogue catalogue, bool discardOutliers, out DateTime?accurateAsOf)
        {
            accurateAsOf = null;
            Evaluation mostRecentEvaluation;

            try
            {
                var repo = new DQERepository(catalogue.CatalogueRepository);
                mostRecentEvaluation = repo.GetMostRecentEvaluationFor(catalogue);
            }
            catch (Exception)
            {
                return(Unknown());
            }

            if (mostRecentEvaluation == null)
            {
                return(Unknown());
            }

            accurateAsOf = mostRecentEvaluation.DateOfEvaluation;

            return(GetMachineReadableTimespanIfKnownOf(mostRecentEvaluation, discardOutliers));
        }
Ejemplo n.º 16
0
        public void ValidateBulkTestData(bool testCancellingValiationEarly)
        {
            int      numberOfRecordsToGenerate = 10000;
            DateTime startTime = DateTime.Now;

            BulkTestsData testData = new BulkTestsData(CatalogueRepository, DiscoveredDatabaseICanCreateRandomTablesIn, numberOfRecordsToGenerate);

            testData.SetupTestData();
            testData.ImportAsCatalogue();

            DQERepository dqeRepository = new DQERepository(CatalogueRepository);

            //the shouldn't be any lingering results in the database
            Assert.IsNull(dqeRepository.GetMostRecentEvaluationFor(_catalogue));

            //set some validation rules
            testData.catalogue.ValidatorXML = bulkTestDataValidation;

            //set the time periodicity field
            var toBeTimePeriodicityCol = testData.catalogue.GetAllExtractionInformation(ExtractionCategory.Any).Single(e => e.GetRuntimeName().Equals("dtCreated"));

            testData.catalogue.TimeCoverage_ExtractionInformation_ID = toBeTimePeriodicityCol.ID;

            //do the validation
            CatalogueConstraintReport report = new CatalogueConstraintReport(testData.catalogue, SpecialFieldNames.DataLoadRunID);

            report.Check(new ThrowImmediatelyCheckNotifier());

            CancellationTokenSource source = new CancellationTokenSource();

            if (testCancellingValiationEarly)
            {
                source.Cancel();
            }

            ToMemoryDataLoadEventListener listener = new ToMemoryDataLoadEventListener(false);

            report.GenerateReport(testData.catalogue, listener, source.Token);

            if (testCancellingValiationEarly)
            {
                Assert.IsTrue(listener.EventsReceivedBySender[report].Count(m => m.Exception is OperationCanceledException) == 1);
                testData.Destroy();
                testData.DeleteCatalogue();
                return;
            }

            Assert.IsTrue(listener.EventsReceivedBySender[report].All(m => m.Exception == null));//all messages must have null exceptions


            //get the reuslts now
            var results = dqeRepository.GetMostRecentEvaluationFor(testData.catalogue);

            Assert.IsNotNull(results);

            //the sum of all consquences across all data load run ids should be the record count
            Assert.AreEqual(10000, results.RowStates.Sum(r => r.Missing + r.Invalid + r.Wrong + r.Correct));

            //there should be at least 5 data load run ids (should be around 12 actually - see BulkTestData but theoretically everyone could magically - all 10,000 into 5 decades - or even less but those statistics must be astronomical)
            Assert.GreaterOrEqual(results.RowStates.Count(), 5);

            //there should be lots of column results too
            Assert.GreaterOrEqual(results.ColumnStates.Count(), 5);

            //Did it log?
            LogManager logManager = new LogManager(CatalogueRepository.GetServerDefaults().GetDefaultFor(PermissableDefaults.LiveLoggingServer_ID));
            var        log        = logManager.GetArchivalDataLoadInfos("DQE").FirstOrDefault();

            Assert.IsNotNull(log);
            Assert.GreaterOrEqual(log.StartTime, startTime);
            Assert.AreEqual(0, log.Errors.Count);
            Assert.AreEqual(numberOfRecordsToGenerate, log.TableLoadInfos.Single().Inserts);

            testData.Destroy();

            testData.DeleteCatalogue();
        }
Ejemplo n.º 17
0
        /// <inheritdoc/>
        public string GetHumanReadableTimepsanIfKnownOf(Catalogue catalogue, bool discardOutliers)
        {
            DataTable dt;

            try
            {
                var repo = new DQERepository(catalogue.CatalogueRepository);

                Evaluation mostRecentEvaluation = repo.GetMostRecentEvaluationFor(catalogue);

                if (mostRecentEvaluation == null)
                {
                    return("Unknown");
                }

                dt = PeriodicityState.GetPeriodicityForDataTableForEvaluation(mostRecentEvaluation, "ALL", false);
            }
            catch (Exception e)
            {
                return("Unknown:" + e.Message);
            }

            if (dt == null || dt.Rows.Count < 2)
            {
                return("Unknown");
            }

            int discardThreshold = discardOutliers? GetDiscardThreshold(dt):-1;

            string minMonth = null;

            for (int i = 0; i < dt.Rows.Count; i++)
            {
                if (Convert.ToInt32(dt.Rows[i]["CountOfRecords"]) > discardThreshold)
                {
                    minMonth = dt.Rows[i][1].ToString();
                    break;
                }
            }

            string maxMonth = null;

            for (int i = dt.Rows.Count - 1; i >= 0; i--)
            {
                if (Convert.ToInt32(dt.Rows[i]["CountOfRecords"]) > discardThreshold)
                {
                    maxMonth = dt.Rows[i][1].ToString();
                    break;
                }
            }

            if (maxMonth == null || minMonth == null)
            {
                return("All Values Below Threshold");
            }

            if (maxMonth == minMonth)
            {
                return(minMonth);
            }


            return(minMonth + " To " + maxMonth);
        }
Ejemplo n.º 18
0
        public Tuple <DateTime?, DateTime?> GetMachineReadableTimepsanIfKnownOf(Catalogue catalogue, bool discardOutliers, out DateTime?accurateAsOf)
        {
            DataTable dt;

            accurateAsOf = null;

            Evaluation mostRecentEvaluation = null;

            try
            {
                var repo = new DQERepository(catalogue.CatalogueRepository);
                mostRecentEvaluation = repo.GetMostRecentEvaluationFor(catalogue);
            }
            catch (Exception)
            {
                return(Unknown());
            }

            if (mostRecentEvaluation == null)
            {
                return(Unknown());
            }

            accurateAsOf = mostRecentEvaluation.DateOfEvaluation;
            dt           = PeriodicityState.GetPeriodicityForDataTableForEvaluation(mostRecentEvaluation, "ALL", false);

            if (dt == null || dt.Rows.Count < 2)
            {
                return(Unknown());
            }

            int discardThreshold = discardOutliers? GetDiscardThreshold(dt):-1;

            DateTime?minMonth = null;

            for (int i = 0; i < dt.Rows.Count; i++)
            {
                if (Convert.ToInt32(dt.Rows[i]["CountOfRecords"]) > discardThreshold)
                {
                    minMonth = DateTime.Parse(dt.Rows[i][1].ToString());
                    break;
                }
            }

            DateTime?maxMonth = null;

            for (int i = dt.Rows.Count - 1; i >= 0; i--)
            {
                if (Convert.ToInt32(dt.Rows[i]["CountOfRecords"]) > discardThreshold)
                {
                    maxMonth = DateTime.Parse(dt.Rows[i][1].ToString());
                    break;
                }
            }

            if (maxMonth == null || minMonth == null)
            {
                return(Unknown());
            }

            if (maxMonth == minMonth)
            {
                return(Tuple.Create(minMonth, minMonth));
            }

            return(Tuple.Create(minMonth, maxMonth));
        }
Ejemplo n.º 19
0
        public override void GenerateReport(ICatalogue c, IDataLoadEventListener listener, CancellationToken cancellationToken)
        {
            SetupLogging(c.CatalogueRepository);

            var toDatabaseLogger = new ToLoggingDatabaseDataLoadEventListener(this, _logManager, _loggingTask, "DQE evaluation of " + c);

            var forker = new ForkDataLoadEventListener(listener, toDatabaseLogger);

            try
            {
                _catalogue = c;
                var dqeRepository = new DQERepository(c.CatalogueRepository);

                byPivotCategoryCubesOverTime.Add("ALL", new PeriodicityCubesOverTime("ALL"));
                byPivotRowStatesOverDataLoadRunId.Add("ALL", new DQEStateOverDataLoadRunId("ALL"));

                Check(new FromDataLoadEventListenerToCheckNotifier(forker));

                var sw = Stopwatch.StartNew();
                using (var con = _server.GetConnection())
                {
                    con.Open();

                    var cmd = _server.GetCommand(_queryBuilder.SQL, con);
                    cmd.CommandTimeout = 500000;

                    var t = cmd.ExecuteReaderAsync(cancellationToken);
                    t.Wait(cancellationToken);

                    if (cancellationToken.IsCancellationRequested)
                    {
                        throw new OperationCanceledException("User cancelled DQE while fetching data");
                    }

                    var r = t.Result;

                    int progress = 0;

                    while (r.Read())
                    {
                        cancellationToken.ThrowIfCancellationRequested();

                        progress++;
                        int dataLoadRunIDOfCurrentRecord = 0;
                        //to start with assume we will pass the results for the 'unknown batch' (where data load run ID is null or not available)

                        //if the DataReader is likely to have a data load run ID column
                        if (_containsDataLoadID)
                        {
                            //get data load run id
                            int?runID = dqeRepository.ObjectToNullableInt(r[_dataLoadRunFieldName]);

                            //if it has a value use it (otherwise it is null so use 0 - ugh I know, it's a primary key constraint issue)
                            if (runID != null)
                            {
                                dataLoadRunIDOfCurrentRecord = (int)runID;
                            }
                        }

                        string pivotValue = null;

                        //if the user has a pivot category configured
                        if (_pivotCategory != null)
                        {
                            pivotValue = GetStringValueForPivotField(r[_pivotCategory], forker);

                            if (!haveComplainedAboutNullCategories && string.IsNullOrWhiteSpace(pivotValue))
                            {
                                forker.OnNotify(this,
                                                new NotifyEventArgs(ProgressEventType.Warning,
                                                                    "Found a null/empty value for pivot category '" + _pivotCategory +
                                                                    "', this record will ONLY be recorded under ALL and not it's specific category, you will not be warned of further nulls because there are likely to be many if there are any"));
                                haveComplainedAboutNullCategories = true;
                                pivotValue = null;
                            }
                        }

                        //always increase the "ALL" category
                        ProcessRecord(dqeRepository, dataLoadRunIDOfCurrentRecord, r,
                                      byPivotCategoryCubesOverTime["ALL"], byPivotRowStatesOverDataLoadRunId["ALL"]);

                        //if there is a value in the current record for the pivot column
                        if (pivotValue != null)
                        {
                            //if it is a novel
                            if (!byPivotCategoryCubesOverTime.ContainsKey(pivotValue))
                            {
                                //we will need to expand the dictionaries
                                if (byPivotCategoryCubesOverTime.Keys.Count > MaximumPivotValues)
                                {
                                    throw new OverflowException(
                                              "Encountered more than " + MaximumPivotValues + " values for the pivot column " + _pivotCategory +
                                              " this will result in crazy space usage since it is a multiplicative scale of DQE tesseracts");
                                }

                                //expand both the time periodicity and the state results
                                byPivotRowStatesOverDataLoadRunId.Add(pivotValue,
                                                                      new DQEStateOverDataLoadRunId(pivotValue));
                                byPivotCategoryCubesOverTime.Add(pivotValue, new PeriodicityCubesOverTime(pivotValue));
                            }

                            //now we are sure that the dictionaries have the category field we can increment it
                            ProcessRecord(dqeRepository, dataLoadRunIDOfCurrentRecord, r,
                                          byPivotCategoryCubesOverTime[pivotValue], byPivotRowStatesOverDataLoadRunId[pivotValue]);
                        }

                        if (progress % 5000 == 0)
                        {
                            forker.OnProgress(this,
                                              new ProgressEventArgs("Processing " + _catalogue,
                                                                    new ProgressMeasurement(progress, ProgressType.Records), sw.Elapsed));
                        }
                    }
                    //final value
                    forker.OnProgress(this,
                                      new ProgressEventArgs("Processing " + _catalogue,
                                                            new ProgressMeasurement(progress, ProgressType.Records), sw.Elapsed));
                    con.Close();
                }
                sw.Stop();

                foreach (var state in byPivotRowStatesOverDataLoadRunId.Values)
                {
                    state.CalculateFinalValues();
                }

                //now commit results
                using (var con = dqeRepository.BeginNewTransactedConnection())
                {
                    try
                    {
                        //mark down that we are beginning an evaluation on this the day of our lord etc...
                        Evaluation evaluation = new Evaluation(dqeRepository, _catalogue);

                        foreach (var state in byPivotRowStatesOverDataLoadRunId.Values)
                        {
                            state.CommitToDatabase(evaluation, _catalogue, con.Connection, con.Transaction);
                        }

                        if (_timePeriodicityField != null)
                        {
                            foreach (PeriodicityCubesOverTime periodicity in byPivotCategoryCubesOverTime.Values)
                            {
                                periodicity.CommitToDatabase(evaluation);
                            }
                        }

                        con.ManagedTransaction.CommitAndCloseConnection();
                    }
                    catch (Exception)
                    {
                        con.ManagedTransaction.AbandonAndCloseConnection();
                        throw;
                    }
                }

                forker.OnNotify(this,
                                new NotifyEventArgs(ProgressEventType.Information,
                                                    "CatalogueConstraintReport completed successfully  and committed results to DQE server"));
            }
            catch (Exception e)
            {
                if (!(e is OperationCanceledException))
                {
                    forker.OnNotify(this, new NotifyEventArgs(ProgressEventType.Error, "Fatal Crash", e));
                }
                else
                {
                    forker.OnNotify(this, new NotifyEventArgs(ProgressEventType.Warning, "DQE Execution Cancelled", e));
                }
            }
            finally
            {
                toDatabaseLogger.FinalizeTableLoadInfos();
            }
        }
Ejemplo n.º 20
0
        public override void Check(ICheckNotifier notifier)
        {
            //there is a catalogue
            if (_catalogue == null)
            {
                notifier.OnCheckPerformed(new CheckEventArgs("Catalogue has not been set, either use the constructor with Catalogue parameter or use the blank constructor and call CatalogueSupportsReport instead", CheckResult.Fail));
                return;
            }
            try
            {
                var dqeRepository = new DQERepository(_catalogue.CatalogueRepository);
                notifier.OnCheckPerformed(new CheckEventArgs("Found DQE reporting server " + dqeRepository.DiscoveredServer.Name, CheckResult.Success));
            }
            catch (Exception e)
            {
                notifier.OnCheckPerformed(
                    new CheckEventArgs(
                        "Failed to create DQE Repository, possibly there is no DataQualityEngine Reporting Server (ExternalDatabaseServer).  You will need to create/set one in CatalogueManager by using 'Locations=>Manage External Servers...'",
                        CheckResult.Fail, e));
            }

            try
            {
                SetupLogging(_catalogue.CatalogueRepository);
            }
            catch (Exception e)
            {
                notifier.OnCheckPerformed(new CheckEventArgs("Failed to setup logging of DQE runs", CheckResult.Fail, e));
                return;
            }

            //there is XML
            if (string.IsNullOrWhiteSpace(_catalogue.ValidatorXML))
            {
                notifier.OnCheckPerformed(new CheckEventArgs("There is no ValidatorXML specified for the Catalogue " + _catalogue + ", you must configure validation rules", CheckResult.Fail));
                return;
            }
            notifier.OnCheckPerformed(new CheckEventArgs("Found ValidatorXML specified for the Catalogue " + _catalogue + ":" + Environment.NewLine + _catalogue.ValidatorXML, CheckResult.Success));

            //the XML is legit
            try
            {
                _validator = Validator.LoadFromXml(_catalogue.ValidatorXML);
            }
            catch (Exception e)
            {
                notifier.OnCheckPerformed(new CheckEventArgs("ValidatorXML for Catalogue " + _catalogue + " could not be deserialized into a Validator", CheckResult.Fail, e));
                return;
            }

            notifier.OnCheckPerformed(new CheckEventArgs("Deserialized validation XML successfully", CheckResult.Success));

            //there is a server
            try
            {
                _server = _catalogue.GetDistinctLiveDatabaseServer(DataAccessContext.InternalDataProcessing, true);
            }
            catch (Exception e)
            {
                notifier.OnCheckPerformed(new CheckEventArgs("Could not get connection to Catalogue " + _catalogue, CheckResult.Fail, e));
                return;
            }
            notifier.OnCheckPerformed(new CheckEventArgs("Found connection string for Catalogue " + _catalogue, CheckResult.Success));

            //we can connect to the server
            try
            {
                _server.TestConnection();
            }
            catch (Exception e)
            {
                notifier.OnCheckPerformed(new CheckEventArgs("Could not connect to server for Catalogue " + _catalogue, CheckResult.Fail, e));
            }

            //there is extraction SQL
            try
            {
                _queryBuilder = new QueryBuilder("", "");
                _queryBuilder.AddColumnRange(_catalogue.GetAllExtractionInformation(ExtractionCategory.Any));

                var duplicates = _queryBuilder.SelectColumns.GroupBy(c => c.IColumn.GetRuntimeName()).SelectMany(grp => grp.Skip(1)).ToArray();

                if (duplicates.Any())
                {
                    foreach (QueryTimeColumn column in duplicates)
                    {
                        notifier.OnCheckPerformed(
                            new CheckEventArgs(
                                "The column name " + column.IColumn.GetRuntimeName() +
                                " is duplicated in the SELECT command, column names must be unique!  Most likely you have 2+ columns with the same name (from different tables) or duplicate named CatalogueItem/Aliases for the same underlying ColumnInfo",
                                CheckResult.Fail));
                    }
                }

                notifier.OnCheckPerformed(new CheckEventArgs("Query Builder decided the extraction SQL was:" + Environment.NewLine + _queryBuilder.SQL, CheckResult.Success));

                SetupAdditionalValidationRules(notifier);
            }
            catch (Exception e)
            {
                notifier.OnCheckPerformed(new CheckEventArgs("Failed to generate extraction SQL", CheckResult.Fail, e));
            }

            //for each thing we are about to try and validate
            foreach (ItemValidator itemValidator in _validator.ItemValidators)
            {
                //is there a column in the query builder that matches it
                if (
                    //there isnt!
                    !_queryBuilder.SelectColumns.Any(
                        c => c.IColumn.GetRuntimeName().Equals(itemValidator.TargetProperty)))
                {
                    notifier.OnCheckPerformed(
                        new CheckEventArgs(
                            "Could not find a column in the extraction SQL that would match TargetProperty " +
                            itemValidator.TargetProperty, CheckResult.Fail));
                }
                else
                {
                    //there is that is good
                    notifier.OnCheckPerformed(
                        new CheckEventArgs("Found column in query builder columns which matches TargetProperty " +
                                           itemValidator.TargetProperty, CheckResult.Success));
                }
            }

            _containsDataLoadID =
                _queryBuilder.SelectColumns.Any(
                    c => c.IColumn.GetRuntimeName().Equals(_dataLoadRunFieldName));

            if (_containsDataLoadID)
            {
                notifier.OnCheckPerformed(
                    new CheckEventArgs(
                        "Found " + _dataLoadRunFieldName + " field in ExtractionInformation",
                        CheckResult.Success));
            }
            else
            {
                notifier.OnCheckPerformed(
                    new CheckEventArgs(
                        "Did not find ExtractionInformation for a column called " + _dataLoadRunFieldName +
                        ", this will prevent you from viewing the resulting report subdivided by data load batch (make sure you have this column and that it is marked as extractable)",
                        CheckResult.Warning));
            }


            if (_catalogue.PivotCategory_ExtractionInformation_ID == null)
            {
                notifier.OnCheckPerformed(
                    new CheckEventArgs(
                        "Catalogue does not have a pivot category so all records will appear as PivotCategory 'ALL'",
                        CheckResult.Warning));
            }
            else
            {
                _pivotCategory = _catalogue.PivotCategory_ExtractionInformation.GetRuntimeName();
                notifier.OnCheckPerformed(
                    new CheckEventArgs(
                        "Found time Pivot Category field " + _pivotCategory + " so we will be able to generate a categorised tesseract (evaluation, periodicity, consequence, pivot category)",
                        CheckResult.Success));
            }

            var tblValuedFunctions = _catalogue.GetTableInfoList(true).Where(t => t.IsTableValuedFunction).ToArray();

            if (tblValuedFunctions.Any())
            {
                notifier.OnCheckPerformed(
                    new CheckEventArgs(
                        "Catalogue contains 1+ table valued function in it's TableInfos (" +
                        string.Join(",", tblValuedFunctions.Select(t => t.ToString())), CheckResult.Fail));
            }

            if (_catalogue.TimeCoverage_ExtractionInformation_ID == null)
            {
                notifier.OnCheckPerformed(
                    new CheckEventArgs(
                        "Catalogue does not have a time coverage field set",
                        CheckResult.Fail));
            }
            else
            {
                var periodicityExtractionInformation = _catalogue.TimeCoverage_ExtractionInformation;

                _timePeriodicityField = periodicityExtractionInformation.GetRuntimeName();
                notifier.OnCheckPerformed(
                    new CheckEventArgs(
                        "Found time coverage field " + _timePeriodicityField,
                        CheckResult.Success));

                if (!periodicityExtractionInformation.ColumnInfo.Data_type.ToLower().Contains("date"))
                {
                    notifier.OnCheckPerformed(
                        new CheckEventArgs(
                            "Time periodicity field " + _timePeriodicityField + " was of type " +
                            periodicityExtractionInformation.ColumnInfo.Data_type +
                            " (expected the type name to contain the word 'date' - ignoring caps).  It is possible (but unlikely) that you have dealt with this by applying a transform to the underlying ColumnInfo as part of the ExtractionInformation, if so you can ignore this message.",
                            CheckResult.Warning));
                }
                else
                {
                    notifier.OnCheckPerformed(
                        new CheckEventArgs(
                            "Time periodicity field " + _timePeriodicityField + " is a legit date!",
                            CheckResult.Success));
                }
            }
        }