private static void Evaluate(SparkSession session, Action <AnalyzerContext, IMetricsRepository> func)
        {
            DataFrame data = FixtureSupport.GetDFFull(session);

            AnalyzerContext results = CreateAnalysis().Run(data, Option <IStateLoader> .None,
                                                           Option <IStatePersister> .None);

            IMetricsRepository repository = CreateRepository();

            func(results, repository);
        }
Example #2
0
        support_saving_data_with_different_tags_and_returning_DataFrame_with_them() =>
        Evaluate(_session, (context, repository) =>
        {
            repository.Save(new ResultKey(DATE_ONE, new Dictionary <string, string>(REGION_EU_AND_DATASET_NAME)),
                            context);
            repository.Save(new ResultKey(DATE_TWO, new Dictionary <string, string>(REGION_NA_AND_DATASET_VERSION)),
                            context);

            DataFrame analysisResultsAsDataFrame = repository.Load()
                                                   .GetSuccessMetricsAsDataFrame(_session, Enumerable.Empty <string>());


            List <GenericRow> elements = new List <GenericRow>
            {
                new GenericRow(new object[] { "Dataset", "*", "Size", 4.0, DATE_ONE, "EU", "Some", null }),
                new GenericRow(new object[] { "Column", "att1", "Completeness", 1.0, DATE_ONE, "EU", "Some", null }),
                new GenericRow(new object[] { "Column", "item", "Distinctness", 1.0, DATE_ONE, "EU", "Some", null }),
                new GenericRow(new object[]
                {
                    "Multicolumn", "att1,att2", "Uniqueness", 0.25, DATE_ONE, "EU", "Some", null
                }),
                new GenericRow(new object[] { "Dataset", "*", "Size", 4.0, DATE_TWO, "NA", null, "2.0" }),
                new GenericRow(
                    new object[] { "Column", "att1", "Completeness", 1.0, DATE_TWO, "NA", null, "2.0" }),
                new GenericRow(
                    new object[] { "Column", "item", "Distinctness", 1.0, DATE_TWO, "NA", null, "2.0" }),
                new GenericRow(new object[]
                {
                    "Multicolumn", "att1,att2", "Uniqueness", 0.25, DATE_TWO, "NA", null, "2.0"
                })
            };

            StructType schema = new StructType(
                new List <StructField>
            {
                new StructField("entity", new StringType()),
                new StructField("instance", new StringType()),
                new StructField("name", new StringType()),
                new StructField("value", new DoubleType()),
                new StructField("dataset_date", new LongType()),
                new StructField("region", new StringType()),
                new StructField("dataset_name", new StringType()),
                new StructField("dataset_version", new StringType())
            });

            DataFrame df = _session.CreateDataFrame(elements, schema);

            FixtureSupport.AssertSameRows(analysisResultsAsDataFrame, df, Option <ITestOutputHelper> .None);
        });
        public void save_and_retrieve_AnalyzerResults() =>
        Evaluate(_session, (context, repository) =>
        {
            repository.Save(new ResultKey(DATE_ONE, new Dictionary <string, string>(REGION_EU)), context);
            repository.Save(new ResultKey(DATE_TWO, new Dictionary <string, string>(REGION_NA)), context);

            DataFrame analysisResultsAsDataFrame = repository.Load()
                                                   .After(DATE_ONE)
                                                   .GetSuccessMetricsAsDataFrame(_session, Enumerable.Empty <string>());

            List <GenericRow> elements = new List <GenericRow>
            {
                new GenericRow(new object[] { "Dataset", "*", "Size", 4.0, DATE_ONE, "EU" }),
                new GenericRow(new object[] { "Column", "att1", "Completeness", 1.0, DATE_ONE, "EU" }),
                new GenericRow(new object[] { "Column", "item", "Distinctness", 1.0, DATE_ONE, "EU" }),
                new GenericRow(new object[] { "Multicolumn", "att1,att2", "Uniqueness", 0.25, DATE_ONE, "EU" }),
                new GenericRow(new object[] { "Dataset", "*", "Size", 4.0, DATE_TWO, "NA" }),
                new GenericRow(new object[] { "Column", "att1", "Completeness", 1.0, DATE_TWO, "NA" }),
                new GenericRow(new object[] { "Column", "item", "Distinctness", 1.0, DATE_TWO, "NA" }),
                new GenericRow(new object[] { "Multicolumn", "att1,att2", "Uniqueness", 0.25, DATE_TWO, "NA" })
            };

            StructType schema = new StructType(
                new List <StructField>
            {
                new StructField("entity", new StringType()),
                new StructField("instance", new StringType()),
                new StructField("name", new StringType()),
                new StructField("value", new DoubleType()),
                new StructField("dataset_date", new LongType()),
                new StructField("region", new StringType())
            });

            DataFrame df = _session.CreateDataFrame(elements, schema);

            FixtureSupport.AssertSameRows(analysisResultsAsDataFrame, df, Option <ITestOutputHelper> .None);
        });
        public void only_include_specifics_metrics_in_loaded_AnalysisResults_if_requested() =>
        Evaluate(_session, (context, repository) =>
        {
            repository.Save(new ResultKey(DATE_ONE, new Dictionary <string, string>(REGION_EU)), context);
            repository.Save(new ResultKey(DATE_TWO, new Dictionary <string, string>(REGION_NA)), context);

            DataFrame analysisResultsAsDataFrame = repository.Load()
                                                   .After(DATE_TWO)
                                                   .ForAnalyzers(new List <IAnalyzer <IMetric> >
            {
                Initializers.Completeness("att1"), Initializers.Uniqueness(new[] { "att1", "att2" })
            })
                                                   .GetSuccessMetricsAsDataFrame(_session, Enumerable.Empty <string>());

            List <GenericRow> elements = new List <GenericRow>
            {
                new GenericRow(new object[] { "Column", "att1", "Completeness", 1.0, DATE_ONE, "EU" }),
                new GenericRow(new object[] { "Multicolumn", "att1,att2", "Uniqueness", 0.25, DATE_ONE, "EU" }),
                new GenericRow(new object[] { "Column", "att1", "Completeness", 1.0, DATE_ONE, "NA" }),
                new GenericRow(new object[] { "Multicolumn", "att1,att2", "Uniqueness", 0.25, DATE_ONE, "NA" })
            };

            StructType schema = new StructType(
                new List <StructField>
            {
                new StructField("entity", new StringType()),
                new StructField("instance", new StringType()),
                new StructField("name", new StringType()),
                new StructField("value", new DoubleType()),
                new StructField("dataset_date", new LongType()),
                new StructField("region", new StringType())
            });

            DataFrame df = _session.CreateDataFrame(elements, schema);

            FixtureSupport.AssertSameRows(analysisResultsAsDataFrame, df, new Option <ITestOutputHelper>(_helper));
        });