Example #1
0
        private static VerificationResult BusinessDataTests(DataFrame data)
        {
            Regex timeMatching = new Regex("^([0-1]?[0-9]|2[0-3]):[0-5]?[0-9]|-([0-1]?[0-9]|2[0-3]):[0-5]?[0-9]$");
            VerificationResult verificationResult = new VerificationSuite()
                                                    .OnData(data)
                                                    .AddCheck(
                new Check(CheckLevel.Error, "integrity checks")
                .IsUnique("business_id")
                .AreComplete(new[] { "business_id", "name", "address", "city", "state", "postal_code" })
                .IsComplete("stars")
                .IsContainedIn("latitude", -90, 90)
                .IsContainedIn("longitude", -180, 80)
                .IsContainedIn("stars", 0, 5)
                .HasPattern("hours.Monday", timeMatching, value => value >= .50)
                .HasPattern("hours.Tuesday", timeMatching, value => value >= .50)
                .HasPattern("hours.Wednesday", timeMatching, value => value >= .50)
                .HasPattern("hours.Thursday", timeMatching, value => value >= .50)
                .HasPattern("hours.Friday", timeMatching, value => value >= .50)
                .HasPattern("hours.Saturday", timeMatching, value => value >= .50)
                .HasPattern("hours.Sunday", timeMatching, value => value >= .40)
                )
                                                    .Run();

            verificationResult.Debug();
            return(verificationResult);
        }
Example #2
0
        public static void ExecuteSimpleVerificationSuiteWithExternalFile()
        {
            var spark = SparkSession.Builder().GetOrCreate();
            var data  = spark.Read().Json("data/inventory.json");

            data.Show();

            VerificationResult verificationResult = new VerificationSuite()
                                                    .OnData(data)
                                                    .AddCheck(
                new Check(CheckLevel.Error, "integrity checks")
                .HasSize(value => value == 5)
                .IsComplete("id")
                .IsUnique("id")
                .IsComplete("productName")
                .IsContainedIn("priority", new[] { "high", "low" })
                .IsNonNegative("numViews")
                )
                                                    .AddCheck(
                new Check(CheckLevel.Warning, "distribution checks")
                .ContainsURL("description", value => value >= .5)
                )
                                                    .Run();

            verificationResult.Debug();
        }
Example #3
0
        private static VerificationResult ReviewsDataTests(DataFrame data)
        {
            VerificationResult verificationResult = new VerificationSuite()
                                                    .OnData(data)
                                                    .AddCheck(
                new Check(CheckLevel.Error, "integrity checks")
                .IsUnique("review_id")
                .AreComplete(new[] { "review_id", "user_id", "business_id" })
                .AreComplete(new[] { "stars", "useful", "funny", "cool" })
                .IsComplete("date")
                .IsContainedIn("stars", 0, 5)
                )
                                                    .AddCheck(
                new Check(CheckLevel.Warning, "semantic checks")
                .ContainsURL("text", value => value <= .2)
                .ContainsCreditCardNumber("text", value => value <= .2)
                .ContainsEmail("text", value => value <= .2)
                .HasMin("useful", d => d <= .2)
                .HasMin("funny", d => d <= .2)
                .HasMin("cool", d => d <= .2)
                )
                                                    .Run();

            verificationResult.Debug();
            return(verificationResult);
        }
Example #4
0
        public static void ExecuteSimpleVerificationSuite()
        {
            var data = SparkSession.Builder().GetOrCreate().CreateDataFrame(
                new List <GenericRow>
            {
                new GenericRow(new object[] { 1, "Thingy A", "awesome thing.", "high", 0 }),
                new GenericRow(new object[] { 2, "Thingy B", "available at http://thingb.com", null, 0 }),
                new GenericRow(new object[] { 3, null, null, "low", 5 }),
                new GenericRow(new object[] { 4, "Thingy D", "checkout https://thingd.ca", "low", 10 }),
                new GenericRow(new object[] { 5, "Thingy E", null, "high", 12 })
            },
                new StructType(new List <StructField>
            {
                new StructField("id", new IntegerType()),
                new StructField("productName", new StringType()),
                new StructField("description", new StringType()),
                new StructField("priority", new StringType()),
                new StructField("numViews", new IntegerType()),
            }));

            var verificationResult = new VerificationSuite()
                                     .OnData(data)
                                     .AddCheck(
                new Check(CheckLevel.Error, "integrity checks")
                .HasSize(value => value == 5)
                .IsComplete("id")
                .IsUnique("id")
                .IsComplete("productName")
                .IsContainedIn("priority", new[] { "high", "low" })
                .IsNonNegative("numViews")
                )
                                     .AddCheck(
                new Check(CheckLevel.Warning, "distribution checks")
                .ContainsURL("description", value => value == .5)
                )
                                     .Run();

            verificationResult.Debug();
        }