Ejemplo n.º 1
0
        public static void Main(string[] args)
        {
            // Command line argument parser we use doesn't support more than 15 options, therefore
            // we split it
            if (args.Length == 0)
            {
                Console.WriteLine("Invalid number of arguments supplied");
                Environment.Exit(-1);
            }

            switch (args[0])
            {
            case "redactImage":
                Parser.Default.ParseArguments <RedactFromImageOptions>(args).MapResult(
                    (RedactFromImageOptions options) => RedactSamples.RedactFromImage(
                        options.ProjectId,
                        options.ImageFromPath,
                        options.ImageToPath),
                    errs => 1);
                break;

            case "kAnonymity":
            case "lDiversity":
            case "deidDateShift":
            case "kMap":
            case "listInfoTypes":
            case "inspectBigQuery":
            case "inspectDataStore":
            case "inspectGcs":
                Parser.Default.ParseArguments <
                    KAnonymityOptions,
                    LDiversityOptions,
                    DeidDateShiftOptions,
                    KMapOptions,
                    ListInfoTypesOptions,
                    InspectBigQueryOptions,
                    InspectDatastoreOptions,
                    InspectGcsOptions>(args).MapResult(
                    (KAnonymityOptions opts) => RiskAnalysis.KAnonymity(
                        opts.CallingProjectId,
                        opts.TableProjectId,
                        opts.DatasetId,
                        opts.TableId,
                        opts.TopicId,
                        opts.SubscriptionId,
                        DlpSamplesUtils.ParseQuasiIds(opts.QuasiIdColumns)),
                    (LDiversityOptions opts) => RiskAnalysis.LDiversity(
                        opts.CallingProjectId,
                        opts.TableProjectId,
                        opts.DatasetId,
                        opts.TableId,
                        opts.TopicId,
                        opts.SubscriptionId,
                        DlpSamplesUtils.ParseQuasiIds(opts.QuasiIdColumns),
                        opts.SensitiveAttribute),
                    (KMapOptions opts) => RiskAnalysis.KMap(
                        opts.CallingProjectId,
                        opts.TableProjectId,
                        opts.DatasetId,
                        opts.TableId,
                        opts.TopicId,
                        opts.SubscriptionId,
                        DlpSamplesUtils.ParseQuasiIds(opts.QuasiIdColumns),
                        DlpSamplesUtils.ParseInfoTypes(opts.InfoTypes),
                        opts.RegionCode),
                    (DeidDateShiftOptions opts) => DeIdentify.DeidDateShift(
                        opts.ProjectId,
                        opts.InputCsvFile,
                        opts.OutputCsvFile,
                        opts.LowerBoundDays,
                        opts.UpperBoundDays,
                        opts.DateFields,
                        opts.ContextFieldId,
                        opts.KeyName,
                        opts.WrappedKey),
                    (ListInfoTypesOptions opts) => Metadata.ListInfoTypes(
                        opts.LanguageCode,
                        opts.Filter),
                    (InspectBigQueryOptions opts) => InspectSamples.InspectBigQuery(
                        opts.ProjectId,
                        opts.MinLikelihood,
                        opts.MaxFindings,
                        !opts.NoIncludeQuote,
                        DlpSamplesUtils.ParseIdentifyingFields(opts.IdentifyingFields),
                        DlpSamplesUtils.ParseInfoTypes(opts.InfoTypes),
                        DlpSamplesUtils.ParseCustomInfoTypes(opts.CustomDictionary, opts.CustomRegexes),
                        opts.DatasetId,
                        opts.TableId),
                    (InspectDatastoreOptions opts) => InspectSamples.InspectCloudDataStore(
                        opts.ProjectId,
                        opts.MinLikelihood,
                        opts.MaxFindings,
                        !opts.NoIncludeQuote,
                        opts.KindName,
                        opts.NamespaceId,
                        DlpSamplesUtils.ParseInfoTypes(opts.InfoTypes),
                        DlpSamplesUtils.ParseCustomInfoTypes(opts.CustomDictionary, opts.CustomRegexes),
                        opts.DatasetId,
                        opts.TableId),
                    (InspectGcsOptions opts) => InspectSamples.InspectGCS(
                        opts.ProjectId,
                        opts.MinLikelihood,
                        opts.MaxFindings,
                        !opts.NoIncludeQuote,
                        DlpSamplesUtils.ParseInfoTypes(opts.InfoTypes),
                        DlpSamplesUtils.ParseCustomInfoTypes(opts.CustomDictionary, opts.CustomRegexes),
                        opts.BucketName,
                        opts.TopicId,
                        opts.SubscriptionId),
                    errs => 1);
                break;

            default:
                Parser.Default.ParseArguments <
                    InspectStringOptions,
                    InspectFileOptions,
                    CreateTemplateOptions,
                    ListTemplatesOptions,
                    DeleteTemplatesOptions,
                    DeidMaskOptions,
                    DeidFpeOptions,
                    ReidFpeOptions,
                    ListJobsOptions,
                    DeleteJobOptions,
                    CreateJobTriggerOptions,
                    ListJobTriggersOptions,
                    DeleteJobTriggerOptions,
                    NumericalStatsOptions,
                    CategoricalStatsOptions>(args)
                .MapResult(
                    (InspectStringOptions opts) => InspectSamples.InspectString(
                        opts.ProjectId,
                        opts.Value,
                        opts.MinLikelihood,
                        opts.MaxFindings,
                        !opts.NoIncludeQuote,
                        DlpSamplesUtils.ParseInfoTypes(opts.InfoTypes),
                        DlpSamplesUtils.ParseCustomInfoTypes(opts.CustomDictionary, opts.CustomRegexes)),
                    (InspectFileOptions opts) => InspectSamples.InspectFile(
                        opts.ProjectId,
                        opts.File,
                        opts.MinLikelihood,
                        opts.MaxFindings,
                        !opts.NoIncludeQuote,
                        DlpSamplesUtils.ParseInfoTypes(opts.InfoTypes),
                        DlpSamplesUtils.ParseCustomInfoTypes(opts.CustomDictionary, opts.CustomRegexes)),
                    (CreateTemplateOptions opts) => InspectTemplates.CreateInspectTemplate(
                        opts.ProjectId,
                        opts.TemplateId,
                        opts.DisplayName,
                        opts.Description,
                        opts.MinLikelihood,
                        opts.MaxFindings,
                        !opts.NoIncludeQuote),
                    (ListTemplatesOptions opts) => InspectTemplates.ListInspectTemplate(opts.ProjectId),
                    (DeleteTemplatesOptions opts) => InspectTemplates.DeleteInspectTemplate(opts.ProjectId, opts.TemplateName),
                    (DeidMaskOptions opts) => DeIdentify.DeidMask(
                        opts.ProjectId,
                        opts.Value,
                        opts.InfoTypes,
                        opts.Mask,
                        opts.Num,
                        opts.Reverse),
                    (DeidFpeOptions opts) => DeIdentify.DeidFpe(
                        opts.ProjectId,
                        opts.Value,
                        opts.KeyName,
                        opts.WrappedKeyFile,
                        opts.Alphabet),
                    (ReidFpeOptions opts) => DeIdentify.ReidFpe(
                        opts.ProjectId,
                        opts.Value,
                        opts.KeyName,
                        opts.WrappedKeyFile,
                        opts.Alphabet),
                    (ListJobsOptions opts) => Jobs.ListJobs(
                        opts.ProjectId,
                        opts.Filter,
                        opts.JobType),
                    (DeleteJobOptions opts) => Jobs.DeleteJob(opts.JobName),
                    (CreateJobTriggerOptions opts) => JobTriggers.CreateJobTrigger(
                        opts.ProjectId,
                        opts.BucketName,
                        opts.MinLikelihood,
                        opts.MaxFindings,
                        opts.AutoPopulateTimespan,
                        opts.ScanPeriod,
                        DlpSamplesUtils.ParseInfoTypes(opts.InfoTypes),
                        opts.TriggerId,
                        opts.DisplayName,
                        opts.Description
                        ),
                    (ListJobTriggersOptions opts) => JobTriggers.ListJobTriggers(
                        opts.ProjectId
                        ),
                    (DeleteJobTriggerOptions opts) => JobTriggers.DeleteJobTrigger(
                        opts.TriggerName
                        ),
                    (NumericalStatsOptions opts) => RiskAnalysis.NumericalStats(
                        opts.CallingProjectId,
                        opts.TableProjectId,
                        opts.DatasetId,
                        opts.TableId,
                        opts.TopicId,
                        opts.SubscriptionId,
                        opts.ColumnName
                        ),
                    (CategoricalStatsOptions opts) => RiskAnalysis.CategoricalStats(
                        opts.CallingProjectId,
                        opts.TableProjectId,
                        opts.DatasetId,
                        opts.TableId,
                        opts.TopicId,
                        opts.SubscriptionId,
                        opts.ColumnName
                        ),
                    errs => 1);
                break;
            }
        }
Ejemplo n.º 2
0
        // [END dlp_l_diversity]

        // [START dlp_k_map]
        public static object KMap(
            string callingProjectId,
            string tableProjectId,
            string datasetId,
            string tableId,
            string topicId,
            string subscriptionId,
            IEnumerable <FieldId> quasiIds,
            IEnumerable <InfoType> infoTypes,
            string regionCode)
        {
            DlpServiceClient dlp = DlpServiceClient.Create();

            // Construct + submit the job
            var kmapEstimationConfig = new KMapEstimationConfig
            {
                QuasiIds =
                {
                    quasiIds.Zip(
                        infoTypes,
                        (Field, InfoType) => new TaggedField
                    {
                        Field    = Field,
                        InfoType = InfoType
                    }
                        )
                },
                RegionCode = regionCode
            };

            var config = new RiskAnalysisJobConfig()
            {
                PrivacyMetric = new PrivacyMetric
                {
                    KMapEstimationConfig = kmapEstimationConfig
                },
                SourceTable = new BigQueryTable
                {
                    ProjectId = tableProjectId,
                    DatasetId = datasetId,
                    TableId   = tableId
                },
                Actions =
                {
                    new Google.Cloud.Dlp.V2.Action
                    {
                        PubSub = new PublishToPubSub
                        {
                            Topic = $"projects/{callingProjectId}/topics/{topicId}"
                        }
                    }
                }
            };

            var submittedJob = dlp.CreateDlpJob(
                new CreateDlpJobRequest
            {
                ParentAsProjectName = new Google.Cloud.Dlp.V2.ProjectName(callingProjectId),
                RiskJob             = config
            });

            // Listen to pub/sub for the job
            var subscriptionName = new SubscriptionName(
                callingProjectId,
                subscriptionId);
            SubscriberClient subscriber = SubscriberClient.Create(
                subscriptionName, new[] { SubscriberServiceApiClient.Create() });

            // SimpleSubscriber runs your message handle function on multiple
            // threads to maximize throughput.
            var done = new ManualResetEventSlim(false);

            subscriber.StartAsync((PubsubMessage message, CancellationToken cancel) =>
            {
                if (message.Attributes["DlpJobName"] == submittedJob.Name)
                {
                    Thread.Sleep(500); // Wait for DLP API results to become consistent
                    done.Set();
                    return(Task.FromResult(SubscriberClient.Reply.Ack));
                }
                else
                {
                    return(Task.FromResult(SubscriberClient.Reply.Nack));
                }
            });

            done.Wait(TimeSpan.FromMinutes(10)); // 10 minute timeout; may not work for large jobs
            subscriber.StopAsync(CancellationToken.None).Wait();

            // Process results
            var resultJob = dlp.GetDlpJob(new GetDlpJobRequest
            {
                DlpJobName = DlpJobName.Parse(submittedJob.Name)
            });

            var result = resultJob.RiskDetails.KMapEstimationResult;

            for (int histogramIdx = 0; histogramIdx < result.KMapEstimationHistogram.Count; histogramIdx++)
            {
                var histogramValue = result.KMapEstimationHistogram[histogramIdx];
                Console.WriteLine($"Bucket {histogramIdx}");
                Console.WriteLine($"  Anonymity range: [{histogramValue.MinAnonymity}, {histogramValue.MaxAnonymity}].");
                Console.WriteLine($"  Size: {histogramValue.BucketSize}");

                foreach (var datapoint in histogramValue.BucketValues)
                {
                    // 'UnpackValue(x)' is a prettier version of 'x.toString()'
                    Console.WriteLine($"    Values: [{String.Join(',', datapoint.QuasiIdsValues.Select(x => DlpSamplesUtils.UnpackValue(x)))}]");
                    Console.WriteLine($"    Estimated k-map anonymity: {datapoint.EstimatedAnonymity}");
                }
            }

            return(0);
        }
Ejemplo n.º 3
0
        // [START dlp_numerical_stats]
        public static object NumericalStats(
            string callingProjectId,
            string tableProjectId,
            string datasetId,
            string tableId,
            string topicId,
            string subscriptionId,
            string columnName)
        {
            DlpServiceClient dlp = DlpServiceClient.Create();

            // Construct + submit the job
            var config = new RiskAnalysisJobConfig
            {
                PrivacyMetric = new PrivacyMetric
                {
                    NumericalStatsConfig = new NumericalStatsConfig
                    {
                        Field = new FieldId {
                            Name = columnName
                        }
                    }
                },
                SourceTable = new BigQueryTable
                {
                    ProjectId = tableProjectId,
                    DatasetId = datasetId,
                    TableId   = tableId
                },
                Actions =
                {
                    new Google.Cloud.Dlp.V2.Action
                    {
                        PubSub = new PublishToPubSub
                        {
                            Topic = $"projects/{callingProjectId}/topics/{topicId}"
                        }
                    }
                }
            };

            var submittedJob = dlp.CreateDlpJob(
                new CreateDlpJobRequest
            {
                ParentAsProjectName = new Google.Cloud.Dlp.V2.ProjectName(callingProjectId),
                RiskJob             = config
            });

            // Listen to pub/sub for the job
            var subscriptionName        = new SubscriptionName(callingProjectId, subscriptionId);
            SubscriberClient subscriber = SubscriberClient.Create(
                subscriptionName, new[] { SubscriberServiceApiClient.Create() });

            // SimpleSubscriber runs your message handle function on multiple
            // threads to maximize throughput.
            var done = new ManualResetEventSlim(false);

            subscriber.StartAsync((PubsubMessage message, CancellationToken cancel) =>
            {
                if (message.Attributes["DlpJobName"] == submittedJob.Name)
                {
                    Thread.Sleep(500); // Wait for DLP API results to become consistent
                    done.Set();
                    return(Task.FromResult(SubscriberClient.Reply.Ack));
                }
                else
                {
                    return(Task.FromResult(SubscriberClient.Reply.Nack));
                }
            });

            done.Wait(TimeSpan.FromMinutes(10)); // 10 minute timeout; may not work for large jobs
            subscriber.StopAsync(CancellationToken.None).Wait();

            // Process results
            var resultJob = dlp.GetDlpJob(
                new GetDlpJobRequest
            {
                DlpJobName = DlpJobName.Parse(submittedJob.Name)
            });

            var result = resultJob.RiskDetails.NumericalStatsResult;

            // 'UnpackValue(x)' is a prettier version of 'x.toString()'
            Console.WriteLine($"Value Range: [{DlpSamplesUtils.UnpackValue(result.MinValue)}, {DlpSamplesUtils.UnpackValue(result.MaxValue)}]");
            string lastValue = string.Empty;

            for (int quantile = 0; quantile < result.QuantileValues.Count; quantile++)
            {
                string currentValue = DlpSamplesUtils.UnpackValue(result.QuantileValues[quantile]);
                if (lastValue != currentValue)
                {
                    Console.WriteLine($"Value at {quantile + 1}% quantile: {currentValue}");
                }
                lastValue = currentValue;
            }

            return(0);
        }
Ejemplo n.º 4
0
        // [END dlp_k_anonymity]

        // [START dlp_l_diversity]
        public static object LDiversity(
            string callingProjectId,
            string tableProjectId,
            string datasetId,
            string tableId,
            string topicId,
            string subscriptionId,
            IEnumerable <FieldId> quasiIds,
            string sensitiveAttribute)
        {
            DlpServiceClient dlp = DlpServiceClient.Create();

            // Construct + submit the job
            var ldiversityConfig = new LDiversityConfig
            {
                SensitiveAttribute = new FieldId {
                    Name = sensitiveAttribute
                },
                QuasiIds = { quasiIds }
            };

            var config = new RiskAnalysisJobConfig
            {
                PrivacyMetric = new PrivacyMetric
                {
                    LDiversityConfig = ldiversityConfig
                },
                SourceTable = new BigQueryTable
                {
                    ProjectId = tableProjectId,
                    DatasetId = datasetId,
                    TableId   = tableId
                },
                Actions =
                {
                    new Google.Cloud.Dlp.V2.Action
                    {
                        PubSub = new PublishToPubSub
                        {
                            Topic = $"projects/{callingProjectId}/topics/{topicId}"
                        }
                    }
                }
            };

            var submittedJob = dlp.CreateDlpJob(
                new CreateDlpJobRequest
            {
                ParentAsProjectName = new Google.Cloud.Dlp.V2.ProjectName(callingProjectId),
                RiskJob             = config
            });

            // Listen to pub/sub for the job
            var subscriptionName        = new SubscriptionName(callingProjectId, subscriptionId);
            SubscriberClient subscriber = SubscriberClient.Create(
                subscriptionName, new[] { SubscriberServiceApiClient.Create() });

            // SimpleSubscriber runs your message handle function on multiple
            // threads to maximize throughput.
            var done = new ManualResetEventSlim(false);

            subscriber.StartAsync((PubsubMessage message, CancellationToken cancel) =>
            {
                if (message.Attributes["DlpJobName"] == submittedJob.Name)
                {
                    Thread.Sleep(500); // Wait for DLP API results to become consistent
                    done.Set();
                    return(Task.FromResult(SubscriberClient.Reply.Ack));
                }
                else
                {
                    return(Task.FromResult(SubscriberClient.Reply.Nack));
                }
            });

            done.Wait(TimeSpan.FromMinutes(10)); // 10 minute timeout; may not work for large jobs
            subscriber.StopAsync(CancellationToken.None).Wait();

            // Process results
            var resultJob = dlp.GetDlpJob(
                new GetDlpJobRequest
            {
                DlpJobName = DlpJobName.Parse(submittedJob.Name)
            });

            var result = resultJob.RiskDetails.LDiversityResult;

            for (int bucketIdx = 0; bucketIdx < result.SensitiveValueFrequencyHistogramBuckets.Count; bucketIdx++)
            {
                var bucket = result.SensitiveValueFrequencyHistogramBuckets[bucketIdx];
                Console.WriteLine($"Bucket {bucketIdx}");
                Console.WriteLine($"  Bucket size range: [{bucket.SensitiveValueFrequencyLowerBound}, {bucket.SensitiveValueFrequencyUpperBound}].");
                Console.WriteLine($"  {bucket.BucketSize} unique value(s) total.");

                foreach (var bucketValue in bucket.BucketValues)
                {
                    // 'UnpackValue(x)' is a prettier version of 'x.toString()'
                    Console.WriteLine($"    Quasi-ID values: [{String.Join(',', bucketValue.QuasiIdsValues.Select(x => DlpSamplesUtils.UnpackValue(x)))}]");
                    Console.WriteLine($"    Class size: {bucketValue.EquivalenceClassSize}");

                    foreach (var topValue in bucketValue.TopSensitiveValues)
                    {
                        Console.WriteLine($"    Sensitive value {DlpSamplesUtils.UnpackValue(topValue.Value)} occurs {topValue.Count} time(s).");
                    }
                }
            }

            return(0);
        }
        // [END dlp_numerical_stats]

        // [START dlp_categorical_stats]
        public static object CategoricalStats(
            string callingProjectId,
            string tableProjectId,
            string datasetId,
            string tableId,
            string topicId,
            string subscriptionId,
            string columnName)
        {
            DlpServiceClient dlp = DlpServiceClient.Create();

            // Construct + submit the job
            RiskAnalysisJobConfig config = new RiskAnalysisJobConfig
            {
                PrivacyMetric = new PrivacyMetric
                {
                    CategoricalStatsConfig = new CategoricalStatsConfig()
                    {
                        Field = new FieldId {
                            Name = columnName
                        }
                    }
                },
                SourceTable = new BigQueryTable
                {
                    ProjectId = tableProjectId,
                    DatasetId = datasetId,
                    TableId   = tableId
                },
                Actions =
                {
                    new Google.Cloud.Dlp.V2.Action
                    {
                        PubSub = new PublishToPubSub
                        {
                            Topic = $"projects/{callingProjectId}/topics/{topicId}"
                        }
                    }
                }
            };

            var submittedJob = dlp.CreateDlpJob(new CreateDlpJobRequest
            {
                ParentAsProjectName = new ProjectName(callingProjectId),
                RiskJob             = config
            });

            // Listen to pub/sub for the job
            var subscriptionName        = new SubscriptionName(callingProjectId, subscriptionId);
            SubscriberClient subscriber = SubscriberClient.CreateAsync(
                subscriptionName).Result;

            // SimpleSubscriber runs your message handle function on multiple
            // threads to maximize throughput.
            var done = new ManualResetEventSlim(false);

            subscriber.StartAsync((PubsubMessage message, CancellationToken cancel) =>
            {
                if (message.Attributes["DlpJobName"] == submittedJob.Name)
                {
                    Thread.Sleep(500); // Wait for DLP API results to become consistent
                    done.Set();
                    return(Task.FromResult(SubscriberClient.Reply.Ack));
                }
                else
                {
                    return(Task.FromResult(SubscriberClient.Reply.Nack));
                }
            });

            done.Wait(TimeSpan.FromMinutes(10)); // 10 minute timeout; may not work for large jobs
            subscriber.StopAsync(CancellationToken.None).Wait();

            // Process results
            var resultJob = dlp.GetDlpJob(new GetDlpJobRequest
            {
                DlpJobName = DlpJobName.Parse(submittedJob.Name)
            });

            var result = resultJob.RiskDetails.CategoricalStatsResult;

            for (int bucketIdx = 0; bucketIdx < result.ValueFrequencyHistogramBuckets.Count; bucketIdx++)
            {
                var bucket = result.ValueFrequencyHistogramBuckets[bucketIdx];
                Console.WriteLine($"Bucket {bucketIdx}");
                Console.WriteLine($"  Most common value occurs {bucket.ValueFrequencyUpperBound} time(s).");
                Console.WriteLine($"  Least common value occurs {bucket.ValueFrequencyLowerBound} time(s).");
                Console.WriteLine($"  {bucket.BucketSize} unique value(s) total.");

                foreach (var bucketValue in bucket.BucketValues)
                {
                    // 'UnpackValue(x)' is a prettier version of 'x.toString()'
                    Console.WriteLine($"  Value {DlpSamplesUtils.UnpackValue(bucketValue.Value)} occurs {bucketValue.Count} time(s).");
                }
            }

            return(0);
        }