// [END dlp_l_diversity] // [START dlp_k_map] public static object KMap( string callingProjectId, string tableProjectId, string datasetId, string tableId, string topicId, string subscriptionId, IEnumerable <FieldId> quasiIds, IEnumerable <InfoType> infoTypes, string regionCode) { var dlp = DlpServiceClient.Create(); // Construct + submit the job var kmapEstimationConfig = new KMapEstimationConfig { QuasiIds = { quasiIds.Zip( infoTypes, (Field, InfoType) => new TaggedField { Field = Field, InfoType = InfoType } ) }, RegionCode = regionCode }; var config = new RiskAnalysisJobConfig() { PrivacyMetric = new PrivacyMetric { KMapEstimationConfig = kmapEstimationConfig }, SourceTable = new BigQueryTable { ProjectId = tableProjectId, DatasetId = datasetId, TableId = tableId }, Actions = { new Google.Cloud.Dlp.V2.Action { PubSub = new PublishToPubSub { Topic = $"projects/{callingProjectId}/topics/{topicId}" } } } }; var submittedJob = dlp.CreateDlpJob( new CreateDlpJobRequest { ParentAsProjectName = new ProjectName(callingProjectId), RiskJob = config }); // Listen to pub/sub for the job var subscriptionName = new SubscriptionName( callingProjectId, subscriptionId); var subscriber = SubscriberClient.CreateAsync( subscriptionName).Result; // SimpleSubscriber runs your message handle function on multiple // threads to maximize throughput. var done = new ManualResetEventSlim(false); subscriber.StartAsync((PubsubMessage message, CancellationToken cancel) => { if (message.Attributes["DlpJobName"] == submittedJob.Name) { Thread.Sleep(500); // Wait for DLP API results to become consistent done.Set(); return(Task.FromResult(SubscriberClient.Reply.Ack)); } else { return(Task.FromResult(SubscriberClient.Reply.Nack)); } }); done.Wait(TimeSpan.FromMinutes(10)); // 10 minute timeout; may not work for large jobs subscriber.StopAsync(CancellationToken.None).Wait(); // Process results var resultJob = dlp.GetDlpJob(new GetDlpJobRequest { DlpJobName = DlpJobName.Parse(submittedJob.Name) }); var result = resultJob.RiskDetails.KMapEstimationResult; for (var histogramIdx = 0; histogramIdx < result.KMapEstimationHistogram.Count; histogramIdx++) { var histogramValue = result.KMapEstimationHistogram[histogramIdx]; Console.WriteLine($"Bucket {histogramIdx}"); Console.WriteLine($" Anonymity range: [{histogramValue.MinAnonymity}, {histogramValue.MaxAnonymity}]."); Console.WriteLine($" Size: {histogramValue.BucketSize}"); foreach (var datapoint in histogramValue.BucketValues) { // 'UnpackValue(x)' is a prettier version of 'x.toString()' Console.WriteLine($" Values: [{String.Join(',', datapoint.QuasiIdsValues.Select(x => DlpSamplesUtils.UnpackValue(x)))}]"); Console.WriteLine($" Estimated k-map anonymity: {datapoint.EstimatedAnonymity}"); } } return(0); }
// [START dlp_numerical_stats] public static object NumericalStats( string callingProjectId, string tableProjectId, string datasetId, string tableId, string topicId, string subscriptionId, string columnName) { var dlp = DlpServiceClient.Create(); // Construct + submit the job var config = new RiskAnalysisJobConfig { PrivacyMetric = new PrivacyMetric { NumericalStatsConfig = new NumericalStatsConfig { Field = new FieldId { Name = columnName } } }, SourceTable = new BigQueryTable { ProjectId = tableProjectId, DatasetId = datasetId, TableId = tableId }, Actions = { new Google.Cloud.Dlp.V2.Action { PubSub = new PublishToPubSub { Topic = $"projects/{callingProjectId}/topics/{topicId}" } } } }; var submittedJob = dlp.CreateDlpJob( new CreateDlpJobRequest { ParentAsProjectName = new ProjectName(callingProjectId), RiskJob = config }); // Listen to pub/sub for the job var subscriptionName = new SubscriptionName(callingProjectId, subscriptionId); var subscriber = SubscriberClient.CreateAsync( subscriptionName).Result; // SimpleSubscriber runs your message handle function on multiple // threads to maximize throughput. var done = new ManualResetEventSlim(false); subscriber.StartAsync((PubsubMessage message, CancellationToken cancel) => { if (message.Attributes["DlpJobName"] == submittedJob.Name) { Thread.Sleep(500); // Wait for DLP API results to become consistent done.Set(); return(Task.FromResult(SubscriberClient.Reply.Ack)); } else { return(Task.FromResult(SubscriberClient.Reply.Nack)); } }); done.Wait(TimeSpan.FromMinutes(10)); // 10 minute timeout; may not work for large jobs subscriber.StopAsync(CancellationToken.None).Wait(); // Process results var resultJob = dlp.GetDlpJob( new GetDlpJobRequest { DlpJobName = DlpJobName.Parse(submittedJob.Name) }); var result = resultJob.RiskDetails.NumericalStatsResult; // 'UnpackValue(x)' is a prettier version of 'x.toString()' Console.WriteLine($"Value Range: [{DlpSamplesUtils.UnpackValue(result.MinValue)}, {DlpSamplesUtils.UnpackValue(result.MaxValue)}]"); var lastValue = string.Empty; for (var quantile = 0; quantile < result.QuantileValues.Count; quantile++) { var currentValue = DlpSamplesUtils.UnpackValue(result.QuantileValues[quantile]); if (lastValue != currentValue) { Console.WriteLine($"Value at {quantile + 1}% quantile: {currentValue}"); } lastValue = currentValue; } return(0); }
// [END dlp_k_anonymity] // [START dlp_l_diversity] public static object LDiversity( string callingProjectId, string tableProjectId, string datasetId, string tableId, string topicId, string subscriptionId, IEnumerable <FieldId> quasiIds, string sensitiveAttribute) { var dlp = DlpServiceClient.Create(); // Construct + submit the job var ldiversityConfig = new LDiversityConfig { SensitiveAttribute = new FieldId { Name = sensitiveAttribute }, QuasiIds = { quasiIds } }; var config = new RiskAnalysisJobConfig { PrivacyMetric = new PrivacyMetric { LDiversityConfig = ldiversityConfig }, SourceTable = new BigQueryTable { ProjectId = tableProjectId, DatasetId = datasetId, TableId = tableId }, Actions = { new Google.Cloud.Dlp.V2.Action { PubSub = new PublishToPubSub { Topic = $"projects/{callingProjectId}/topics/{topicId}" } } } }; var submittedJob = dlp.CreateDlpJob( new CreateDlpJobRequest { ParentAsProjectName = new ProjectName(callingProjectId), RiskJob = config }); // Listen to pub/sub for the job var subscriptionName = new SubscriptionName(callingProjectId, subscriptionId); var subscriber = SubscriberClient.CreateAsync( subscriptionName).Result; // SimpleSubscriber runs your message handle function on multiple // threads to maximize throughput. var done = new ManualResetEventSlim(false); subscriber.StartAsync((PubsubMessage message, CancellationToken cancel) => { if (message.Attributes["DlpJobName"] == submittedJob.Name) { Thread.Sleep(500); // Wait for DLP API results to become consistent done.Set(); return(Task.FromResult(SubscriberClient.Reply.Ack)); } else { return(Task.FromResult(SubscriberClient.Reply.Nack)); } }); done.Wait(TimeSpan.FromMinutes(10)); // 10 minute timeout; may not work for large jobs subscriber.StopAsync(CancellationToken.None).Wait(); // Process results var resultJob = dlp.GetDlpJob( new GetDlpJobRequest { DlpJobName = DlpJobName.Parse(submittedJob.Name) }); var result = resultJob.RiskDetails.LDiversityResult; for (var bucketIdx = 0; bucketIdx < result.SensitiveValueFrequencyHistogramBuckets.Count; bucketIdx++) { var bucket = result.SensitiveValueFrequencyHistogramBuckets[bucketIdx]; Console.WriteLine($"Bucket {bucketIdx}"); Console.WriteLine($" Bucket size range: [{bucket.SensitiveValueFrequencyLowerBound}, {bucket.SensitiveValueFrequencyUpperBound}]."); Console.WriteLine($" {bucket.BucketSize} unique value(s) total."); foreach (var bucketValue in bucket.BucketValues) { // 'UnpackValue(x)' is a prettier version of 'x.toString()' Console.WriteLine($" Quasi-ID values: [{String.Join(',', bucketValue.QuasiIdsValues.Select(x => DlpSamplesUtils.UnpackValue(x)))}]"); Console.WriteLine($" Class size: {bucketValue.EquivalenceClassSize}"); foreach (var topValue in bucketValue.TopSensitiveValues) { Console.WriteLine($" Sensitive value {DlpSamplesUtils.UnpackValue(topValue.Value)} occurs {topValue.Count} time(s)."); } } } return(0); }
public static void Main(string[] args) { // Command line argument parser we use doesn't support more than 15 options, therefore // we split it if (args.Length == 0) { Console.WriteLine("Invalid number of arguments supplied"); Environment.Exit(-1); } switch (args[0]) { case "redactImage": Parser.Default.ParseArguments <RedactFromImageOptions>(args).MapResult( (RedactFromImageOptions options) => RedactSamples.RedactFromImage( options.ProjectId, options.ImageFromPath, options.ImageToPath), errs => 1); break; case "kAnonymity": case "lDiversity": case "deidDateShift": case "kMap": case "listInfoTypes": case "inspectBigQuery": case "inspectDataStore": case "inspectGcs": Parser.Default.ParseArguments < KAnonymityOptions, LDiversityOptions, DeidDateShiftOptions, KMapOptions, ListInfoTypesOptions, InspectBigQueryOptions, InspectDatastoreOptions, InspectGcsOptions>(args).MapResult( (KAnonymityOptions opts) => RiskAnalysis.KAnonymity( opts.CallingProjectId, opts.TableProjectId, opts.DatasetId, opts.TableId, opts.TopicId, opts.SubscriptionId, DlpSamplesUtils.ParseQuasiIds(opts.QuasiIdColumns)), (LDiversityOptions opts) => RiskAnalysis.LDiversity( opts.CallingProjectId, opts.TableProjectId, opts.DatasetId, opts.TableId, opts.TopicId, opts.SubscriptionId, DlpSamplesUtils.ParseQuasiIds(opts.QuasiIdColumns), opts.SensitiveAttribute), (KMapOptions opts) => RiskAnalysis.KMap( opts.CallingProjectId, opts.TableProjectId, opts.DatasetId, opts.TableId, opts.TopicId, opts.SubscriptionId, DlpSamplesUtils.ParseQuasiIds(opts.QuasiIdColumns), DlpSamplesUtils.ParseInfoTypes(opts.InfoTypes), opts.RegionCode), (DeidDateShiftOptions opts) => DeIdentify.DeidDateShift( opts.ProjectId, opts.InputCsvFile, opts.OutputCsvFile, opts.LowerBoundDays, opts.UpperBoundDays, opts.DateFields, opts.ContextFieldId, opts.KeyName, opts.WrappedKey), (ListInfoTypesOptions opts) => Metadata.ListInfoTypes( opts.LanguageCode, opts.Filter), (InspectBigQueryOptions opts) => InspectSamples.InspectBigQuery( opts.ProjectId, opts.MinLikelihood, opts.MaxFindings, !opts.NoIncludeQuote, DlpSamplesUtils.ParseIdentifyingFields(opts.IdentifyingFields), DlpSamplesUtils.ParseInfoTypes(opts.InfoTypes), DlpSamplesUtils.ParseCustomInfoTypes(opts.CustomDictionary, opts.CustomRegexes), opts.DatasetId, opts.TableId), (InspectDatastoreOptions opts) => InspectSamples.InspectCloudDataStore( opts.ProjectId, opts.MinLikelihood, opts.MaxFindings, !opts.NoIncludeQuote, opts.KindName, opts.NamespaceId, DlpSamplesUtils.ParseInfoTypes(opts.InfoTypes), DlpSamplesUtils.ParseCustomInfoTypes(opts.CustomDictionary, opts.CustomRegexes), opts.DatasetId, opts.TableId), (InspectGcsOptions opts) => InspectSamples.InspectGCS( opts.ProjectId, opts.MinLikelihood, opts.MaxFindings, !opts.NoIncludeQuote, DlpSamplesUtils.ParseInfoTypes(opts.InfoTypes), DlpSamplesUtils.ParseCustomInfoTypes(opts.CustomDictionary, opts.CustomRegexes), opts.BucketName, opts.TopicId, opts.SubscriptionId), errs => 1); break; default: Parser.Default.ParseArguments < InspectStringOptions, InspectFileOptions, CreateTemplateOptions, ListTemplatesOptions, DeleteTemplatesOptions, DeidMaskOptions, DeidFpeOptions, ReidFpeOptions, ListJobsOptions, DeleteJobOptions, CreateJobTriggerOptions, ListJobTriggersOptions, DeleteJobTriggerOptions, NumericalStatsOptions, CategoricalStatsOptions>(args) .MapResult( (InspectStringOptions opts) => InspectSamples.InspectString( opts.ProjectId, opts.Value, opts.MinLikelihood, opts.MaxFindings, !opts.NoIncludeQuote, DlpSamplesUtils.ParseInfoTypes(opts.InfoTypes), DlpSamplesUtils.ParseCustomInfoTypes(opts.CustomDictionary, opts.CustomRegexes)), (InspectFileOptions opts) => InspectSamples.InspectFile( opts.ProjectId, opts.File, opts.MinLikelihood, opts.MaxFindings, !opts.NoIncludeQuote, DlpSamplesUtils.ParseInfoTypes(opts.InfoTypes), DlpSamplesUtils.ParseCustomInfoTypes(opts.CustomDictionary, opts.CustomRegexes)), (CreateTemplateOptions opts) => InspectTemplates.CreateInspectTemplate( opts.ProjectId, opts.TemplateId, opts.DisplayName, opts.Description, opts.MinLikelihood, opts.MaxFindings, !opts.NoIncludeQuote), (ListTemplatesOptions opts) => InspectTemplates.ListInspectTemplate(opts.ProjectId), (DeleteTemplatesOptions opts) => InspectTemplates.DeleteInspectTemplate(opts.ProjectId, opts.TemplateName), (DeidMaskOptions opts) => DeIdentify.DeidMask( opts.ProjectId, opts.Value, DlpSamplesUtils.ParseInfoTypes(opts.InfoTypes), opts.Mask, opts.Num, opts.Reverse), (DeidFpeOptions opts) => DeIdentify.DeidFpe( opts.ProjectId, opts.Value, DlpSamplesUtils.ParseInfoTypes(opts.InfoTypes), opts.KeyName, opts.WrappedKeyFile, opts.Alphabet), (ReidFpeOptions opts) => DeIdentify.ReidFpe( opts.ProjectId, opts.Value, opts.KeyName, opts.WrappedKeyFile, opts.Alphabet), (ListJobsOptions opts) => Jobs.ListJobs( opts.ProjectId, opts.Filter, opts.JobType), (DeleteJobOptions opts) => Jobs.DeleteJob(opts.JobName), (CreateJobTriggerOptions opts) => JobTriggers.CreateJobTrigger( opts.ProjectId, opts.BucketName, opts.MinLikelihood, opts.MaxFindings, opts.AutoPopulateTimespan, opts.ScanPeriod, DlpSamplesUtils.ParseInfoTypes(opts.InfoTypes), opts.TriggerId, opts.DisplayName, opts.Description ), (ListJobTriggersOptions opts) => JobTriggers.ListJobTriggers( opts.ProjectId ), (DeleteJobTriggerOptions opts) => JobTriggers.DeleteJobTrigger( opts.TriggerName ), (NumericalStatsOptions opts) => RiskAnalysis.NumericalStats( opts.CallingProjectId, opts.TableProjectId, opts.DatasetId, opts.TableId, opts.TopicId, opts.SubscriptionId, opts.ColumnName ), (CategoricalStatsOptions opts) => RiskAnalysis.CategoricalStats( opts.CallingProjectId, opts.TableProjectId, opts.DatasetId, opts.TableId, opts.TopicId, opts.SubscriptionId, opts.ColumnName ), errs => 1); break; } }