/// <summary>Snippet for CreateDlpJob</summary> public void CreateDlpJob_RequestObject() { // Snippet: CreateDlpJob(CreateDlpJobRequest,CallSettings) // Create client DlpServiceClient dlpServiceClient = DlpServiceClient.Create(); // Initialize request argument(s) CreateDlpJobRequest request = new CreateDlpJobRequest { ParentAsProjectName = new ProjectName("[PROJECT]"), }; // Make the request DlpJob response = dlpServiceClient.CreateDlpJob(request); // End snippet }
public void TestJobs() { // Create job. DlpServiceClient dlp = DlpServiceClient.Create(); var dlpJob = dlp.CreateDlpJob(new CreateDlpJobRequest() { ParentAsProjectName = new ProjectName(ProjectId), RiskJob = new RiskAnalysisJobConfig() { PrivacyMetric = new PrivacyMetric() { CategoricalStatsConfig = new PrivacyMetric.Types.CategoricalStatsConfig() { Field = new FieldId() { Name = "zip_code" } } }, SourceTable = new BigQueryTable() { ProjectId = "bigquery-public-data", DatasetId = "san_francisco", TableId = "bikeshare_trips" } } }); Regex dlpJobRegex = new Regex("projects/.*/dlpJobs/r-\\d+"); _retryRobot.ShouldRetry = ex => true; _retryRobot.Eventually(() => { // List jobs. ConsoleOutput listOutput = _dlp.Run("listJobs", CallingProjectId, "state=DONE", "RiskAnalysisJob"); Assert.Matches(dlpJobRegex, listOutput.Stdout); // Delete created job. string jobName = dlpJobRegex.Match(listOutput.Stdout).Value; ConsoleOutput deleteOutput = _dlp.Run("deleteJob", jobName); Assert.Contains($"Successfully deleted job {jobName}", deleteOutput.Stdout); }); }
// [END dlp_inspect_datastore] // [START dlp_inspect_gcs] public static object InspectGCS( string projectId, string minLikelihood, int maxFindings, bool includeQuote, IEnumerable <InfoType> infoTypes, string bucketName, string topicId, string subscriptionId) { var inspectJob = new InspectJobConfig { StorageConfig = new StorageConfig { CloudStorageOptions = new CloudStorageOptions { FileSet = new CloudStorageOptions.Types.FileSet { Url = $"gs://{bucketName}/*.txt" }, BytesLimitPerFile = 1073741824 }, }, InspectConfig = new InspectConfig { InfoTypes = { infoTypes }, ExcludeInfoTypes = false, IncludeQuote = includeQuote, Limits = new FindingLimits { MaxFindingsPerRequest = maxFindings }, MinLikelihood = (Likelihood)System.Enum.Parse(typeof(Likelihood), minLikelihood) }, Actions = { new Google.Cloud.Dlp.V2.Action { // Send results to Pub/Sub topic PubSub = new Google.Cloud.Dlp.V2.Action.Types.PublishToPubSub { Topic = topicId, } } } }; // Issue Create Dlp Job Request DlpServiceClient client = DlpServiceClient.Create(); var request = new CreateDlpJobRequest { InspectJob = inspectJob, ParentAsProjectName = new Google.Cloud.Dlp.V2.ProjectName(projectId), }; // We need created job name var dlpJob = client.CreateDlpJob(request); // Get a pub/sub subscription and listen for DLP results var fireEvent = new ManualResetEventSlim(); var subscriptionName = new SubscriptionName(projectId, subscriptionId); var subscriberClient = SubscriberServiceApiClient.Create(); var subscriber = SubscriberClient.Create(subscriptionName, new[] { subscriberClient }); subscriber.StartAsync( (pubSubMessage, cancellationToken) => { // Given a message that we receive on this subscription, we should either acknowledge or decline it if (pubSubMessage.Attributes["DlpJobName"] == dlpJob.Name) { fireEvent.Set(); return(Task.FromResult(SubscriberClient.Reply.Ack)); } return(Task.FromResult(SubscriberClient.Reply.Nack)); }); // We block here until receiving a signal from a separate thread that is waiting on a message indicating receiving a result of Dlp job if (fireEvent.Wait(TimeSpan.FromMinutes(1))) { // Stop the thread that is listening to messages as a result of StartAsync call earlier subscriber.StopAsync(CancellationToken.None).Wait(); // Now we can inspect full job results var job = client.GetDlpJob(new GetDlpJobRequest { DlpJobName = new DlpJobName(projectId, dlpJob.Name) }); // Inspect Job details Console.WriteLine($"Processed bytes: {job.InspectDetails.Result.ProcessedBytes}"); Console.WriteLine($"Total estimated bytes: {job.InspectDetails.Result.TotalEstimatedBytes}"); var stats = job.InspectDetails.Result.InfoTypeStats; Console.WriteLine("Found stats:"); foreach (var stat in stats) { Console.WriteLine($"{stat.InfoType.Name}"); } } else { Console.WriteLine("Error: The wait failed on timeout"); } return(0); }
// [END dlp_inspect_bigquery] // [START dlp_inspect_datastore] public static object InspectCloudDataStore( string projectId, string minLikelihood, int maxFindings, bool includeQuote, string kindName, string namespaceId, IEnumerable <InfoType> infoTypes, string datasetId, string tableId) { var inspectJob = new InspectJobConfig { StorageConfig = new StorageConfig { DatastoreOptions = new DatastoreOptions { Kind = new KindExpression { Name = kindName }, PartitionId = new PartitionId { NamespaceId = namespaceId, ProjectId = projectId, } }, TimespanConfig = new StorageConfig.Types.TimespanConfig { StartTime = Timestamp.FromDateTime(System.DateTime.UtcNow.AddYears(-1)), EndTime = Timestamp.FromDateTime(System.DateTime.UtcNow) } }, InspectConfig = new InspectConfig { InfoTypes = { infoTypes }, Limits = new FindingLimits { MaxFindingsPerRequest = maxFindings }, ExcludeInfoTypes = false, IncludeQuote = includeQuote, MinLikelihood = (Likelihood)System.Enum.Parse(typeof(Likelihood), minLikelihood) }, Actions = { new Google.Cloud.Dlp.V2.Action { // Save results in BigQuery Table SaveFindings = new Google.Cloud.Dlp.V2.Action.Types.SaveFindings { OutputConfig = new OutputStorageConfig { Table = new Google.Cloud.Dlp.V2.BigQueryTable { ProjectId = projectId, DatasetId = datasetId, TableId = tableId } } }, } } }; // Issue Create Dlp Job Request DlpServiceClient client = DlpServiceClient.Create(); var request = new CreateDlpJobRequest { InspectJob = inspectJob, ParentAsProjectName = new Google.Cloud.Dlp.V2.ProjectName(projectId), }; // We need created job name var dlpJob = client.CreateDlpJob(request); var jobName = dlpJob.Name; // Make sure the job finishes before inspecting the results. // Alternatively, we can inspect results opportunistically, but // for testing purposes, we want consistent outcome bool jobFinished = EnsureJobFinishes(projectId, jobName); if (jobFinished) { var bigQueryClient = BigQueryClient.Create(projectId); var table = bigQueryClient.GetTable(datasetId, tableId); // Return only first page of 10 rows Console.WriteLine("DLP v2 Results:"); var firstPage = table.ListRows(new ListRowsOptions { StartIndex = 0, PageSize = 10 }); foreach (var item in firstPage) { Console.WriteLine($"\t {item[""]}"); } } return(0); }
// [END dlp_l_diversity] // [START dlp_k_map] public static object KMap( string callingProjectId, string tableProjectId, string datasetId, string tableId, string topicId, string subscriptionId, IEnumerable <FieldId> quasiIds, IEnumerable <InfoType> infoTypes, string regionCode) { DlpServiceClient dlp = DlpServiceClient.Create(); // Construct + submit the job var kmapEstimationConfig = new KMapEstimationConfig { QuasiIds = { quasiIds.Zip( infoTypes, (Field, InfoType) => new TaggedField { Field = Field, InfoType = InfoType } ) }, RegionCode = regionCode }; var config = new RiskAnalysisJobConfig() { PrivacyMetric = new PrivacyMetric { KMapEstimationConfig = kmapEstimationConfig }, SourceTable = new BigQueryTable { ProjectId = tableProjectId, DatasetId = datasetId, TableId = tableId }, Actions = { new Google.Cloud.Dlp.V2.Action { PubSub = new PublishToPubSub { Topic = $"projects/{callingProjectId}/topics/{topicId}" } } } }; var submittedJob = dlp.CreateDlpJob( new CreateDlpJobRequest { ParentAsProjectName = new Google.Cloud.Dlp.V2.ProjectName(callingProjectId), RiskJob = config }); // Listen to pub/sub for the job var subscriptionName = new SubscriptionName( callingProjectId, subscriptionId); SubscriberClient subscriber = SubscriberClient.Create( subscriptionName, new[] { SubscriberServiceApiClient.Create() }); // SimpleSubscriber runs your message handle function on multiple // threads to maximize throughput. var done = new ManualResetEventSlim(false); subscriber.StartAsync((PubsubMessage message, CancellationToken cancel) => { if (message.Attributes["DlpJobName"] == submittedJob.Name) { Thread.Sleep(500); // Wait for DLP API results to become consistent done.Set(); return(Task.FromResult(SubscriberClient.Reply.Ack)); } else { return(Task.FromResult(SubscriberClient.Reply.Nack)); } }); done.Wait(TimeSpan.FromMinutes(10)); // 10 minute timeout; may not work for large jobs subscriber.StopAsync(CancellationToken.None).Wait(); // Process results var resultJob = dlp.GetDlpJob(new GetDlpJobRequest { DlpJobName = DlpJobName.Parse(submittedJob.Name) }); var result = resultJob.RiskDetails.KMapEstimationResult; for (int histogramIdx = 0; histogramIdx < result.KMapEstimationHistogram.Count; histogramIdx++) { var histogramValue = result.KMapEstimationHistogram[histogramIdx]; Console.WriteLine($"Bucket {histogramIdx}"); Console.WriteLine($" Anonymity range: [{histogramValue.MinAnonymity}, {histogramValue.MaxAnonymity}]."); Console.WriteLine($" Size: {histogramValue.BucketSize}"); foreach (var datapoint in histogramValue.BucketValues) { // 'UnpackValue(x)' is a prettier version of 'x.toString()' Console.WriteLine($" Values: [{String.Join(',', datapoint.QuasiIdsValues.Select(x => DlpSamplesUtils.UnpackValue(x)))}]"); Console.WriteLine($" Estimated k-map anonymity: {datapoint.EstimatedAnonymity}"); } } return(0); }
// [END dlp_k_anonymity] // [START dlp_l_diversity] public static object LDiversity( string callingProjectId, string tableProjectId, string datasetId, string tableId, string topicId, string subscriptionId, IEnumerable <FieldId> quasiIds, string sensitiveAttribute) { DlpServiceClient dlp = DlpServiceClient.Create(); // Construct + submit the job var ldiversityConfig = new LDiversityConfig { SensitiveAttribute = new FieldId { Name = sensitiveAttribute }, QuasiIds = { quasiIds } }; var config = new RiskAnalysisJobConfig { PrivacyMetric = new PrivacyMetric { LDiversityConfig = ldiversityConfig }, SourceTable = new BigQueryTable { ProjectId = tableProjectId, DatasetId = datasetId, TableId = tableId }, Actions = { new Google.Cloud.Dlp.V2.Action { PubSub = new PublishToPubSub { Topic = $"projects/{callingProjectId}/topics/{topicId}" } } } }; var submittedJob = dlp.CreateDlpJob( new CreateDlpJobRequest { ParentAsProjectName = new Google.Cloud.Dlp.V2.ProjectName(callingProjectId), RiskJob = config }); // Listen to pub/sub for the job var subscriptionName = new SubscriptionName(callingProjectId, subscriptionId); SubscriberClient subscriber = SubscriberClient.Create( subscriptionName, new[] { SubscriberServiceApiClient.Create() }); // SimpleSubscriber runs your message handle function on multiple // threads to maximize throughput. var done = new ManualResetEventSlim(false); subscriber.StartAsync((PubsubMessage message, CancellationToken cancel) => { if (message.Attributes["DlpJobName"] == submittedJob.Name) { Thread.Sleep(500); // Wait for DLP API results to become consistent done.Set(); return(Task.FromResult(SubscriberClient.Reply.Ack)); } else { return(Task.FromResult(SubscriberClient.Reply.Nack)); } }); done.Wait(TimeSpan.FromMinutes(10)); // 10 minute timeout; may not work for large jobs subscriber.StopAsync(CancellationToken.None).Wait(); // Process results var resultJob = dlp.GetDlpJob( new GetDlpJobRequest { DlpJobName = DlpJobName.Parse(submittedJob.Name) }); var result = resultJob.RiskDetails.LDiversityResult; for (int bucketIdx = 0; bucketIdx < result.SensitiveValueFrequencyHistogramBuckets.Count; bucketIdx++) { var bucket = result.SensitiveValueFrequencyHistogramBuckets[bucketIdx]; Console.WriteLine($"Bucket {bucketIdx}"); Console.WriteLine($" Bucket size range: [{bucket.SensitiveValueFrequencyLowerBound}, {bucket.SensitiveValueFrequencyUpperBound}]."); Console.WriteLine($" {bucket.BucketSize} unique value(s) total."); foreach (var bucketValue in bucket.BucketValues) { // 'UnpackValue(x)' is a prettier version of 'x.toString()' Console.WriteLine($" Quasi-ID values: [{String.Join(',', bucketValue.QuasiIdsValues.Select(x => DlpSamplesUtils.UnpackValue(x)))}]"); Console.WriteLine($" Class size: {bucketValue.EquivalenceClassSize}"); foreach (var topValue in bucketValue.TopSensitiveValues) { Console.WriteLine($" Sensitive value {DlpSamplesUtils.UnpackValue(topValue.Value)} occurs {topValue.Count} time(s)."); } } } return(0); }
// [START dlp_numerical_stats] public static object NumericalStats( string callingProjectId, string tableProjectId, string datasetId, string tableId, string topicId, string subscriptionId, string columnName) { DlpServiceClient dlp = DlpServiceClient.Create(); // Construct + submit the job var config = new RiskAnalysisJobConfig { PrivacyMetric = new PrivacyMetric { NumericalStatsConfig = new NumericalStatsConfig { Field = new FieldId { Name = columnName } } }, SourceTable = new BigQueryTable { ProjectId = tableProjectId, DatasetId = datasetId, TableId = tableId }, Actions = { new Google.Cloud.Dlp.V2.Action { PubSub = new PublishToPubSub { Topic = $"projects/{callingProjectId}/topics/{topicId}" } } } }; var submittedJob = dlp.CreateDlpJob( new CreateDlpJobRequest { ParentAsProjectName = new Google.Cloud.Dlp.V2.ProjectName(callingProjectId), RiskJob = config }); // Listen to pub/sub for the job var subscriptionName = new SubscriptionName(callingProjectId, subscriptionId); SubscriberClient subscriber = SubscriberClient.Create( subscriptionName, new[] { SubscriberServiceApiClient.Create() }); // SimpleSubscriber runs your message handle function on multiple // threads to maximize throughput. var done = new ManualResetEventSlim(false); subscriber.StartAsync((PubsubMessage message, CancellationToken cancel) => { if (message.Attributes["DlpJobName"] == submittedJob.Name) { Thread.Sleep(500); // Wait for DLP API results to become consistent done.Set(); return(Task.FromResult(SubscriberClient.Reply.Ack)); } else { return(Task.FromResult(SubscriberClient.Reply.Nack)); } }); done.Wait(TimeSpan.FromMinutes(10)); // 10 minute timeout; may not work for large jobs subscriber.StopAsync(CancellationToken.None).Wait(); // Process results var resultJob = dlp.GetDlpJob( new GetDlpJobRequest { DlpJobName = DlpJobName.Parse(submittedJob.Name) }); var result = resultJob.RiskDetails.NumericalStatsResult; // 'UnpackValue(x)' is a prettier version of 'x.toString()' Console.WriteLine($"Value Range: [{DlpSamplesUtils.UnpackValue(result.MinValue)}, {DlpSamplesUtils.UnpackValue(result.MaxValue)}]"); string lastValue = string.Empty; for (int quantile = 0; quantile < result.QuantileValues.Count; quantile++) { string currentValue = DlpSamplesUtils.UnpackValue(result.QuantileValues[quantile]); if (lastValue != currentValue) { Console.WriteLine($"Value at {quantile + 1}% quantile: {currentValue}"); } lastValue = currentValue; } return(0); }
// [END dlp_numerical_stats] // [START dlp_categorical_stats] public static object CategoricalStats( string callingProjectId, string tableProjectId, string datasetId, string tableId, string topicId, string subscriptionId, string columnName) { DlpServiceClient dlp = DlpServiceClient.Create(); // Construct + submit the job RiskAnalysisJobConfig config = new RiskAnalysisJobConfig { PrivacyMetric = new PrivacyMetric { CategoricalStatsConfig = new CategoricalStatsConfig() { Field = new FieldId { Name = columnName } } }, SourceTable = new BigQueryTable { ProjectId = tableProjectId, DatasetId = datasetId, TableId = tableId }, Actions = { new Google.Cloud.Dlp.V2.Action { PubSub = new PublishToPubSub { Topic = $"projects/{callingProjectId}/topics/{topicId}" } } } }; var submittedJob = dlp.CreateDlpJob(new CreateDlpJobRequest { ParentAsProjectName = new ProjectName(callingProjectId), RiskJob = config }); // Listen to pub/sub for the job var subscriptionName = new SubscriptionName(callingProjectId, subscriptionId); SubscriberClient subscriber = SubscriberClient.CreateAsync( subscriptionName).Result; // SimpleSubscriber runs your message handle function on multiple // threads to maximize throughput. var done = new ManualResetEventSlim(false); subscriber.StartAsync((PubsubMessage message, CancellationToken cancel) => { if (message.Attributes["DlpJobName"] == submittedJob.Name) { Thread.Sleep(500); // Wait for DLP API results to become consistent done.Set(); return(Task.FromResult(SubscriberClient.Reply.Ack)); } else { return(Task.FromResult(SubscriberClient.Reply.Nack)); } }); done.Wait(TimeSpan.FromMinutes(10)); // 10 minute timeout; may not work for large jobs subscriber.StopAsync(CancellationToken.None).Wait(); // Process results var resultJob = dlp.GetDlpJob(new GetDlpJobRequest { DlpJobName = DlpJobName.Parse(submittedJob.Name) }); var result = resultJob.RiskDetails.CategoricalStatsResult; for (int bucketIdx = 0; bucketIdx < result.ValueFrequencyHistogramBuckets.Count; bucketIdx++) { var bucket = result.ValueFrequencyHistogramBuckets[bucketIdx]; Console.WriteLine($"Bucket {bucketIdx}"); Console.WriteLine($" Most common value occurs {bucket.ValueFrequencyUpperBound} time(s)."); Console.WriteLine($" Least common value occurs {bucket.ValueFrequencyLowerBound} time(s)."); Console.WriteLine($" {bucket.BucketSize} unique value(s) total."); foreach (var bucketValue in bucket.BucketValues) { // 'UnpackValue(x)' is a prettier version of 'x.toString()' Console.WriteLine($" Value {DlpSamplesUtils.UnpackValue(bucketValue.Value)} occurs {bucketValue.Count} time(s)."); } } return(0); }