/// <summary>Snippet for AsyncBatchAnnotateFilesAsync</summary> public async Task AsyncBatchAnnotateFilesAsync() { // Snippet: AsyncBatchAnnotateFilesAsync(IEnumerable<AsyncAnnotateFileRequest>, CallSettings) // Additional: AsyncBatchAnnotateFilesAsync(IEnumerable<AsyncAnnotateFileRequest>, CancellationToken) // Create client ImageAnnotatorClient imageAnnotatorClient = await ImageAnnotatorClient.CreateAsync(); // Initialize request argument(s) IEnumerable <AsyncAnnotateFileRequest> requests = new AsyncAnnotateFileRequest[] { new AsyncAnnotateFileRequest(), }; // Make the request Operation <AsyncBatchAnnotateFilesResponse, OperationMetadata> response = await imageAnnotatorClient.AsyncBatchAnnotateFilesAsync(requests); // Poll until the returned long-running operation is complete Operation <AsyncBatchAnnotateFilesResponse, OperationMetadata> completedResponse = await response.PollUntilCompletedAsync(); // Retrieve the operation result AsyncBatchAnnotateFilesResponse result = completedResponse.Result; // Or get the name of the operation string operationName = response.Name; // This name can be stored, then the long-running operation retrieved later by name Operation <AsyncBatchAnnotateFilesResponse, OperationMetadata> retrievedResponse = await imageAnnotatorClient.PollOnceAsyncBatchAnnotateFilesAsync(operationName); // Check if the retrieved long-running operation has completed if (retrievedResponse.IsCompleted) { // If it has completed, then access the result AsyncBatchAnnotateFilesResponse retrievedResult = retrievedResponse.Result; } // End snippet }
private static async Task GetDataAsync(string path, GoogleCredential credential) { try { int pos = path.LastIndexOf("\\"); string strFileName = path.Substring(pos + 1, path.Length - pos - 1); StorageClient storageClient = await StorageClient.CreateAsync(credential); storageClient.Service.HttpClient.Timeout = new TimeSpan(0, 10, 0); var bucket = await storageClient.GetBucketAsync("bucket ocr"); FileStream fs = new FileStream(path, FileMode.Open, FileAccess.ReadWrite); using (MemoryStream memStream = new MemoryStream()) { await fs.CopyToAsync(memStream); Google.Apis.Storage.v1.Data.Object googleDataObject; googleDataObject = await storageClient.UploadObjectAsync(bucket.Name, "sinProcesar/" + strFileName, "application/pdf", memStream); } var asyncRequest = new AsyncAnnotateFileRequest { InputConfig = new InputConfig { GcsSource = new GcsSource { Uri = $"gs://{bucket.Name}/sinProcesar/{strFileName}" }, MimeType = "application/pdf" }, OutputConfig = new OutputConfig { BatchSize = 2, GcsDestination = new GcsDestination { Uri = $"gs://{bucket.Name}/procesados/{strFileName.Split('.')[0]}" } } }; asyncRequest.Features.Add(new Feature { Type = Feature.Types.Type.DocumentTextDetection }); List <AsyncAnnotateFileRequest> requests = new List <AsyncAnnotateFileRequest>(); requests.Add(asyncRequest); var client = new ImageAnnotatorClientBuilder { CredentialsPath = @"D:\Google_API_key_TFM.json" }.Build(); var operation = client.AsyncBatchAnnotateFiles(requests); operation.PollUntilCompleted(); } catch (Exception e) { } }
// [START vision_text_detection_pdf_gcs] private static object DetectDocument(string gcsSourceUri, string gcsDestinationBucketName, string gcsDestinationPrefixName) { var client = ImageAnnotatorClient.Create(); var asyncRequest = new AsyncAnnotateFileRequest { InputConfig = new InputConfig { GcsSource = new GcsSource { Uri = gcsSourceUri }, // Supported mime_types are: 'application/pdf' and 'image/tiff' MimeType = "application/pdf" }, OutputConfig = new OutputConfig { // How many pages should be grouped into each json output file. BatchSize = 2, GcsDestination = new GcsDestination { Uri = $"gs://{gcsDestinationBucketName}/{gcsDestinationPrefixName}" } } }; asyncRequest.Features.Add(new Feature { Type = Feature.Types.Type.DocumentTextDetection }); List <AsyncAnnotateFileRequest> requests = new List <AsyncAnnotateFileRequest>(); requests.Add(asyncRequest); var operation = client.AsyncBatchAnnotateFiles(requests); Console.WriteLine("Waiting for the operation to finish"); operation.PollUntilCompleted(); // Once the rquest has completed and the output has been // written to GCS, we can list all the output files. var storageClient = StorageClient.Create(); // List objects with the given prefix. var blobList = storageClient.ListObjects(gcsDestinationBucketName, gcsDestinationPrefixName); Console.WriteLine("Output files:"); foreach (var blob in blobList) { Console.WriteLine(blob.Name); } // Process the first output file from GCS. // Select the first JSON file from the objects in the list. var output = blobList.Where(x => x.Name.Contains(".json")).First(); var jsonString = ""; using (var stream = new MemoryStream()) { storageClient.DownloadObject(output, stream); jsonString = System.Text.Encoding.UTF8.GetString(stream.ToArray()); } var response = JsonParser.Default .Parse <AnnotateFileResponse>(jsonString); // The actual response for the first page of the input file. var firstPageResponses = response.Responses[0]; var annotation = firstPageResponses.FullTextAnnotation; // Here we print the full text from the first page. // The response contains more information: // annotation/pages/blocks/paragraphs/words/symbols // including confidence scores and bounding boxes Console.WriteLine($"Full text: \n {annotation.Text}"); return(0); }
public int Executar(string nomeDoArquivo, MemoryStream arquivo) { var bucketName = BucketName.RandomName(); var outputPrefix = ""; var gcsSourceURI = $"gs://{bucketName}/{nomeDoArquivo}"; SortedDictionary <string, SortedSet <string> > _garbage = new SortedDictionary <string, SortedSet <string> >(); StorageClient _storage = StorageClient.Create(); _storage.CreateBucket(_projectId, bucketName); _storage.UploadObject(bucketName, nomeDoArquivo, "application/pdf", arquivo); SortedSet <string> objectNames; if (!_garbage.TryGetValue(bucketName, out objectNames)) { objectNames = _garbage[bucketName] = new SortedSet <string>(); } objectNames.Add(nomeDoArquivo); var client = ImageAnnotatorClient.Create(); var asyncRequest = new AsyncAnnotateFileRequest { InputConfig = new InputConfig { GcsSource = new GcsSource { Uri = gcsSourceURI }, MimeType = "application/pdf" }, OutputConfig = new OutputConfig { // How many pages should be grouped into each json output file. BatchSize = 100, GcsDestination = new GcsDestination { Uri = $"gs://{bucketName}/{outputPrefix}" } } }; asyncRequest.Features.Add(new Feature { Type = Feature.Types.Type.DocumentTextDetection }); List <AsyncAnnotateFileRequest> requests = new List <AsyncAnnotateFileRequest> { asyncRequest }; var operation = client.AsyncBatchAnnotateFiles(requests); operation.PollUntilCompleted(); var blobList = _storage.ListObjects(bucketName, outputPrefix); var output = blobList.Where(x => x.Name.Contains(".json")).First(); var jsonString = ""; using (var stream = new MemoryStream()) { _storage.DownloadObject(output, stream); jsonString = System.Text.Encoding.UTF8.GetString(stream.ToArray()); } var response = JsonParser.Default.Parse <AnnotateFileResponse>(jsonString); int total = 0; for (int i = 0; i < response.Responses.Count; i++) { var pageResponses = response.Responses[i]; if (pageResponses != null) { var annotation = pageResponses.FullTextAnnotation; var conteudo = annotation.Text.Replace("\n", " "); var remocaoDosEspacos = conteudo.Split(' '); foreach (var item in remocaoDosEspacos) { total += item.Length; } } } RemoverArquivos(bucketName); return(total); }