/// <summary> /// Starts the asynchronous detection of text in a document. Amazon Textract can detect /// lines of text and the words that make up a line of text. /// /// /// <para> /// Amazon Textract can detect text in document images and PDF files that are stored in /// an Amazon S3 bucket. Use <a>DocumentLocation</a> to specify the bucket name and the /// file name of the document image. /// </para> /// /// <para> /// <code>StartTextDetection</code> returns a job identifier (<code>JobId</code>) that /// you use to get the results of the operation. When text detection is finished, Amazon /// Textract publishes a completion status to the Amazon Simple Notification Service (Amazon /// SNS) topic that you specify in <code>NotificationChannel</code>. To get the results /// of the text detection operation, first check that the status value published to the /// Amazon SNS topic is <code>SUCCEEDED</code>. If so, call <a>GetDocumentTextDetection</a>, /// and pass the job identifier (<code>JobId</code>) from the initial call to <code>StartDocumentTextDetection</code>. /// </para> /// /// <para> /// For more information, see Document Text Detection in the Amazon Textract Developer /// Guide. /// </para> /// </summary> /// <param name="request">Container for the necessary parameters to execute the StartDocumentTextDetection service method.</param> /// /// <returns>The response from the StartDocumentTextDetection service method, as returned by Textract.</returns> /// <exception cref="Amazon.Textract.Model.AccessDeniedException"> /// You aren't authorized to perform the action. /// </exception> /// <exception cref="Amazon.Textract.Model.BadDocumentException"> /// Amazon Textract isn't able to read the document. /// </exception> /// <exception cref="Amazon.Textract.Model.DocumentTooLargeException"> /// The document can't be processed because it's too large. The maximum document size /// for synchronous operations 5 MB. The maximum document size for asynchronous operations /// is 500 MB for PDF format files. /// </exception> /// <exception cref="Amazon.Textract.Model.IdempotentParameterMismatchException"> /// A <code>ClientRequestToken</code> input parameter was reused with an operation, but /// at least one of the other input parameters is different from the previous call to /// the operation. /// </exception> /// <exception cref="Amazon.Textract.Model.InternalServerErrorException"> /// Amazon Textract experienced a service issue. Try your call again. /// </exception> /// <exception cref="Amazon.Textract.Model.InvalidParameterException"> /// An input parameter violated a constraint. For example, in synchronous operations, /// an <code>InvalidParameterException</code> exception occurs when neither of the <code>S3Object</code> /// or <code>Bytes</code> values are supplied in the <code>Document</code> request parameter. /// Validate your parameter before calling the API operation again. /// </exception> /// <exception cref="Amazon.Textract.Model.InvalidS3ObjectException"> /// Amazon Textract is unable to access the S3 object that's specified in the request. /// </exception> /// <exception cref="Amazon.Textract.Model.LimitExceededException"> /// An Amazon Textract service limit was exceeded. For example, if you start too many /// asynchronous jobs concurrently, calls to start operations (<code>StartDocumentTextDetection</code>, /// for example) raise a LimitExceededException exception (HTTP status code: 400) until /// the number of concurrently running jobs is below the Amazon Textract service limit. /// </exception> /// <exception cref="Amazon.Textract.Model.ProvisionedThroughputExceededException"> /// The number of requests exceeded your throughput limit. If you want to increase this /// limit, contact Amazon Textract. /// </exception> /// <exception cref="Amazon.Textract.Model.ThrottlingException"> /// Amazon Textract is temporarily unable to process the request. Try your call again. /// </exception> /// <exception cref="Amazon.Textract.Model.UnsupportedDocumentException"> /// The format of the input document isn't supported. Amazon Textract supports documents /// that are .png or .jpg format. /// </exception> /// <seealso href="http://docs.aws.amazon.com/goto/WebAPI/textract-2018-06-27/StartDocumentTextDetection">REST API Reference for StartDocumentTextDetection Operation</seealso> public virtual StartDocumentTextDetectionResponse StartDocumentTextDetection(StartDocumentTextDetectionRequest request) { var options = new InvokeOptions(); options.RequestMarshaller = StartDocumentTextDetectionRequestMarshaller.Instance; options.ResponseUnmarshaller = StartDocumentTextDetectionResponseUnmarshaller.Instance; return(Invoke <StartDocumentTextDetectionResponse>(request, options)); }
public async Task <string> StartDocumentTextDetection(string bucketName, string key) { var request = new StartDocumentTextDetectionRequest(); request.DocumentLocation = new DocumentLocation { S3Object = new S3Object { Bucket = bucketName, Name = key } }; var response = await this.textract.StartDocumentTextDetectionAsync(request); return(response.JobId); }
public async Task <IEnumerable <Block> > GetDocumentBlocks(string bucketName, string key) { var request = new StartDocumentTextDetectionRequest { DocumentLocation = new DocumentLocation { S3Object = new S3Object { Bucket = bucketName, Name = key } } }; var response = await _textract.StartDocumentTextDetectionAsync(request); var jobId = response.JobId; //Wait until job complete. Note: Normally you would seperate this to a distributed event or scheduler WaitForJobCompletion(response.JobId); //Get detection results var textDetectionResponses = GetJobResults(jobId); //Return all blocks return(textDetectionResponses.SelectMany(textDetectionResponse => textDetectionResponse.Blocks)); }
/// <summary> /// Starts the asynchronous detection of text in a document. Amazon Textract can detect /// lines of text and the words that make up a line of text. /// /// /// <para> /// <code>StartDocumentTextDetection</code> can analyze text in documents that are in /// JPG, PNG, and PDF format. The documents are stored in an Amazon S3 bucket. Use <a>DocumentLocation</a> /// to specify the bucket name and file name of the document. /// </para> /// /// <para> /// <code>StartTextDetection</code> returns a job identifier (<code>JobId</code>) that /// you use to get the results of the operation. When text detection is finished, Amazon /// Textract publishes a completion status to the Amazon Simple Notification Service (Amazon /// SNS) topic that you specify in <code>NotificationChannel</code>. To get the results /// of the text detection operation, first check that the status value published to the /// Amazon SNS topic is <code>SUCCEEDED</code>. If so, call <a>GetDocumentTextDetection</a>, /// and pass the job identifier (<code>JobId</code>) from the initial call to <code>StartDocumentTextDetection</code>. /// </para> /// /// <para> /// For more information, see <a href="https://docs.aws.amazon.com/textract/latest/dg/how-it-works-detecting.html">Document /// Text Detection</a>. /// </para> /// </summary> /// <param name="request">Container for the necessary parameters to execute the StartDocumentTextDetection service method.</param> /// <param name="cancellationToken"> /// A cancellation token that can be used by other objects or threads to receive notice of cancellation. /// </param> /// /// <returns>The response from the StartDocumentTextDetection service method, as returned by Textract.</returns> /// <exception cref="Amazon.Textract.Model.AccessDeniedException"> /// You aren't authorized to perform the action. /// </exception> /// <exception cref="Amazon.Textract.Model.BadDocumentException"> /// Amazon Textract isn't able to read the document. /// </exception> /// <exception cref="Amazon.Textract.Model.DocumentTooLargeException"> /// The document can't be processed because it's too large. The maximum document size /// for synchronous operations 5 MB. The maximum document size for asynchronous operations /// is 500 MB for PDF format files. /// </exception> /// <exception cref="Amazon.Textract.Model.IdempotentParameterMismatchException"> /// A <code>ClientRequestToken</code> input parameter was reused with an operation, but /// at least one of the other input parameters is different from the previous call to /// the operation. /// </exception> /// <exception cref="Amazon.Textract.Model.InternalServerErrorException"> /// Amazon Textract experienced a service issue. Try your call again. /// </exception> /// <exception cref="Amazon.Textract.Model.InvalidParameterException"> /// An input parameter violated a constraint. For example, in synchronous operations, /// an <code>InvalidParameterException</code> exception occurs when neither of the <code>S3Object</code> /// or <code>Bytes</code> values are supplied in the <code>Document</code> request parameter. /// Validate your parameter before calling the API operation again. /// </exception> /// <exception cref="Amazon.Textract.Model.InvalidS3ObjectException"> /// Amazon Textract is unable to access the S3 object that's specified in the request. /// </exception> /// <exception cref="Amazon.Textract.Model.LimitExceededException"> /// An Amazon Textract service limit was exceeded. For example, if you start too many /// asynchronous jobs concurrently, calls to start operations (<code>StartDocumentTextDetection</code>, /// for example) raise a LimitExceededException exception (HTTP status code: 400) until /// the number of concurrently running jobs is below the Amazon Textract service limit. /// </exception> /// <exception cref="Amazon.Textract.Model.ProvisionedThroughputExceededException"> /// The number of requests exceeded your throughput limit. If you want to increase this /// limit, contact Amazon Textract. /// </exception> /// <exception cref="Amazon.Textract.Model.ThrottlingException"> /// Amazon Textract is temporarily unable to process the request. Try your call again. /// </exception> /// <exception cref="Amazon.Textract.Model.UnsupportedDocumentException"> /// The format of the input document isn't supported. Amazon Textract supports documents /// that are .png or .jpg format. /// </exception> /// <seealso href="http://docs.aws.amazon.com/goto/WebAPI/textract-2018-06-27/StartDocumentTextDetection">REST API Reference for StartDocumentTextDetection Operation</seealso> public virtual Task <StartDocumentTextDetectionResponse> StartDocumentTextDetectionAsync(StartDocumentTextDetectionRequest request, System.Threading.CancellationToken cancellationToken = default(CancellationToken)) { var options = new InvokeOptions(); options.RequestMarshaller = StartDocumentTextDetectionRequestMarshaller.Instance; options.ResponseUnmarshaller = StartDocumentTextDetectionResponseUnmarshaller.Instance; return(InvokeAsync <StartDocumentTextDetectionResponse>(request, options, cancellationToken)); }