/// <summary> /// Analyzes an input document for relationships between detected items. /// /// /// <para> /// The types of information returned are as follows: /// </para> /// <ul> <li> /// <para> /// Words and lines that are related to nearby lines and words. The related information /// is returned in two <a>Block</a> objects each of type <code>KEY_VALUE_SET</code>: a /// KEY Block object and a VALUE Block object. For example, <i>Name: Ana Silva Carolina</i> /// contains a key and value. <i>Name:</i> is the key. <i>Ana Silva Carolina</i> is the /// value. /// </para> /// </li> <li> /// <para> /// Table and table cell data. A TABLE Block object contains information about a detected /// table. A CELL Block object is returned for each cell in a table. /// </para> /// </li> <li> /// <para> /// Selectable elements such as checkboxes and radio buttons. A SELECTION_ELEMENT Block /// object contains information about a selectable element. /// </para> /// </li> <li> /// <para> /// Lines and words of text. A LINE Block object contains one or more WORD Block objects. /// </para> /// </li> </ul> /// <para> /// You can choose which type of analysis to perform by specifying the <code>FeatureTypes</code> /// list. /// </para> /// /// <para> /// The output is returned in a list of <code>BLOCK</code> objects. /// </para> /// /// <para> /// <code>AnalyzeDocument</code> is a synchronous operation. To analyze documents asynchronously, /// use <a>StartDocumentAnalysis</a>. /// </para> /// /// <para> /// For more information, see <a href="https://docs.aws.amazon.com/textract/latest/dg/how-it-works-analyzing.html">Document /// Text Analysis</a>. /// </para> /// </summary> /// <param name="request">Container for the necessary parameters to execute the AnalyzeDocument service method.</param> /// <param name="cancellationToken"> /// A cancellation token that can be used by other objects or threads to receive notice of cancellation. /// </param> /// /// <returns>The response from the AnalyzeDocument service method, as returned by Textract.</returns> /// <exception cref="Amazon.Textract.Model.AccessDeniedException"> /// You aren't authorized to perform the action. /// </exception> /// <exception cref="Amazon.Textract.Model.BadDocumentException"> /// Amazon Textract isn't able to read the document. /// </exception> /// <exception cref="Amazon.Textract.Model.DocumentTooLargeException"> /// The document can't be processed because it's too large. The maximum document size /// for synchronous operations 5 MB. The maximum document size for asynchronous operations /// is 500 MB for PDF format files. /// </exception> /// <exception cref="Amazon.Textract.Model.InternalServerErrorException"> /// Amazon Textract experienced a service issue. Try your call again. /// </exception> /// <exception cref="Amazon.Textract.Model.InvalidParameterException"> /// An input parameter violated a constraint. For example, in synchronous operations, /// an <code>InvalidParameterException</code> exception occurs when neither of the <code>S3Object</code> /// or <code>Bytes</code> values are supplied in the <code>Document</code> request parameter. /// Validate your parameter before calling the API operation again. /// </exception> /// <exception cref="Amazon.Textract.Model.InvalidS3ObjectException"> /// Amazon Textract is unable to access the S3 object that's specified in the request. /// </exception> /// <exception cref="Amazon.Textract.Model.ProvisionedThroughputExceededException"> /// The number of requests exceeded your throughput limit. If you want to increase this /// limit, contact Amazon Textract. /// </exception> /// <exception cref="Amazon.Textract.Model.ThrottlingException"> /// Amazon Textract is temporarily unable to process the request. Try your call again. /// </exception> /// <exception cref="Amazon.Textract.Model.UnsupportedDocumentException"> /// The format of the input document isn't supported. Amazon Textract supports documents /// that are .png or .jpg format. /// </exception> /// <seealso href="http://docs.aws.amazon.com/goto/WebAPI/textract-2018-06-27/AnalyzeDocument">REST API Reference for AnalyzeDocument Operation</seealso> public virtual Task <AnalyzeDocumentResponse> AnalyzeDocumentAsync(AnalyzeDocumentRequest request, System.Threading.CancellationToken cancellationToken = default(CancellationToken)) { var options = new InvokeOptions(); options.RequestMarshaller = AnalyzeDocumentRequestMarshaller.Instance; options.ResponseUnmarshaller = AnalyzeDocumentResponseUnmarshaller.Instance; return(InvokeAsync <AnalyzeDocumentResponse>(request, options, cancellationToken)); }
internal virtual AnalyzeDocumentResponse AnalyzeDocument(AnalyzeDocumentRequest request) { var options = new InvokeOptions(); options.RequestMarshaller = AnalyzeDocumentRequestMarshaller.Instance; options.ResponseUnmarshaller = AnalyzeDocumentResponseUnmarshaller.Instance; return(Invoke <AnalyzeDocumentResponse>(request, options)); }
public static void Get_kv_map(string LocalEmploymentFile) { var readFile = File.ReadAllBytes(LocalEmploymentFile); MemoryStream stream = new MemoryStream(readFile); AmazonTextractClient abcdclient = new AmazonTextractClient(); AnalyzeDocumentRequest analyzeDocumentRequest = new AnalyzeDocumentRequest { Document = new Document { Bytes = stream }, FeatureTypes = new List <string> { FeatureType.FORMS } }; var analyzeDocumentResponse = abcdclient.AnalyzeDocument(analyzeDocumentRequest); //Get the text blocks List <Block> blocks = analyzeDocumentResponse.Blocks; //get key and value maps List <Block> key_map = new List <Block>(); List <Block> value_map = new List <Block>(); List <Block> block_map = new List <Block>(); foreach (Block block in blocks) { var block_id = block.Id; block_map.Add(block); if (block.BlockType == BlockType.KEY_VALUE_SET) { if (block.EntityTypes.Contains("KEY")) { key_map.Add(block); } else { value_map.Add(block); } } } //Get Key Value relationship var getKeyValueRelationship = Get_kv_relationship(key_map, value_map, block_map); foreach (KeyValuePair <string, string> kvp in getKeyValueRelationship) { Console.WriteLine(" {0} : {1}", kvp.Key, kvp.Value); } }
private async Task <AnalyzeDocumentResponse> StartDocumentAnalysis(Stream file, List <string> featureTypes) { var request = new AnalyzeDocumentRequest(); var memoryStream = new MemoryStream(); file.CopyTo(memoryStream); request.Document = new Document { Bytes = memoryStream }; request.FeatureTypes = featureTypes; var response = await this.textractClient.AnalyzeDocumentAsync(request); return(response); }
protected override void ProcessRecord() { base.ProcessRecord(); AnalyzeDocumentRequest request; try { request = new AnalyzeDocumentRequest { AnalyzeDocumentDetails = AnalyzeDocumentDetails, OpcRequestId = OpcRequestId }; response = client.AnalyzeDocument(request).GetAwaiter().GetResult(); WriteOutput(response, response.AnalyzeDocumentResult); FinishProcessing(response); } catch (Exception ex) { TerminatingErrorDuringExecution(ex); } }
public async Task <Harvest> Recognize(string filePathName) { Logger.LogDebug(">> Recognize"); var client = new AmazonTextractClient( _configuration.GetSection("AccessKey").Value, _configuration.GetSection("SecretAccessKey").Value, Amazon.RegionEndpoint.GetBySystemName(_configuration.GetSection("Region").Value)); Amazon.Textract.Model.Document MyDocument; int imageWidth = 0; int imageHeight = 0; using (Image image = Image.FromFile(filePathName)) { imageWidth = image.Width >= 2560 ? 2560 : image.Width; imageHeight = image.Height > 1080 ? 1080 : image.Height; #region Negative /* * * Bitmap newBitmap = new Bitmap(image.Width, image.Height); * * Graphics graphics = Graphics.FromImage(newBitmap); * * ColorMatrix colorMatrix = new ColorMatrix( * new float[][] * { * new float[] {.3f, .3f, .3f, 0, 0}, * new float[] {.59f, .59f, .59f, 0, 0}, * new float[] {.11f, .11f, .11f, 0, 0}, * new float[] {0, 0, 0, 1, 0}, * new float[] {0, 0, 0, 0, 1} * }); * * ImageAttributes attributes = new ImageAttributes(); * attributes.SetColorMatrix(colorMatrix); * * graphics.DrawImage(image, new Rectangle(0, 0, image.Width, image.Height), * 0, 0, image.Width, image.Height, GraphicsUnit.Pixel, attributes); * * graphics.Dispose(); */ #endregion using (MemoryStream memoryStream = new MemoryStream()) { image.Save(memoryStream, image.RawFormat); MyDocument = new Amazon.Textract.Model.Document() { Bytes = memoryStream }; } } var request = new AnalyzeDocumentRequest() { Document = MyDocument, FeatureTypes = new List <string>() { "TABLES" } }; var result = await client.AnalyzeDocumentAsync(request); var specie = ""; var furType = ""; float trophyRating = 0; float quickKill = 0; float integrity = 0; float score = 0; bool ocrPaseFailure = false; if (result.HttpStatusCode == System.Net.HttpStatusCode.OK) { try { Logger.LogTrace(System.Text.Json.JsonSerializer.Serialize(result)); float specieMinLeft = (100f + ((imageWidth - 1920) / 2)) / imageWidth; float specieMinTop = (150f + ((imageHeight - 1080) / 2)) / imageHeight; float specieMaxTop = (230f + ((imageHeight - 1080) / 2)) / imageHeight; float furTypeMinLeft = (300f + ((imageWidth - 1920) / 2)) / imageWidth; float furTypeMinTop = (295f + ((imageHeight - 1080) / 2)) / imageHeight; float trophyRatingMinLeft = (300f + ((imageWidth - 1920) / 2)) / imageWidth; float trophyRatingMinTop = (500f + ((imageHeight - 1080) / 2)) / imageHeight; float quickKillMinLeft = (300f + ((imageWidth - 1920) / 2)) / imageWidth; float quickKillMinTop = (550f + ((imageHeight - 1080) / 2)) / imageHeight; float integrityMinLeft = (300f + ((imageWidth - 1920) / 2)) / imageWidth; float integrityMinTop = (585f + ((imageHeight - 1080) / 2)) / imageHeight; float scoreMinLeft = (475f + ((imageWidth - 1920) / 2)) / imageWidth; float scoreMinTop = (680f + ((imageHeight - 1080) / 2)) / imageHeight; float scoreMaxRight = (600f + ((imageWidth - 1920) / 2)) / imageWidth; try { specie = result.Blocks.FindAll(b => b.BlockType.Value == "LINE" && ((b.Geometry.BoundingBox.Left + b.Geometry.BoundingBox.Width) < scoreMaxRight)).Find(b => b.Geometry.BoundingBox.Left > specieMinLeft && b.Geometry.BoundingBox.Top > specieMinTop && specieMaxTop > b.Geometry.BoundingBox.Top ).Text; } catch (Exception e) { ocrPaseFailure = true; Logger.LogError(e); } try { furType = result.Blocks.FindAll(b => b.BlockType.Value == "LINE" && ((b.Geometry.BoundingBox.Left + b.Geometry.BoundingBox.Width) < scoreMaxRight)).Find(b => (b.Geometry.BoundingBox.Left > furTypeMinLeft) && (b.Geometry.BoundingBox.Top > furTypeMinTop) ).Text; } catch (Exception e) { ocrPaseFailure = true; Logger.LogError(e); } try { trophyRating = float.Parse(result.Blocks.FindAll(b => b.BlockType.Value == "LINE" && ((b.Geometry.BoundingBox.Left + b.Geometry.BoundingBox.Width) < scoreMaxRight)).Find(b => (b.Geometry.BoundingBox.Left > trophyRatingMinLeft) && (b.Geometry.BoundingBox.Top > trophyRatingMinTop) ).Text); } catch (Exception e) { ocrPaseFailure = true; Logger.LogError(e); } try { quickKill = float.Parse(result.Blocks.FindAll(b => b.BlockType.Value == "LINE" && ((b.Geometry.BoundingBox.Left + b.Geometry.BoundingBox.Width) < scoreMaxRight)).Find(b => (b.Geometry.BoundingBox.Left > quickKillMinLeft) && (b.Geometry.BoundingBox.Top > quickKillMinTop) ).Text.Replace("%", "")); } catch (Exception e) { ocrPaseFailure = true; Logger.LogError(e); } try { integrity = float.Parse(result.Blocks.FindAll(b => b.BlockType.Value == "LINE" && ((b.Geometry.BoundingBox.Left + b.Geometry.BoundingBox.Width) < scoreMaxRight)).Find(b => (b.Geometry.BoundingBox.Left > integrityMinLeft) && (b.Geometry.BoundingBox.Top > integrityMinTop) ).Text.Replace("%", "")); } catch (Exception e) { ocrPaseFailure = true; Logger.LogError(e); } try { score = float.Parse(result.Blocks.FindAll(b => b.BlockType.Value == "LINE" && ((b.Geometry.BoundingBox.Left + b.Geometry.BoundingBox.Width) < scoreMaxRight)).Find(b => (b.Geometry.BoundingBox.Left > scoreMinLeft) && (b.Geometry.BoundingBox.Top > scoreMinTop)).Text); } catch (Exception e) { ocrPaseFailure = true; Logger.LogError(e); } } catch (Exception e) { ocrPaseFailure = true; Logger.LogError(e); } } return(new Harvest() { FurType = furType, IntegrityBonus = integrity, QuickKillBonus = quickKill, Score = score, Specie = specie, TrophyRating = trophyRating, RequiresCheck = ocrPaseFailure }); }