Beispiel #1
0
 public void PrintDebug(GetDocumentAnalysisResponse response)
 {
     response.Blocks.ForEach(y => {
         Console.WriteLine("<block>");
         Console.WriteLine(y.Id + ":" + y.BlockType + ":" + y.Text);
         if (y.BlockType == "KEY_VALUE_SET")
         {
             Console.WriteLine(" <KEY_VALUE_SET>");
             PrintBlock(y);
             Console.WriteLine(" </KEY_VALUE_SET>");
         }
         else if (y.BlockType == "TABLE")
         {
             Console.WriteLine(" <TABLE>");
             PrintBlock(y);
             Console.WriteLine(" </TABLE>");
         }
         else if (y.BlockType == "CELL")
         {
             Console.WriteLine(" <CELL>");
             PrintBlock(y);
             Console.WriteLine(" </CELL>");
         }
         Console.WriteLine("</block>");
     });
 }
Beispiel #2
0
        // Gets the result of a PDF analysis request.
        private async Task <GetDocumentAnalysisResponse> GetDocAnalysisResponse(StartDocumentAnalysisResponse response)
        {
            // Get jobID from start analysis response
            var x = new GetDocumentAnalysisRequest();

            x.JobId = response.JobId;

            // Poll for analysis to finish. Can take over 15sec. to
            // get results, thus the somewhat long delay.
            GetDocumentAnalysisResponse res = await this.textractClient.GetDocumentAnalysisAsync(x);

            int c = 0;

            while (res.JobStatus != "SUCCEEDED")
            {
                await Task.Delay(200);

                res = await this.textractClient.GetDocumentAnalysisAsync(x);

                c++;
                System.Diagnostics.Debug.WriteLine("Trying again.... " + res.JobStatus + " Attempt " + c);
            }

            return(res);
        }
Beispiel #3
0
        public TextractDocument(GetDocumentAnalysisResponse response)
        {
            Pages = new List <Page>();

            var allPageBlocks = ParseDocumentPagesAndBlockMap(response);

            Parse(allPageBlocks, response.Blocks);
        }
Beispiel #4
0
        /// <summary>
        /// Unmarshaller the response from the service to the response class.
        /// </summary>
        /// <param name="context"></param>
        /// <returns></returns>
        public override AmazonWebServiceResponse Unmarshall(JsonUnmarshallerContext context)
        {
            GetDocumentAnalysisResponse response = new GetDocumentAnalysisResponse();

            context.Read();
            int targetDepth = context.CurrentDepth;

            while (context.ReadAtDepth(targetDepth))
            {
                if (context.TestExpression("AnalyzeDocumentModelVersion", targetDepth))
                {
                    var unmarshaller = StringUnmarshaller.Instance;
                    response.AnalyzeDocumentModelVersion = unmarshaller.Unmarshall(context);
                    continue;
                }
                if (context.TestExpression("Blocks", targetDepth))
                {
                    var unmarshaller = new ListUnmarshaller <Block, BlockUnmarshaller>(BlockUnmarshaller.Instance);
                    response.Blocks = unmarshaller.Unmarshall(context);
                    continue;
                }
                if (context.TestExpression("DocumentMetadata", targetDepth))
                {
                    var unmarshaller = DocumentMetadataUnmarshaller.Instance;
                    response.DocumentMetadata = unmarshaller.Unmarshall(context);
                    continue;
                }
                if (context.TestExpression("JobStatus", targetDepth))
                {
                    var unmarshaller = StringUnmarshaller.Instance;
                    response.JobStatus = unmarshaller.Unmarshall(context);
                    continue;
                }
                if (context.TestExpression("NextToken", targetDepth))
                {
                    var unmarshaller = StringUnmarshaller.Instance;
                    response.NextToken = unmarshaller.Unmarshall(context);
                    continue;
                }
                if (context.TestExpression("StatusMessage", targetDepth))
                {
                    var unmarshaller = StringUnmarshaller.Instance;
                    response.StatusMessage = unmarshaller.Unmarshall(context);
                    continue;
                }
                if (context.TestExpression("Warnings", targetDepth))
                {
                    var unmarshaller = new ListUnmarshaller <Warning, WarningUnmarshaller>(WarningUnmarshaller.Instance);
                    response.Warnings = unmarshaller.Unmarshall(context);
                    continue;
                }
            }

            return(response);
        }
        public string GetRawText(GetDocumentAnalysisResponse response)
        {
            StringBuilder sb = new StringBuilder();

            foreach (var element in response.Blocks)
            {
                if (element.BlockType == "LINE")
                {
                    sb.AppendLine(element.Text);
                }
            }
            return(sb.ToString());
        }
        // result can be stored in .tsv
        // (which does not use , or ; as separator - which can be appeared in key/value)
        public string CreateKeyValueText(GetDocumentAnalysisResponse response)
        {
            StringBuilder sb        = new StringBuilder();
            string        seperator = "\t";

            sb.AppendLine("Key" + seperator + "Value");
            var document = new TextractDocument(response);

            document.Pages.ForEach(page => {
                page.Form.Fields.ForEach(f => {
                    sb.AppendLine(f.Key + seperator + f.Value);
                });
            });
            return(sb.ToString());
        }
Beispiel #7
0
        private List <List <Block> > ParseDocumentPagesAndBlockMap(GetDocumentAnalysisResponse response)
        {
            var allPageBlocks = new List <List <Block> >();
            var pageBlocks    = new List <Block>();

            // Inconsistancy in page number
            var numberOfPages = response.DocumentMetadata.Pages;
            var noOfPages     = response.Blocks.Count(x => x.BlockType == BlockType.PAGE);
            var noOfTable     = response.Blocks.Count(x => x.BlockType == BlockType.TABLE);

            foreach (var block in response.Blocks)
            {
                if (block.BlockType == BlockType.PAGE)
                {
                    if (pageBlocks.Count > 0)
                    {
                        allPageBlocks.Add(pageBlocks);
                    }

                    pageBlocks = new List <Block> {
                        block
                    };
                }
                else
                {
                    pageBlocks.Add(block);
                }
            }

            if (pageBlocks != null)
            {
                allPageBlocks.Add(pageBlocks);
            }

            return(allPageBlocks);
        }