public void PrintDebug(GetDocumentAnalysisResponse response) { response.Blocks.ForEach(y => { Console.WriteLine("<block>"); Console.WriteLine(y.Id + ":" + y.BlockType + ":" + y.Text); if (y.BlockType == "KEY_VALUE_SET") { Console.WriteLine(" <KEY_VALUE_SET>"); PrintBlock(y); Console.WriteLine(" </KEY_VALUE_SET>"); } else if (y.BlockType == "TABLE") { Console.WriteLine(" <TABLE>"); PrintBlock(y); Console.WriteLine(" </TABLE>"); } else if (y.BlockType == "CELL") { Console.WriteLine(" <CELL>"); PrintBlock(y); Console.WriteLine(" </CELL>"); } Console.WriteLine("</block>"); }); }
// Gets the result of a PDF analysis request. private async Task <GetDocumentAnalysisResponse> GetDocAnalysisResponse(StartDocumentAnalysisResponse response) { // Get jobID from start analysis response var x = new GetDocumentAnalysisRequest(); x.JobId = response.JobId; // Poll for analysis to finish. Can take over 15sec. to // get results, thus the somewhat long delay. GetDocumentAnalysisResponse res = await this.textractClient.GetDocumentAnalysisAsync(x); int c = 0; while (res.JobStatus != "SUCCEEDED") { await Task.Delay(200); res = await this.textractClient.GetDocumentAnalysisAsync(x); c++; System.Diagnostics.Debug.WriteLine("Trying again.... " + res.JobStatus + " Attempt " + c); } return(res); }
public TextractDocument(GetDocumentAnalysisResponse response) { Pages = new List <Page>(); var allPageBlocks = ParseDocumentPagesAndBlockMap(response); Parse(allPageBlocks, response.Blocks); }
/// <summary> /// Unmarshaller the response from the service to the response class. /// </summary> /// <param name="context"></param> /// <returns></returns> public override AmazonWebServiceResponse Unmarshall(JsonUnmarshallerContext context) { GetDocumentAnalysisResponse response = new GetDocumentAnalysisResponse(); context.Read(); int targetDepth = context.CurrentDepth; while (context.ReadAtDepth(targetDepth)) { if (context.TestExpression("AnalyzeDocumentModelVersion", targetDepth)) { var unmarshaller = StringUnmarshaller.Instance; response.AnalyzeDocumentModelVersion = unmarshaller.Unmarshall(context); continue; } if (context.TestExpression("Blocks", targetDepth)) { var unmarshaller = new ListUnmarshaller <Block, BlockUnmarshaller>(BlockUnmarshaller.Instance); response.Blocks = unmarshaller.Unmarshall(context); continue; } if (context.TestExpression("DocumentMetadata", targetDepth)) { var unmarshaller = DocumentMetadataUnmarshaller.Instance; response.DocumentMetadata = unmarshaller.Unmarshall(context); continue; } if (context.TestExpression("JobStatus", targetDepth)) { var unmarshaller = StringUnmarshaller.Instance; response.JobStatus = unmarshaller.Unmarshall(context); continue; } if (context.TestExpression("NextToken", targetDepth)) { var unmarshaller = StringUnmarshaller.Instance; response.NextToken = unmarshaller.Unmarshall(context); continue; } if (context.TestExpression("StatusMessage", targetDepth)) { var unmarshaller = StringUnmarshaller.Instance; response.StatusMessage = unmarshaller.Unmarshall(context); continue; } if (context.TestExpression("Warnings", targetDepth)) { var unmarshaller = new ListUnmarshaller <Warning, WarningUnmarshaller>(WarningUnmarshaller.Instance); response.Warnings = unmarshaller.Unmarshall(context); continue; } } return(response); }
public string GetRawText(GetDocumentAnalysisResponse response) { StringBuilder sb = new StringBuilder(); foreach (var element in response.Blocks) { if (element.BlockType == "LINE") { sb.AppendLine(element.Text); } } return(sb.ToString()); }
// result can be stored in .tsv // (which does not use , or ; as separator - which can be appeared in key/value) public string CreateKeyValueText(GetDocumentAnalysisResponse response) { StringBuilder sb = new StringBuilder(); string seperator = "\t"; sb.AppendLine("Key" + seperator + "Value"); var document = new TextractDocument(response); document.Pages.ForEach(page => { page.Form.Fields.ForEach(f => { sb.AppendLine(f.Key + seperator + f.Value); }); }); return(sb.ToString()); }
private List <List <Block> > ParseDocumentPagesAndBlockMap(GetDocumentAnalysisResponse response) { var allPageBlocks = new List <List <Block> >(); var pageBlocks = new List <Block>(); // Inconsistancy in page number var numberOfPages = response.DocumentMetadata.Pages; var noOfPages = response.Blocks.Count(x => x.BlockType == BlockType.PAGE); var noOfTable = response.Blocks.Count(x => x.BlockType == BlockType.TABLE); foreach (var block in response.Blocks) { if (block.BlockType == BlockType.PAGE) { if (pageBlocks.Count > 0) { allPageBlocks.Add(pageBlocks); } pageBlocks = new List <Block> { block }; } else { pageBlocks.Add(block); } } if (pageBlocks != null) { allPageBlocks.Add(pageBlocks); } return(allPageBlocks); }