void FindBlocksets(PipelineInputPdf.PipelineInputPdfPage page) { page .FromCache <IdentifyTablesData>() .ParseBlock <SetIdentifyTablesCompatibility>() .FromCache <ProcessImageData>() .ParseBlock <SetProcessImageCompatibility>() .ParseBlock <BasicFirstPageStats>() // 2 .ParseBlock <RemoveOverlapedImages2>() // 3 .FromCache <HeaderFooterData>() .ParseBlock <RemoveImageLineFromHeaderFooter>() .FromCache <ProcessPdfTextData>() // 4 .ParseBlock <FilterHeaderFooter>() .ParseBlock <RemoveSmallFonts>() // 5 .ParseBlock <MergeTableText>() // 6 //.ParseBlock<HighlightTextTable>() // 7 .ParseBlock <RemoveTableText>() // 8 //.ParseBlock<ReplaceCharacters>() // 9 .ParseBlock <GroupLines>() // 10 //.ParseBlock<RemoveTableDotChar>() // 11 //.ParseBlock<RemoveHeaderImage>() // 12 .ParseBlock <FindInitialBlocksetWithRewind>() // 13 //.Show(Color.Gray) .ParseBlock <BreakColumnsLight>() // 14 .ParseBlock <AddTableSpace>() // 15 .ParseBlock <RemoveTableOverImage>() // 16 .ParseBlock <RemoveImageTexts>() // 17 .ParseBlock <AddImageSpace>() // 18 // .Validate<RemoveFooter>().ShowErrors(p => p.Show(Color.Purple)) // .ParseBlock<RemoveFooter>() // 19 .ParseBlock <AddTableHorizontalLines2>() // 20(b) .ParseBlock <RemoveBackgroundNonText>() // 21 .ParseBlock <BreakColumnsRewrite>() // 22 .ParseBlock <BreakInlineElements>() // 23 .ParseBlock <ResizeBlocksets>() // 24 .ParseBlock <ResizeBlocksetMagins>() // 25 .ParseBlock <OrderBlocksets>() // 26 .ParseBlock <OrganizePageLayout>() // 27 .ParseBlock <MergeSequentialLayout>() // 28 .ParseBlock <ResizeSequentialLayout>() // 29 .StoreCache <BlocksetData>(); }
public static void ProcessPage2(PipelineInputPdf.PipelineInputPdfPage page) { page.ParsePdf <PreProcessTables>() .ParseBlock <IdentifyTables>() .ParsePdf <PreProcessImages>() .Validate <RemoveOverlapedImages>().ShowErrors(p => p.Show(Color.Red)) .ParseBlock <RemoveOverlapedImages>() .ParsePdf <ProcessPdfText>() //.Validate<MergeTableText>().ShowErrors(p => p.Show(Color.Blue)) .ParseBlock <MergeTableText>() //.Validate<HighlightTextTable>().ShowErrors(p => p.Show(Color.Green)) .ParseBlock <HighlightTextTable>() .ParseBlock <RemoveTableText>() .ParseBlock <GroupLines>() .Show(Color.Orange) .Validate <RemoveHeaderImage>().ShowErrors(p => p.Show(Color.Purple)) .ParseBlock <RemoveHeaderImage>() .ParseBlock <FindInitialBlocksetWithRewind>() .ParseBlock <BreakColumns>() .Validate <RemoveFooter>().ShowErrors(p => p.Show(Color.Purple)) .ParseBlock <RemoveFooter>() .ParseBlock <AddTableSpace>() .ParseBlock <AddImageSpace>() .ParseBlock <BreakInlineElements>() .ParseBlock <ResizeBlocksets>() .Validate <ResizeBlocksets>().ShowErrors(p => p.Show(Color.Red)) .ParseBlock <OrderBlocksets>() .Show(Color.Orange) .ShowLine(Color.Black); }
void RetrieveBlocks(PipelineInputPdf.PipelineInputPdfPage page) { page .FromCache <IdentifyTablesData>() .ParseBlock <SetIdentifyTablesCompatibility>() .FromCache <ProcessImageData>() .ParseBlock <SetProcessImageCompatibility>() //.ParseBlock<RemoveOverlapedImages2>() // 3 .FromCache <HeaderFooterData>() .FromCache <BlocksetData>() .Show(Color.Gray) .FromCache <ProcessPdfTextData>() .ParseBlock <RemoveImageLineFromHeaderFooter>() .ParseBlock <FilterHeaderFooter>() //.ParseBlock<RemoveSmallFonts>() // 5 .ParseBlock <FindDouIdMateria>() // 5 .Validate <RemoveBlockHidden>().ShowErrors(p => p.Show(Color.Green)) //.ParseBlock<RemoveBlockHidden>() // considera como parte da tabela? em principio sim.. .ParseBlock <MergeTableText>() // 6 .ParseBlock <HighlightTextTable>() // 7 // precisa gravar o texto dentro da tabela? .ParseBlock <RemoveTableText>() // 8 .ParseBlock <ReplaceCharacters>() // 9 .ParseBlock <GroupLines>() // 10 .ParseBlock <RemoveTableDotChar>() // 11 .ParseBlock <FindInitialBlocksetWithBlockInfo>() // 13(b) .ParseBlock <AddTableSpace>() // 15 .ParseBlock <RemoveTableOverImage>() // 16 .ParseBlock <RemoveImageTexts>() // 17 .ParseBlock <AddImageSpace>() // 18 .ParseBlock <RemoveBackgroundNonText>() // 21 // REPLACE 1: Merge text with text // REPLACE 2: Break text with image/table .ParseBlock <BreakInlineElements>() // 23 .Show(Color.Yellow) .ParseBlock <OrderBlocksetsWithBlockInfo>() .ParseBlock <ResizeBlocksetsWithBlockInfo>() .StoreCache <FinalBlockResultData>() .Show(Color.Red) .ShowLine(Color.Black); //.PrintWarnings(); }
void ShowColors(PipelineInputPdf.PipelineInputPdfPage page) { page .FromCache <HeaderFooterData>() .FromCache <ProcessPdfTextData>() .Validate <ShowTextHeaderFooter>().ShowErrors(p => p.Show(Color.PaleVioletRed)) .ParseBlock <ShowTextHeaderFooter>() .Show(Color.Yellow); }
void FindMargins(PipelineInputPdf.PipelineInputPdfPage page) { page .FromCache <IdentifyTablesData>() .FromCache <ProcessImageData>() .FromCache <ProcessPdfTextData>() .ParseBlock <FindDouHeaderFooter>() .StoreCache <HeaderFooterData>(); }
void Flow(PipelineInputPdf.PipelineInputPdfPage page) { page.ParsePdf <PreProcessTables>() .Show(Color.Blue) .ParsePdf <PreProcessImages>() .Show(Color.Orange) .ParsePdf <ProcessPdfText>() .Show(Color.Yellow) .ShowLine(Color.Black); }
void ShowColors(PipelineInputPdf.PipelineInputPdfPage page) { page .FromCache <ProcessPdfTextData>() .Show(Color.Black) .FromCache <ProcessImageData>() .Show(Color.Yellow) .FromCache <IdentifyTablesData>() .Show(Color.Orange); }
void ShowColors(PipelineInputPdf.PipelineInputPdfPage page) { page.FromCache <BlocksetData>() .Show(Color.Orange) .ShowLine(Color.Green) .ParseBlock <CheckOverlap>() .Validate <CheckOverlap>().ShowErrors(p => p.Show(Color.Red)) .Validate <ValidatePositiveCoordinates>().ShowErrors(p => p.Show(Color.Red)); //.PrintWarnings(); }
void InitialCache(PipelineInputPdf.PipelineInputPdfPage page) { page.ParsePdf <PreProcessTables>() .ParseBlock <IdentifyTables>() .ParseBlock <SetIdentifyTablesCompatibility>() .StoreCache <IdentifyTablesData>() .ParsePdf <PreProcessImages>() .StoreCache <ProcessImageData>() .ParsePdf <ProcessPdfText>() .StoreCache <ProcessPdfTextData>(); }
void ProcessFull(PipelineInputPdf.PipelineInputPdfPage page) { page.ParsePdf <PreProcessTables>() .ParseBlock <IdentifyTables>() // 1 .ParsePdf <PreProcessImages>() .ParseBlock <BasicFirstPageStats>() // 2 //.Validate<RemoveOverlapedImages>().ShowErrors(p => p.Show(Color.Blue)) .ParseBlock <RemoveOverlapedImages>() // 3 .ParsePdf <ProcessPdfText>() // 4 //.Validate<RemoveSmallFonts>().ShowErrors(p => p.ShowText(Color.Green)) .ParseBlock <RemoveSmallFonts>() // 5 //.Validate<MergeTableText>().ShowErrors(p => p.Show(Color.Blue)) .ParseBlock <MergeTableText>() // 6 //.Validate<HighlightTextTable>().ShowErrors(p => p.Show(Color.Green)) .ParseBlock <HighlightTextTable>() // 7 .ParseBlock <RemoveTableText>() // 8 .ParseBlock <ReplaceCharacters>() // 9 .ParseBlock <GroupLines>() // 10 .ParseBlock <RemoveTableDotChar>() // 11 .Show(Color.Yellow) .Validate <RemoveHeaderImage>().ShowErrors(p => p.Show(Color.Purple)) .ParseBlock <RemoveHeaderImage>() // 12 .ParseBlock <FindInitialBlocksetWithRewind>() // 13 .Show(Color.Gray) .ParseBlock <BreakColumnsLight>() // 14 //.ParseBlock<BreakColumns>() .ParseBlock <AddTableSpace>() // 15 .ParseBlock <RemoveTableOverImage>() // 16 .ParseBlock <RemoveImageTexts>() // 17 .ParseBlock <AddImageSpace>() // 18 .Validate <RemoveFooter>().ShowErrors(p => p.Show(Color.Purple)) .ParseBlock <RemoveFooter>() // 19 .ParseBlock <AddTableHorizontalLines>() // 20 .ParseBlock <RemoveBackgroundNonText>() // 21 .ParseBlock <BreakColumnsRewrite>() // 22 .ParseBlock <BreakInlineElements>() // 23 .ParseBlock <ResizeBlocksets>() // 24 .ParseBlock <ResizeBlocksetMagins>() // 25 .ParseBlock <OrderBlocksets>() // 26 .ParseBlock <OrganizePageLayout>() // 27 .ParseBlock <MergeSequentialLayout>() // 28 .ParseBlock <ResizeSequentialLayout>() // 29 .Show(Color.Orange) .ShowLine(Color.Black) .ParseBlock <CheckOverlap>() // 30 .Validate <CheckOverlap>().ShowErrors(p => p.Show(Color.Red)) .Validate <ValidatePositiveCoordinates>().ShowErrors(p => p.Show(Color.Red)) .PrintWarnings(); }
void InitialCache(PipelineInputPdf.PipelineInputPdfPage page) { page.ParsePdf <PreProcessTables>() .Show(Color.Red) .ParseBlock <IdentifyTables>() .ParseBlock <SetIdentifyTablesCompatibility>() .StoreCache <IdentifyTablesData>() .Show(Color.Orange) .ParsePdf <PreProcessImages>() .StoreCache <ProcessImageData>() .Show(Color.Yellow) .ParsePdf <ProcessPdfText>() .StoreCache <ProcessPdfTextData>() .Show(Color.Black); }
void GetLines(PipelineInputPdf.PipelineInputPdfPage page) { page.FromCache <FinalBlockResultData>(); }