Пример #1
0
        void FindBlocksets(PipelineInputPdf.PipelineInputPdfPage page)
        {
            page
            .FromCache <IdentifyTablesData>()
            .ParseBlock <SetIdentifyTablesCompatibility>()
            .FromCache <ProcessImageData>()
            .ParseBlock <SetProcessImageCompatibility>()
            .ParseBlock <BasicFirstPageStats>()                 // 2
            .ParseBlock <RemoveOverlapedImages2>()              // 3

            .FromCache <HeaderFooterData>()
            .ParseBlock <RemoveImageLineFromHeaderFooter>()

            .FromCache <ProcessPdfTextData>()                 // 4
            .ParseBlock <FilterHeaderFooter>()
            .ParseBlock <RemoveSmallFonts>()                  // 5
            .ParseBlock <MergeTableText>()                    // 6

            //.ParseBlock<HighlightTextTable>()         // 7

            .ParseBlock <RemoveTableText>()                   // 8

            //.ParseBlock<ReplaceCharacters>()          // 9

            .ParseBlock <GroupLines>()                        // 10

            //.ParseBlock<RemoveTableDotChar>()         // 11

            //.ParseBlock<RemoveHeaderImage>()          // 12

            .ParseBlock <FindInitialBlocksetWithRewind>()         // 13

            //.Show(Color.Gray)

            .ParseBlock <BreakColumnsLight>()                 // 14
            .ParseBlock <AddTableSpace>()                     // 15
            .ParseBlock <RemoveTableOverImage>()              // 16
            .ParseBlock <RemoveImageTexts>()                  // 17

            .ParseBlock <AddImageSpace>()                     // 18

            //                .Validate<RemoveFooter>().ShowErrors(p => p.Show(Color.Purple))
            //            .ParseBlock<RemoveFooter>()               // 19

            .ParseBlock <AddTableHorizontalLines2>()           // 20(b)

            .ParseBlock <RemoveBackgroundNonText>()            // 21
            .ParseBlock <BreakColumnsRewrite>()                // 22

            .ParseBlock <BreakInlineElements>()                // 23
            .ParseBlock <ResizeBlocksets>()                    // 24
            .ParseBlock <ResizeBlocksetMagins>()               // 25
            .ParseBlock <OrderBlocksets>()                     // 26

            .ParseBlock <OrganizePageLayout>()                 // 27
            .ParseBlock <MergeSequentialLayout>()              // 28
            .ParseBlock <ResizeSequentialLayout>()             // 29

            .StoreCache <BlocksetData>();
        }
Пример #2
0
 public static void ProcessPage2(PipelineInputPdf.PipelineInputPdfPage page)
 {
     page.ParsePdf <PreProcessTables>()
     .ParseBlock <IdentifyTables>()
     .ParsePdf <PreProcessImages>()
     .Validate <RemoveOverlapedImages>().ShowErrors(p => p.Show(Color.Red))
     .ParseBlock <RemoveOverlapedImages>()
     .ParsePdf <ProcessPdfText>()
     //.Validate<MergeTableText>().ShowErrors(p => p.Show(Color.Blue))
     .ParseBlock <MergeTableText>()
     //.Validate<HighlightTextTable>().ShowErrors(p => p.Show(Color.Green))
     .ParseBlock <HighlightTextTable>()
     .ParseBlock <RemoveTableText>()
     .ParseBlock <GroupLines>()
     .Show(Color.Orange)
     .Validate <RemoveHeaderImage>().ShowErrors(p => p.Show(Color.Purple))
     .ParseBlock <RemoveHeaderImage>()
     .ParseBlock <FindInitialBlocksetWithRewind>()
     .ParseBlock <BreakColumns>()
     .Validate <RemoveFooter>().ShowErrors(p => p.Show(Color.Purple))
     .ParseBlock <RemoveFooter>()
     .ParseBlock <AddTableSpace>()
     .ParseBlock <AddImageSpace>()
     .ParseBlock <BreakInlineElements>()
     .ParseBlock <ResizeBlocksets>()
     .Validate <ResizeBlocksets>().ShowErrors(p => p.Show(Color.Red))
     .ParseBlock <OrderBlocksets>()
     .Show(Color.Orange)
     .ShowLine(Color.Black);
 }
Пример #3
0
        void RetrieveBlocks(PipelineInputPdf.PipelineInputPdfPage page)
        {
            page
            .FromCache <IdentifyTablesData>()
            .ParseBlock <SetIdentifyTablesCompatibility>()
            .FromCache <ProcessImageData>()
            .ParseBlock <SetProcessImageCompatibility>()
            //.ParseBlock<RemoveOverlapedImages2>()      // 3
            .FromCache <HeaderFooterData>()
            .FromCache <BlocksetData>()
            .Show(Color.Gray)
            .FromCache <ProcessPdfTextData>()

            .ParseBlock <RemoveImageLineFromHeaderFooter>()
            .ParseBlock <FilterHeaderFooter>()

            //.ParseBlock<RemoveSmallFonts>()           // 5
            .ParseBlock <FindDouIdMateria>()                  // 5
            .Validate <RemoveBlockHidden>().ShowErrors(p => p.Show(Color.Green))
            //.ParseBlock<RemoveBlockHidden>()

            // considera como parte da tabela? em principio sim..
            .ParseBlock <MergeTableText>()                    // 6

            .ParseBlock <HighlightTextTable>()                // 7

            // precisa gravar o texto dentro da tabela?
            .ParseBlock <RemoveTableText>()                   // 8

            .ParseBlock <ReplaceCharacters>()                 // 9
            .ParseBlock <GroupLines>()                        // 10
            .ParseBlock <RemoveTableDotChar>()                // 11

            .ParseBlock <FindInitialBlocksetWithBlockInfo>()  // 13(b)

            .ParseBlock <AddTableSpace>()                     // 15
            .ParseBlock <RemoveTableOverImage>()              // 16
            .ParseBlock <RemoveImageTexts>()                  // 17
            .ParseBlock <AddImageSpace>()                     // 18

            .ParseBlock <RemoveBackgroundNonText>()           // 21

            // REPLACE 1: Merge text with text
            // REPLACE 2: Break text with image/table

            .ParseBlock <BreakInlineElements>()               // 23
            .Show(Color.Yellow)

            .ParseBlock <OrderBlocksetsWithBlockInfo>()
            .ParseBlock <ResizeBlocksetsWithBlockInfo>()

            .StoreCache <FinalBlockResultData>()

            .Show(Color.Red)
            .ShowLine(Color.Black);

            //.PrintWarnings();
        }
Пример #4
0
 void ShowColors(PipelineInputPdf.PipelineInputPdfPage page)
 {
     page
     .FromCache <HeaderFooterData>()
     .FromCache <ProcessPdfTextData>()
     .Validate <ShowTextHeaderFooter>().ShowErrors(p => p.Show(Color.PaleVioletRed))
     .ParseBlock <ShowTextHeaderFooter>()
     .Show(Color.Yellow);
 }
Пример #5
0
 void FindMargins(PipelineInputPdf.PipelineInputPdfPage page)
 {
     page
     .FromCache <IdentifyTablesData>()
     .FromCache <ProcessImageData>()
     .FromCache <ProcessPdfTextData>()
     .ParseBlock <FindDouHeaderFooter>()
     .StoreCache <HeaderFooterData>();
 }
Пример #6
0
 void Flow(PipelineInputPdf.PipelineInputPdfPage page)
 {
     page.ParsePdf <PreProcessTables>()
     .Show(Color.Blue)
     .ParsePdf <PreProcessImages>()
     .Show(Color.Orange)
     .ParsePdf <ProcessPdfText>()
     .Show(Color.Yellow)
     .ShowLine(Color.Black);
 }
Пример #7
0
 void ShowColors(PipelineInputPdf.PipelineInputPdfPage page)
 {
     page
     .FromCache <ProcessPdfTextData>()
     .Show(Color.Black)
     .FromCache <ProcessImageData>()
     .Show(Color.Yellow)
     .FromCache <IdentifyTablesData>()
     .Show(Color.Orange);
 }
Пример #8
0
        void ShowColors(PipelineInputPdf.PipelineInputPdfPage page)
        {
            page.FromCache <BlocksetData>()
            .Show(Color.Orange)
            .ShowLine(Color.Green)

            .ParseBlock <CheckOverlap>()
            .Validate <CheckOverlap>().ShowErrors(p => p.Show(Color.Red))
            .Validate <ValidatePositiveCoordinates>().ShowErrors(p => p.Show(Color.Red));
            //.PrintWarnings();
        }
Пример #9
0
 void InitialCache(PipelineInputPdf.PipelineInputPdfPage page)
 {
     page.ParsePdf <PreProcessTables>()
     .ParseBlock <IdentifyTables>()
     .ParseBlock <SetIdentifyTablesCompatibility>()
     .StoreCache <IdentifyTablesData>()
     .ParsePdf <PreProcessImages>()
     .StoreCache <ProcessImageData>()
     .ParsePdf <ProcessPdfText>()
     .StoreCache <ProcessPdfTextData>();
 }
Пример #10
0
        void ProcessFull(PipelineInputPdf.PipelineInputPdfPage page)
        {
            page.ParsePdf <PreProcessTables>()
            .ParseBlock <IdentifyTables>()                    // 1
            .ParsePdf <PreProcessImages>()
            .ParseBlock <BasicFirstPageStats>()               // 2
                                                              //.Validate<RemoveOverlapedImages>().ShowErrors(p => p.Show(Color.Blue))
            .ParseBlock <RemoveOverlapedImages>()             // 3
            .ParsePdf <ProcessPdfText>()                      // 4
                                                              //.Validate<RemoveSmallFonts>().ShowErrors(p => p.ShowText(Color.Green))
            .ParseBlock <RemoveSmallFonts>()                  // 5
                                                              //.Validate<MergeTableText>().ShowErrors(p => p.Show(Color.Blue))
            .ParseBlock <MergeTableText>()                    // 6
                                                              //.Validate<HighlightTextTable>().ShowErrors(p => p.Show(Color.Green))
            .ParseBlock <HighlightTextTable>()                // 7
            .ParseBlock <RemoveTableText>()                   // 8
            .ParseBlock <ReplaceCharacters>()                 // 9
            .ParseBlock <GroupLines>()                        // 10
            .ParseBlock <RemoveTableDotChar>()                // 11
            .Show(Color.Yellow)
            .Validate <RemoveHeaderImage>().ShowErrors(p => p.Show(Color.Purple))
            .ParseBlock <RemoveHeaderImage>()               // 12
            .ParseBlock <FindInitialBlocksetWithRewind>()   // 13
            .Show(Color.Gray)
            .ParseBlock <BreakColumnsLight>()               // 14
                                                            //.ParseBlock<BreakColumns>()
            .ParseBlock <AddTableSpace>()                   // 15
            .ParseBlock <RemoveTableOverImage>()            // 16
            .ParseBlock <RemoveImageTexts>()                // 17
            .ParseBlock <AddImageSpace>()                   // 18
            .Validate <RemoveFooter>().ShowErrors(p => p.Show(Color.Purple))
            .ParseBlock <RemoveFooter>()                    // 19
            .ParseBlock <AddTableHorizontalLines>()         // 20
            .ParseBlock <RemoveBackgroundNonText>()         // 21
            .ParseBlock <BreakColumnsRewrite>()             // 22

            .ParseBlock <BreakInlineElements>()             // 23
            .ParseBlock <ResizeBlocksets>()                 // 24
            .ParseBlock <ResizeBlocksetMagins>()            // 25
            .ParseBlock <OrderBlocksets>()                  // 26

            .ParseBlock <OrganizePageLayout>()              // 27
            .ParseBlock <MergeSequentialLayout>()           // 28
            .ParseBlock <ResizeSequentialLayout>()          // 29
            .Show(Color.Orange)
            .ShowLine(Color.Black)

            .ParseBlock <CheckOverlap>()                      // 30

            .Validate <CheckOverlap>().ShowErrors(p => p.Show(Color.Red))
            .Validate <ValidatePositiveCoordinates>().ShowErrors(p => p.Show(Color.Red))
            .PrintWarnings();
        }
Пример #11
0
 void InitialCache(PipelineInputPdf.PipelineInputPdfPage page)
 {
     page.ParsePdf <PreProcessTables>()
     .Show(Color.Red)
     .ParseBlock <IdentifyTables>()
     .ParseBlock <SetIdentifyTablesCompatibility>()
     .StoreCache <IdentifyTablesData>()
     .Show(Color.Orange)
     .ParsePdf <PreProcessImages>()
     .StoreCache <ProcessImageData>()
     .Show(Color.Yellow)
     .ParsePdf <ProcessPdfText>()
     .StoreCache <ProcessPdfTextData>()
     .Show(Color.Black);
 }
Пример #12
0
 void GetLines(PipelineInputPdf.PipelineInputPdfPage page)
 {
     page.FromCache <FinalBlockResultData>();
 }