Beispiel #1
0
        public static void Process(string basename, string inputfolder, string outputfolder)
        {
            PdfReaderException.ContinueOnException();

            var conteudos = GetTextLines(basename, inputfolder, outputfolder, out Execution.Pipeline pipeline)
                            .Log <AnalyzeLines>($"{outputfolder}/{basename}-lines.txt")
                            .ConvertText <CreateStructures, TextStructure>()
                            .Log <AnalyzeStructuresCentral>($"{outputfolder}/{basename}-central.txt")
                            //.PrintAnalytics($"bin/{basename}-print-analytics.txt")
                            .ConvertText <CreateTextSegments, TextSegment>()
                            .Log <AnalyzeSegmentTitles>($"{outputfolder}/{basename}-tree.txt")
                            .Log <AnalyzeSegmentStats>($"{outputfolder}/{basename}-segments-stats.txt")
                            .Log <AnalyzeSegments2>($"{outputfolder}/{basename}-segments.csv")
                            .ConvertText <CreateTreeSegments, TextSegment>()
                            .ConvertText <TransformConteudo, Conteudo>()
                            .ToList();

            //Create XML
            var createArticle = new TransformArtigo();
            var artigos       = createArticle.Create(conteudos);

            createArticle.CreateXML(artigos, outputfolder, basename);

            pipeline.Done();
        }
Beispiel #2
0
        public static void RunCreateArtigosJson(IVirtualFS virtualFS, string basename, string inputfolder, string tmpfolder, string outputfolder)
        {
            VirtualFS.ConfigureFileSystem(virtualFS);

            PdfReaderException.ContinueOnException();

            Pipeline pipeline = new Pipeline();

            var conteudo = GetTextLines(pipeline, basename, inputfolder, tmpfolder) // use temp folder
                           .ConvertText <CreateTextLineIndex, TextLine>()
                           .ConvertText <PreCreateStructures, TextLine2>()
                           .ConvertText <CreateStructures2, TextStructure>()
                           .ConvertText <PreCreateTextSegments, TextStructureAgg>()
                           .ConvertText <AggregateStructures, TextStructure>()
                           .ConvertText <CreateTextSegments, TextSegment>()
                           .ConvertText <CreateTreeSegments, TextSegment>()
                           .Log <AnalyzeSegmentTitles>($"{tmpfolder}/{basename}/segment-titles-tree.txt")
                           .ConvertText <TransformConteudo, Conteudo>()
                           .ToList();

            var createArticle = new TransformArtigo();
            var artigos       = createArticle.Create(conteudo);

            createArticle.CreateJson(artigos, $"{outputfolder}/{basename}", basename);
        }
        public void Process()
        {
            var pipelineText = _context.GetPipelineText <TextSegment>();

            var conteudo = pipelineText
                           .ConvertText <TransformConteudo2, Conteudo>()
                           .ToList();

            var createArticle = new TransformArtigo();
            var artigos       = createArticle.Create(conteudo);

            createArticle.CreateXML(artigos, $"{_context.OutputFolder}/{_context.Basename}/artigos", _context.Basename);

            _context.AddOutput("artigosGN", $"{_context.OutputFolder}/{_context.Basename}/artigos/{_context.Basename}-artigo{{0}}.xml");
        }
Beispiel #4
0
        public static void Process(string basename, string inputfolder, string outputfolder)
        {
            PdfReaderException.ContinueOnException();

            var conteudos = GetTextLines(basename, inputfolder, outputfolder, out Execution.Pipeline pipeline)
                            .ConvertText <CreateTextLineIndex, TextLine>()
                            .Log <AnalyzeLines>($"{outputfolder}/{basename}-lines.txt")

                            .Log <AnalyzeLinesCenterRight>($"{outputfolder}/{basename}-;lines-center-right.txt")
                            .ConvertText <CreateStructures, TextStructure>()
                            .Log <AnalyzeStructuresCentral>($"{outputfolder}/{basename}-central.txt")
                            //.PrintAnalytics($"bin/{basename}-print-analytics.txt")
                            .ConvertText <CreateTextSegments, TextSegment>()
                            .Log <AnalyzeSegmentTitles>($"{outputfolder}/{basename}-tree.txt")
                            .Log <AnalyzeSegmentStats>($"{outputfolder}/{basename}-segments-stats.txt")
                            .Log <AnalyzeSegments2>($"{outputfolder}/{basename}-segments.csv")
                            .ConvertText <CreateTreeSegments, TextSegment>()
                            .Log <AnalyzeTreeStructure>($"{outputfolder}/{basename}-tree-hier.txt")
                            .ConvertText <TransformConteudo, Conteudo>()
                            .ToList();

            //Create XML
            var createArticle = new TransformArtigo();
            var artigos       = createArticle.Create(conteudos);

            createArticle.CreateXML(artigos, outputfolder, basename);


            var validation = pipeline.Statistics.Calculate <ValidateFooter, StatsPageFooter>();
            var layout     = (ValidateLayout)pipeline.Statistics.Calculate <ValidateLayout, StatsPageLayout>();
            var overlap    = (ValidateOverlap)pipeline.Statistics.Calculate <ValidateOverlap, StatsBlocksOverlapped>();

            var pagesLayout  = layout.GetPageErrors().ToList();
            var pagesOverlap = overlap.GetPageErrors().ToList();
            var pages        = pagesLayout.Concat(pagesOverlap).Distinct().OrderBy(t => t).ToList();

            if (pages.Count > 0)
            {
                ExtractPages($"{outputfolder}/{basename}-output", $"{outputfolder}/{basename}-parser-errors", pages);
            }

            pipeline.Done();
        }
Beispiel #5
0
        public static void ProcessStats(string basename)
        {
            //PdfWriteText.Test();
            //return;
            Console.WriteLine();
            Console.WriteLine("Program4 - Processing with hierachy");
            Console.WriteLine();

            // Extract(1);

            Examples.FollowText(basename);
            Examples.ShowHeaderFooter(basename);
            ExamplesWork.PrintAllSteps(basename, "bin", "bin");

            PdfReaderException.ContinueOnException();

            var conteudos = GetTextLinesWithPipelineBlockset(basename, out Execution.Pipeline pipeline)
                            .Log <AnalyzeLines>($"bin/{basename}-lines.txt")
                            .ConvertText <CreateStructures, TextStructure>()
                            .Log <AnalyzeStructuresCentral>($"bin/{basename}-central.txt")
                            //.PrintAnalytics($"bin/{basename}-print-analytics.txt")
                            .ConvertText <CreateTextSegments, TextSegment>()
                            .Log <AnalyzeSegmentTitles>($"bin/{basename}-tree.txt")
                            .Log <AnalyzeSegmentStats>($"bin/{basename}-segments-stats.txt")
                            .Log <AnalyzeSegments2>($"bin/{basename}-segments.csv")
                            .ConvertText <CreateTreeSegments, TextSegment>()
                            .ConvertText <TransformConteudo, Conteudo>()
                            .ToList();

            var validation = pipeline.Statistics.Calculate <ValidateFooter, StatsPageFooter>();

            //Create XML
            var createArticle = new TransformArtigo();
            var artigos       = createArticle.Create(conteudos);

            createArticle.CreateXML(artigos, basename);

            var validator = new ProgramValidatorXML();

            validator.ValidateArticle("bin");
        }
Beispiel #6
0
        public static void Process(string basename, string inputfolder, string outputfolder)
        {
            logDir = Directory.CreateDirectory($"{outputfolder}/Log").FullName;
            xmlDir = Directory.CreateDirectory($"{outputfolder}/XMLs").FullName;
            errors = Directory.CreateDirectory($"{outputfolder}/PDF-Errors").FullName;

            PdfReaderException.ContinueOnException();

            //ExamplesWork.PrintAllSteps(basename, inputfolder, outputfolder);

            var conteudos = GetTextLines(basename, inputfolder, outputfolder, out Execution.Pipeline pipeline)
                            .Log <AnalyzeLines>($"{logDir}/{basename}-lines.txt")
                            .ConvertText <CreateTextLineIndex, TextLine>()
                            .ConvertText <CreateStructures, TextStructure>()
                            .Log <AnalyzeStructuresCentral>($"{logDir}/{basename}-central.txt")
                            .ShowPdf <ShowStructureCentral>($"{logDir}/{basename}-show-central.pdf")
                            .PrintAnalytics($"{logDir}/{basename}-print-analytics.txt")
                            .ConvertText <CreateTextSegments, TextSegment>()
                            .Log <AnalyzeSegmentTitles>($"{logDir}/{basename}-tree.txt")
                            .Log <AnalyzeSegmentStats>($"{logDir}/{basename}-segments-stats.txt")
                            .Log <AnalyzeSegments2>($"{logDir}/{basename}-segments.csv")
                            .ConvertText <CreateTreeSegments, TextSegment>()
                            .Log <AnalyzeTreeStructure>($"{logDir}/{basename}-tree-data.txt")
                            .ConvertText <TransformConteudo, Conteudo>()
                            .ConvertText <AggregateAnexo, Conteudo>()
                            .ConvertText <AggregateSingularBody, Conteudo>()
                            .ToList();
            //Create XML
            var createArticle = new TransformArtigo();
            var artigos       = createArticle.Create(conteudos);

            createArticle.CreateXML(artigos, xmlDir, basename);

            pipeline.ExtractOutput <ShowParserWarnings>($"{errors}/{basename}-parser-errors.pdf");

            pipeline.Done();

            var validator = new ProgramValidatorXML();

            validator.ValidateArticle(outputfolder);
        }