public static void Process(string basename, string inputfolder, string outputfolder) { PdfReaderException.ContinueOnException(); var conteudos = GetTextLines(basename, inputfolder, outputfolder, out Execution.Pipeline pipeline) .Log <AnalyzeLines>($"{outputfolder}/{basename}-lines.txt") .ConvertText <CreateStructures, TextStructure>() .Log <AnalyzeStructuresCentral>($"{outputfolder}/{basename}-central.txt") //.PrintAnalytics($"bin/{basename}-print-analytics.txt") .ConvertText <CreateTextSegments, TextSegment>() .Log <AnalyzeSegmentTitles>($"{outputfolder}/{basename}-tree.txt") .Log <AnalyzeSegmentStats>($"{outputfolder}/{basename}-segments-stats.txt") .Log <AnalyzeSegments2>($"{outputfolder}/{basename}-segments.csv") .ConvertText <CreateTreeSegments, TextSegment>() .ConvertText <TransformConteudo, Conteudo>() .ToList(); //Create XML var createArticle = new TransformArtigo(); var artigos = createArticle.Create(conteudos); createArticle.CreateXML(artigos, outputfolder, basename); pipeline.Done(); }
public static void RunCreateArtigos(IVirtualFS virtualFS, string basename, string inputfolder, string tmpfolder, string outputfolder) { VirtualFS.ConfigureFileSystem(virtualFS); PdfReaderException.ContinueOnException(); Pipeline pipeline = new Pipeline(); var conteudo = GetTextLines(pipeline, basename, inputfolder, tmpfolder) // use temp folder .ConvertText <CreateTextLineIndex, TextLine>() .ConvertText <PreCreateStructures, TextLine2>() .ConvertText <CreateStructures2, TextStructure>() .ConvertText <PreCreateTextSegments, TextStructureAgg>() .ConvertText <AggregateStructures, TextStructure>() .ConvertText <CreateTextSegments, TextSegment>() .ConvertText <CreateTreeSegments, TextSegment>() .Log <AnalyzeSegmentTitles>($"{tmpfolder}/{basename}/segment-titles-tree.txt") .ConvertText <TransformConteudo, Conteudo>() .ToList(); var createArticle = new TransformArtigo(); var artigos = createArticle.Create(conteudo); createArticle.CreateXML(artigos, $"{outputfolder}/{basename}", basename); }
public void Process() { var pipelineText = _context.GetPipelineText <TextSegment>(); var conteudo = pipelineText .ConvertText <TransformConteudo2, Conteudo>() .ToList(); var createArticle = new TransformArtigo(); var artigos = createArticle.Create(conteudo); createArticle.CreateXML(artigos, $"{_context.OutputFolder}/{_context.Basename}/artigos", _context.Basename); _context.AddOutput("artigosGN", $"{_context.OutputFolder}/{_context.Basename}/artigos/{_context.Basename}-artigo{{0}}.xml"); }
public static void Process(string basename, string inputfolder, string outputfolder) { PdfReaderException.ContinueOnException(); var conteudos = GetTextLines(basename, inputfolder, outputfolder, out Execution.Pipeline pipeline) .ConvertText <CreateTextLineIndex, TextLine>() .Log <AnalyzeLines>($"{outputfolder}/{basename}-lines.txt") .Log <AnalyzeLinesCenterRight>($"{outputfolder}/{basename}-;lines-center-right.txt") .ConvertText <CreateStructures, TextStructure>() .Log <AnalyzeStructuresCentral>($"{outputfolder}/{basename}-central.txt") //.PrintAnalytics($"bin/{basename}-print-analytics.txt") .ConvertText <CreateTextSegments, TextSegment>() .Log <AnalyzeSegmentTitles>($"{outputfolder}/{basename}-tree.txt") .Log <AnalyzeSegmentStats>($"{outputfolder}/{basename}-segments-stats.txt") .Log <AnalyzeSegments2>($"{outputfolder}/{basename}-segments.csv") .ConvertText <CreateTreeSegments, TextSegment>() .Log <AnalyzeTreeStructure>($"{outputfolder}/{basename}-tree-hier.txt") .ConvertText <TransformConteudo, Conteudo>() .ToList(); //Create XML var createArticle = new TransformArtigo(); var artigos = createArticle.Create(conteudos); createArticle.CreateXML(artigos, outputfolder, basename); var validation = pipeline.Statistics.Calculate <ValidateFooter, StatsPageFooter>(); var layout = (ValidateLayout)pipeline.Statistics.Calculate <ValidateLayout, StatsPageLayout>(); var overlap = (ValidateOverlap)pipeline.Statistics.Calculate <ValidateOverlap, StatsBlocksOverlapped>(); var pagesLayout = layout.GetPageErrors().ToList(); var pagesOverlap = overlap.GetPageErrors().ToList(); var pages = pagesLayout.Concat(pagesOverlap).Distinct().OrderBy(t => t).ToList(); if (pages.Count > 0) { ExtractPages($"{outputfolder}/{basename}-output", $"{outputfolder}/{basename}-parser-errors", pages); } pipeline.Done(); }
public static void ProcessStats(string basename) { //PdfWriteText.Test(); //return; Console.WriteLine(); Console.WriteLine("Program4 - Processing with hierachy"); Console.WriteLine(); // Extract(1); Examples.FollowText(basename); Examples.ShowHeaderFooter(basename); ExamplesWork.PrintAllSteps(basename, "bin", "bin"); PdfReaderException.ContinueOnException(); var conteudos = GetTextLinesWithPipelineBlockset(basename, out Execution.Pipeline pipeline) .Log <AnalyzeLines>($"bin/{basename}-lines.txt") .ConvertText <CreateStructures, TextStructure>() .Log <AnalyzeStructuresCentral>($"bin/{basename}-central.txt") //.PrintAnalytics($"bin/{basename}-print-analytics.txt") .ConvertText <CreateTextSegments, TextSegment>() .Log <AnalyzeSegmentTitles>($"bin/{basename}-tree.txt") .Log <AnalyzeSegmentStats>($"bin/{basename}-segments-stats.txt") .Log <AnalyzeSegments2>($"bin/{basename}-segments.csv") .ConvertText <CreateTreeSegments, TextSegment>() .ConvertText <TransformConteudo, Conteudo>() .ToList(); var validation = pipeline.Statistics.Calculate <ValidateFooter, StatsPageFooter>(); //Create XML var createArticle = new TransformArtigo(); var artigos = createArticle.Create(conteudos); createArticle.CreateXML(artigos, basename); var validator = new ProgramValidatorXML(); validator.ValidateArticle("bin"); }
public static void Process(string basename, string inputfolder, string outputfolder) { logDir = Directory.CreateDirectory($"{outputfolder}/Log").FullName; xmlDir = Directory.CreateDirectory($"{outputfolder}/XMLs").FullName; errors = Directory.CreateDirectory($"{outputfolder}/PDF-Errors").FullName; PdfReaderException.ContinueOnException(); //ExamplesWork.PrintAllSteps(basename, inputfolder, outputfolder); var conteudos = GetTextLines(basename, inputfolder, outputfolder, out Execution.Pipeline pipeline) .Log <AnalyzeLines>($"{logDir}/{basename}-lines.txt") .ConvertText <CreateTextLineIndex, TextLine>() .ConvertText <CreateStructures, TextStructure>() .Log <AnalyzeStructuresCentral>($"{logDir}/{basename}-central.txt") .ShowPdf <ShowStructureCentral>($"{logDir}/{basename}-show-central.pdf") .PrintAnalytics($"{logDir}/{basename}-print-analytics.txt") .ConvertText <CreateTextSegments, TextSegment>() .Log <AnalyzeSegmentTitles>($"{logDir}/{basename}-tree.txt") .Log <AnalyzeSegmentStats>($"{logDir}/{basename}-segments-stats.txt") .Log <AnalyzeSegments2>($"{logDir}/{basename}-segments.csv") .ConvertText <CreateTreeSegments, TextSegment>() .Log <AnalyzeTreeStructure>($"{logDir}/{basename}-tree-data.txt") .ConvertText <TransformConteudo, Conteudo>() .ConvertText <AggregateAnexo, Conteudo>() .ConvertText <AggregateSingularBody, Conteudo>() .ToList(); //Create XML var createArticle = new TransformArtigo(); var artigos = createArticle.Create(conteudos); createArticle.CreateXML(artigos, xmlDir, basename); pipeline.ExtractOutput <ShowParserWarnings>($"{errors}/{basename}-parser-errors.pdf"); pipeline.Done(); var validator = new ProgramValidatorXML(); validator.ValidateArticle(outputfolder); }