public PipelineInputPdf(string filename, PipelineFactory factory, PipelineInputCache <IProcessBlockData> cache = null) { if (factory == null) { throw new ArgumentNullException(nameof(factory)); } var pdfDocument = new PdfDocument(VirtualFS.OpenPdfReader(filename)); InitDocument(pdfDocument, factory); this._input = filename; this._pdfDocument = pdfDocument; this._documentFactory = factory; if (cache != null) { cache.SetSize(_pdfDocument.GetNumberOfPages()); this._cache = cache; } PipelineInputPdf.DebugCurrent = this; PdfReaderException.ClearContext(); }
public static void RunParserPDF(IVirtualFS virtualFS, string basename, string inputfolder, string outputfolder) { VirtualFS.ConfigureFileSystem(virtualFS); PdfReaderException.ContinueOnException(); Pipeline pipeline = new Pipeline(); var artigos = GetTextLines(pipeline, basename, inputfolder, outputfolder) .Log <AnalyzeLines>($"{outputfolder}/{basename}/lines.txt") .ConvertText <CreateTextLineIndex, TextLine>() .ConvertText <PreCreateStructures, TextLine2>() .ConvertText <CreateStructures2, TextStructure>() .ConvertText <PreCreateTextSegments, TextStructureAgg>() .ConvertText <AggregateStructures, TextStructure>() .ShowPdf <ShowStructureCentral>($"{outputfolder}/{basename}/show-central.pdf") .Log <AnalyzeStructures>($"{outputfolder}/{basename}/struct.txt") .Log <AnalyzeStructuresCentral>($"{outputfolder}/{basename}/central.txt") .ConvertText <CreateTextSegments, TextSegment>() .ConvertText <CreateTreeSegments, TextSegment>() .Log <AnalyzeSegmentTitles>($"{outputfolder}/{basename}/segment-titles-tree.txt") .Log <AnalyzeTreeStructure>(Console.Out) .ToList(); pipeline.ExtractOutput <ShowParserWarnings>($"{outputfolder}/{basename}/parser-errors.pdf"); }
public static void RunCreateArtigosJson(IVirtualFS virtualFS, string basename, string inputfolder, string tmpfolder, string outputfolder) { VirtualFS.ConfigureFileSystem(virtualFS); PdfReaderException.ContinueOnException(); Pipeline pipeline = new Pipeline(); var conteudo = GetTextLines(pipeline, basename, inputfolder, tmpfolder) // use temp folder .ConvertText <CreateTextLineIndex, TextLine>() .ConvertText <PreCreateStructures, TextLine2>() .ConvertText <CreateStructures2, TextStructure>() .ConvertText <PreCreateTextSegments, TextStructureAgg>() .ConvertText <AggregateStructures, TextStructure>() .ConvertText <CreateTextSegments, TextSegment>() .ConvertText <CreateTreeSegments, TextSegment>() .Log <AnalyzeSegmentTitles>($"{tmpfolder}/{basename}/segment-titles-tree.txt") .ConvertText <TransformConteudo, Conteudo>() .ToList(); var createArticle = new TransformArtigo(); var artigos = createArticle.Create(conteudo); createArticle.CreateJson(artigos, $"{outputfolder}/{basename}", basename); }
public PipelineText <TT> Log <TL>(string filename) where TL : ILogStructure <TT>, new() { var file = _factory.CreateInstance <TextWriter>(() => VirtualFS.OpenStreamWriter(filename)); return(Log <TL>(file)); }
public void ExtractPages(string outfile, IList <int> pageNumbers) { using (var pdfInput = new PdfDocument(VirtualFS.OpenPdfReader(_input))) using (var pdfOutput = new PdfDocument(VirtualFS.OpenPdfWriter(outfile))) { pdfInput.CopyPagesTo(pageNumbers, pdfOutput); } }
public void CreateDirectoryIfNotExists(string absolutepath) { if (!DirectoryExists(absolutepath)) { VirtualFS.AddDirectory(Path.GetDirectoryName(absolutepath), MakeNewDirectoryMetadata(Path.GetFileName(absolutepath))); } }
public void WriteFile(string name, string filepath, string content) { using (var file = VirtualFS.OpenWrite(filepath)) using (var writer = new StreamWriter(file)) { writer.Write(content); } AddOutput(name, filepath); }
public PipelineText <TT> Log <TL>(string filename) where TL : class, ILogStructure <TT> { var file = VirtualFS.OpenStreamWriter(filename); _tracker.TrackInstance(file); return(Log <TL>(file)); }
public void Extract(string outfile, int start, int end) { IList <int> pageNumbers = Enumerable.Range(start, end - start + 1).ToList(); using (var pdfInput = new PdfDocument(VirtualFS.OpenPdfReader(_input))) using (var pdfOutput = new PdfDocument(VirtualFS.OpenPdfWriter(outfile))) { pdfInput.CopyPagesTo(pageNumbers, pdfOutput); } }
private static string ExtracPage(HighlightObject result) { var pdfInput = new PdfDocument(VirtualFS.OpenPdfReader(result.Metadata.PDFURI)); PdfPage origPage = pdfInput.GetPage(result.PageNumber); using (var pdfOutput = new PdfDocument(VirtualFS.OpenPdfWriter(tempPath))) { pdfInput.CopyPagesTo(1, 1, pdfOutput); } return(tempPath); }
public PostRun( ILogStream logStream, VirtualFS virtualFS, QuartusProjectTools quartusTools, RuntimeConfiguration runtimeConfiguration) { _logStream = logStream; _virtualFS = virtualFS; _quartusTools = quartusTools; _runtimeConfiguration = runtimeConfiguration; }
public PipelineInputPdf(string filename) { var pdfDocument = new PdfDocument(VirtualFS.OpenPdfReader(filename)); this._input = filename; this._pdfDocument = pdfDocument; PipelineInputPdf.DebugCurrent = this; PdfReaderException.ClearContext(); }
void CalculatePrecision(int docs, int error) { GlobalStats.errors += error; GlobalStats.total += docs; float result = (1.0f - ((float)error / (float)docs)) * 100; string text = $"Article precision: {result.ToString("00.00")}% \nArticles processed: {docs} \nArticles With Error: {error}"; VirtualFS.FileWriteAllText($"{logDir}/ArticlePrecision.txt", text); GlobalStats.text += $" \n\n{currentName} - {text}"; }
public static float CreateFinalStats(string filename) { int error = GlobalStats.errors; int docs = GlobalStats.total; float result = (1.0f - ((float)error / (float)docs)) * 100; string text = $"Article precision: {result.ToString("00.00")}% \nArticles processed: {docs} \nArticles With Error: {error} \n\n{GlobalStats.text}"; VirtualFS.FileWriteAllText(filename, text); return(result); }
public static Dictionary <string, string> RunParserPDF(IVirtualFS virtualFS, string basename, string inputfolder, string outputfolder) { VirtualFS.ConfigureFileSystem(virtualFS); PdfReaderException.ContinueOnException(); using (var context = new ParserStages.StageContext(basename, inputfolder, outputfolder)) { //var dbg0 = new ParserStages.StageDbgFlow(context); //dbg0.Process(); var extract = new ParserStages.StageExtractHeaderDOU(context); extract.Process(); var stage0 = new ParserStages.StagePdfInput(context); stage0.Process(); var stage1 = new ParserStages.StagePageMargins(context); stage1.Process(); var stage2 = new ParserStages.StageBlocksets(context); stage2.Process(); var stage3 = new ParserStages.StageRetrieveBlocks(context); stage3.Process(); var stageText1 = new ParserStages.StageConvertText(context); stageText1.Process(); var stageText2 = new ParserStages.StageConvertStructure(context); stageText2.Process(); //stageText2.ProcessWithConfiguration($"{outputfolder}/{basename}/{basename}-tree.config"); var stageText3 = new ParserStages.StageConvertStructText(context); stageText3.Process(); var stageTextTree = new ParserStages.StageConvertTree(context); stageTextTree.Process(); var stageContent = new ParserStages.StageConvertContent(context); stageContent.Process(); var stageArtigos = new ParserStages.StageConvertArtigoGN(context); stageArtigos.Process(); string logStage3 = context.GetOutput("stage3"); string logTree = context.GetOutput("tree"); return(context.FileListOutput); } }
public PipelineInputPdf Output(string outfile) { if (_pdfOutput != null) { ((IDisposable)_pdfOutput).Dispose(); } var pdfOutput = new PdfDocument(VirtualFS.OpenPdfReader(_input), VirtualFS.OpenPdfWriter(outfile)); this._output = outfile; this._pdfOutput = pdfOutput; return(this); }
public static void FollowText(IVirtualFS virtualFS, string basename) { VirtualFS.ConfigureFileSystem(virtualFS); var pipeline = new Execution.Pipeline(); pipeline.Input($"{basename}.pdf") .Output($"{basename}-follow-text-output.pdf") .AllPages(page => page .ParsePdf <ProcessPdfText>() .ShowLine(Color.Orange) ); pipeline.Done(); }
public void ValidateArticle(string folder) { currentName = VirtualFS.GetDirectoryName(folder); logDir = VirtualFS.GetDirectoryCreateDirectory($"{folder}/Log"); XMLErrorsDir = VirtualFS.GetDirectoryCreateDirectory($"{folder}/XML-Errors"); folder = folder + "/XMLs"; foreach (var file in VirtualFS.DirectoryInfoEnumerateFiles(folder, "*.xml")) { DocumentsCount++; Validate(file); } CalculatePrecision(DocumentsCount, DocumentsCountWithError); }
public static string ExtractHeader(string basename) { var virtualFS = new VirtualFS(); VirtualFS.ConfigureFileSystem(virtualFS); PdfReaderException.ContinueOnException(); using (var context = new ParserStages.StageContext(basename, "input", "output")) { var extract = new ParserStages.StageExtractHeaderDOU(context); extract.Process(); return(context.FileListOutput.ToString()); } }
IEnumerable <TT> PipelineTextLogFile <TL>(string pattern, IEnumerable <TT> stream) where TL : class, ILogMultipleStructure <TT> { TL logger = _factory.CreateGlobalInstance <TL>(); foreach (var data in stream) { string id = logger.CreateId(data); string filename = String.Format(pattern, id); using (var file = VirtualFS.OpenWrite(filename)) logger.Log(id, file, data); yield return(data); } }
private void GetIDAndActivate() { try { _ID = Request.QueryString["ID"]; if (!string.IsNullOrEmpty(_ID)) { tblActivationRequest actReq = (from req in GoProGoDC.ProfileDC.tblActivationRequests where req.ActivationID.ToString() == _ID && !req.IsFulfilled select req).SingleOrDefault <tblActivationRequest>(); if (actReq != null && actReq.IsFulfilled == false) { MembershipUser user = Membership.GetUser(actReq.tblProfile.UserID); if (!user.IsApproved) { //Approved user account first user.IsApproved = true; Membership.UpdateUser(user); //Following line will submit changes in DB for creating file tblFileInformation fileInfo = VirtualFS.CreateFolder(user.UserName, actReq.tblProfile.ID, null); actReq.tblProfile.RootFolderID = fileInfo.ID; actReq.IsFulfilled = true; //Following line will submit changes to update Profile GoProGoDC.ProfileDC.SubmitChanges(ConflictMode.FailOnFirstConflict); pnlSuccess.Visible = true; } } else { pnlFailure.Visible = true; } } else { throw new Exception("Invalid request."); } } catch (Exception ex) { ((PublicMaster)this.Master).ShowMessage(ex.Message, MessageType.Error); } }
public int SaveErrors(string outputfile) { string inputfile = this._input; var errorPages = _pdfLog.GetErrors().OrderBy(t => t).ToList(); if (errorPages.Count == 0) { return(0); } using (var pdfInput = new PdfDocument(VirtualFS.OpenPdfReader(_input))) using (var pdfOutput = new PdfDocument(VirtualFS.OpenPdfWriter(outputfile))) { pdfInput.CopyPagesTo(errorPages, pdfOutput); } return(errorPages.Count); }
public int ExtractOutputPages(string outputfile, IEnumerable <int> pages) { string inputfile = this._input; var pageList = pages.OrderBy(t => t).ToList(); if (pageList.Count == 0) { return(0); } using (var pdfInput = new PdfDocument(VirtualFS.OpenPdfReader(_output))) using (var pdfOutput = new PdfDocument(VirtualFS.OpenPdfWriter(outputfile))) { pdfInput.CopyPagesTo(pageList, pdfOutput); } return(pageList.Count); }
static void TestVirtualFileSystem() { try { Random r = new Random(); VolatileDisk disk = new VolatileDisk(1); //PersistentDisk disk = new PersistentDisk(1, "disk1"); disk.TurnOn(); VirtualFS vfs = new VirtualFS(); vfs.Format(disk); vfs.Mount(disk, "/"); VirtualNode root = vfs.RootNode; VirtualNode dir1 = root.CreateDirectoryNode("dir1"); dir1.CreateDirectoryNode("foo"); dir1.CreateDirectoryNode("bar"); VirtualNode dir2 = root.CreateDirectoryNode("dir2"); dir2.CreateDirectoryNode("hah!"); VirtualNode file1 = dir1.CreateFileNode("file1"); TestFileWriteRead(file1, r, 0, 100); // 1 sector TestFileWriteRead(file1, r, 0, 500); // 2 sectors TestFileWriteRead(file1, r, 250, 500); // 3 sectors dir2.CreateFileNode("another"); vfs.Unmount("/"); vfs.Mount(disk, "/"); RecursivelyPrintNodes(vfs.RootNode); disk.TurnOff(); } catch (Exception ex) { Console.WriteLine("VFS test failed: " + ex.Message); Console.WriteLine(ex.StackTrace); } }
public void Write(Artigo artigo, string doc) { // TODO: fix it // Rollback to previous name //string finalURL = ProcessName(artigos.FirstOrDefault(), doc); string finalURL = doc; JsonSerializerSettings settings = new JsonSerializerSettings() { Formatting = Formatting.Indented }; using (Stream virtualStream = VirtualFS.OpenWrite($"{finalURL}.json")) { string content = JsonConvert.SerializeObject(artigo, settings); using (var writer = new StreamWriter(virtualStream)) { writer.Write(content); } } }
public void SaveOk(string outputfile) { string inputfile = this._input; var errorPages = _pdfLog.GetErrors().OrderBy(t => t).ToList(); using (var pdfInput = new PdfDocument(VirtualFS.OpenPdfReader(_input))) { int total = pdfInput.GetNumberOfPages(); var positivePages = Enumerable.Range(1, total).Except(errorPages).ToList(); if (positivePages.Count == 0) { return; } using (var pdfOutput = new PdfDocument(VirtualFS.OpenPdfWriter(outputfile))) { pdfInput.CopyPagesTo(positivePages, pdfOutput); } } }
public PipelineDebugContext(string filename, string outputname) { this._pdf = new PdfDocument(VirtualFS.OpenPdfReader(filename), VirtualFS.OpenPdfWriter(outputname)); }
public bool DirectoryExists(string absolutepath) => VirtualFS.GetDirectory(absolutepath) != null;
static void TestVirtualFileSystem() { try { Random r = new Random(); VolatileDisk disk = new VolatileDisk(1); //PersistentDisk disk = new PersistentDisk(1, "disk1"); disk.TurnOn(); VirtualFS vfs = new VirtualFS(); vfs.Format(disk); vfs.Mount(disk, "/"); VirtualNode root = vfs.RootNode; VirtualNode dir1 = root.CreateDirectoryNode("dir1"); VirtualNode dir2 = root.CreateDirectoryNode("dir2"); dir2.CreateDirectoryNode("dir3"); dir1.CreateDirectoryNode("dir4"); dir2.CreateDirectoryNode("dir5"); VirtualNode file1 = dir1.CreateFileNode("file1"); VirtualNode file2 = dir1.CreateFileNode("file2"); VirtualNode file3 = dir2.CreateFileNode("file3"); VirtualNode file4 = dir2.CreateFileNode("file4"); TestFileWriteRead(file1, r, 0, 100); // 1 sector TestFileWriteRead(file1, r, 42, 77); TestFileWriteRead(file1, r, 0, 500); // 2 sectors TestFileWriteRead(file1, r, 250, 500); // 3 sectors TestFileWriteRead(file1, r, 275, 700); // 4 sectors RecursivelyPrintNodes(vfs.RootNode); Console.WriteLine("Rename!"); dir1 = vfs.RootNode.GetChild("dir1"); dir1.Rename("newdir1"); RecursivelyPrintNodes(vfs.RootNode); // Move something Console.WriteLine("Move!"); dir2 = vfs.RootNode.GetChild("dir2"); dir1.Move(dir2); RecursivelyPrintNodes(vfs.RootNode); // Make sure disk is correct Console.WriteLine("Unmount/mount!"); vfs.Unmount("/"); vfs.Mount(disk, "/"); RecursivelyPrintNodes(vfs.RootNode); // Delete Console.WriteLine("Delete file!"); VirtualNode file6 = vfs.RootNode.CreateFileNode("file6"); file6.Write(0, CreateTestBytes(r, 1000)); Console.WriteLine("File before deleting!"); RecursivelyPrintNodes(vfs.RootNode); file6.Delete(); Console.WriteLine("File after deleting!"); RecursivelyPrintNodes(vfs.RootNode); // Delete directory Console.WriteLine("Delete Directory!"); VirtualNode deleteDir2 = vfs.RootNode.GetChild("dir2"); VirtualNode deleteDir1 = deleteDir2.GetChild("newdir1"); //deleteDir2.Delete(); deleteDir1.Delete(); Console.WriteLine("...After deleting Directory!"); RecursivelyPrintNodes(vfs.RootNode); disk.TurnOff(); Console.WriteLine("TestVirtualFileSystem success!"); } catch (Exception ex) { Console.WriteLine("VFS test failed: " + ex.Message); Console.WriteLine(ex.StackTrace); } }
public PostRunExample(ILogStream logStream, VirtualFS virtualFS) { _logStream = logStream; _virtualFS = virtualFS; }