static void Main() { string folder = @"c:\temp\slips"; List <string> results = new List <string>(); List <SlipData> allSlips = new List <SlipData>(); foreach (var filename in Directory.GetFiles(folder, "*.pdf")) { try { List <string> content = PDFWrapper.ReadFile(filename); if (content.Count > 40) { SlipData slipData = new SlipData(content); if (slipData.IssuesPresent()) { Console.WriteLine($"{slipData.PayDate} : {slipData.Issues[0]}"); } allSlips.Add(slipData); results.Add(JsonConvert.SerializeObject(slipData)); } } catch (Exception) { // ---- ignore throw; } } foreach (var xx in allSlips) { if (xx.Year() == 2018) { Console.WriteLine($"{xx.PayDate} {xx.TaxPeriodNumber} {xx.NetPay}"); } } // that old pauser! _ = Console.ReadLine(); }
public static List <string> ReadFile(string filename) { List <PDFWrapper> fragments = new List <PDFWrapper>(); try { //get text fragments out of file into <fragments> using (var pdfFile = PdfReader.Open(filename, PdfDocumentOpenMode.ReadOnly)) { foreach (var page in pdfFile.Pages) { PDFWrapper fragment = new PDFWrapper(); var zz = ContentReader.ReadContent(page); foreach (var element in zz) { if (element is COperator op) { if (op.Name == "Td") { fragment = new PDFWrapper(); fragment.AddPos(op); } else if (op.Name == "Tj") { fragment.AddText(op); fragments.Add(fragment); } } } } } } catch (Exception e) { throw e; } //write the fragments into the retval var retval = new List <string>(); double prevY = 0.0; string str = ""; foreach (var f in from frag in fragments orderby frag.Y descending, frag.X ascending select frag) { if (prevY != f.Y && str != "") { retval.Add(str); str = ""; } str += f.ToString(); prevY = f.Y; } if (str != "") { retval.Add(str); } return(retval); }