public static string GenerateCalendar(string rawInput) { ValidateInput(rawInput); var daysList = PdfParser.GetDaysList(rawInput); var events = daysList.SelectMany(day => day.GetLessonTexts()).Select(text => text.ToCalendarEvent()); var calendar = new StringBuilder(); calendar.AppendLine("BEGIN:VCALENDAR"); calendar.AppendLine("VERSION:2.0"); calendar.AppendLine("PRODID:Schedule_generated_with_itext7"); calendar.AppendLine("CALSCALE:GREGORIAN"); events.ToList().ForEach(ev => { calendar.AppendLine("BEGIN:VEVENT"); calendar.AppendLine("DTSTAMP:" + FormatDateTime(ev.TimeStamp)); calendar.AppendLine("DTSTART:" + FormatDateTime(ev.Start)); calendar.AppendLine("DTEND:" + FormatDateTime(ev.End)); calendar.AppendLine("SUMMARY:" + ev.Summary); calendar.AppendLine("DESCRIPTION:" + ev.Description); calendar.AppendLine("LOCATION:" + ev.Location); calendar.AppendLine("UID:" + ev.Uid); calendar.AppendLine("END:VEVENT"); }); calendar.AppendLine("END:VCALENDAR"); return(calendar.ToString()); }
internal static string GetFileContent(IFileInfo file) { try { string extension = Path.GetExtension(file.FileName); if (extension == ".pdf") { var fileContent = FileManager.Instance.GetFileContent(file); if (fileContent != null) { return(PdfParser.ReadPdfFile(fileContent)); } } else if (extension == ".txt") { var fileContent = FileManager.Instance.GetFileContent(file); if (fileContent != null) { using (var reader = new StreamReader(fileContent, Encoding.UTF8)) { return(reader.ReadToEnd()); } } } } catch (Exception ex) { string filename = file == null ? "unknown filename. IFileInfo is null." : file.FileName; Log.Logger.WarnFormat("Ignoring file [{0}]. Failed reading content. Error: {1}", filename, ex.Message); } return(""); }
public void HandleOpenPdf(string filePath) { parser = new PdfParser(filePath); string contentPDF = parser.GetFileContent(); tbFileContent.Text = contentPDF; }
private static string GetToolCode(string file) { IPdfParser parser = new PdfParser(); var rect = new iTextSharp.text.Rectangle(296, 722, 323, 728); return(parser.GetStringValueFromRegion(file, rect)); }
public void GetDaysListTest() { var expected = _exampleDaysList; var result = PdfParser.GetDaysList(ExampleRawInput); Assert.AreEqual(expected, result); }
public void ExtractDateFromDayStringItemSuccessTest() { var dayStringItem = "Data Zajęć: 2019-10-04 piątek 17:30 19:00 2h00m dr Name Surname Advanced Math Wyk W/1/Web F Toronto Egzamin 19:15 20:45 2h00m doc. dr John Smiths Modern History of Poland Wyk W/1/Web F Praga Egzamin"; var result = PdfParser.ExtractDateFromDayStringItem(dayStringItem); Assert.AreEqual("Data Zajęć: 2019-10-04 piątek", result); }
public void GetText() { var pdfParser = new PdfParser(); var text = pdfParser.GetText("PdfFiles/1.pdf"); Assert.AreEqual(text, "atish kumar dipongkor"); }
public void RawTextToWordsTest() { var expectedOutput = new[] { "Some", "example", "input", "text" }; var input = expectedOutput[0] + " " + expectedOutput[1] + "\n" + expectedOutput[2] + " " + expectedOutput[3]; var result = PdfParser.RawTextToWords(input); Assert.AreEqual(expectedOutput, result); }
public void ExtractDateFromStringItemNotSuccessfulTest2() { var dayStringItem = " Data Zajęć: 2019-10-04 piątek 17:30 19:00 2h00m dr Name Surname Advanced Math Wyk W/1/Web F Toronto Egzamin 19:15 20:45 2h00m doc. dr John Smiths Modern History of Poland Wyk W/1/Web F Praga Egzamin"; var exception = Assert.Throws <ParsingException>(() => { PdfParser.ExtractDateFromDayStringItem(dayStringItem); }); Assert.That(exception.Message, Is.EqualTo(ParsingException.IndexOfMatchedItemNotZero)); }
public void Parse_age_distribution_correctly(string file) { var pdfText = LoadResource(file); var expectedDictionary = GetExpectedAgeDistributionForFile(file); var distribution = PdfParser.ParseDistributionByAge(pdfText); expectedDictionary.Keys.ShouldBe(distribution.Keys); foreach (var key in expectedDictionary.Keys) { distribution[key].ShouldBe(expectedDictionary[key], $"{key} does not match"); } }
public List <Dictionary <string, object> > GetAllForminfo(string formid) { var pdffile = Path.Combine(pdfroot, formid + ".pdf"); if (System.IO.File.Exists(pdffile)) { var pdfDoc = new PdfParser(pdffile); return(pdfDoc.GetFieldsInfo()); } else { return(null); } }
public int GetAllForms(string formid) { var pdffile = Path.Combine(pdfroot, formid + ".pdf"); if (System.IO.File.Exists(pdffile)) { var pdfDoc = new PdfParser(pdffile); return(pdfDoc.GetPageNumber()); } else { return(-1); } }
public void DayStringsToDayItemsTest() { var dayStringItems = new List <string> { "Data Zajęć: 2019-10-04 piątek 17:30 19:00 2h00m dr Name Surname Advanced Math Wyk W/1/Web F Toronto Egzamin 19:15 20:45 2h00m doc. dr John Smiths Modern History of Poland Wyk W/1/Web F Praga Egzamin", "Data Zajęć: 2019-10-05 sobota 8:00 9:30 2h00m mgr Marry Smiths-Blue Chemistry Cw 12 Blue 2/IEN F Sztokholm Zaliczenie ocena 11:20 14:30 4h00m doc. dr John Black Physics Wyk W/2/W F Toronto Egzamin", "Data Zajęć: 2019-10-06 niedziela 8:00 9:30 2h00m mgr Jack Green Wzorce projektowe Lab lab/WebN F Montreal Zaliczenie ocena 9:40 11:10 2h00m mgr inż. Thomas Orange Biology and Geography Lab lab15/2/WebN F Los Angeles Zaliczenie ocena 14:40 16:10 2h00m dr George White Geology Lab lab51/22/WebN F San Francisco Zaliczenie ocena 16:20 17:50 2h00m mgr Richard White Basic Mathematics Lab lab22/22/WebN F Chicago Zaliczenie ocena 18:00 19:30 2h00m mgr David Smith Civil Engineering Lab lab3/3/WebN F Nowy Jork Zaliczenie ocena 19:40 21:10 2h00m mgr Jacob Brown Advanced Mathematics Konw konw/2/WebN F Toronto Zaliczenie ocena", "Data Zajęć: 2019-10-18 piątek 17:30 19:00 2h00m dr Jacob Brown Advanced Physics Wyk W/2/WebN F Toronto Egzamin 19:15 20:45 2h00m dr inż. Thomas Blue Geology Konw konw/2/WebN F Praga Zaliczenie ocena" }; var expected = _exampleDaysList; var result = PdfParser.DayStringsToDayItems(dayStringItems); Assert.True(expected.SequenceEqual(result)); }
public ActionResult <Stream> GetFile(string formid, int pageid) { //var configfile = Path.Combine(pdfroot,string.Format("{0}.{1}.json",formid,pageid)); var db = new DbHelper(); var jsonObj = db.LoadFormData(formid, pageid); if (jsonObj.Count > 0) { var pdffile = Path.Combine(pdfroot, formid + ".pdf"); var pdfDoc = new PdfParser(pdffile); if (pageid > pdfDoc.GetPageNumber() || pageid < 1) { return(null); } var newpdf = Path.Combine(pdfroot, string.Format("{0}.{1}.pdf", formid, pageid)); var downpdfname = string.Format("myform.{0}.{1}.pdf", formid, pageid); if (pdfDoc.PageToNewPdf(pageid, newpdf)) { var pdfDocForm = new PdfParser(newpdf); if (pdfDocForm.GetFieldsInfo().Count < 1) { return(null); } // edit post content pdfDocForm.SetFieldsValues(jsonObj, Path.Combine(pdfroot, downpdfname)); // edit finished } string localFilePath = Path.Combine(pdfroot, downpdfname); string fileName = Path.GetFileName(localFilePath); //long fileSize = (new FileInfo(localFilePath)).Length; var dataBytes = System.IO.File.ReadAllBytes(localFilePath); var dataStream = new MemoryStream(dataBytes); return(dataStream); // HttpResponseMessage response = new HttpResponseMessage(HttpStatusCode.OK); // response.Content = new StreamContent(dataStream); // response.Content.Headers.ContentDisposition = new System.Net.Http.Headers.ContentDispositionHeaderValue("attachment"); // response.Content.Headers.ContentDisposition.FileName = fileName; // response.Content.Headers.ContentType = new MediaTypeHeaderValue("application/octet-stream"); //return response; } else { return(null); } }
public void WordsToStringDayItemsTest() { var expectedOutput = new List <string> { "Data Zajęć: 2019-10-04 piątek 17:30 19:00 2h00m dr Name Surname Advanced Math Wyk W/1/Web F Toronto Egzamin 19:15 20:45 2h00m doc. dr John Smiths Modern History of Poland Wyk W/1/Web F Praga Egzamin", "Data Zajęć: 2019-10-05 sobota 8:00 9:30 2h00m mgr Marry Smiths-Blue Chemistry Cw 12 Blue 2/IEN F Sztokholm Zaliczenie ocena 11:20 14:30 4h00m doc. dr John Black Physics Wyk W/2/W F Toronto Egzamin", "Data Zajęć: 2019-10-06 niedziela 8:00 9:30 2h00m mgr Jack Green Wzorce projektowe Lab lab/WebN F Montreal Zaliczenie ocena 9:40 11:10 2h00m mgr inż. Thomas Orange Biology and Geography Lab lab15/2/WebN F Los Angeles Zaliczenie ocena 14:40 16:10 2h00m dr George White Geology Lab lab51/22/WebN F San Francisco Zaliczenie ocena 16:20 17:50 2h00m mgr Richard White Basic Mathematics Lab lab22/22/WebN F Chicago Zaliczenie ocena 18:00 19:30 2h00m mgr David Smith Civil Engineering Lab lab3/3/WebN F Nowy Jork Zaliczenie ocena 19:40 21:10 2h00m mgr Jacob Brown Advanced Mathematics Konw konw/2/WebN F Toronto Zaliczenie ocena", "Data Zajęć: 2019-10-18 piątek 17:30 19:00 2h00m dr Jacob Brown Advanced Physics Wyk W/2/WebN F Toronto Egzamin 19:15 20:45 2h00m dr inż. Thomas Blue Geology Konw konw/2/WebN F Praga Zaliczenie ocena" }; var wordsInput = "Czas od Czas do Liczba godzin Prowadzący Przedmiot Forma zaj. Grupy Sala Forma zaliczenia Uwagi Data Zajęć: 2019-10-04 piątek 17:30 19:00 2h00m dr Name Surname Advanced Math Wyk W/1/Web F Toronto Egzamin 19:15 20:45 2h00m doc. dr John Smiths Modern History of Poland Wyk W/1/Web F Praga Egzamin Data Zajęć: 2019-10-05 sobota 8:00 9:30 2h00m mgr Marry Smiths-Blue Chemistry Cw 12 Blue 2/IEN F Sztokholm Zaliczenie ocena 11:20 14:30 4h00m doc. dr John Black Physics Wyk W/2/W F Toronto Egzamin Data Zajęć: 2019-10-06 niedziela 8:00 9:30 2h00m mgr Jack Green Wzorce projektowe Lab lab/WebN F Montreal Zaliczenie ocena 9:40 11:10 2h00m mgr inż. Thomas Orange Biology and Geography Lab lab15/2/WebN F Los Angeles Zaliczenie ocena 14:40 16:10 2h00m dr George White Geology Lab lab51/22/WebN F San Francisco Zaliczenie ocena 16:20 17:50 2h00m mgr Richard White Basic Mathematics Lab lab22/22/WebN F Chicago Zaliczenie ocena 18:00 19:30 2h00m mgr David Smith Civil Engineering Lab lab3/3/WebN F Nowy Jork Zaliczenie ocena 19:40 21:10 2h00m mgr Jacob Brown Advanced Mathematics Konw konw/2/WebN F Toronto Zaliczenie ocena Data Zajęć: 2019-10-18 piątek 17:30 19:00 2h00m dr Jacob Brown Advanced Physics Wyk W/2/WebN F Toronto Egzamin 19:15 20:45 2h00m dr inż. Thomas Blue Geology Konw konw/2/WebN F Praga Zaliczenie ocena " .Split(" "); var result = PdfParser.WordsToStringDayItems(wordsInput); Assert.AreEqual(expectedOutput, result); }
private async Task <List <SalesOrderListItemViewModel> > GetSalesOrdersAsync() { return(await Task.Factory.StartNew(() => { var salesOrderAttachments = OutlookDataProvider.ExtractSalesOrderPdfs( Settings.Default.NewOrderFolderName, Path.GetTempPath()); string orderExpr, buyerExpr, deliveryExpr, drawingExpr; byte[] logoBytes = null; using (var cpe = new CPEUnitOfWork()) { var customer = cpe.Customers.GetAll().FirstOrDefault(c => c.Name.Contains("E2V")); orderExpr = customer.OrderNumberRegex; buyerExpr = customer.BuyerRegex; deliveryExpr = customer.DeliveryDateRegex; drawingExpr = customer.DrawingNumberRegex; logoBytes = customer.LogoBLOB; } var salesOrders = new List <SalesOrderListItemViewModel>(); foreach (var attachment in salesOrderAttachments) { var detail = PdfParser.ParseSalesOrderAsync(attachment.FileName, attachment.MailId, orderExpr, deliveryExpr, buyerExpr, drawingExpr).Result; var model = new SalesOrderListItemViewModel { Buyer = detail.Buyer, DeliveryDate = detail.DeliveryDate, DrawingNumber = detail.DrawingNumber, OrderNumber = detail.OrderNumber, CompanyLogoBytes = logoBytes, FileName = detail.FileName, MailId = detail.MailId }; salesOrders.Add(model); } return salesOrders; })); }
static void Main(string[] args) { string inputFile = args[0]; string outputFile = args[1]; int pageNumber = Int32.Parse(args[2]); string target = Path.Combine(Path.GetDirectoryName(outputFile), Path.GetFileNameWithoutExtension(outputFile) + " - " + pageNumber.ToString("00000") + ".pdf"); using (var inputStream = new FileStream(inputFile, FileMode.Open, FileAccess.Read, FileShare.ReadWrite)) { var pdfParser = PdfParser.Parse(inputStream); using (Stream s = new FileStream(target, FileMode.Create, FileAccess.Write)) { var pdfMerger = new PdfMerger(s); pdfMerger = new PdfMerger(s); pdfMerger.Add(pdfParser, new int[] { pageNumber }); pdfMerger.Finish(); } } }
static int Main(string[] args) { ArgsParser ArgsParser = new ArgsParser(); if (!ArgsParser.Validate(args)) { return(0); } PdfReader PdfReader = new PdfReader(ArgsParser.GetBookPath()); PdfParser PdfParser = new PdfParser(PdfReader); string result = "No action has been executed"; if (ArgsParser.GetMode() == ArgsParser.Mode.Encode) { try { Encoder Encoder = new Encoder(PdfParser); result = string.Format("Encoded text: {0}", Encoder.Encode(ArgsParser.GetText())); } catch (EncodeException e) { result = e.Message; } catch (Exception e) { result = "Couldn't encode this string. "; } } if (ArgsParser.GetMode() == ArgsParser.Mode.Decode) { try { Decoder Decoder = new Decoder(PdfParser); result = string.Format("Decoded text: {0}", Decoder.Decode(ArgsParser.GetText())); } catch (Exception e) { result = "Couldn't decode this string."; } } Console.WriteLine(result); return(1); }
public async Task <Result <DailyPdfStats> > Handle(ParsePdfCommand request, CancellationToken cancellationToken) { await Task.FromResult(true); try { var result = new DailyPdfStats { FileName = request.File.FileName }; var buffer = new StringBuilder(); using (var stream = request.File.OpenReadStream()) { var doc = new PdfDocument(stream); foreach (PdfPageBase page in doc.Pages) { buffer.Append(page.ExtractText()); } } var pdfContents = buffer.ToString(); result.NumberInfected = PdfParser.ParseNumberInfected(pdfContents); result.NumberDeceased = PdfParser.ParseNumberDeceased(pdfContents); result.NumberCured = PdfParser.ParseNumberCured(pdfContents); result.AverageAge = PdfParser.ParseAverageAge(pdfContents); result.DistributionByAge = PdfParser.ParseDistributionByAge(pdfContents); var parsedOn = PdfParser.TryParsePublishedDate(request.File.FileName); result.ParsedOn = new DateTimeOffset(parsedOn).ToUnixTimeSeconds(); result.ParsedOnString = parsedOn.ToString("yyyy-MM-dd"); return(Result.Ok(result)); } catch (Exception e) { return(Result.Failure <DailyPdfStats>($"{request.File.FileName} -- {e.Message}")); } }
static void Main(string[] args) { string inputFile = args[0]; string outputFile = args[1]; int iterations = Int32.Parse(args[2]); // Merge the generated pdfs using (var outputStream = new FileStream(outputFile, FileMode.Create, FileAccess.Write)) { var merger = new PdfMerger(outputStream); for (int count = 1; count <= iterations; count++) { using (var inputStream = new FileStream(inputFile, FileMode.Open, FileAccess.Read, FileShare.ReadWrite)) { var pdfParser = PdfParser.Parse(inputStream); merger.Add(pdfParser, null); } } merger.Finish(); } }
private static async Task <FoundationResult> ProcessFoundationAsync(string foundation, string shortName, int year, string host = null) { var downloadUri = AppDomain.CurrentDomain.BaseDirectory + @"\Data\" + $"{shortName}_{year}.pdf"; var result = Crawler.FindFinancialStatement(foundation, year, host).ToArray(); var jahresRechnungUrl = result.FirstOrDefault(r => r.AbsoluteUri.Contains("rechnung")) ?? result.FirstOrDefault(); if (jahresRechnungUrl == null) { return(new FoundationResult { Success = false }); } if (!File.Exists(downloadUri)) // Don't download the file if we already have it { await Downloader.DownloadAsync(jahresRechnungUrl.AbsoluteUri, downloadUri); } try { var balanceSheetTotal = PdfParser.FindTotalActiva(downloadUri, year).GetValueOrDefault(); return(new FoundationResult { FinancialStatementUrl = jahresRechnungUrl, BalanceSheetTotal = balanceSheetTotal, Success = true }); } catch (Exception ex) { Console.WriteLine("ProcessFoundation: {0}", ex); return(new FoundationResult { Success = false }); } }
public void CreateTfIdf() { var tfIdf = new DocumentTermFrequency(); var nGram = new Ngram(); var pdfParser = new PdfParser(); var reports = new Dictionary <string, List <string> >(); foreach (var file in Directory.EnumerateFiles("Pdf", "*.pdf")) { var fileName = Path.GetFileNameWithoutExtension(file); var contents = pdfParser.GetText(file); reports[fileName] = nGram.Create(contents, 3); } var result = tfIdf.Create(reports); Assert.AreEqual(result.GetLength(0), 2); Assert.AreEqual(result.GetLength(1), 7); }
public List <Dictionary <string, object> > GetPage(string formid, int pageid, bool forcegenerate = false) { if (!forcegenerate) { var db = new DbHelper(); var ret = db.LoadFormData(formid, pageid); if (ret == null) { ret = db.LoadFormConfig(formid, pageid); } if (ret != null) { return(ret); } } var pdffile = Path.Combine(pdfroot, formid + ".pdf"); var pdfDoc = new PdfParser(pdffile); if (pageid > pdfDoc.GetPageNumber() || pageid < 1) { return(null); } var newpdf = Path.Combine(pdfroot, string.Format("{0}.{1}.pdf", formid, pageid)); if (pdfDoc.PageToNewPdf(pageid, newpdf)) { var result = new PdfParser(newpdf).GetFieldsInfo(); var db = new DbHelper(); db.SavePdfFormData(formid, pageid, result); return(result); } else { return(null); } }
public void ExtractLessonStringsFromDayStringItemsTest() { var dayStringItem = "Data Zajęć: 2019-10-06 niedziela 8:00 9:30 2h00m mgr Jack Green Wzorce projektowe Lab lab/WebN F Montreal Zaliczenie ocena " + "9:40 11:10 2h00m mgr inż. Thomas Orange Biology and Geography Lab lab15/2/WebN F Los Angeles Zaliczenie ocena " + "14:40 16:10 2h00m dr George White Geology Lab lab51/22/WebN F San Francisco Zaliczenie ocena " + "16:20 17:50 2h00m mgr Richard White Basic Mathematics Lab lab22/22/WebN F Chicago Zaliczenie ocena " + "18:00 19:30 2h00m mgr David Smith Civil Engineering Lab lab3/3/WebN F Nowy Jork Zaliczenie ocena " + "19:40 21:10 2h00m mgr Jacob Brown Advanced Mathematics Konw konw/2/WebN F Toronto Zaliczenie ocena"; var expected = new List <string> { "8:00 9:30 2h00m mgr Jack Green Wzorce projektowe Lab lab/WebN F Montreal Zaliczenie ocena ", "9:40 11:10 2h00m mgr inż. Thomas Orange Biology and Geography Lab lab15/2/WebN F Los Angeles Zaliczenie ocena ", "14:40 16:10 2h00m dr George White Geology Lab lab51/22/WebN F San Francisco Zaliczenie ocena ", "16:20 17:50 2h00m mgr Richard White Basic Mathematics Lab lab22/22/WebN F Chicago Zaliczenie ocena ", "18:00 19:30 2h00m mgr David Smith Civil Engineering Lab lab3/3/WebN F Nowy Jork Zaliczenie ocena ", "19:40 21:10 2h00m mgr Jacob Brown Advanced Mathematics Konw konw/2/WebN F Toronto Zaliczenie ocena" }; var result = PdfParser.ExtractLessonStringsFromDayStringItem(dayStringItem); Assert.AreEqual(expected, result); }
private IParser GetParser(FileExtension fileType) { IParser parser = null; switch (fileType) { case FileExtension.Docx: parser = new DocxParser(); break; case FileExtension.Odt: parser = new OdtParser(); break; case FileExtension.Pdf: parser = new PdfParser(); break; default: throw new Exception("Unknown file type"); } return(parser); }
public void TestPdfParser() { var sb = new StringBuilder(); var addrs = new List <int>(); addrs.Add(0); sb.AppendLine("%PDF-1.4"); sb.AppendLine("%ту╧╙"); var page1ContentId = addStream(sb, addrs, $"Hello World"); var page1Id = addObject(sb, addrs, $"<</Type/Page /Parent ~1~ 0 R/Contents {page1ContentId} 0 R>>"); var pagesId = addObject(sb, addrs, $"<</Type/Pages /Kids[{page1Id} 0 R] /Count 1>>"); var catalogId = addObject(sb, addrs, $"<</Type/Catalog /Pages {pagesId} 0 R>>"); var xrefAddress = sb.Length; sb.AppendLine("xref"); sb.AppendLine($"0 {addrs.Count}"); sb.AppendLine("0000000000 65535 f"); foreach (var address in addrs) { if (address == 0) { continue; } sb.AppendLine($"{address:0000000000} 00000 n"); } sb.AppendLine($"trailer<</Size {addrs.Count}/Root {catalogId} 0 R>>"); sb.AppendLine("startxref"); sb.AppendLine(xrefAddress.ToString()); sb.AppendLine("%%EOF"); var byteString = sb.ToString(); var bytes = new byte[byteString.Length]; var bytesIndex = 0; foreach (var ch in byteString) { bytes[bytesIndex++] = (byte)ch; } var pdfParser = new PdfParser(bytes); Assert.AreEqual("1.4", pdfParser.PdfVersion); var trailer = pdfParser.Tokeniser.TrailerEntries; var root = (DictionaryToken)trailer["Root"]; Assert.AreEqual("Catalog", ((NameToken)root["Type"]).Value); var pages = (DictionaryToken)root["Pages"]; Assert.AreEqual("Pages", ((NameToken)pages["Type"]).Value); var kids = (ArrayToken)pages["Kids"]; foreach (var kid in kids) { var page = (DictionaryToken)kid; Assert.AreEqual("Page", ((NameToken)page["Type"]).Value); var pageContent = (DictionaryToken)page["Contents"]; } }
public void Parse_average_age_correctly(string file, string expectedMap) { var pdfText = LoadResource(file); PdfParser.ParseAverageAge(pdfText).ShouldBe(expectedMap); }
private PdfOcrEngine(PdfParser parser, ExtractPdf extractPdf) { _parser = parser; _extractPdf = extractPdf; }
public Decoder(PdfParser PdfParser) { this.PdfParser = PdfParser; }
private void navigate(bool isNext) { pdfRefRunTrace.Clear(); BackStatusBarItem.Visibility = Visibility.Collapsed; //check if user has changed file or directory if (FileTextBox.Text != "" && fileString != FileTextBox.Text) { var fileInfo = new FileInfo(FileTextBox.Text); if (!fileInfo.Exists) { MessageBox.Show($"Could not find file '{FileTextBox.Text}'.", "Pdf file not found"); return; } fileString = FileTextBox.Text; directoryString = fileInfo.DirectoryName; DirectoryTextBox.Text = directoryString; files.Clear(); dirs.Clear(); allFiles.Clear(); currentFileIndex = 0; dirs.Push(new DirectoryInfo(directoryString)); isShowStartFile = true; } else if (DirectoryTextBox.Text != "" && directoryString != DirectoryTextBox.Text) { var directoryInfo = new DirectoryInfo(DirectoryTextBox.Text); if (!directoryInfo.Exists) { MessageBox.Show($"Could not find directory '{DirectoryTextBox.Text}'.", "Directory not found"); return; } directoryString = DirectoryTextBox.Text; files.Clear(); dirs.Clear(); allFiles.Clear(); currentFileIndex = 0; dirs.Push(directoryInfo); } var haveAllFilesBeenFound = false; if (currentFileIndex < allFiles.Count - 1 || (isNext == false) || (files.Count == 0 && dirs.Count == 0)) { //show already read files if (isNext) { currentFileIndex++; if (currentFileIndex >= allFiles.Count) { currentFileIndex = 0; } } else { currentFileIndex--; if (currentFileIndex < 0) { currentFileIndex = allFiles.Count - 1; } } } else { while (files.Count == 0 && !haveAllFilesBeenFound) { if (dirs.Count == 0) { if (allFiles.Count == 0) { MessageBox.Show($"There are no pdf files in '{directoryString}' and its subdirectories.", "No pdf file found"); return; } else { currentFileIndex = 0; haveAllFilesBeenFound = true; } } else { //read next directory var dir = dirs.Pop(); foreach (var subDir in dir.GetDirectories().OrderByDescending(d => d.Name)) { dirs.Push(subDir); } foreach (var subfile in dir.GetFiles("*.pdf")) { if (isShowStartFile) { if (subfile.FullName == fileString) { isShowStartFile = false; files.Enqueue(subfile); } else { allFiles.Add(subfile); } } else { files.Enqueue(subfile); } } } } if (!haveAllFilesBeenFound) { currentFileIndex = allFiles.Count; allFiles.Add(files.Dequeue()); } } var file = allFiles[currentFileIndex]; FileTextBox.Text = file.FullName; fileString = FileTextBox.Text; PagesTabControl.Items.Clear(); PdfParser pdfParser; try { pdfParser = new PdfParser(file.FullName, "|", streamBuffer, stringBuilder); } catch (Exception ex) { var pageTabItem = new TabItem { Header = "E_xception" }; var bytes = ""; if (ex is PdfException pdfException) { bytes = Environment.NewLine + Environment.NewLine + pdfException.Tokeniser.ShowBufferContent(); } var textBox = new TextBox { Text = ex.ToDetailString() + bytes, VerticalScrollBarVisibility = ScrollBarVisibility.Auto, IsReadOnly = true }; pageTabItem.Content = textBox; PagesTabControl.Items.Add(pageTabItem); PagesTabControl.SelectedIndex = 0; return; } try { PdfWebBrowser.Visibility = Visibility.Visible; PdfTextBox.Visibility = Visibility.Collapsed; var fileUri = new Uri(new Uri("file://"), file.FullName); PdfWebBrowser.Navigate(fileUri); //if (PdfWebBrowser.Source?.AbsolutePath!=file.FullName) { // //couldn't find file // PdfWebBrowser.Navigate(new Uri("about:blank")); //} } catch (Exception ex) { PdfWebBrowser.Visibility = Visibility.Collapsed; PdfTextBox.Visibility = Visibility.Visible; PdfTextBox.Text = ex.ToDetailString(); } var pageIndex = 0; foreach (var page in pdfParser.Pages) { //todo: How to deal with pdf documents having more than 20 pages ? if (pageIndex > 20) { break; } var hasException = false; var underline = ""; if (pageIndex < 10) { underline = "_"; } var pageTabItem = new TabItem { Header = underline + pageIndex++ }; stringBuilder.Clear(); var isFirstContent = true; foreach (var content in page.Contents) { if (isFirstContent) { isFirstContent = false; } else { stringBuilder.AppendLine(new string('-', 80)); } stringBuilder.AppendLine(content.Text); if (content.Exception != null) { hasException = true; stringBuilder.AppendLine(new string('+', 80)); stringBuilder.AppendLine(content.Exception); stringBuilder.AppendLine(new string('+', 80)); } if (content.Error != null) { hasException = true; stringBuilder.AppendLine(new string('+', 80)); stringBuilder.AppendLine(content.Error); stringBuilder.AppendLine(new string('+', 80)); } } if (page.Exception != null) { hasException = true; stringBuilder.AppendLine(new string('+', 80)); stringBuilder.AppendLine(page.Exception); stringBuilder.AppendLine(new string('+', 80)); } var textBox = new TextBox { Text = stringBuilder.ToString(), VerticalScrollBarVisibility = ScrollBarVisibility.Auto, HorizontalScrollBarVisibility = ScrollBarVisibility.Auto, IsReadOnly = true }; if (hasException) { pageTabItem.Background = Brushes.Khaki; } pageTabItem.Content = textBox; PagesTabControl.Items.Add(pageTabItem); } var infoTabItem = new TabItem { Header = "_Info" }; var tokeniser = pdfParser.Tokeniser; var infotext = "PDF Version: " + tokeniser.PdfVersion; if (tokeniser.DocumentInfo != null) { infotext += Environment.NewLine + Environment.NewLine + "Document Info: " + tokeniser.DocumentInfo; } if (tokeniser.DocumentID != null) { infotext += Environment.NewLine + Environment.NewLine + "Document ID: " + tokeniser.DocumentID; } infotext += Environment.NewLine + Environment.NewLine + "Pages: " + tokeniser.Pages.Count; infotext += Environment.NewLine + Environment.NewLine + "Fonts: "; foreach (var objectId_Token in tokeniser.Tokens) { if (objectId_Token.Value is DictionaryToken objectDictionaryToken) { if (objectDictionaryToken.Type == "Font") { var pdfFont = (PdfFont)objectDictionaryToken.PdfObject !; infotext += Environment.NewLine + Environment.NewLine + pdfFont.Name + objectDictionaryToken.ToString(); if (pdfFont.ToUnicodeHeader != null) { infotext += Environment.NewLine + "ToUnicodeHeader: " + pdfFont.ToUnicodeHeader; } if (pdfFont.CMap != null) { foreach (var code_char in pdfFont.CMap) { infotext += Environment.NewLine + $"{code_char.Key}: '{code_char.Value}'"; } } if (pdfFont.Exception != null) { infotext += Environment.NewLine + new string('+', 80); infotext += Environment.NewLine + pdfFont.Exception; infotext += Environment.NewLine + new string('+', 80); infoTabItem.Background = Brushes.Khaki; } infotext += Environment.NewLine; } } } if (tokeniser.Metadata != null) { infotext += Environment.NewLine + Environment.NewLine + "Meta data: " + tokeniser.Metadata; } var textBoxInfo = new TextBox { Text = infotext, VerticalScrollBarVisibility = ScrollBarVisibility.Auto, HorizontalScrollBarVisibility = ScrollBarVisibility.Auto, IsReadOnly = true }; infoTabItem.Content = textBoxInfo; PagesTabControl.Items.Add(infoTabItem); var bytesTabItem = new TabItem { Header = "_Bytes" }; //var bytesContextMenu = new ContextMenu(); //bytesContextMenu.Items.Add(new MenuItem { Command = System.Windows.Input.ApplicationCommands.SelectAll}); //bytesContextMenu.Items.Add(new MenuItem { Command = System.Windows.Input.ApplicationCommands.Copy}); //bytesContextMenu.Items.Add(new MenuItem { Command = System.Windows.Input.ApplicationCommands.Cut }); //bytesContextMenu.Items.Add(new MenuItem { Command = System.Windows.Input.ApplicationCommands.Paste }); //bytesContextMenu.Items.Add(new MenuItem { Command = System.Windows.Input.ApplicationCommands.Undo }); //bytesContextMenu.Items.Add(new MenuItem { Command = System.Windows.Input.ApplicationCommands.Redo }); ///* //(int)Shortcut.CtrlS, Show Stream //(int)Shortcut.CtrlA, Select all //(int)Shortcut.CtrlC, Copy //(int)Shortcut.CtrlX, Cut //(int)Shortcut.CtrlV, Paste //Shortcut.CtrlZ, Undo //(int)Shortcut.CtrlY, Redo //*/ //bytesTextBox = new TextBox { // Text = pdfParser.Tokeniser.ShowBufferContent(), // VerticalScrollBarVisibility = ScrollBarVisibility.Auto, // HorizontalScrollBarVisibility = ScrollBarVisibility.Auto, // ContextMenu = bytesContextMenu, // IsReadOnly = true // }; var pdfSourceRichTextBox = new PdfSourceRichTextBox(pdfParser.Tokeniser, stringBuilder, this); bytesTabItem.Content = pdfSourceRichTextBox; PagesTabControl.Items.Add(bytesTabItem); PagesTabControl.SelectedIndex = 0; }