public void RunPdfTron(string input_path) { PDFNet.Initialize(); // string output_path = "../../../../TestFiles/Output/"; try { // Open the test file PDFDoc doc = new PDFDoc(input_path); doc.InitSecurityHandler(); PageIterator itr; ElementReader page_reader = new ElementReader(); for (itr = doc.GetPageIterator(); itr.HasNext(); itr.Next()) // Read every page { int pageno = itr.GetPageNumber(); page_reader.Begin(itr.Current()); ProcessElements(page_reader); page_reader.End(); } page_reader.Dispose(); // Calling Dispose() on ElementReader/Writer/Builder can result in increased performance and lower memory consumption. doc.Close(); } catch (PDFNetException e) { ConsoleLog += e.Message; } PDFNet.Terminate(); }
protected async override void OnNavigatedFrom(NavigationEventArgs e) { this.navigationHelper.OnNavigatedFrom(e); if (e.NavigationMode == NavigationMode.Back && pdfDoc != null) { MessageDialog dialog = new MessageDialog("Sollen mögliche Änderungen gespeichert werden?", "Warnung"); dialog.Commands.Add(new UICommand("Ja", command => { OverwriteOldDocument(); })); dialog.Commands.Add(new UICommand("Nain")); await dialog.ShowAsync(); } PDFNet.Terminate(); }
/// <summary> /// The main entry point for the application. /// </summary> static void Main(string[] args) { PDFNet.Initialize(); try { // first the one-line conversion method SimpleConvert("simple-word_2007.docx", "simple-word_2007.pdf"); // then the more flexible line-by-line conversion API FlexibleConvert("the_rime_of_the_ancient_mariner.docx", "the_rime_of_the_ancient_mariner.pdf"); } catch (pdftron.Common.PDFNetException e) { Console.WriteLine(e.Message); } catch (Exception e) { Console.WriteLine("Unrecognized Exception: " + e.Message); } PDFNet.Terminate(); Console.WriteLine("Done."); }
public void ReadAdvanced(string input_path) { PDFNet.Initialize(); try { PDFDoc doc = new PDFDoc(input_path); doc.InitSecurityHandler(); Page page = doc.GetPage(1); if (page == null) { ConsoleLog += "Page not found."; return; } TextExtractor txt = new TextExtractor(); txt.Begin(page); // Read the page. // Other options you may want to consider... // txt.Begin(page, null, TextExtractor.ProcessingFlags.e_no_dup_remove); // txt.Begin(page, null, TextExtractor.ProcessingFlags.e_remove_hidden_text); // ... // Example 1. Get all text on the page in a single string. // Words will be separated with space or new line characters. if (example1_basic) { // Get the word count. ConsoleLog += "Word Count: {0}" + txt.GetWordCount(); ConsoleLog += "\n\n- GetAsText --------------------------\n{0}" + txt.GetAsText(); ConsoleLog += "-----------------------------------------------------------"; } // Example 2. Get XML logical structure for the page. if (example2_xml) { String text = txt.GetAsXML(TextExtractor.XMLOutputFlags.e_words_as_elements | TextExtractor.XMLOutputFlags.e_output_bbox | TextExtractor.XMLOutputFlags.e_output_style_info); ConsoleLog += "\n\n- GetAsXML --------------------------\n{0}" + text; ConsoleLog += "-----------------------------------------------------------"; } // Example 3. Extract words one by one. if (example3_wordlist) { TextExtractor.Word word; for (TextExtractor.Line line = txt.GetFirstLine(); line.IsValid(); line = line.GetNextLine()) { for (word = line.GetFirstWord(); word.IsValid(); word = word.GetNextWord()) { ConsoleLog += word.GetString(); } } ConsoleLog += "-----------------------------------------------------------"; } // Example 3. A more advanced text extraction example. // The output is XML structure containing paragraphs, lines, words, // as well as style and positioning information. if (example4_advanced) { Rect bbox; int cur_flow_id = -1, cur_para_id = -1; TextExtractor.Line line; TextExtractor.Word word; TextExtractor.Style s, line_style; // For each line on the page... for (line = txt.GetFirstLine(); line.IsValid(); line = line.GetNextLine()) { if (line.GetNumWords() == 0) { continue; } if (cur_flow_id != line.GetFlowID()) { if (cur_flow_id != -1) { if (cur_para_id != -1) { cur_para_id = -1; ConsoleLog += "</Para>"; } ConsoleLog += "</Flow>"; } cur_flow_id = line.GetFlowID(); ConsoleLog += "<Flow id=\"{0}\">" + cur_flow_id; } if (cur_para_id != line.GetParagraphID()) { if (cur_para_id != -1) { ConsoleLog += "</Para>"; } cur_para_id = line.GetParagraphID(); ConsoleLog += "<Para id=\"{0}\">" + cur_para_id; } bbox = line.GetBBox(); line_style = line.GetStyle(); Console.Write("<Line box=\"" + bbox.y1 + "," + bbox.y2 + "," + bbox.x1 + "," + bbox.x2 + ">"); PrintStyle(line_style); ConsoleLog += ""; // For each word in the line... for (word = line.GetFirstWord(); word.IsValid(); word = word.GetNextWord()) { // Output the bounding box for the word. bbox = word.GetBBox(); ConsoleLog += "<Word box=\"{0}, {1}, {2}, {3}\"" + bbox.x1 + bbox.y1 + bbox.x2 + bbox.y2; int sz = word.GetStringLen(); if (sz == 0) { continue; } // If the word style is different from the parent style, output the new style. s = word.GetStyle(); if (s != line_style) { PrintStyle(s); } ConsoleLog += ">\n" + word.GetString(); ConsoleLog += "</Word>"; } ConsoleLog += "</Line>"; } if (cur_flow_id != -1) { if (cur_para_id != -1) { cur_para_id = -1; ConsoleLog += "</Para>"; } ConsoleLog += "</Flow>"; } } // Note: Calling Dispose() on TextExtractor when it is not anymore in use can result in increased performance and lower memory consumption. txt.Dispose(); doc.Close(); ConsoleLog += "Done."; } catch (PDFNetException e) { ConsoleLog += e.Message; } // Sample code showing how to use low-level text extraction APIs. if (example5_low_level) { try { LowLevelTextExtractUtils util = new LowLevelTextExtractUtils(); PDFDoc doc = new PDFDoc(input_path); doc.InitSecurityHandler(); // Example 1. Extract all text content from the document ElementReader reader = new ElementReader(); PageIterator itr = doc.GetPageIterator(); //for (; itr.HasNext(); itr.Next()) // Read every page { reader.Begin(itr.Current()); LowLevelTextExtractUtils u = new LowLevelTextExtractUtils(); u.DumpAllText(reader); ConsoleLog += u.ConsoleLog; reader.End(); } // Example 2. Extract text based on the selection rectangle. ConsoleLog += "----------------------------------------------------"; ConsoleLog += "Extract text based on the selection rectangle."; ConsoleLog += "----------------------------------------------------"; Page first_page = doc.GetPage(1); string field1 = util.ReadTextFromRect(first_page, new Rect(27, 392, 563, 534), reader); string field2 = util.ReadTextFromRect(first_page, new Rect(28, 551, 106, 623), reader); string field3 = util.ReadTextFromRect(first_page, new Rect(208, 550, 387, 621), reader); ConsoleLog += "Field 1: {0}" + field1; ConsoleLog += "Field 2: {0}" + field2; ConsoleLog += "Field 3: {0}" + field3; // ... reader.Dispose(); doc.Close(); ConsoleLog += "Done."; } catch (PDFNetException e) { ConsoleLog += e.Message; } } PDFNet.Terminate(); }
private void Current_SessionEnding(object sender, SessionEndingCancelEventArgs e) { PDFNet.Terminate(); }