/// <summary> /// Writing the extracted date into textfile. /// </summary> /// <param name="reader"> Open reader to the to read pdf file </param> /// <param name="page"> which page are we going to extract the information from the pdf file </param> /// <param name="its"> Which extraction strategy do we use when extracting our data </param> /// <param name="outPath"> Where is the textfile located in my computer </param> /// private static void WriteInfile(PdfReader reader, int page, ITextExtractionStrategy its, string outPath) { string strText = string.Empty; strText = PdfTextExtractor.GetTextFromPage(reader, page, its); strText = Encoding.UTF8.GetString(ASCIIEncoding.Convert(Encoding.Default, Encoding.UTF8, Encoding.Default.GetBytes(strText))); string[] lines = strText.Split('\n'); foreach (string line in lines) { using (System.IO.StreamWriter file = new System.IO.StreamWriter(outPath, true)) { string test = line + "\0"; int index = test.Length; if (index > 0 && index < 55 && !char.IsPunctuation(test[index - 2]) && !char.IsDigit(test[0])) { Console.WriteLine("TITLE = " + line + " " + index); file.Write("Title - - - - - "); file.WriteLine(line + "\n"); } else { file.WriteLine(line); } } } using (System.IO.StreamWriter file = new System.IO.StreamWriter(outPath, true)) file.WriteLine("- - - - - - - - - - - - - - - - - - - - - - - - - - - "); }
public virtual void TestWithMultiFilteredRenderListener() { PdfDocument pdfDocument = new PdfDocument(new PdfReader(sourceFolder + "test.pdf")); float x1; float y1; float x2; float y2; FilteredEventListener listener = new FilteredEventListener(); x1 = 122; x2 = 22; y1 = 678.9f; y2 = 12; ITextExtractionStrategy region1Listener = listener.AttachEventListener(new LocationTextExtractionStrategy( ), new TextRegionEventFilter(new Rectangle(x1, y1, x2, y2))); x1 = 156; x2 = 13; y1 = 678.9f; y2 = 12; ITextExtractionStrategy region2Listener = listener.AttachEventListener(new LocationTextExtractionStrategy( ), new TextRegionEventFilter(new Rectangle(x1, y1, x2, y2))); PdfCanvasProcessor parser = new PdfCanvasProcessor(new GlyphEventListener(listener)); parser.ProcessPageContent(pdfDocument.GetPage(1)); NUnit.Framework.Assert.AreEqual("Your", region1Listener.GetResultantText()); NUnit.Framework.Assert.AreEqual("dju", region2Listener.GetResultantText()); }
/// <summary>Extract text from a specified page using an extraction strategy.</summary> /// <param name="page">the page for the text to be extracted from</param> /// <param name="strategy">the strategy to use for extracting text</param> /// <returns>the extracted text</returns> public static String GetTextFromPage(PdfPage page, ITextExtractionStrategy strategy) { PdfCanvasProcessor parser = new PdfCanvasProcessor(strategy); parser.ProcessPageContent(page); return(strategy.GetResultantText()); }
public virtual void EventOccurred(IEventData data, EventType type) { switch (type) { case EventType.RENDER_TEXT: { TextRenderInfo textInfo = (TextRenderInfo)data; int mcid = textInfo.GetMcid(); if (mcid != -1) { ITextExtractionStrategy textExtractionStrategy = this.contentByMcid.Get(mcid); if (textExtractionStrategy == null) { textExtractionStrategy = new LocationTextExtractionStrategy(); this.contentByMcid.Put(mcid, textExtractionStrategy); } textExtractionStrategy.EventOccurred(data, type); } break; } default: { break; } } }
virtual public void TestWithMultiFilteredRenderListener() { PdfReader pdfReader = TestResourceUtils.GetResourceAsPdfReader(TEST_RESOURCES_PATH, "test.pdf"); PdfReaderContentParser parser = new PdfReaderContentParser(pdfReader); float x1, y1, x2, y2; MultiFilteredRenderListener listener = new MultiFilteredRenderListener(); x1 = 122; x2 = 144; y1 = 841.9f - 151; y2 = 841.9f - 163; ITextExtractionStrategy region1Listener = listener.AttachRenderListener( new LocationTextExtractionStrategy(), new RegionTextRenderFilter(new Rectangle(x1, y1, x2, y2))); x1 = 156; x2 = 169; y1 = 841.9f - 151; y2 = 841.9f - 163; ITextExtractionStrategy region2Listener = listener.AttachRenderListener( new LocationTextExtractionStrategy(), new RegionTextRenderFilter(new Rectangle(x1, y1, x2, y2))); parser.ProcessContent(1, new GlyphRenderListener(listener)); Assert.AreEqual("Your", region1Listener.GetResultantText()); Assert.AreEqual("dju", region2Listener.GetResultantText()); }
public static MemoryStream ExtractPdfText(string filename, ITextExtractionStrategy textExtractionStrategy) { if (!File.Exists(filename)) { throw new FileNotFoundException("File: [" + filename + "] does not exist."); } var textStream = new MemoryStream(); using (var output = new StreamWriter(textStream, Encoding.UTF8, 1024, true)) { using (var pdfReader = new PdfReader(filename)) { for (var page = 1; page <= pdfReader.NumberOfPages; page++) { var text = Encoding.UTF8.GetString(Encoding.Convert(Encoding.Default, Encoding.UTF8, Encoding.Default.GetBytes(PdfTextExtractor.GetTextFromPage(pdfReader, page, textExtractionStrategy)))); output.WriteLine(text); output.WriteLine("*********************************************************************************************"); } } } textStream.Seek(0, SeekOrigin.Begin); return(textStream); }
/// <summary>Extract text from a specified page using an extraction strategy.</summary> /// <remarks> /// Extract text from a specified page using an extraction strategy. /// Also allows registration of custom IContentOperators that can influence /// how (and whether or not) the PDF instructions will be parsed. /// </remarks> /// <param name="page">the page for the text to be extracted from</param> /// <param name="strategy">the strategy to use for extracting text</param> /// <param name="additionalContentOperators"> /// an optional map of custom /// <see cref="IContentOperator"/> /// s for rendering instructions /// </param> /// <returns>the extracted text</returns> public static String GetTextFromPage(PdfPage page, ITextExtractionStrategy strategy, IDictionary <String, IContentOperator > additionalContentOperators) { PdfCanvasProcessor parser = new PdfCanvasProcessor(strategy, additionalContentOperators); parser.ProcessPageContent(page); return(strategy.GetResultantText()); }
public PdfSearcher(string filename) { if (File.Exists(filename)) { Pdf = new PdfReader(filename); PdfDocument = new PdfDocument(Pdf); textExtractionStrategy = new SimpleTextExtractionStrategy(); } else { throw new FileNotFoundException($"Did not find {filename}"); } }
public SetRectangle(int x1, int y1, int x2, int y2, string sourceFileName) { Rectangle rect = new Rectangle(x1, y1, x2, y2); TextRegionEventFilter regionFilter = new TextRegionEventFilter(rect); pdfReader = new PdfReader(sourceFileName); pdfDoc = new PdfDocument(pdfReader); strategy = new FilteredTextEventListener(new LocationTextExtractionStrategy(), regionFilter); sn = sourceFileName; this.x1 = x1; this.y1 = y1; this.x2 = x2; this.y2 = y2; }
public static MemoryStream ExtractPdfText(string filename, ITextExtractionStrategy textExtractionStrategy) { if (!File.Exists(filename)) throw new FileNotFoundException("File: [" + filename + "] does not exist."); var textStream = new MemoryStream(); using (var output = new StreamWriter(textStream, Encoding.UTF8, 1024, true)) { using (var pdfReader = new PdfReader(filename)) { for (var page = 1; page <= pdfReader.NumberOfPages; page++) { var text = Encoding.UTF8.GetString(Encoding.Convert(Encoding.Default, Encoding.UTF8, Encoding.Default.GetBytes(PdfTextExtractor.GetTextFromPage(pdfReader, page, textExtractionStrategy)))); output.WriteLine(text); output.WriteLine("*********************************************************************************************"); } } } textStream.Seek(0, SeekOrigin.Begin); return textStream; }
public GlyphTextRenderListener(ITextExtractionStrategy deleg) : base(deleg) { this.deleg = deleg; }
/** * Extract text from a specified page using an extraction strategy. * Also allows registration of custom ContentOperators * @param reader the reader to extract text from * @param pageNumber the page to extract text from * @param strategy the strategy to use for extracting text * @param additionalContentOperators an optional dictionary of custom IContentOperators for rendering instructions * @return the extracted text * @throws IOException if any operation fails while reading from the provided PdfReader */ public static String GetTextFromPage(PdfReader reader, int pageNumber, ITextExtractionStrategy strategy, IDictionary <string, IContentOperator> additionalContentOperators) { PdfReaderContentParser parser = new PdfReaderContentParser(reader); return(parser.ProcessContent(pageNumber, strategy, additionalContentOperators).GetResultantText()); }
public LimitedTextStrategy2(ITextExtractionStrategy strategy) { this.textextractionstrategy = strategy; }
/// <summary> /// Constructs a /// <see cref="GlyphEventListener"/> /// instance by a /// <see cref="ITextExtractionStrategy"/> /// delegate to which /// the expanded text events for each glyph occurred will be passed on. /// </summary> /// <param name="delegate_">delegate to pass the expanded glyph render events to.</param> public GlyphTextEventListener(ITextExtractionStrategy delegate_) : base(delegate_) { }
/** * Construction * @param deleg the deleg {@link RenderListener} that will receive filtered text operations * @param filters the Filter(s) to apply */ public FilteredTextRenderListener(ITextExtractionStrategy deleg, RenderFilter[] filters) : base(deleg, filters) { this.deleg = deleg; }
/// <summary> /// Constructs a /// <see cref="FilteredTextEventListener"/> /// instance with a /// <see cref="ITextExtractionStrategy"/> /// delegate. /// </summary> /// <param name="delegate_">a delegate that fill be called when all the corresponding filters for an event pass /// </param> /// <param name="filterSet">filters attached to the delegate that will be tested before passing an event on to the delegate /// </param> public FilteredTextEventListener(ITextExtractionStrategy delegate_, params IEventFilter[] filterSet) : base(delegate_, filterSet) { }
public bool ReadPdf(string pdfFile, ref Documents doc, ref int pages) { bool success = false; try { if (pdfFile.ToLower().Contains("pdf")) { StringBuilder textBuilder = new StringBuilder(); PdfReader r = new PdfReader(pdfFile); pages = r.NumberOfPages; for (int i = 1; i <= pages; i++) { PdfReaderContentParser parser = new PdfReaderContentParser(r); ITextExtractionStrategy st = parser.ProcessContent <SimpleTextExtractionStrategy>(i, new SimpleTextExtractionStrategy()); string text = st.GetResultantText().Trim('\r', '\n', '\t', (char)32, (char)160); if (!string.IsNullOrEmpty(text)) { doc.DocBodyDic.Add(i, text); } else { text = PdfTextExtractor.GetTextFromPage(r, i).Trim('\r', '\n', '\t', (char)32, (char)160); if (!string.IsNullOrEmpty(text)) { doc.DocBodyDic.Add(i, text); } } } r.Close(); success = true; } else if (pdfFile.ToLower().Contains("doc")) { MsWord.Application newApp = null; MsWord.Document msdoc = null; try { int retry = 2; while (retry > 0) { try { //newApp = (MsWord.Application)Marshal.GetActiveObject("Word.Application"); newApp = newApp == null ? new MsWord.Application() : newApp; System.Threading.Thread.Sleep(1000); //msdoc = newApp.ActiveDocument; msdoc = newApp.Documents.Open(pdfFile); System.Threading.Thread.Sleep(1000); object nothing = Missing.Value; MsWord.WdStatistic stat = MsWord.WdStatistic.wdStatisticPages; int num = msdoc.ComputeStatistics(stat, ref nothing); for (int i = 1; i <= num; i++) { if (doc.DocBodyDic.ContainsKey(i)) { continue; } object objWhat = MsWord.WdGoToItem.wdGoToPage; object objWhich = MsWord.WdGoToDirection.wdGoToAbsolute; object objPage = (object)i; MsWord.Range range1 = msdoc.GoTo(ref objWhat, ref objWhich, ref objPage, ref nothing); MsWord.Range range2 = range1.GoToNext(MsWord.WdGoToItem.wdGoToPage); object objStart = range1.Start; object objEnd = range2.Start; if (range1.Start == range2.Start) { objEnd = msdoc.Characters.Count; } Console.ForegroundColor = ConsoleColor.Red; Console.WriteLine("DEBUG: Path: {0}, {1}-{2}........", pdfFile, objStart, objEnd); Console.ResetColor(); if ((int)objStart <= (int)objEnd) { string innerText = msdoc.Range(ref objStart, ref objEnd).Text; doc.DocBodyDic.Add(i, innerText); } } success = true; break; } catch (Exception ex) { Console.ForegroundColor = ConsoleColor.Red; Console.WriteLine("Retry to read word {0}, Exception: {1}..", pdfFile, ex.ToString()); Console.ResetColor(); System.Threading.Thread.Sleep(1000); retry--; } finally { if (newApp != null) { newApp.NormalTemplate.Saved = true; if (msdoc != null) { msdoc.Close(false); } newApp.Quit(); } } } } catch (Exception e) { } } } catch (Exception ex) { } return(success); }
/** * Extract text from a specified page using an extraction strategy. * @param reader the reader to extract text from * @param pageNumber the page to extract text from * @param strategy the strategy to use for extracting text * @return the extracted text * @throws IOException if any operation fails while reading from the provided PdfReader * @since 5.0.2 */ public static String GetTextFromPage(PdfReader reader, int pageNumber, ITextExtractionStrategy strategy) { PdfReaderContentParser parser = new PdfReaderContentParser(reader); return(parser.ProcessContent(pageNumber, strategy).GetResultantText()); }
/** * Extract text from a specified page using an extraction strategy. * @param reader the reader to extract text from * @param pageNumber the page to extract text from * @param strategy the strategy to use for extracting text * @return the extracted text * @throws IOException if any operation fails while reading from the provided PdfReader * @since 5.0.2 */ public static String GetTextFromPage(PdfReader reader, int pageNumber, ITextExtractionStrategy strategy) { PdfReaderContentParser parser = new PdfReaderContentParser(reader); return parser.ProcessContent(pageNumber, strategy).GetResultantText(); }
static int Main(string[] args) { #region var declaration bool verbose = false; bool datedFileNames = false; bool showRegexMatches = false; string inputFilename = ""; bool inputFileExists; string extension; bool isPDF = false; string outputDirname = ""; bool outputDirExsists = false; uint splitType = 0x0; string keySplitRegex = ""; #endregion #region arg intake // intake args and load values in to scope var result = Parser.Default.ParseArguments <Options>(args); result.WithParsed <Options>(o => { inputFilename = o.inputFilename; outputDirname = o.outputDirname; if (o.verbose) { verbose = true; } if (o.datedFileNames) { datedFileNames = true; } if (o.showRegexMatches) { showRegexMatches = true; } if (!String.IsNullOrEmpty(o.keySplitRegex) && o.keySplitRegex.Length > 0) { splitType = splitType | 0x1; keySplitRegex = o.keySplitRegex; } }); #endregion #region verify input // input file location if (verbose) { Console.WriteLine("Input File:\t" + inputFilename); } inputFileExists = File.Exists(inputFilename); if (verbose) { Console.WriteLine(inputFileExists ? "File exists:\tTrue" : "File exists:\tFalse"); } if (!inputFileExists) { if (verbose) { Console.WriteLine("Input File does not exsist; Exiting with error code 1."); } #if DEBUG Console.ReadKey(); #endif return(1); } // input file format extension = System.IO.Path.GetExtension(inputFilename).ToLower(); if (verbose) { Console.WriteLine("File format:\t" + extension); } isPDF = string.Equals(extension, ".pdf"); if (verbose) { Console.WriteLine(isPDF ? "Correct Format:\tTrue" : "Correct Format:\tFalse"); } if (!isPDF) { if (verbose) { Console.WriteLine("Input File is not a PDF; Exiting with error code 2."); } #if DEBUG Console.ReadKey(); #endif return(2); } // output directory exsistance if (verbose) { Console.WriteLine("Output to:\t" + outputDirname); } outputDirExsists = Directory.Exists(outputDirname); if (verbose) { Console.WriteLine(outputDirExsists ? "Output valid:\tTrue" : "Output valid:\tFalse"); } if (!outputDirExsists) { if (verbose) { Console.WriteLine("Output dir does not exsist; Exiting with error code 3."); } #if DEBUG Console.ReadKey(); #endif return(3); } #endregion // Split switch (splitType) { case 0x1: // key match if (verbose) { Console.WriteLine("split type:\tKey"); } if (verbose) { Console.WriteLine("Key regex:\t" + keySplitRegex); } Regex regex = new Regex(keySplitRegex, RegexOptions.Compiled | RegexOptions.Multiline); PdfReader reader = new PdfReader(inputFilename); PdfReaderContentParser parser = new PdfReaderContentParser(reader); string regexKeyMatch = ""; int docPageStart = 1; string newDocName = ""; for (int page = 1; page <= reader.NumberOfPages; page++) { if (showRegexMatches) { Console.WriteLine("Page: " + page); } ITextExtractionStrategy strategy = parser.ProcessContent (page, new SimpleTextExtractionStrategy()); int matchCount = 0; Match match = regex.Match(strategy.GetResultantText()); { if (showRegexMatches) { Console.WriteLine("Match: " + (++matchCount)); } for (int x = 1; x <= 2; x++) { Group group = match.Groups[x]; if (showRegexMatches) { Console.WriteLine("Group " + x + " = '" + group + "'"); } CaptureCollection cc = group.Captures; for (int y = 0; y < cc.Count; y++) { Capture capture = cc[y]; string captureS = capture.ToString(); if (!string.Equals(captureS, regexKeyMatch)) { // if not first instance print last doc if (page > 1) { ExtractPages(inputFilename, outputDirname + newDocName, docPageStart, (page - 1)); } // reset the count regexKeyMatch = captureS; if (datedFileNames) { newDocName = DateTime.Now.ToString("yyyyMMdd") + "_" + captureS + ".pdf"; } else { newDocName = captureS + ".pdf"; } docPageStart = page; if (verbose) { System.Console.WriteLine("New document at page:\t" + docPageStart); } } if (showRegexMatches) { System.Console.WriteLine("Capture " + y + " = '" + capture + "', Position=" + capture.Index); } } } match = match.NextMatch(); } } break; default: if (verbose) { Console.WriteLine("No valid split type selected; Exiting with error code 4."); } #if DEBUG Console.ReadKey(); #endif return(4); } #if DEBUG Console.ReadKey(); #endif return(0); }
/** * Construction * @param deleg the deleg {@link RenderListener} that will receive filtered text operations * @param filters the Filter(s) to apply */ public FilteredTextRenderListener(ITextExtractionStrategy deleg, params RenderFilter[] filters) : base(deleg, filters) { this.deleg = deleg; }
public String ExportData() { //Document variables DocInfo docInfo = new DocInfo(); System.Boolean hasOfficialUse = false; string officialText; try { if (!ExportFilePath.isFilePathOK(".txt")) { return("Invalid export file path: " + ExportFilePath); } BeforeProcessing(); using (var pdfReader = new PdfReader(PdfPath)) { // For image checking var parser = new PdfReaderContentParser(pdfReader); ImageRenderListener listener = null; // Check to see if doc has "for official use only" at the bottom ITextExtractionStrategy officialTextRectangle = MakeRectangle(70, 1, 375, 120); officialText = PdfTextExtractor.GetTextFromPage(pdfReader, 1, officialTextRectangle); officialText = Encoding.UTF8.GetString(Encoding.Convert(Encoding.Default, Encoding.UTF8, Encoding.Default.GetBytes(officialText))); if (officialText.ToString().ToUpper().Contains("FOROFFICIALUSEONLY")) { hasOfficialUse = true; } else { hasOfficialUse = false; } // Loop through each page of the PDF for (Int32 currentPage = 1; currentPage <= pdfReader.NumberOfPages; currentPage++) { PageInfo currentPageInfo = new PageInfo() { PageNum = currentPage }; ITextExtractionStrategy rectangleStrategy; float height = pdfReader.GetPageSize(currentPage).Height; float width = pdfReader.GetPageSize(currentPage).Width; if (height > 785 && height < 802 && width > 1215 && width < 1230) { rectangleStrategy = MakeRectangle(450, 1, 450, 70); } else if (height > 785 && height < 802 && width > 608 && width < 617) { rectangleStrategy = MakeRectangle(190, 1, 255, 74); } else { myLogger.Log("Page # " + currentPage.ToString() + " not 8.5 x 11 or 11 x 17"); continue; } string currentText = PdfTextExtractor.GetTextFromPage(pdfReader, currentPage, rectangleStrategy); currentText = Encoding.UTF8.GetString(Encoding.Convert(Encoding.Default, Encoding.UTF8, Encoding.Default.GetBytes(currentText))); if (hasOfficialUse) { currentText = OfficialUseRegex.Replace(currentText, "").Trim(); } ITextExtractionStrategy workPackageIndexStrategy = MakeRectangle(60, 600, 160, 50); string WPI = PdfTextExtractor.GetTextFromPage(pdfReader, currentPage, workPackageIndexStrategy); WPI = Encoding.UTF8.GetString(Encoding.Convert(Encoding.Default, Encoding.UTF8, Encoding.Default.GetBytes(WPI))); if (WPI.ToUpper().Contains("WORKPACKAGEINDEX")) { currentPageInfo.HasWpIndex = true; } // #-# if (NumDashNumRegex.IsMatch(currentText)) { currentPageInfo.PageNumText = NumDashNumRegex.Match(currentText).Value.Trim(); currentPageInfo.IsWP = true; } else { // #-#/blank if (NumDashNumBlankRegex.IsMatch(currentText)) { currentPageInfo.PageNumText = NumDashNumBlankRegex.Match(currentText).Value.Trim(); currentPageInfo.IsDashBlank = true; currentPageInfo.IsWP = true; } else { if (romanNumRegex.IsMatch(currentText.ToUpper().Trim())) { currentPageInfo.PageNumText = romanNumRegex.Match(currentText.ToUpper().Trim()).Value.Trim(); if (String.Equals(currentPageInfo.PageNumText.ToUpper(), "C") || String.Equals(currentPageInfo.PageNumText.ToUpper(), "D")) { currentPageInfo.PageNumText = currentPageInfo.PageNumText.ToLower(); currentPageInfo.IsLetter = true; } else { currentPageInfo.IsRoman = true; } } else { if (LetterRegex.IsMatch(currentText.Trim())) { currentPageInfo.PageNumText = LetterRegex.Match(currentText).Value.Trim(); currentPageInfo.IsLetter = true; } else { // Check if whole page is empty parser.ProcessContent(currentPage, (listener = new ImageRenderListener())); ITextExtractionStrategy currentTextRectangle = MakeRectangle(1, 1, 1000000, 1000000); String checkText = PdfTextExtractor.GetTextFromPage(pdfReader, currentPage, currentTextRectangle); checkText = Encoding.UTF8.GetString(Encoding.Convert(Encoding.Default, Encoding.UTF8, Encoding.Default.GetBytes(checkText))); if ((listener.Images.Count <= 0) && String.IsNullOrWhiteSpace(checkText)) { currentPageInfo.IsWholePageEmpty = true; currentPageInfo.IsPageNumAreaBlank = true; } else { if (String.IsNullOrWhiteSpace(currentText)) { currentPageInfo.IsPageNumAreaBlank = true; } else { if (indexRegex.IsMatch(currentText.Trim())) { currentPageInfo.PageNumText = indexRegex.Match(currentText).Value.Trim(); currentPageInfo.IsIndex = true; } else { currentPageInfo.PageNumText = currentText; currentPageInfo.IsMisc = true; } } } } } } } if (Bw.CancellationPending) { myLogger.Log("Processing cancelled at dwg #: " + currentPage.ToString()); break; } Bw.ReportProgress(Utils.GetPercentage(currentPage, pdfReader.NumberOfPages)); docInfo.Pages.Add(currentPageInfo); } } WriteDocInfoToTextFile(docInfo); } catch (System.Exception se) { return(se.Message); } finally { AfterProcessing(); } return(String.Concat(docInfo.ToString(), Environment.NewLine, "Processing completed in ", timer.Elapsed.TotalSeconds.PrintTimeFromSeconds(), Environment.NewLine, myLogger.ErrorCount.ToString(), " errors found.")); //return String.Concat( // docInfo.NumSheets, // "Processing completed in ", // timer.Elapsed.TotalSeconds.PrintTimeFromSeconds(), // " with ", // myLogger.ErrorCount, // " errors."); }
/// <summary>Extract text from a specified page using an extraction strategy.</summary> /// <param name="page">the page for the text to be extracted from</param> /// <param name="strategy">the strategy to use for extracting text</param> /// <returns>the extracted text</returns> public static String GetTextFromPage(PdfPage page, ITextExtractionStrategy strategy) { return(GetTextFromPage(page, strategy, new Dictionary <String, IContentOperator>())); }