/// <summary> /// Inits this instance. /// </summary> protected void Init() { this.glyphShapesTable = new GlyphShapesTable(); this.fontBoundsTable = new RectCollection(); this.fontKerningTable = new KerningRecordCollection(); this.fontAdvanceTable = new ShortCollection(); _tagCode = (int)TagCodeEnum.DefineFont2; }
private void LoadSettings() { NumberOfPlayers = levelSettings.PlayerCount; LevelWeaponMode = levelSettings.LevelWeaponMode; PlayerLivesCount = levelSettings.Lives; switch (NumberOfPlayers) { case 1: rectCollection = NoSplit; break; case 2: rectCollection = TwoWaySplit; break; default: rectCollection = FourWaySplit; break; } }
/// <summary> /// The main entry point for the application. /// </summary> static void Main(string[] args) { // The first step in every application using PDFNet is to initialize the // library and set the path to common PDF resources. The library is usually // initialized only once, but calling Initialize() multiple times is also fine. PDFNet.Initialize(); // Can optionally set path to the OCR module PDFNet.AddResourceSearchPath("../../../../../Lib/"); if (!OCRModule.IsModuleAvailable()) { Console.WriteLine(""); Console.WriteLine("Unable to run OCRTest: PDFTron SDK OCR module not available."); Console.WriteLine("---------------------------------------------------------------"); Console.WriteLine("The OCR module is an optional add-on, available for download"); Console.WriteLine("at http://www.pdftron.com/. If you have already downloaded this"); Console.WriteLine("module, ensure that the SDK is able to find the required files"); Console.WriteLine("using the PDFNet.AddResourceSearchPath() function."); Console.WriteLine(""); return; } // Relative path to the folder containing test files. string input_path = "../../TestFiles/OCR/"; string output_path = "../../TestFiles/Output/"; //-------------------------------------------------------------------------------- // Example 1) Process image try { // A) Setup empty destination doc using (PDFDoc doc = new PDFDoc()) { // B) Set English as the language of choice OCROptions opts = new OCROptions(); opts.AddLang("eng"); // C) Run OCR on the .png with options OCRModule.ImageToPDF(doc, input_path + "psychomachia_excerpt.png", opts); // D) check the result doc.Save(output_path + "psychomachia_excerpt.pdf", SDFDoc.SaveOptions.e_remove_unused); Console.WriteLine("Example 1: psychomachia_excerpt.png"); } } catch (PDFNetException e) { Console.WriteLine(e.Message); } //-------------------------------------------------------------------------------- // Example 2) Process document using multiple languages try { // A) Setup empty destination doc using (PDFDoc doc = new PDFDoc()) { // B) Setup options with multiple target languages, English will always be considered as secondary language OCROptions opts = new OCROptions(); opts.AddLang("rus"); opts.AddLang("deu"); // C) Run OCR on the .jpg with options OCRModule.ImageToPDF(doc, input_path + "multi_lang.jpg", opts); // D) check the result doc.Save(output_path + "multi_lang.pdf", SDFDoc.SaveOptions.e_remove_unused); Console.WriteLine("Example 2: multi_lang.jpg"); } } catch (PDFNetException e) { Console.WriteLine(e.Message); } //-------------------------------------------------------------------------------- // Example 3) Process a .pdf specifying a language - German - and ignore zone comprising a sidebar image try { // A) Open the .pdf document using (PDFDoc doc = new PDFDoc(input_path + "german_kids_song.pdf")) { // B) Setup options with a single language and an ignore zone OCROptions opts = new OCROptions(); opts.AddLang("deu"); RectCollection ignoreZones = new RectCollection(); ignoreZones.AddRect(1768, 680, 2056, 3044); opts.AddIgnoreZonesForPage(ignoreZones, 1); // C) Run OCR on the .pdf with options OCRModule.ProcessPDF(doc, opts); // D) check the result doc.Save(output_path + "german_kids_song.pdf", SDFDoc.SaveOptions.e_remove_unused); Console.WriteLine("Example 3: german_kids_song.pdf"); } } catch (PDFNetException e) { Console.WriteLine(e.Message); } //-------------------------------------------------------------------------------- // Example 4) Process multipage tiff with text/ignore zones specified for each page try { // A) Setup empty destination doc using (PDFDoc doc = new PDFDoc()) { // B) Setup options with a single language plus text/ignore zones OCROptions opts = new OCROptions(); opts.AddLang("eng"); RectCollection zones = new RectCollection(); // ignore signature box in the first 2 pages zones.AddRect(1492, 56, 2236, 432); opts.AddIgnoreZonesForPage(zones, 1); zones.Clear(); zones.AddRect(1492, 56, 2236, 432); opts.AddIgnoreZonesForPage(zones, 2); zones.Clear(); // can use a combination of ignore and text boxes to focus on the page area of interest, // as ignore boxes are applied first, we remove the arrows before selecting part of the diagram zones.AddRect(992, 1276, 1368, 1372); opts.AddIgnoreZonesForPage(zones, 3); zones.Clear(); // select horizontal BUFFER ZONE sign zones.AddRect(900, 2384, 1236, 2480); // select right vertical BUFFER ZONE sign zones.AddRect(1960, 1976, 2016, 2296); // select Lot No. zones.AddRect(696, 1028, 1196, 1128); // select part of the plan inside the BUFFER ZONE zones.AddRect(428, 1484, 1784, 2344); zones.AddRect(948, 1288, 1672, 1476); opts.AddTextZonesForPage(zones, 3); // C) Run OCR on the .pdf with options OCRModule.ImageToPDF(doc, input_path + "bc_environment_protection.tif", opts); // D) check the result doc.Save(output_path + "bc_environment_protection.pdf", SDFDoc.SaveOptions.e_remove_unused); Console.WriteLine("Example 4: bc_environment_protection.tif"); } } catch (PDFNetException e) { Console.WriteLine(e.Message); } //-------------------------------------------------------------------------------- // Example 5) Alternative workflow for extracting OCR result JSON, postprocessing (e.g., removing words not in the dictionary or filtering special // out special characters), and finally applying modified OCR JSON to the source PDF document try { // A) Open the .pdf document using (PDFDoc doc = new PDFDoc(input_path + "zero_value_test_no_text.pdf")) { // B) set English language OCROptions opts = new OCROptions(); opts.AddLang("eng"); // C) Run OCR on the .pdf string json = OCRModule.GetOCRJsonFromPDF(doc, opts); // D) Post-processing step (whatever it might be), but we just print JSON here Console.WriteLine("Have OCR result JSON, re-applying to PDF"); // E) Apply potentially modified OCR JSON to the PDF OCRModule.ApplyOCRJsonToPDF(doc, json); // F) check the result doc.Save(output_path + "zero_value_test_no_text.pdf", SDFDoc.SaveOptions.e_remove_unused); Console.WriteLine("Example 5: extracting and applying OCR JSON from zero_value_test_no_text.pdf"); } } catch (PDFNetException e) { Console.WriteLine(e.Message); } //-------------------------------------------------------------------------------- // Example 6) The postprocessing workflow has also an option of extracting OCR results in XML format, similar to the one used by TextExtractor try { // A) Setup empty destination doc using (PDFDoc doc = new PDFDoc()) { // B) set English language OCROptions opts = new OCROptions(); opts.AddLang("eng"); // C) Run OCR on the .tif with default English language, extracting OCR results in XML format. Note that // in the process we convert the source image into PDF. We reuse this PDF document later to add hidden text layer to it. string xml = OCRModule.GetOCRXmlFromImage(doc, input_path + "physics.tif", opts); // D) Post-processing step (whatever it might be), but we just print XML here Console.WriteLine("Have OCR result XML, re-applying to PDF"); // E) Apply potentially modified OCR XML to the PDF OCRModule.ApplyOCRXmlToPDF(doc, xml); // F) check the result doc.Save(output_path + "physics.pdf", SDFDoc.SaveOptions.e_remove_unused); Console.WriteLine("Example 6: extracting and applying OCR XML from physics.tif"); } } catch (PDFNetException e) { Console.WriteLine(e.Message); } }
private static RectCollection buildBoxes(Bitmap bitmap) { List<CRectangle> rt = new List<CRectangle>(); UnsafeBitmap bmp = new UnsafeBitmap(bitmap); bmp.LockBitmap(); var gapHeight = 40; for (int y = 0; y < bitmap.Height; y++) { List<CRectangle> rtc = new List<CRectangle>(); foreach (var r in rt) { if (r.Y + r.Height + gapHeight + 1 >= y) { rtc.Add(r); } } for (int x = 0; x < bitmap.Width; x++) { var pix = bmp.GetPixel(x, y); if (!(pix.red == 255 && pix.green == 255 && pix.blue == 255)) { List<CRectangle> ins = new List<CRectangle>(); if (rtc.Count > 1) { } foreach (var rectangle in rtc) { if (rectangle.ShouldContain(x, y)) { if (rectangle.Contains(x, y)) { ins.Add(rectangle); // Console.WriteLine($"rt: {rt.Count} rtc:{rtc.Count} contians"); } else if (rectangle.Contains(x, y - 1)) { ins.Add(rectangle); rectangle.Height = rectangle.Height + 1; // Console.WriteLine($"rt: {rt.Count} rtc:{rtc.Count} height"); } else if (rectangle.Contains(x - 1, y)) { ins.Add(rectangle); rectangle.Width = rectangle.Width + 1; // Console.WriteLine($"rt: {rt.Count} rtc:{rtc.Count} width"); } } } if (ins.Count == 0) { var cRectangle = new CRectangle(x, y, gapHeight * 5, gapHeight); rt.Add(cRectangle); rtc.Add(cRectangle); // Console.WriteLine($"rt: {rt.Count} rtc:{rtc.Count} new"); } if (ins.Count > 1) { var ba = ins[0]; for (int index = ins.Count - 1; index >= 0; index--) { ba = CRectangle.Union(ba, ins[index]); rt.Remove(ins[index]); rtc.Remove(ins[index]); } rt.Add(ba); rtc.Add(ba); // Console.WriteLine($"rt: {rt.Count} rtc:{rtc.Count} union"); } } } } Console.WriteLine($"{rt.Count} boxes"); bmp.Dispose(); byte[] bytes = new byte[10000000]; bytes[0] = 0xff; RectCollection col = new RectCollection(); for (int index = 0; index < rt.Count; index++) { var cRectangle = rt[index]; var dc = CopyImage(bitmap, cRectangle); col.Rectangles.Add(new RectObject() { X = cRectangle.X, Y = cRectangle.Y, Width = cRectangle.Width, Height = cRectangle.Height, Index = index, Bitmap = dc }); } return col; /* Bitmap done = new Bitmap(bitmap.Width, bitmap.Height); var random = new Random(); using (Graphics g = Graphics.FromImage(done)) { foreach (var rectangle in rt) { g.FillRectangle(new SolidBrush(Color.FromArgb(random.Next(0, 255), random.Next(0, 255), random.Next(0, 255))), rectangle.X, rectangle.Y, rectangle.Width, rectangle.Height); } } return done; */ }