Exemple #1
0
        internal static bool CreateHocrFileFromPDF(PDDocument document, string outputfile, bool useWords)
        {
            bool flag;

            try
            {
                PDFHocr pDFHocr = new PDFHocr()
                {
                    getHOCRByWords = useWords
                };
                pDFHocr.setSortByPosition(true);
                pDFHocr.setStartPage(0);
                pDFHocr.setEndPage(document.getNumberOfPages());
                PDFHelper.DisplayTrialPopupIfNecessary();
                if (PDFHelper.AddStamp)
                {
                    pDFHocr.setEndPage(3);
                }
                pDFHocr.writeText(document, new OutputStreamWriter(new ByteArrayOutputStream()));
                if ((pDFHocr.lineList == null ? false : pDFHocr.lineList.Count > 0))
                {
                    HocrPageModel hocrPageModel = new HocrPageModel();
                    hocrPageModel.Lines.AddRange(pDFHocr.SortLineList(pDFHocr.lineList));
                    pDFHocr.pageList.Add(hocrPageModel);
                    pDFHocr.lineList.Clear();
                }
                pDFHocr.GetHocrFromPageList(pDFHocr.pageList, outputfile);
                flag = true;
            }
            catch (Exception exception)
            {
                flag = false;
            }
            return(flag);
        }
Exemple #2
0
        internal static List <HocrPageModel> GetPageWordDetails(PDDocument document)
        {
            List <HocrPageModel> hocrPageModels;

            try
            {
                PDFHocr pDFHocr = new PDFHocr();
                pDFHocr.setSortByPosition(true);
                pDFHocr.setStartPage(0);
                pDFHocr.setEndPage(document.getNumberOfPages());
                Writer outputStreamWriter = new OutputStreamWriter(new ByteArrayOutputStream());
                PDFHelper.DisplayTrialPopupIfNecessary();
                if (PDFHelper.AddStamp)
                {
                    pDFHocr.setEndPage(3);
                }
                pDFHocr.writeText(document, outputStreamWriter);
                if ((pDFHocr.lineList == null ? false : pDFHocr.lineList.Count > 0))
                {
                    HocrPageModel hocrPageModel = new HocrPageModel();
                    hocrPageModel.Lines.AddRange(pDFHocr.SortLineList(pDFHocr.lineList));
                    pDFHocr.pageList.Add(hocrPageModel);
                    pDFHocr.lineList.Clear();
                }
                hocrPageModels = pDFHocr.pageList;
            }
            catch (Exception exception)
            {
                hocrPageModels = null;
            }
            return(hocrPageModels);
        }
Exemple #3
0
        private bool CreateHocrPage(HocrPageModel page, string fileName, int pageNumber)
        {
            bool flag = true;

            try
            {
                List <string> strs = new List <string>();
                foreach (HocrLineModel line in page.Lines)
                {
                    strs.Add(this.GetLineWithWords(line));
                }
                using (StreamWriter streamWriter = new StreamWriter(fileName))
                {
                    streamWriter.WriteLine("<!DOCTYPE html  PUBLIC \"-//W3C//DTD XHTML 1.0 Transitional//EN\"  \"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd\" > ");
                    streamWriter.WriteLine("<html xmlns=\"http://www.w3.org/1999/xhtml\">");
                    streamWriter.WriteLine("\t<head>");
                    streamWriter.WriteLine("\t\t <title>OCR Output</title>");
                    streamWriter.WriteLine("\t</head>");
                    streamWriter.WriteLine("\t<body>");
                    streamWriter.WriteLine(string.Format("\t\t <div class=\"ocr_page\" title=\"bbox 0 0 {0} {1}; ppageno {2}\">", page.Lines.Max <HocrLineModel>((HocrLineModel l) => l.Words.Max <WordData>((WordData w) => w.XCord1)), page.Lines.Max <HocrLineModel>((HocrLineModel l) => l.Words.Max <WordData>((WordData w) => w.YCord1)), pageNumber));
                    foreach (string str in strs)
                    {
                        streamWriter.WriteLine(str);
                    }
                    streamWriter.WriteLine("\t\t</div>");
                    streamWriter.WriteLine("\t</body>");
                    streamWriter.WriteLine("</html>");
                }
            }
            catch (Exception exception)
            {
                flag = false;
            }
            return(flag);
        }
Exemple #4
0
 private void AddToPageList()
 {
     if ((this.lineList == null ? false : this.lineList.Count > 0))
     {
         HocrPageModel hocrPageModel = new HocrPageModel();
         hocrPageModel.Lines.AddRange(this.SortLineList(this.lineList));
         this.pageList.Add(hocrPageModel);
         this.lineList.Clear();
     }
 }