Beispiel #1
0
        // Generate a Machine Readable PDF from a Non MR Pdf File
        public static String OCRPdf(String DTUrl, int DTPort, String InputFilePath, String Language)
        {
            DoctoolsConnectionBroker rbroker      = new DoctoolsConnectionBroker(DTUrl, DTPort);
            _100DTRestCalls          apicallsRest = new _100DTRestCalls(rbroker);
            HttpWebResponse          Resp         = apicallsRest.ConvertPDFToMachineReadable(InputFilePath, InputFilePath, Language);
            HttpStatusCode           code         = Resp.StatusCode;

            return(code.ToString());
        }
Beispiel #2
0
        // Split a PDF into multiple PDFs (on blank pages)
        public static String SplitPdfsWithBlankPages(String DTUrl, int DTPort, String InputFilePath, String Language, String OutputFolder, Boolean DeleteTempFiles)
        {
            String DEPTH = "2";
            // TO DO: add option to remove temp MR file
            // TODO: add option to automatically generate files in another folder

            String Output = "";
            DoctoolsConnectionBroker rbroker      = new DoctoolsConnectionBroker(DTUrl, DTPort);
            _100DTRestCalls          apicallsRest = new _100DTRestCalls(rbroker);

            String OutputRootPath = Path.GetDirectoryName(InputFilePath) + "\\";

            String TempMRFile = "";

            if (OutputFolder != null && OutputFolder != "")
            {
                OutputRootPath = OutputFolder;
            }

            TempMRFile = OutputRootPath + Path.GetFileNameWithoutExtension(InputFilePath) + "_MR" + Path.GetExtension(InputFilePath);

            Output = Output + "Machine Readable Filename: " + TempMRFile + "\n";
            HttpWebResponse Resp = apicallsRest.ConvertPDFToMachineReadable(InputFilePath, TempMRFile, Language);
            HttpStatusCode  code = Resp.StatusCode;

            Output = Output + "MR Call Response: " + code + "\n";

            // TO DO check for response status

            HttpWebResponse Resp1           = apicallsRest.GetTextInMachineReadablePDF(TempMRFile, DEPTH);
            String          responseString1 = "";

            using (Stream stream = Resp1.GetResponseStream())
            {
                StreamReader reader = new StreamReader(stream, Encoding.UTF8);
                responseString1 = reader.ReadToEnd();
            }
            HttpStatusCode code1 = Resp1.StatusCode;

            Output = Output + "Pdf To Text Call Response: " + code1 + "\n";

            IQBotAPILibrary.JsonObjects.Doctools.PDFToText.Response r = JsonConvert.DeserializeObject <IQBotAPILibrary.JsonObjects.Doctools.PDFToText.Response>(responseString1);

            Output = Output + "Response Successfully Parsed" + "\n";
            if (DeleteTempFiles)
            {
                File.Delete(TempMRFile);
            }
            List <int> TempList   = new List <int>();
            int        PreviousEl = 0;

            Child last = r.children.Last();

            foreach (var item in r.children)
            {
                int CurrentPage         = item.pageNumber;
                int NumberOfItemsOnPage = item.children.Count;
                //Console.WriteLine("DEBUG: Current Page:" + CurrentPage);
                if (NumberOfItemsOnPage > 0)
                {
                    if (item != last)
                    {
                        // Console.WriteLine("Debug: " + NumberOfItemsOnPage + " Items Found on page:" + CurrentPage);
                        TempList.Add(CurrentPage);
                        PreviousEl = CurrentPage;
                    }
                    else // not empty, but last element
                    {
                        TempList.Add(CurrentPage);
                        //Console.WriteLine("Debug: Break Detected, Range: [" + string.Join(",", TempList.ToArray()) + "]");
                        String Range = TempList[0] + "-" + TempList[TempList.Count - 1];
                        //Console.WriteLine("Debug - Range: [" + Range+"]");
                        Output = Output + "Range Identified: " + Range + "\n";

                        SplitPdf(DTUrl, DTPort, InputFilePath, Range, OutputRootPath);
                    }
                }
                else if (NumberOfItemsOnPage == 0)
                {
                    // Show TempList
                    // Reset TempList
                    //Console.WriteLine("Debug: Break Detected, Range: ["+string.Join(",",TempList.ToArray())+"]");
                    String Range = TempList[0] + "-" + TempList[TempList.Count - 1];
                    //Console.WriteLine("Debug - Range: [" + Range + "]");
                    Output = Output + "Range Identified: " + Range + "\n";
                    SplitPdf(DTUrl, DTPort, InputFilePath, Range, OutputRootPath);

                    TempList.Clear();
                    PreviousEl = CurrentPage;
                }
            }
            return(Output);
        }