コード例 #1
0
        public int SplitAndSave(string inputPath, string outputPath, string pattern, bool auto, string doctype = "", string docnum = "")
        {
            int NumberOfPages = 0;

            try
            {
                //if the document processing is automatic
                if (auto)
                {
                    //get all the file with searchPattern as *.pdf
                    files = Directory.EnumerateFiles(Path.GetDirectoryName(inputPath), pattern);
                    foreach (var file in files)
                    {
                        //checking if the pdf file is valid
                        if (IsValidPdf(file))
                        {
                            using (PdfReader reader = new PdfReader(file))
                            {
                                for (int pagenumber = 1; pagenumber <= reader.NumberOfPages; pagenumber++)
                                {
                                    //reading the time date stamp in specified format
                                    string timestamp = DateTime.Now.ToString("HHmmss_ddMMyyyy");

                                    //get the filepath to put the output files
                                    string finalOutputPath = Environment.CurrentDirectory + outputPath;

                                    //reading the pdf content,storing in string
                                    string thePage = PdfTextExtractor.GetTextFromPage(reader, pagenumber);

                                    //splitting with line ending and storing in string array
                                    string[] lines = thePage.Split('\n');

                                    //removing the whitespaces
                                    lines = lines.Where(line => !string.IsNullOrWhiteSpace(line)).ToArray();

                                    // reading the doc type in the first line of the array
                                    thePage = lines[0].ToString();
                                    if (!manualXMLonly)
                                    {
                                        //counting till colon is encountered
                                        index = thePage.IndexOf(":");

                                        //reading string till colon to take out the document type
                                        string docType = thePage.Substring(0, index).Trim();

                                        //reading string after colon to get the document number
                                        string docNumber = thePage.Substring(index + 1).Trim();

                                        //naming the file without extension as per naming convention
                                        string filenameWithoutExt = docType + " " + docNumber + " " + timestamp;

                                        ////replacing the whitespaces if any within the filename with underscore
                                        filenameWithoutExt = filenameWithoutExt.Replace(" ", "_");

                                        //if invalid file naming the file starting with ERROR_
                                        if (docType != "Tax Invoice" && docType != "Credit Note")
                                        {
                                            filenameWithoutExt = "ERROR_" + timestamp;
                                            //create the directory to save invalid file
                                            finalOutputPath = CreateDirectories(Environment.CurrentDirectory + "\\Errors", true);
                                            docType         = "";
                                        }
                                        else
                                        {
                                            finalOutputPath = CreateDirectories(finalOutputPath + "\\" + docType, false);
                                        }
                                        //create the files in pdf and xml files

                                        CreatePDF(reader, pagenumber, finalOutputPath, filenameWithoutExt);
                                        CreateXML(docType, docNumber, timestamp, lines, finalOutputPath);
                                    }
                                    else
                                    {
                                        doctype = doctype.Replace(" ", "_");
                                        //finalOutputPath = CreateDirectories(finalOutputPath + "\\" + doctype, false);
                                        CreateXML(doctype, docnum, timestamp, lines, outputPath);
                                    }
                                }
                                NumberOfPages = reader.NumberOfPages;
                            }
                        }
                    }
                }
                //Manual processing of invalid files
                else
                {
                    ManualProcess(inputPath, doctype, docnum);
                }
            }
            catch (NullReferenceException ex)
            {
                ErrorLogging.Call_Log(ex, false);
            }
            return(NumberOfPages);
        }