예제 #1
0
        private static void Main(string[] args)
        {
            VirtTerm vt = new VirtTerm( );

            Workers work = new Workers( );

            work.options = "";

            work.iDoc = 1;

            work.DefForeColor = Console.ForegroundColor;

            string prodVer  = FileVersionInfo.GetVersionInfo(Assembly.GetExecutingAssembly( ).Location).ProductVersion;
            string subject  = "";
            string author   = "";
            string keywords = "";

            // obtained from downloading samples with internet speed at 15Mbps
            float [] fac1 = new float[] { 0.0f, 0.015f, 0.922f, 0.934f, 0.945f, 0.962f, 0.999f, 1.0f };

            DateTime last = DateTime.Now;

            if (args.Length < 2)
            {
                vt.PrintText("\n  ConvertWeb2Pdf version " + prodVer);
                vt.PrintText("\n\n  Usage: ConvertWeb2Pdf <Yellow,>[options]</> <file.txt> <out.pdf>");
                vt.PrintText("\n\n  Options:");
                vt.PrintText("\n  <Yellow,>-nc</>, Do not compress pdf");
                vt.PrintText("\n  <Yellow,>-dj</>, Disable javascripts");
                vt.PrintText("\n  <Yellow,>-ni</>, Do not load images");
                vt.PrintText("\n  <Yellow,>-lq</>, Draft quality");
                vt.PrintText("\n  <Yellow,>-gs</>, Grayscale printing");
                vt.PrintText("\n  <Yellow,>-ab</>, Abort after failure ");
                vt.PrintText("\n  <Yellow,>-ti:</>, Timeout in ms (default is infinity) ");
                vt.PrintText("\n  <Yellow,>-su:</>, Metadata: Pdf subject. Use <Yellow,>^</> as a space placeholder (e.g. <Yellow,>-su:Pdf^test</> becomes \"Pdf test\")");
                vt.PrintText("\n  <Yellow,>-ar:</>, Metadata: Pdf author. Use <Yellow,>^</> as a space placeholder. Use <Yellow,>?</> as a Computer/User placeholder");
                vt.PrintText("\n  <Yellow,>-kw:</>, Metadata: Pdf keywords. Comma-delimited items. Use <Yellow,>^</> as a space placeholder");
                vt.PrintText("\n\n  This software is published under BSD (3-Clause) License.");
                vt.PrintText("\n\n  PDFsharp and MigraDoc are published by empira Software GmbH under MIT License.");
                vt.PrintText("\n  <BrightWhite,><http://www.pdfsharp.net/>");
                vt.PrintText("\n\n  Wkhtmltopdf is published under LGPLv3 License.");
                vt.PrintText("\n  <BrightWhite,><https://wkhtmltopdf.org/>");
                vt.PrintText("\n\n  If you experience bugs or want to request new features please visit");
                vt.PrintText("\n  <BrightWhite,><https://github.com/rbsriobr//ConvertWeb2Pdf/issues>");
                vt.PrintText("\n\n  Copyright 2017 Ricardo Santos\n");
                return;
            }

            work.Print(-2, fac1[0], 2);

            for (int k = 0; k < args.Length - 2; k++)
            {
                if (args[k].ToLower( ) == "-nc")
                {
                    work.options += " --no-pdf-compression ";
                }
                else if (args[k].ToLower( ) == "-dj")
                {
                    work.options += " --disable-javascript ";
                }
                else if (args[k].ToLower( ) == "-ni")
                {
                    work.options += " --no-images ";
                }
                else if (args[k].ToLower( ) == "-lq")
                {
                    work.options += " --lowquality ";
                }
                else if (args[k].ToLower( ) == "-gs")
                {
                    work.options += " --grayscale ";
                }
                else if (args[k].ToLower( ) == "-ab")
                {
                    work.abortafter = Workers._abortafter.yes;
                }
                else if (args[k].ToLower( ).Substring(0, 4) == "-ti:")
                {
                    int res;
                    if (Int32.TryParse(args[k].ToLower( ).Substring(4), out res))
                    {
                        if (res < 0)
                        {
                            res = -1;
                        }
                        work.timeout = res;
                    }
                }
                else if (args[k].ToLower( ).Substring(0, 4) == "-su:")
                {
                    subject = args[k].ToLower( ).Substring(4);
                    subject = subject.Replace("^", " ");
                }
                else if (args[k].ToLower( ).Substring(0, 4) == "-kw:")
                {
                    keywords = args[k].ToLower( ).Substring(4);
                    keywords = keywords.Replace("^", " ");
                }
                else if (args[k].ToLower( ) == "-ar:?")
                {
                    author = Environment.MachineName + "/" + Environment.UserName;
                    author = author.Replace("^", " ");
                }
                else if (args[k].ToLower( ).Substring(0, 4) == "-ar:")
                {
                    author = args[k].ToLower( ).Substring(4);
                    author = author.Replace("^", " ");
                }
            }

            string InFilename  = args[args.Length - 2];
            string OutFilename = args[args.Length - 1];

            OutFilename = OutFilename.Replace(" ", "_").Replace(".pdf", "");

            if (!File.Exists(InFilename))
            {
                vt.PrintText("\n\n <Red,>File not found:</> " + InFilename);
                return;
            }

            try
            {
                String Line;

                StreamReader inFile = new StreamReader(InFilename);

                work.docInfoList.Add(new DocInfo(-1, -1, OutFilename + "0.pdf", OutFilename, ""));

                List <string> Urilist = new List <string> ( );

                int j = 1, h;

                Uri test = null;

                while ((Line = inFile.ReadLine( )) != null)
                {
                    Line = RemoveEscEtc(Line);

                    if (String.IsNullOrEmpty(Line))
                    {
                        j++;
                        continue;
                    }

                    if ((h = Urilist.IndexOf(Line)) != -1)
                    {
                        vt.PrintText("\r <Yellow,>Duplicate URL found in file.</> Lines: " + j.ToString( ) + ", " + (h + 1).ToString( ));
                        j++;
                        continue;
                    }
                    if (!Uri.TryCreate(Line, UriKind.Absolute, out test))
                    {
                        vt.PrintText("\r <Yellow,>Invalid URL.</> Line: " + j.ToString( ));
                        j++;
                        continue;
                    }

                    j++;

                    Urilist.Add(Line);

                    work.docInfoList.Add(new DocInfo(-1, -1, OutFilename + work.docInfoList.Count.ToString( ) + ".pdf", Line, ""));
                }

                Urilist.Clear( );

                inFile.Close( );

                work.elemArray = new Object[(work.docInfoList.Count * 2) + 2];

                string now     = DateTime.Now.ToString( );
                string docname = "ConvertWeb2Pdf - " + now;

                now = now.Replace("/", "").Replace("\\", "").Replace(":", "").Replace(" ", "-");

                Directory.CreateDirectory(now);

                work.elemArray[0] = new XElement("p", new XElement("br"), docname);
                work.elemArray[1] = new XElement("hr");

                work.exeFileName = Path.GetDirectoryName(System.Reflection.Assembly.GetEntryAssembly( ).Location);
                work.exeFileName = work.exeFileName + "\\" + "wkhtmltopdf.exe";

                work.Print(work.iDoc, fac1[1], 2);

                work.fac = fac1[2];

                Directory.SetCurrentDirectory(Directory.GetCurrentDirectory( ) + "\\" + now + "\\");

                work.curDir = Directory.GetCurrentDirectory( );

                Console.CursorVisible = false;

                ////////////////////
                //	Download web  //
                ////////////////////

                Thread thread1 = new Thread(new ThreadStart(work.DoWork));

                thread1.Start( );

                Thread thread2 = new Thread(new ThreadStart(work.DoWork));

                thread2.Start( );

                Thread thread3 = new Thread(new ThreadStart(work.DoWork));

                thread3.Start( );

                Thread thread4 = new Thread(new ThreadStart(work.DoWork));

                thread4.Start( );

                bool stop = false;

                while (!stop)
                {
                    stop  = thread1.Join(250);
                    stop &= thread2.Join(250);
                    stop &= thread3.Join(250);
                    stop &= thread4.Join(250);

                    work.Print(work.iDoc, fac1[2], 2);
                }

                if ((work.abortafter == Workers._abortafter.yes) || (work.abortafter == Workers._abortafter.all))
                {
                    return;
                }

                //////////////////////////////////////////////////////
                //	Test of corrupted pdf files and get page count  //
                //////////////////////////////////////////////////////

                int pagOffset = 1;

                for (int idoc = 0; idoc < work.docInfoList.Count; idoc++)
                {
                    j = 0;

                    if (work.docInfoList.ElementAt(idoc).Exists)
                    {
                        while (true)
                        {
                            try
                            {
                                using (PdfDocument inputDocument = PdfReader.Open(work.docInfoList.ElementAt(idoc).DocFileName, PdfDocumentOpenMode.Import))
                                {
                                    work.docInfoList.ElementAt(idoc).NPags     = inputDocument.PageCount;
                                    work.docInfoList.ElementAt(idoc).PagOffset = pagOffset;
                                    pagOffset += inputDocument.PageCount;
                                }
                            }
                            catch (Exception ex)
                            {
                                if (++j > 2)
                                {
                                    vt.PrintText("\n\n <Red,>File deleteted: file seems corrupted.");
                                    try
                                    {
                                        File.Delete(work.docInfoList.ElementAt(idoc).DocFileName);
                                    }
                                    catch (System.IO.IOException)
                                    { }
                                    break;
                                }
                                vt.PrintText("\n\n <Yellow,>Exception on opening file</> <" + work.docInfoList.ElementAt(idoc).DocFileName +
                                             ">: " + ex.Message + " " + j.ToString( ) + " of 3 attempts.");
                                Thread.Sleep(1500);
                                continue;
                            }
                            break;
                        }
                        if (j > 0 && j < 3)
                        {
                            vt.PrintText("\n\n <Green,>Ok:</> " + (j + 1).ToString( ) + " of 3 attempts.");
                            j = 0;
                        }
                    }
                }

                ////////////////////////////////////////
                //	Create a html file with contents  //
                ////////////////////////////////////////

                for (int idoc = 1; idoc < work.docInfoList.Count; idoc++)
                {
                    if (work.docInfoList.ElementAt(idoc).Exists)
                    {
                        work.elemArray[idoc * 2] = new XElement("p",
                                                                new XElement("a",
                                                                             new XAttribute("href", work.docInfoList.ElementAt(idoc).Url),
                                                                             idoc.ToString( ) + " - " + work.docInfoList.ElementAt(idoc).DocTitle));
                    }
                    else
                    {
                        work.elemArray[idoc * 2] = new XElement("p",
                                                                new XElement("font",
                                                                             new XAttribute("color", "red"),
                                                                             idoc.ToString( ) + " Failed to download !"));
                    }
                    work.elemArray[(idoc * 2) + 1] = new XElement("p",
                                                                  new XAttribute("style", "text-indent: 2em"),
                                                                  new XElement("small", "\t\t" + work.docInfoList.ElementAt(idoc).Url));
                }

                var xDocument = new XDocument(
                    new XDocumentType("html", null, null, null),
                    new XElement("html",
                                 new XElement("head",
                                              new XElement("title", docname),
                                              new XElement("meta",
                                                           new XAttribute("content", "text/html; charset=UTF-8"),
                                                           new XAttribute("http-equiv", "content-type"))),
                                 new XElement("body", work.elemArray)
                                 ));

                var settings = new XmlWriterSettings
                {
                    OmitXmlDeclaration = true,
                    Indent             = true,
                    IndentChars        = "\t"
                };

                using (var writer = XmlWriter.Create(OutFilename + "0.html", settings))
                {
                    xDocument.WriteTo(writer);
                }

                work.totalbytessec = 0;

                work.Print(work.iDoc, fac1[3], 2);

                ///////////////////////////
                //	Convert html to pdf  //
                ///////////////////////////

                System.Diagnostics.Process pProcess = new System.Diagnostics.Process( );

                //pProcess.ErrorDataReceived += pProcess_ErrorDataReceived;

                pProcess.StartInfo.CreateNoWindow   = true;
                pProcess.StartInfo.UseShellExecute  = false;
                pProcess.StartInfo.FileName         = work.exeFileName;
                pProcess.StartInfo.WorkingDirectory = work.curDir;

                pProcess.StartInfo.Arguments = work.options + " \"" + OutFilename + "0.html" + "\" \"" + OutFilename + "0.pdf";

                pProcess.Start( );

                if (work.timeout >= 0)
                {
                    bool state = pProcess.WaitForExit(work.timeout);
                    if (!pProcess.HasExited)
                    {
                        pProcess.Kill( );
                    }
                    if (!state)
                    {
                        try
                        {
                            File.Delete(OutFilename + "0.pdf");
                        }
                        catch (System.IO.IOException)
                        { }
                        vt.PrintText("\n\n <Yellow,>Process killed:</> the file <" + OutFilename + "0.pdf> was not created from <" +
                                     OutFilename + "0.html> because it exeeded timeout.");
                    }
                }
                else
                {
                    pProcess.WaitForExit( );
                }

                work.Print(work.iDoc, fac1[4], 2);

                //////////////////
                //	Merge pdfs  //
                //////////////////

                int doc0len = 0;

                using (PdfDocument outputDocument = new PdfDocument( ))
                {
                    for (int idoc = 0; idoc < work.docInfoList.Count; idoc++)
                    {
                        if (work.docInfoList.ElementAt(idoc).Exists)
                        {
                            using (PdfDocument inputDocument = PdfReader.Open(OutFilename + idoc.ToString( ) + ".pdf", PdfDocumentOpenMode.Import))
                            {
                                if (idoc == 0)
                                {
                                    doc0len = inputDocument.PageCount;
                                }
                                for (int ipag = 0; ipag < inputDocument.PageCount; ipag++)
                                {
                                    AddPage(outputDocument, inputDocument.Pages[ipag]);
                                }
                            }
                        }
                    }
                    outputDocument.Save("temp.pdf");
                    outputDocument.Close( );
                }

                ////////////////////////
                //	Update pdf links  //
                ////////////////////////

                Dictionary <string, int> Url2PagOffset = work.docInfoList.ToDictionary(p => p.Url).ToDictionary(i => i.Key, i => i.Value.PagOffset);

                work.Print(work.iDoc, fac1[5], 2);

                using (PdfDocument pdfDoc = PdfReader.Open("temp.pdf", PdfDocumentOpenMode.Modify))
                {
                    UpdateDoc(pdfDoc, Url2PagOffset, "Contents", doc0len);

                    if (pdfDoc.Pages.Count > 0)
                    {
                        pdfDoc.Info.Creator  = "ConvertWeb2Pdf vs " + prodVer;
                        pdfDoc.Info.Subject  = subject;
                        pdfDoc.Info.Keywords = keywords;
                        pdfDoc.Info.Author   = author;

                        pdfDoc.Save(OutFilename + ".pdf");
                        pdfDoc.Close( );
                    }
                }

                work.Print(work.iDoc, fac1[6], 2);

                //////////////////////////////
                //	Delete temporary files  //
                //////////////////////////////

                try
                {
                    for (int idoc = 0; idoc < work.docInfoList.Count; idoc++)
                    {
                        File.Delete(OutFilename + idoc.ToString( ) + ".pdf");
                    }

                    File.Delete("temp.pdf");
                    File.Delete(OutFilename + "0.html");
                }
                catch (System.IO.IOException)
                { }
            }
            catch (PdfSharp.Pdf.IO.PdfReaderException ex)
            {
                vt.PrintText("\n\n <Red,>PdfSharp.Pdf.IO.PdfReaderException:</> " + ex.Message);
            }
            catch (Exception ex)
            {
                vt.PrintText("\n\n <Red,>Exception:</> " + ex.Message);
            }

            work.Print(work.iDoc, fac1[7], 2);

            Console.CursorVisible = true;
        }