Example #1
0
        /// <summary>
        ///  remove extra empty spans
        /// </summary>
        /// <param name="doc">html document path</param>
        /// <param name="destDir">directory of html page</param>
        /// <returns></returns>
        private static HtmlDocument RemoveEmptySpans(HtmlDocument doc, string destDir)
        {
            var pageContainer = doc.DocumentNode.SelectSingleNode("//div[contains(@id, 'page-container')]");

            if (pageContainer == null)
            {
                return(null);
            }
            var textDivHTML = pageContainer.InnerHtml;

            if (string.IsNullOrEmpty(textDivHTML))
            {
                return(null);
            }


            //var doc = new HtmlDocument();
            //  doc.OptionWriteEmptyNodes = true;
            var html = new DirectoryInfo(destDir).GetFiles("*.html").First();

            // string allText = File.ReadAllText(destDir + "\\OEBPS\\css\\" + Path.GetFileNameWithoutExtension(html.Name) + ".css").ToString();
            //// string nh = @"[^\s_[0-9]*]";
            //   string nh = @"^_d*";

            // Regex regex1 = new Regex(nh, RegexOptions.IgnoreCase);
            // Match match = regex1.Match(allText);

            List <string> xpaths = new List <string>();
            // var elementsList = rdoc.DocumentNode.SelectNodes("//div[contains(@class, 'pc')]").Elements().ToList();
            var spans = doc.DocumentNode.SelectNodes("//span");

            if (spans == null)
            {
                return(doc);
            }

            var elementsList = spans.ToList();

            if (elementsList != null)
            {
                foreach (var pnode in elementsList)
                {
                    var childElements = pnode.GetAttributeValue("class", null);

                    if (childElements != null && childElements.Contains('_'))
                    {
                        var    arrSpanClasses = childElements.Split(' ');
                        string strSpanClass   = "";
                        if (arrSpanClasses.Length > 1)
                        {
                            strSpanClass = arrSpanClasses[1];

                            string leftwidth  = CssParserold.GetElementStyle(destDir + "\\OEBPS\\css\\" + Path.GetFileNameWithoutExtension(html.Name) + ".css", strSpanClass);
                            string emptyWidth = leftwidth.Split(';')[1].Split(':')[1].Split('p')[0];
                            var    roundOff   = Convert.ToInt32(double.Parse(emptyWidth));

                            if (roundOff >= -4 && roundOff <= 4)
                            {
                                pnode.Remove();
                            }
                        }
                    }
                }
            }
            return(doc);
        }
Example #2
0
/// <summary>
/// rename .page to .html with some stuff.
/// </summary>
/// <param name="destDir"></param>
        public static void ChangeHtmltoXhtml(string destDir)
        {
            if (Directory.Exists(destDir))
            {
                var doc = new HtmlDocument();
                doc.OptionWriteEmptyNodes = true;
                var html = new  DirectoryInfo(destDir).GetFiles("*.html").First();

                if (html != null)
                {
                    doc.Load(html.FullName, Encoding.UTF8);
                    var htmlAdd = doc.DocumentNode.SelectSingleNode("//html");
                    htmlAdd.SetAttributeValue("xmlns:epub", "http://www.idpf.org/2007/ops");
                    htmlAdd.SetAttributeValue("xml:lang", "en-US");


                    var metaInfo = doc.CreateElement("meta");

                    string widthP     = CssParserold.GetElementStyle(destDir + "\\OEBPS\\css\\" + Path.GetFileNameWithoutExtension(html.Name) + ".css", "w0");
                    var    pageWidth  = widthP.Split(':')[1].Split(';')[0].Split('.')[0];
                    string f          = destDir + "\\OEBPS\\css\\" + Path.GetFileNameWithoutExtension(html.Name);
                    string heightP    = CssParserold.GetElementStyle(destDir + "\\OEBPS\\css\\" + Path.GetFileNameWithoutExtension(html.Name) + ".css", "h0");
                    var    pageHeight = heightP.Split(':')[1].Split(';')[0].Split('.')[0];

                    string charSet = metaInfo.GetAttributeValue("charset", null);
                    metaInfo.SetAttributeValue("name", "viewport");
                    metaInfo.SetAttributeValue("content", "width=" + pageWidth + ", height=" + pageHeight);
                    //doc.DocumentNode.SelectSingleNode("//meta[@charset='utf-8'").InsertAfter(metaInfo);
                    doc.DocumentNode.SelectSingleNode("//head").PrependChild(metaInfo);

                    var xmlNode = HtmlNode.CreateNode("<?xml version=\"1.0\" encoding=\"UTF-8\"?>");
                    doc.DocumentNode.PrependChild(xmlNode);
                    var script = doc.DocumentNode.Descendants().Where(n => n.Name == "script");// //removing all redundant script tags
                    if (script != null)
                    {
                        script.ToList().ForEach(n => n.Remove());
                    }

                    var fancyLink = doc.DocumentNode.SelectSingleNode("//link[@href='fancy.min.css']");
                    if (fancyLink != null)
                    {
                        fancyLink.Remove();
                    }
                    if (File.Exists(destDir + "\\OEBPS\\css\\fancy.min.css"))
                    {
                        File.Delete(destDir + "\\OEBPS\\css\\fancy.min.css");
                    }

                    var links = doc.DocumentNode.Descendants().Where(n => n.Name == "link");
                    if (links != null)
                    {
                        links.ToList().ForEach(l => {
                            l.SetAttributeValue("href", "css/" + l.GetAttributeValue("href", null));
                        });
                    }



                    var extra = doc.DocumentNode.SelectSingleNode("//div[@id='outline']");
                    if (extra != null)
                    {
                        extra.Remove();
                    }

                    var sidebr = doc.DocumentNode.SelectSingleNode("//div[@id='sidebar']");
                    if (sidebr != null)
                    {
                        sidebr.Remove();
                    }

                    var png = doc.DocumentNode.SelectSingleNode("//div[@class='loading-indicator']");
                    if (png != null)
                    {
                        png.Remove();
                    }

                    var exMeta = doc.DocumentNode.SelectSingleNode("//meta[@http-equiv='X-UA-Compatible']");
                    if (exMeta != null)
                    {
                        exMeta.Remove();
                    }
                    var exMetaOther = doc.DocumentNode.SelectSingleNode("//meta[@name='generator']");
                    if (exMetaOther != null)
                    {
                        exMetaOther.Remove();
                    }

                    var pageFiles   = new DirectoryInfo(destDir).GetFiles("*.PAGE");
                    int pageCounter = 1;
                    foreach (var pageFile in pageFiles)
                    {
                        var pageDoc = new HtmlDocument();
                        pageDoc.Load(pageFile.FullName, Encoding.UTF8);
                        var imgs = pageDoc.DocumentNode.SelectNodes("//img");
                        if (imgs != null)
                        {
                            imgs.ToList().ForEach(i => i.SetAttributeValue("src", "images/" + i.GetAttributeValue("src", null)));
                        }

                        var container = doc.DocumentNode.SelectSingleNode("//div[@id='page-container']");
                        if (container != null)
                        {
                            container.RemoveAllChildren();
                            container.AppendChild(pageDoc.DocumentNode);
                            if (HtmlNode.ElementsFlags.ContainsKey("img"))
                            {
                                HtmlNode.ElementsFlags["img"] = HtmlElementFlag.Closed;
                            }

                            pageCounter = Int32.Parse(Path.GetFileNameWithoutExtension(pageFile.Name).Replace(Path.GetFileNameWithoutExtension(html.Name), ""));

                            string newFileName = "page" + pageCounter.ToString().PadLeft(4, '0') + ".xhtml";
                            doc.DocumentNode.SelectSingleNode("//title").InnerHtml = newFileName;

                            //RemoveEmptySpans(doc, destDir);
                            SaveTargetNameHtmlFile(destDir + "\\OEBPS\\" + newFileName, doc);
                        }
                    }
                    if (pageFiles.Count() > 0)
                    {
                        pageFiles.ToList().ForEach(p => File.Delete(p.FullName));
                    }

                    if (File.Exists(html.FullName))
                    {
                        File.Delete(html.FullName);
                    }

                    var cssText = File.ReadAllText(destDir + "\\OEBPS\\css\\" + Path.GetFileNameWithoutExtension(html.Name) + ".css");
                    File.WriteAllText(destDir + "\\OEBPS\\css\\" + Path.GetFileNameWithoutExtension(html.Name) + ".css", cssText.Replace("src:url(f", "src:url(../fonts/f"));

                    var baseCssText = File.ReadAllText(destDir + "\\OEBPS\\css\\base.min.css");

                    baseCssText = CssParser.RemoveCssClassByName(baseCssText, "media screen");
                    baseCssText = CssParser.RemoveCssClassByName(baseCssText, "media print");
                    baseCssText = CssParser.RemoveCssClassByName(baseCssText, "sidebar");
                    //baseCssText = CssParser.CheckWellFormed(baseCssText);

                    File.WriteAllText(destDir + "\\OEBPS\\css\\base.min.css", baseCssText);

                    var baseText = File.ReadAllText(destDir + "\\OEBPS\\css\\base.min.css");

                    //var bg = baseText += "body{background-color:#808080;margin:0px;}";

                    File.WriteAllText(destDir + "\\OEBPS\\css\\base.min.css", baseText.Replace("unicode-bidi:bidi-override;", "").Replace("unicode-bidi:bidi-override", ""));
                    //File.WriteAllText(destDir + "\\OEBPS\\css\\base.min.css",baseText.Replace("@media print" , ""));
                    //string extraSS = CssParser.GetElementStyle(destDir + "\\OEBPS\\css\\base.min.css", "body").ToString();
                }
            }
        }