/// <summary> /// remove extra empty spans /// </summary> /// <param name="doc">html document path</param> /// <param name="destDir">directory of html page</param> /// <returns></returns> private static HtmlDocument RemoveEmptySpans(HtmlDocument doc, string destDir) { var pageContainer = doc.DocumentNode.SelectSingleNode("//div[contains(@id, 'page-container')]"); if (pageContainer == null) { return(null); } var textDivHTML = pageContainer.InnerHtml; if (string.IsNullOrEmpty(textDivHTML)) { return(null); } //var doc = new HtmlDocument(); // doc.OptionWriteEmptyNodes = true; var html = new DirectoryInfo(destDir).GetFiles("*.html").First(); // string allText = File.ReadAllText(destDir + "\\OEBPS\\css\\" + Path.GetFileNameWithoutExtension(html.Name) + ".css").ToString(); //// string nh = @"[^\s_[0-9]*]"; // string nh = @"^_d*"; // Regex regex1 = new Regex(nh, RegexOptions.IgnoreCase); // Match match = regex1.Match(allText); List <string> xpaths = new List <string>(); // var elementsList = rdoc.DocumentNode.SelectNodes("//div[contains(@class, 'pc')]").Elements().ToList(); var spans = doc.DocumentNode.SelectNodes("//span"); if (spans == null) { return(doc); } var elementsList = spans.ToList(); if (elementsList != null) { foreach (var pnode in elementsList) { var childElements = pnode.GetAttributeValue("class", null); if (childElements != null && childElements.Contains('_')) { var arrSpanClasses = childElements.Split(' '); string strSpanClass = ""; if (arrSpanClasses.Length > 1) { strSpanClass = arrSpanClasses[1]; string leftwidth = CssParserold.GetElementStyle(destDir + "\\OEBPS\\css\\" + Path.GetFileNameWithoutExtension(html.Name) + ".css", strSpanClass); string emptyWidth = leftwidth.Split(';')[1].Split(':')[1].Split('p')[0]; var roundOff = Convert.ToInt32(double.Parse(emptyWidth)); if (roundOff >= -4 && roundOff <= 4) { pnode.Remove(); } } } } } return(doc); }
/// <summary> /// rename .page to .html with some stuff. /// </summary> /// <param name="destDir"></param> public static void ChangeHtmltoXhtml(string destDir) { if (Directory.Exists(destDir)) { var doc = new HtmlDocument(); doc.OptionWriteEmptyNodes = true; var html = new DirectoryInfo(destDir).GetFiles("*.html").First(); if (html != null) { doc.Load(html.FullName, Encoding.UTF8); var htmlAdd = doc.DocumentNode.SelectSingleNode("//html"); htmlAdd.SetAttributeValue("xmlns:epub", "http://www.idpf.org/2007/ops"); htmlAdd.SetAttributeValue("xml:lang", "en-US"); var metaInfo = doc.CreateElement("meta"); string widthP = CssParserold.GetElementStyle(destDir + "\\OEBPS\\css\\" + Path.GetFileNameWithoutExtension(html.Name) + ".css", "w0"); var pageWidth = widthP.Split(':')[1].Split(';')[0].Split('.')[0]; string f = destDir + "\\OEBPS\\css\\" + Path.GetFileNameWithoutExtension(html.Name); string heightP = CssParserold.GetElementStyle(destDir + "\\OEBPS\\css\\" + Path.GetFileNameWithoutExtension(html.Name) + ".css", "h0"); var pageHeight = heightP.Split(':')[1].Split(';')[0].Split('.')[0]; string charSet = metaInfo.GetAttributeValue("charset", null); metaInfo.SetAttributeValue("name", "viewport"); metaInfo.SetAttributeValue("content", "width=" + pageWidth + ", height=" + pageHeight); //doc.DocumentNode.SelectSingleNode("//meta[@charset='utf-8'").InsertAfter(metaInfo); doc.DocumentNode.SelectSingleNode("//head").PrependChild(metaInfo); var xmlNode = HtmlNode.CreateNode("<?xml version=\"1.0\" encoding=\"UTF-8\"?>"); doc.DocumentNode.PrependChild(xmlNode); var script = doc.DocumentNode.Descendants().Where(n => n.Name == "script");// //removing all redundant script tags if (script != null) { script.ToList().ForEach(n => n.Remove()); } var fancyLink = doc.DocumentNode.SelectSingleNode("//link[@href='fancy.min.css']"); if (fancyLink != null) { fancyLink.Remove(); } if (File.Exists(destDir + "\\OEBPS\\css\\fancy.min.css")) { File.Delete(destDir + "\\OEBPS\\css\\fancy.min.css"); } var links = doc.DocumentNode.Descendants().Where(n => n.Name == "link"); if (links != null) { links.ToList().ForEach(l => { l.SetAttributeValue("href", "css/" + l.GetAttributeValue("href", null)); }); } var extra = doc.DocumentNode.SelectSingleNode("//div[@id='outline']"); if (extra != null) { extra.Remove(); } var sidebr = doc.DocumentNode.SelectSingleNode("//div[@id='sidebar']"); if (sidebr != null) { sidebr.Remove(); } var png = doc.DocumentNode.SelectSingleNode("//div[@class='loading-indicator']"); if (png != null) { png.Remove(); } var exMeta = doc.DocumentNode.SelectSingleNode("//meta[@http-equiv='X-UA-Compatible']"); if (exMeta != null) { exMeta.Remove(); } var exMetaOther = doc.DocumentNode.SelectSingleNode("//meta[@name='generator']"); if (exMetaOther != null) { exMetaOther.Remove(); } var pageFiles = new DirectoryInfo(destDir).GetFiles("*.PAGE"); int pageCounter = 1; foreach (var pageFile in pageFiles) { var pageDoc = new HtmlDocument(); pageDoc.Load(pageFile.FullName, Encoding.UTF8); var imgs = pageDoc.DocumentNode.SelectNodes("//img"); if (imgs != null) { imgs.ToList().ForEach(i => i.SetAttributeValue("src", "images/" + i.GetAttributeValue("src", null))); } var container = doc.DocumentNode.SelectSingleNode("//div[@id='page-container']"); if (container != null) { container.RemoveAllChildren(); container.AppendChild(pageDoc.DocumentNode); if (HtmlNode.ElementsFlags.ContainsKey("img")) { HtmlNode.ElementsFlags["img"] = HtmlElementFlag.Closed; } pageCounter = Int32.Parse(Path.GetFileNameWithoutExtension(pageFile.Name).Replace(Path.GetFileNameWithoutExtension(html.Name), "")); string newFileName = "page" + pageCounter.ToString().PadLeft(4, '0') + ".xhtml"; doc.DocumentNode.SelectSingleNode("//title").InnerHtml = newFileName; //RemoveEmptySpans(doc, destDir); SaveTargetNameHtmlFile(destDir + "\\OEBPS\\" + newFileName, doc); } } if (pageFiles.Count() > 0) { pageFiles.ToList().ForEach(p => File.Delete(p.FullName)); } if (File.Exists(html.FullName)) { File.Delete(html.FullName); } var cssText = File.ReadAllText(destDir + "\\OEBPS\\css\\" + Path.GetFileNameWithoutExtension(html.Name) + ".css"); File.WriteAllText(destDir + "\\OEBPS\\css\\" + Path.GetFileNameWithoutExtension(html.Name) + ".css", cssText.Replace("src:url(f", "src:url(../fonts/f")); var baseCssText = File.ReadAllText(destDir + "\\OEBPS\\css\\base.min.css"); baseCssText = CssParser.RemoveCssClassByName(baseCssText, "media screen"); baseCssText = CssParser.RemoveCssClassByName(baseCssText, "media print"); baseCssText = CssParser.RemoveCssClassByName(baseCssText, "sidebar"); //baseCssText = CssParser.CheckWellFormed(baseCssText); File.WriteAllText(destDir + "\\OEBPS\\css\\base.min.css", baseCssText); var baseText = File.ReadAllText(destDir + "\\OEBPS\\css\\base.min.css"); //var bg = baseText += "body{background-color:#808080;margin:0px;}"; File.WriteAllText(destDir + "\\OEBPS\\css\\base.min.css", baseText.Replace("unicode-bidi:bidi-override;", "").Replace("unicode-bidi:bidi-override", "")); //File.WriteAllText(destDir + "\\OEBPS\\css\\base.min.css",baseText.Replace("@media print" , "")); //string extraSS = CssParser.GetElementStyle(destDir + "\\OEBPS\\css\\base.min.css", "body").ToString(); } } }