Пример #1
0
        public static void ProcessDir(string dir, string ver = null)
        {
            ver      = ver ?? FindVer(dir) ?? DateTime.UtcNow.ToShortDateString();
            DVersion = ver;
            Console.WriteLine($"version: {ver}");
            TOP = TOP.Replace("%VER%", ver);

            RootFolder = Path.GetFullPath(dir);
            if (!RootFolder.EndsWith("" + Path.DirectorySeparatorChar))
            {
                RootFolder += Path.DirectorySeparatorChar;
            }

            ParseDir(RootFolder);
            Console.WriteLine("\n");

            IndexDir(RootFolder);
            SaveIndex(CombineDir(RootFolder, "Index.hhk"));
            Console.WriteLine("\n");

            BooksFromDir(RootFolder);
            Console.WriteLine("\n");
        }
Пример #2
0
        // Changing HTMLs

        static void Parse(string html, string img, string jqry)
        {
            var doc = new HtmlDocument();

            doc.Load(html, Encoding.UTF8);
            var root = doc.DocumentNode;

            Console.WriteLine($"parsing {html}");

            // change TOP
            var del  = root.SelectSingleNode("//div[@id=\"copyright\"]");
            var cprt = del?.InnerHtml;

            del?.Remove();
            var top = TOP.Replace("%TEXT%", cprt).Replace("%IMG%", img);

            del = root.SelectSingleNode("//div[@id=\"top\"]");
            //del?.Remove();
            del?.ParentNode.ReplaceChild(
                HtmlNode.CreateNode(top), del);

            // delete ISSUES
            del = root.SelectSingleNode("//div[@id=\"tools\"]");
            del?.Remove();

            // delete RIGHT TREE, CHM has own
            del = root.SelectSingleNode("//div[@class=\"subnav-helper\"]");
            del?.Remove();
            del = root.SelectSingleNode("//div[@class=\"subnav\"]");
            del?.Remove();

            // replace ARROWS (no need load font for symbols "<" and ">"
            // <i class="fa fa-angle-left" aria-hidden="true"></i>
            var upd = root.SelectSingleNode("//i[@class=\"fa fa-angle-left\"]");

            upd?.ParentNode.ReplaceChild(
                HtmlNode.CreateNode("<b>&lt;</b>"), upd);
            // <i class="fa fa-angle-right" aria-hidden="true"></i>
            upd = root.SelectSingleNode("//i[@class=\"fa fa-angle-right\"]");
            upd?.ParentNode.ReplaceChild(
                HtmlNode.CreateNode("<b>&gt;</b>"), upd);

            var dir = Path.GetDirectoryName(html);

            // SCRIPTs: delete all except jQuery & listanchors
            bool contains(HtmlNode hn, string what)
            {
                var txt  = (hn?.InnerText ?? "").ToLower().Replace(" ", "");
                var attr = hn.GetAttributeValue("src", "");

                return((txt + attr).Contains(what));
            }

            var scripts = (root.SelectNodes("//script")
                           ?? Enumerable.Empty <HtmlNode>()).ToArray();
            var delScripts = scripts;
            var anchors    = scripts.Where(n => contains(n, "listanchors"));

            if (anchors.Count() == 2)
            {
                var fst = anchors.First();
                var htm = $"<script type=\"text/javascript\" src=\"{jqry}\"/>\n";
                fst.ParentNode.InsertBefore(HtmlNode.CreateNode(htm), fst);
                delScripts = scripts.Except(anchors).ToArray();
            }
            // delete others
            foreach (var scr in delScripts)
            {
                scr.Remove();
            }

            // replece hrefs that are !local and !exists to text
            foreach (var nod in root.SelectNodes("//*[@href]"))
            {
                var href = nod.Attributes["href"].Value.Trim().ToLower();
                if (href[0] == '#')
                {
                    continue;
                }
                if (!FileExists(dir, href))
                {
                    nod.ParentNode.ReplaceChild(
                        HtmlNode.CreateNode(nod.InnerText), nod);
                }
            }

            // remove "- D programming language" from title
            const string Dlang = "- d programming language";

            upd = root.SelectSingleNode("//title");
            if ((upd?.InnerText ?? "").ToLower().EndsWith(Dlang))
            {
                var ih = upd.InnerHtml;
                upd.InnerHtml = ih.Substring(0, ih.Length - Dlang.Length).Trim();
            }

            Titles[html] = upd?.InnerText ?? html.Replace(RootFolder, "");

            // remove empty lines
            root.InnerHtml = Regex.Replace(root.InnerHtml, @"^\s+$[\r\n]*", "", RegexOptions.Multiline);
            doc.Save(html, Encoding.UTF8);

            // bonus:
            // at this point out HTMLs takes 15Mb on disk instead 85Mb (for v2.088.0)
        }