public static void extract_bookmarks1(string filename) { string html = System.IO.File.ReadAllText(filename); afh.HTML.HTMLDocument doc = afh.HTML.HTMLDocument.Parse(html); using (System.IO.StreamWriter sw = new System.IO.StreamWriter("bm_all.xml")){ sw.WriteLine("<?xml version=\"1.0\" encoding=\"utf-8\"?>"); sw.WriteLine("<bookmarks>"); string indent = " "; foreach (afh.HTML.HTMLElement elem in doc.enumAllElements(false)) { switch (elem.tagName) { case "H3": sw.Write(indent); sw.WriteLine("<directory name=\"{0}\">", afh.Text.TextUtils.EscapeXml(elem.innerText) ); indent += " "; break; case "HR": sw.Write(indent); sw.WriteLine("<separator/>"); break; case "DT": string name = null; string url = null; foreach (afh.HTML.HTMLElement a in elem.enumElementsByTagName("A", true)) { name = a.innerText; url = a.getAttribute("href", false); break; } if (url == null) { break; } sw.Write(indent); sw.WriteLine("<link name=\"{0}\" url=\"{1}\"/>", afh.Text.TextUtils.EscapeXml(name), afh.Text.TextUtils.EscapeXml(url) ); break; case "DD": string text = ""; foreach (afh.HTML.HTMLNode node in elem.childNodes) { afh.HTML.HTMLTextNode textNode = node as afh.HTML.HTMLTextNode; if (textNode == null) { continue; } text = textNode.data; break; } if (text != "") { sw.Write(indent); sw.WriteLine("<desc>{0}</desc>", afh.Text.TextUtils.EscapeXml(text.Trim()) ); } break; } if (elem.parentNode.lastChild == elem) { afh.HTML.HTMLElement directory = null; // DL>last or DL>last-child:DD>last if (elem.tagName == "p") { if (elem.childNodes.Count == 0) { directory = elem.parentNode; } } else if (elem.parentNode.tagName == "p") { afh.HTML.HTMLElement p = elem.parentNode; if (p.parentNode != null && p.parentNode.lastChild == p) { directory = p.parentNode; } } if (directory != null) { if (directory.tagName == "DD" && directory.parentNode != null) { directory = directory.parentNode; } if (directory.tagName == "DL" && directory.parentNode != null && directory.parentNode.nodeType == afh.HTML.nodeType.ELEMENT_NODE) { indent = indent.Substring(2); sw.Write(indent); sw.WriteLine("</directory>"); } } } } sw.WriteLine("</bookmarks>"); } }
/// <summary> /// Firefox から出力される bookmark.html を解析して、 /// その内容に対応する BookmarkTree を生成します。 /// </summary> /// <param name="document">Firefox から出力される bookmarks.html の内容を保持する HTMLDocument を指定します。</param> /// <returns>生成した BookmarkTree を返します。</returns> public static BookmarkTree CreateFromBookmarkHtml(afh.HTML.HTMLDocument document) { BookmarkTree ret = new BookmarkTree(); BookmarkDir currentDir = ret.RootNode; BookmarkNode currentNode = ret.RootNode; foreach (afh.HTML.HTMLElement elem in document.enumAllElements(false)) { switch (elem.tagName) { case "H3": { BookmarkDir newDir = new BookmarkDir(); newDir.name = elem.innerText; newDir.ctime = BookmarkNode.UnixTime2DateTime(elem.getAttribute("add_date", false)); newDir.utime = BookmarkNode.UnixTime2DateTime(elem.getAttribute("last_modified", false)); currentDir.Nodes.Add(newDir); currentDir = newDir; currentNode = newDir; break; } case "HR": currentDir.Nodes.Add(currentNode = new BookmarkSep()); break; case "DT": { afh.HTML.HTMLElement a = elem.enumElementsByTagName("A", true).First(); if (a == null) { break; } BookmarkLink newNode = new BookmarkLink(); newNode.name = a.innerText; newNode.url = a.getAttribute("href", false); newNode.ctime = BookmarkNode.UnixTime2DateTime(a.getAttribute("add_date", false)); newNode.utime = BookmarkNode.UnixTime2DateTime(a.getAttribute("last_modified", false)); newNode.charset = a.getAttribute("last_charset", false); currentDir.Nodes.Add(newNode); currentNode = newNode; break; } case "DD": string text = ""; foreach (afh.HTML.HTMLNode node in elem.childNodes) { afh.HTML.HTMLTextNode textNode = node as afh.HTML.HTMLTextNode; if (textNode == null) { continue; } text = textNode.data.Trim(); break; } if (text == "") { break; } BookmarkDir isDir = currentNode as BookmarkDir; if (isDir != null) { isDir.description = text; break; } BookmarkLink isLnk = currentNode as BookmarkLink; if (isLnk != null) { isLnk.description = text; break; } break; } // directory 終了判定 (本当にこれで大丈夫か?) if (elem.parentNode.lastChild == elem) { afh.HTML.HTMLElement directory = null; // DL>last or DL>last-child:DD>last if (elem.tagName == "p") { if (elem.childNodes.Count == 0) { directory = elem.parentNode; } } else if (elem.parentNode.tagName == "p") { afh.HTML.HTMLElement p = elem.parentNode; if (p.parentNode != null && p.parentNode.lastChild == p) { directory = p.parentNode; } } if (directory != null) { if (directory.tagName == "DD" && directory.parentNode != null) { directory = directory.parentNode; } if (directory.tagName == "DL" && directory.parentNode != null && directory.parentNode.nodeType == afh.HTML.nodeType.ELEMENT_NODE) { // go to outer directory if (currentDir != ret.RootNode) { currentDir = (BookmarkDir)currentDir.Parent; } } } } } return(ret); }