Пример #1
0
        public static void extract_bookmarks1(string filename)
        {
            string html = System.IO.File.ReadAllText(filename);

            afh.HTML.HTMLDocument doc = afh.HTML.HTMLDocument.Parse(html);

            using (System.IO.StreamWriter sw = new System.IO.StreamWriter("bm_all.xml")){
                sw.WriteLine("<?xml version=\"1.0\" encoding=\"utf-8\"?>");
                sw.WriteLine("<bookmarks>");
                string indent = "  ";
                foreach (afh.HTML.HTMLElement elem in doc.enumAllElements(false))
                {
                    switch (elem.tagName)
                    {
                    case "H3":
                        sw.Write(indent);
                        sw.WriteLine("<directory name=\"{0}\">",
                                     afh.Text.TextUtils.EscapeXml(elem.innerText)
                                     );
                        indent += "  ";
                        break;

                    case "HR":
                        sw.Write(indent);
                        sw.WriteLine("<separator/>");
                        break;

                    case "DT":
                        string name = null;
                        string url  = null;
                        foreach (afh.HTML.HTMLElement a in elem.enumElementsByTagName("A", true))
                        {
                            name = a.innerText;
                            url  = a.getAttribute("href", false);
                            break;
                        }
                        if (url == null)
                        {
                            break;
                        }

                        sw.Write(indent);
                        sw.WriteLine("<link name=\"{0}\" url=\"{1}\"/>",
                                     afh.Text.TextUtils.EscapeXml(name),
                                     afh.Text.TextUtils.EscapeXml(url)
                                     );
                        break;

                    case "DD":
                        string text = "";
                        foreach (afh.HTML.HTMLNode node in elem.childNodes)
                        {
                            afh.HTML.HTMLTextNode textNode = node as afh.HTML.HTMLTextNode;
                            if (textNode == null)
                            {
                                continue;
                            }

                            text = textNode.data;
                            break;
                        }
                        if (text != "")
                        {
                            sw.Write(indent);
                            sw.WriteLine("<desc>{0}</desc>",
                                         afh.Text.TextUtils.EscapeXml(text.Trim())
                                         );
                        }
                        break;
                    }

                    if (elem.parentNode.lastChild == elem)
                    {
                        afh.HTML.HTMLElement directory = null;
                        // DL>last or DL>last-child:DD>last
                        if (elem.tagName == "p")
                        {
                            if (elem.childNodes.Count == 0)
                            {
                                directory = elem.parentNode;
                            }
                        }
                        else if (elem.parentNode.tagName == "p")
                        {
                            afh.HTML.HTMLElement p = elem.parentNode;
                            if (p.parentNode != null && p.parentNode.lastChild == p)
                            {
                                directory = p.parentNode;
                            }
                        }

                        if (directory != null)
                        {
                            if (directory.tagName == "DD" && directory.parentNode != null)
                            {
                                directory = directory.parentNode;
                            }
                            if (directory.tagName == "DL" && directory.parentNode != null && directory.parentNode.nodeType == afh.HTML.nodeType.ELEMENT_NODE)
                            {
                                indent = indent.Substring(2);
                                sw.Write(indent);
                                sw.WriteLine("</directory>");
                            }
                        }
                    }
                }
                sw.WriteLine("</bookmarks>");
            }
        }
Пример #2
0
        /// <summary>
        /// Firefox から出力される bookmark.html を解析して、
        /// その内容に対応する BookmarkTree を生成します。
        /// </summary>
        /// <param name="document">Firefox から出力される bookmarks.html の内容を保持する HTMLDocument を指定します。</param>
        /// <returns>生成した BookmarkTree を返します。</returns>
        public static BookmarkTree CreateFromBookmarkHtml(afh.HTML.HTMLDocument document)
        {
            BookmarkTree ret         = new BookmarkTree();
            BookmarkDir  currentDir  = ret.RootNode;
            BookmarkNode currentNode = ret.RootNode;

            foreach (afh.HTML.HTMLElement elem in document.enumAllElements(false))
            {
                switch (elem.tagName)
                {
                case "H3": {
                    BookmarkDir newDir = new BookmarkDir();
                    newDir.name  = elem.innerText;
                    newDir.ctime = BookmarkNode.UnixTime2DateTime(elem.getAttribute("add_date", false));
                    newDir.utime = BookmarkNode.UnixTime2DateTime(elem.getAttribute("last_modified", false));

                    currentDir.Nodes.Add(newDir);
                    currentDir  = newDir;
                    currentNode = newDir;
                    break;
                }

                case "HR":
                    currentDir.Nodes.Add(currentNode = new BookmarkSep());
                    break;

                case "DT": {
                    afh.HTML.HTMLElement a = elem.enumElementsByTagName("A", true).First();
                    if (a == null)
                    {
                        break;
                    }

                    BookmarkLink newNode = new BookmarkLink();
                    newNode.name    = a.innerText;
                    newNode.url     = a.getAttribute("href", false);
                    newNode.ctime   = BookmarkNode.UnixTime2DateTime(a.getAttribute("add_date", false));
                    newNode.utime   = BookmarkNode.UnixTime2DateTime(a.getAttribute("last_modified", false));
                    newNode.charset = a.getAttribute("last_charset", false);

                    currentDir.Nodes.Add(newNode);
                    currentNode = newNode;
                    break;
                }

                case "DD":
                    string text = "";
                    foreach (afh.HTML.HTMLNode node in elem.childNodes)
                    {
                        afh.HTML.HTMLTextNode textNode = node as afh.HTML.HTMLTextNode;
                        if (textNode == null)
                        {
                            continue;
                        }

                        text = textNode.data.Trim();
                        break;
                    }
                    if (text == "")
                    {
                        break;
                    }

                    BookmarkDir isDir = currentNode as BookmarkDir;
                    if (isDir != null)
                    {
                        isDir.description = text;
                        break;
                    }

                    BookmarkLink isLnk = currentNode as BookmarkLink;
                    if (isLnk != null)
                    {
                        isLnk.description = text;
                        break;
                    }
                    break;
                }

                // directory 終了判定 (本当にこれで大丈夫か?)
                if (elem.parentNode.lastChild == elem)
                {
                    afh.HTML.HTMLElement directory = null;
                    // DL>last or DL>last-child:DD>last
                    if (elem.tagName == "p")
                    {
                        if (elem.childNodes.Count == 0)
                        {
                            directory = elem.parentNode;
                        }
                    }
                    else if (elem.parentNode.tagName == "p")
                    {
                        afh.HTML.HTMLElement p = elem.parentNode;
                        if (p.parentNode != null && p.parentNode.lastChild == p)
                        {
                            directory = p.parentNode;
                        }
                    }

                    if (directory != null)
                    {
                        if (directory.tagName == "DD" && directory.parentNode != null)
                        {
                            directory = directory.parentNode;
                        }
                        if (directory.tagName == "DL" && directory.parentNode != null && directory.parentNode.nodeType == afh.HTML.nodeType.ELEMENT_NODE)
                        {
                            // go to outer directory
                            if (currentDir != ret.RootNode)
                            {
                                currentDir = (BookmarkDir)currentDir.Parent;
                            }
                        }
                    }
                }
            }

            return(ret);
        }