PrependChild() public method

Adds the specified node to the beginning of the list of children of this node.
public PrependChild ( HtmlNode newChild ) : HtmlNode
newChild HtmlNode The node to add. May not be null.
return HtmlNode
    //=========================================================================================
    // 実体のないファイルの場合、画像やリンクは相対アドレスは不可能なので、絶対アドレスへと置き換える。
    //=========================================================================================
    private string RelativeToAbsolute(string html)
    {
        try
        {
            string fn = strCurFileFullPath;
            fn = fn.Replace("\\", "/");
            string strBaseUrl = fn;
            HtmlAgilityPack.HtmlDocument doc = new HtmlAgilityPack.HtmlDocument();
            doc.LoadHtml(html);

            bool isExistBaseHref = false;
            foreach (var nodeBase in doc.DocumentNode.Descendants("base"))
            {
                if (nodeBase.Attributes["href"].Value.Length > 0)
                {
                    isExistBaseHref = true;
                }
                else
                {
                }
            }

            // base hrefの指定が無いのであれば、現在の開いているファイルのディレクトリをそこにしておくことで、相対ディレクトリをフォローする。
            if (!isExistBaseHref)
            {
                string basedir = System.IO.Path.GetDirectoryName(strBaseUrl);
                HtmlAgilityPack.HtmlNode baseNode = HtmlAgilityPack.HtmlNode.CreateNode("<base href=''>");
                baseNode.Attributes["href"].Value = basedir + "\\";

                // Headタグがあればそこにたす
                HtmlAgilityPack.HtmlNode nodeHead = doc.DocumentNode.SelectSingleNode("/html/head");
                HtmlAgilityPack.HtmlNode nodeHtml = doc.DocumentNode.SelectSingleNode("/html");
                if (nodeHead != null)
                {
                    nodeHead.PrependChild(baseNode);
                }
                else if (nodeHtml != null)
                {
                    nodeHtml.PrependChild(baseNode);
                }
                // Headタグがないなら、トップにたさざるをえないだろう
                else
                {
                    doc.DocumentNode.PrependChild(baseNode);
                }
            }

            StringWriter writer = new StringWriter();
            doc.Save(writer);

            string newHtml = writer.ToString();
            return(newHtml);
        }
        catch (Exception)
        {
        }

        return(html);
    }
Ejemplo n.º 2
0
        public override void Parse(Response response)
        {
            //Create a new HTMLAglityPack document
            HtmlDocument ContentDocument = new HtmlDocument();

            //load the #content of the page into the document
            ContentDocument.LoadHtml(response.Css("#content").First().OuterHtml);
            HtmlAgilityPack.HtmlNode BodyNode = ContentDocument.DocumentNode;
            patternObject.Title = BodyNode.SelectSingleNode("//*[@id=\"firstHeading\"]").InnerHtml;
            HtmlAgilityPack.HtmlNode ContentNode = BodyNode.SelectSingleNode("//*[@id=\"mw-content-text\"]");

            //remove the "toc" and "jump" and "siteSub" sections to save space and later client-side processing time
            if (ContentNode.SelectSingleNode("//*[@id=\"toc\"]") != null)
            {
                ContentNode.SelectSingleNode("//*[@id=\"toc\"]").Remove();
            }

            foreach (var node in ContentNode.SelectNodes("//comment()"))
            {
                node.Remove();
            }

            ContentNode.PrependChild(BodyNode.SelectSingleNode("//*[@id=\"firstHeading\"]"));

            //set the patternObject's title
            patternObject.Title = ContentNode.SelectSingleNode("//*[@id=\"firstHeading\"]").InnerHtml;


            foreach (var link in ContentNode.SelectNodes("//a/@href"))
            {
                //skip if this is a redlink (page doesn't exist).
                if (link.Attributes["href"].Value.Contains("redlink=1"))
                {
                    continue;
                }
                //skip if this links to this page
                if (link.Attributes["href"].Value.Split('#').First() == response.FinalUrl)
                {
                    continue;
                }

                //if any of the links ancestor nodes is the "category links" part of the page
                if (link.Ancestors().Any(node => node.Id == "catlinks"))
                {
                    if (link.InnerText != "Categories") //if it is not the "categories" special page
                    {
                        //add it to the patterns list of categories
                        patternObject.Categories.Add(link.InnerText);
                    }
                }
                else //assume its a normal text-body link
                {
                    //check if we don't already know about this link
                    patternObject.CreateOrGetPatternLink(link.InnerText);
                }

                //add relation info if this is a relation link
                if (GetNodeReleventPageHeading(link, "h2") != null &&
                    GetNodeReleventPageHeading(link, "h2").InnerText == "Relations")
                {
                    //get the relation type of this relation and get its inner text
                    HtmlAgilityPack.HtmlNode RelationHeadingNode = GetNodeReleventPageHeading(link, "h3");
                    String RelationName = RelationHeadingNode.InnerText;

                    //if there is a h4 node before the previous h3 node
                    if (GetNodeReleventPageHeading(link, "h4") != null &&
                        RelationHeadingNode.InnerStartIndex < GetNodeReleventPageHeading(link, "h4").InnerStartIndex)
                    {
                        //assume it is a "with x" sub-category of relation for the "Can Instantiate" section
                        RelationName = RelationHeadingNode.InnerText + " " + GetNodeReleventPageHeading(link, "h4").InnerText;
                    }

                    //add the relevent relation to this link
                    patternObject.CreateOrGetPatternLink(link.InnerText).Type.Add(RelationName);
                }
            }

            //get a cleaned copy of the #content HTML for giving in the JSON data
            patternObject.Content = ProcessPageContentToString(ContentNode);

            string Json = JsonConvert.SerializeObject(patternObject);

            File.WriteAllText(Pattern.GetFileName(patternObject.Title), Json);
        }