Beispiel #1
0
        /// <summary>
        /// Process a URL
        /// </summary>
        /// <param name="page">the URL to process</param>
        private void ProcessPage(string page)
        {
            ParseHTML parse = new ParseHTML();

            parse.Source = page;

            while (!parse.Eof())
            {
                char ch = parse.Parse();
                if (ch == 0)
                {
                    Attribute a = parse.GetTag()["HREF"];
                    if (a != null)
                    {
                        ProcessLink(a.Value);
                    }

                    a = parse.GetTag()["SRC"];
                    if (a != null)
                    {
                        ProcessLink(a.Value);
                    }
                }
            }
        }
Beispiel #2
0
        private void ProcessPage(string page)
        {
            ParseHTML parse = new ParseHTML();

            parse.Source = page; //page为要解析的HTML文档
            while (!parse.eof()) //利用循环来检查HTML文档包含的所有文本和标记
            {
                char ch = parse.Parse();
                //Parse方法将返回HTML文档包含的字符--它返回的内容只包含那些非HTML标记的字符,如果遇到了HTML标记,Parse方法将返回0值,表示现在遇到了一个HTML标记。
                //遇到一个标记之后,用GetTag()方法来处理它。
                if (ch == 0)
                {
                    Attribute a = parse.get()["HREF"];
                    if (a != null)
                    {
                        ProcessLink(a.Value);           //提取出HREF属性的值
                    }
                    a = parse.get()["SRC"];
                    if (a != null)
                    {
                        ProcessLink(a.Value);           //提取出SRC属性的值
                    }
                    if (spider.Flag == 1)
                    {
                        a = parse.get()["IMG"];
                        if (a != null && (a.Name.ToLower() == "src" || a.Name.ToLower() == "href"))
                        {
                            Uri url = new Uri(uri, a.Value);
                            if (spider.addIMG(url))
                            {
                                getpage(url);
                            }
                        }
                    }
                }
            }
        }
        /// <summary>
        /// Process a URL
        /// </summary>
        /// <param name="page">the URL to process</param>
        private void ProcessPage(string page)
        {
            ParseHTML parse = new ParseHTML();
            parse.Source = page;

            while(!parse.Eof())
            {
                char ch = parse.Parse();
                if(ch==0)
                {
                    Attribute a = parse.GetTag()["HREF"];
                    if( a!=null )
                        ProcessLink(a.Value);

                    a = parse.GetTag()["SRC"];
                    if( a!=null )
                        ProcessLink(a.Value);
                }
            }
        }