示例#1
0
 public static string[] GetHTMLUrls(string Page, string HostAbsolutePath, int MaxLen, int FindUrlLevel)
 {
     ArrayList list = new ArrayList();
     ParseHTML ehtml = new ParseHTML();
     ehtml.Source = Page;
     while (!ehtml.Eof())
     {
         if (ehtml.Parse() == '\0')
         {
             Shove.HTML.HtmlParse.Attribute attribute = ehtml.GetTag()["HREF"];
             if (attribute != null)
             {
                 string str = attribute.Value.Trim().ToLower();
                 if ((((str != "") && !str.StartsWith("mailto")) && !str.StartsWith("#")) && (((FindUrlLevel == 2) || str.StartsWith("http://")) || str.StartsWith("https://")))
                 {
                     str = GetPath(str, HostAbsolutePath);
                     if ((MaxLen < 1) || (str.Length <= MaxLen))
                     {
                         list.Add(str);
                     }
                 }
             }
             attribute = ehtml.GetTag()["SRC"];
             if (attribute != null)
             {
                 string str2 = attribute.Value.Trim().ToLower();
                 if ((str2 != "") && (((FindUrlLevel == 2) || str2.StartsWith("http://")) || str2.StartsWith("https://")))
                 {
                     str2 = GetPath(str2, HostAbsolutePath);
                     if ((MaxLen < 1) || (str2.Length <= MaxLen))
                     {
                         list.Add(str2);
                     }
                 }
             }
         }
     }
     if (list.Count == 0)
     {
         return null;
     }
     string[] strArray = new string[list.Count];
     for (int i = 0; i < list.Count; i++)
     {
         strArray[i] = list[i].ToString();
     }
     return strArray;
 }
示例#2
0
        /// <summary>
        /// 获取 html 包含的所有的链接地址
        /// </summary>
        /// <param name="page"></param>
        /// <param name="hostAbsolutePath"></param>
        /// <param name="maxLen"></param>
        /// <param name="findUrlLevel"></param>
        /// <returns></returns>
        public static string[] GetHTMLUrls(string page, string hostAbsolutePath, int maxLen, int findUrlLevel)
        {
            ArrayList m_Url = new ArrayList();

            HtmlParse.ParseHTML parse = new HtmlParse.ParseHTML();
            HtmlParse.Attribute a;

            parse.Source = page;
            while (!parse.Eof())
            {
                char ch = parse.Parse();
                if (ch == 0)
                {
                    a = parse.GetTag()["HREF"];
                    if (a != null)
                    {
                        string str = a.Value.Trim().ToLower();
                        if ((str != "") && (!str.StartsWith("mailto", StringComparison.Ordinal)) && (!str.StartsWith("#", StringComparison.Ordinal)))
                        {
                            if ((findUrlLevel == 2) || str.StartsWith("http://", StringComparison.Ordinal) || str.StartsWith("https://", StringComparison.Ordinal))
                            {
                                str = ReBuildUrl(str, hostAbsolutePath);
                                if ((maxLen < 1) || (str.Length <= maxLen))
                                {
                                    m_Url.Add(str);
                                }
                            }
                        }
                    }

                    a = parse.GetTag()["SRC"];
                    if (a != null)
                    {
                        string str = a.Value.Trim().ToLower();
                        if (str != "")
                        {
                            if ((findUrlLevel == 2) || str.StartsWith("http://", StringComparison.Ordinal) || str.StartsWith("https://", StringComparison.Ordinal))
                            {
                                str = ReBuildUrl(str, hostAbsolutePath);
                                if ((maxLen < 1) || (str.Length <= maxLen))
                                {
                                    m_Url.Add(str);
                                }
                            }
                        }
                    }
                }
            }

            if (m_Url.Count == 0)
            {
                return(null);
            }

            string[] strs = new string[m_Url.Count];
            int      i;

            for (i = 0; i < m_Url.Count; i++)
            {
                strs[i] = m_Url[i].ToString();
            }
            return(strs);
        }