示例#1
0
        public static Dictionary <string, string> GetLinks(string sContent, string sUrl, ref Dictionary <string, string> lisDes)
        {
            Dictionary <string, string> dictionary = new Dictionary <string, string>();

            smethod_0(sContent, sUrl, ref dictionary);
            string          str    = CRegex.GetDomain(sUrl).ToLower();
            MatchCollection matchs = new Regex("<script[^>]+src\\s*=\\s*(?:'(?<src>[^']+)'|\"(?<src>[^\"]+)\"|(?<src>[^>\\s]+))\\s*[^>]*>", RegexOptions.IgnorePatternWhitespace | RegexOptions.Multiline | RegexOptions.IgnoreCase).Matches(sContent);

            for (int i = matchs.Count - 1; i >= 0; i--)
            {
                Match  match = matchs[i];
                string url   = CRegex.GetUrl(sUrl, match.Groups["src"].Value);
                if (str.CompareTo(CRegex.GetDomain(url).ToLower()) == 0)
                {
                    string htmlByUrl = CSocket.GetHtmlByUrl(url);
                    if (htmlByUrl.Length != 0)
                    {
                        smethod_0(htmlByUrl, url, ref dictionary);
                    }
                }
            }
            if (dictionary.Count == 0)
            {
                return(GetLinksFromRss(sContent, sUrl, ref lisDes));
            }
            return(dictionary);
        }
示例#2
0
        public static Dictionary <string, string> GetLinks(string sContent, string sUrl, ref Dictionary <string, string> lisDes)
        {
            Dictionary <string, string> lisA = new Dictionary <string, string>();

            _GetLinks(sContent, sUrl, ref lisA);

            string domain = CRegex.GetDomain(sUrl).ToLower();

            //抓取脚本输出的链接
            Regex           re  = new Regex(@"<script[^>]+src\s*=\s*(?:'(?<src>[^']+)'|""(?<src>[^""]+)""|(?<src>[^>\s]+))\s*[^>]*>", RegexOptions.Multiline | RegexOptions.IgnorePatternWhitespace | RegexOptions.IgnoreCase);
            MatchCollection mcs = re.Matches(sContent);

            //foreach (Match mc in mcs)
            for (int i = mcs.Count - 1; i >= 0; i--)
            {
                Match  mc     = mcs[i];
                string subUrl = CRegex.GetUrl(sUrl, mc.Groups["src"].Value);
                if (domain.CompareTo(CRegex.GetDomain(subUrl).ToLower()) != 0)
                {
                    //同一域的才提炼
                    continue;
                }
                string subContent = CSocket.GetHtmlByUrl(subUrl);
                if (subContent.Length == 0)
                {
                    continue;
                }
                _GetLinks(subContent, subUrl, ref lisA);
            }

            if (lisA.Count == 0)
            {
                return(GetLinksFromRss(sContent, sUrl, ref lisDes));
            }

            return(lisA);
        }