示例#1
0
        private static string GetHtml(string sUrl, System.Text.Encoding sEncode, int iMaxRetry, int iCurrentRetry)
        {
            string result = string.Empty;

            try
            {
                Uri                    requestUri     = new Uri(sUrl);
                WebRequest             webRequest     = WebRequest.Create(requestUri);
                WebResponse            response       = webRequest.GetResponse();
                System.IO.Stream       responseStream = response.GetResponseStream();
                System.IO.StreamReader streamReader   = new System.IO.StreamReader(responseStream, sEncode);
                result = streamReader.ReadToEnd();
                streamReader.Close();
                response.Close();
            }
            catch
            {
                iCurrentRetry++;
                if (iCurrentRetry <= iMaxRetry)
                {
                    HttpCollects.GetHtml(sUrl, sEncode, iMaxRetry, iCurrentRetry);
                }
            }
            return(result);
        }
示例#2
0
        private static System.Text.Encoding getEncoding(string url)
        {
            string hTMLContent = HttpCollects.GetHTMLContent(url, System.Text.Encoding.UTF8, "charset\\b\\s*=\\s*(?<charset>[a-zA-Z\\d|-]*)");
            Regex  regex       = new Regex("charset\\b\\s*=\\s*(?<charset>[a-zA-Z\\d|-]*)");

            System.Text.Encoding encoding = System.Text.Encoding.UTF8;
            if (regex.IsMatch(hTMLContent))
            {
                foreach (Match match in regex.Matches(hTMLContent))
                {
                    try
                    {
                        if (!string.IsNullOrEmpty(match.Groups["charset"].Value))
                        {
                            encoding = System.Text.Encoding.GetEncoding(match.Groups["charset"].Value);
                            if (encoding != null)
                            {
                                break;
                            }
                        }
                    }
                    catch
                    {
                    }
                }
            }
            return(encoding);
        }
示例#3
0
        public static string GetDescription(string html, bool ignoreCas)
        {
            string metaString = HttpCollects.GetMetaString(html, "<meta([\\s]*)name=\"description\"([\\s]*)content=\"", "\"([\\s]*)/?>", ignoreCas);

            if (string.IsNullOrEmpty(metaString))
            {
                metaString = HttpCollects.GetMetaString(html, "<meta([\\s]*)content=\"", "\"([\\s]*)name=\"description\"([\\s]*)/?>", ignoreCas);
            }
            return(metaString);
        }
示例#4
0
        public static string GetTitle(string html, bool ignoreCas)
        {
            string metaString = HttpCollects.GetMetaString(html, "<meta name=\"title\"([\\s]*)content=\"", "\"([\\s]*)/?>", ignoreCas);

            if (string.IsNullOrEmpty(metaString))
            {
                string pattern = "(?<=<title.*>)([\\s\\S]*)(?=</title>)";
                Regex  regex   = new Regex(pattern, RegexOptions.IgnoreCase);
                return(regex.Match(html).Value.Trim());
            }
            return(metaString);
        }
示例#5
0
        public static string GetHTMLContent(string url, System.Text.Encoding encoding, string endRegexString)
        {
            HttpWebResponse httpWebResponse = null;

            System.IO.Stream       stream       = null;
            System.IO.StreamReader streamReader = null;
            string result;

            try
            {
                HttpWebRequest httpWebRequest = (HttpWebRequest)WebRequest.Create(url);
                httpWebRequest.Timeout = 30000;
                httpWebResponse        = (HttpWebResponse)httpWebRequest.GetResponse();
                if (httpWebResponse.StatusCode != HttpStatusCode.OK)
                {
                    result = null;
                }
                else
                {
                    stream = httpWebResponse.GetResponseStream();
                    if (encoding == null)
                    {
                        try
                        {
                            if (string.IsNullOrEmpty(httpWebResponse.CharacterSet) || httpWebResponse.CharacterSet == "ISO-8859-1")
                            {
                                encoding = HttpCollects.getEncoding(url);
                            }
                            else
                            {
                                encoding = System.Text.Encoding.GetEncoding(httpWebResponse.CharacterSet);
                            }
                        }
                        catch
                        {
                            encoding = System.Text.Encoding.UTF8;
                        }
                        if (encoding == null)
                        {
                            encoding = System.Text.Encoding.UTF8;
                        }
                    }
                    httpWebRequest.Timeout = 8000;
                    httpWebRequest         = (HttpWebRequest)WebRequest.Create(url);
                    httpWebResponse        = (HttpWebResponse)httpWebRequest.GetResponse();
                    stream       = httpWebResponse.GetResponseStream();
                    streamReader = new System.IO.StreamReader(stream, encoding);
                    string text;
                    if (string.IsNullOrEmpty(endRegexString))
                    {
                        text = streamReader.ReadToEnd();
                    }
                    else
                    {
                        Regex regex = new Regex(endRegexString, RegexOptions.IgnoreCase);
                        System.Text.StringBuilder stringBuilder = new System.Text.StringBuilder();
                        string text2 = string.Empty;
                        while ((text2 = streamReader.ReadLine()) != null)
                        {
                            stringBuilder.Append(text2);
                            text2 = stringBuilder.ToString();
                            if (regex.IsMatch(text2))
                            {
                                break;
                            }
                        }
                        text = stringBuilder.ToString();
                    }
                    streamReader.Close();
                    stream.Close();
                    httpWebResponse.Close();
                    result = text;
                }
            }
            catch (WebException)
            {
                result = null;
            }
            catch (System.IO.IOException)
            {
                result = null;
            }
            finally
            {
                if (streamReader != null)
                {
                    streamReader.Close();
                }
                if (stream != null)
                {
                    stream.Close();
                }
                if (httpWebResponse != null)
                {
                    httpWebResponse.Close();
                }
            }
            return(result);
        }
示例#6
0
 public static string GetHTMLContent(string url, string endRegexString)
 {
     return(HttpCollects.GetHTMLContent(url, null, endRegexString));
 }