예제 #1
0
        /// <summary>
        /// Gets the title out of the HTML head section.
        /// </summary>
        /// <param name="url">The URL of the page</param>
        /// <param name="defaultIfNoMatch">string to return, if no match was found</param>
        /// <param name="credentials">Credentials for authenticating the request</param>
        /// <param name="proxy">Proxy server to direct the request through</param>
        /// <returns></returns>
        //dup to FindTitle2() - which one we should use?
        public static string FindTitle(string url, string defaultIfNoMatch, IWebProxy proxy, ICredentials credentials)
        {
            HttpWebRequest request = (HttpWebRequest)WebRequest.Create(url);

            request.AllowAutoRedirect = true;
            request.Proxy             = proxy;
            request.Credentials       = credentials;
            request.Timeout           = 5 * 1000 /* 5 second timeout */;

            if (FeedSource.SetCookies)
            {
                HttpCookieManager.SetCookies(request);
            }

            /* use bogus user agent since some sites will bounce you to unsupported browser page otherwise */
            request.UserAgent = "Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1;)";

            string title  = defaultIfNoMatch;
            Stream stream = null;

            try
            {
                stream = request.GetResponse().GetResponseStream();

                SgmlReader reader = new SgmlReader();
                reader.InputStream = new StreamReader(stream);

                while (reader.Read())
                {
                    if ((reader.NodeType == XmlNodeType.Element) && (reader.Name.ToLower().Equals("title")))
                    {
                        title = reader.ReadElementContentAsString();
                        stream.Flush();
                        break;
                    }
                } //while
            }
            catch (Exception e)
            {
                _log.Debug("Error retrieving title from HTML page at " + url, e);
            }
            finally
            {
                if (stream != null)
                {
                    stream.Close();
                }
            }

            return(title);
        }