/// <summary> /// Gets the title out of the HTML head section. /// </summary> /// <param name="url">The URL of the page</param> /// <param name="defaultIfNoMatch">string to return, if no match was found</param> /// <param name="credentials">Credentials for authenticating the request</param> /// <param name="proxy">Proxy server to direct the request through</param> /// <returns></returns> //dup to FindTitle2() - which one we should use? public static string FindTitle(string url, string defaultIfNoMatch, IWebProxy proxy, ICredentials credentials) { HttpWebRequest request = (HttpWebRequest)WebRequest.Create(url); request.AllowAutoRedirect = true; request.Proxy = proxy; request.Credentials = credentials; request.Timeout = 5 * 1000 /* 5 second timeout */; if (FeedSource.SetCookies) { HttpCookieManager.SetCookies(request); } /* use bogus user agent since some sites will bounce you to unsupported browser page otherwise */ request.UserAgent = "Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1;)"; string title = defaultIfNoMatch; Stream stream = null; try { stream = request.GetResponse().GetResponseStream(); SgmlReader reader = new SgmlReader(); reader.InputStream = new StreamReader(stream); while (reader.Read()) { if ((reader.NodeType == XmlNodeType.Element) && (reader.Name.ToLower().Equals("title"))) { title = reader.ReadElementContentAsString(); stream.Flush(); break; } } //while } catch (Exception e) { _log.Debug("Error retrieving title from HTML page at " + url, e); } finally { if (stream != null) { stream.Close(); } } return(title); }