protected void discoverRel(string url, ArrayList feeds) { HttpWebRequest request = (HttpWebRequest)HttpWebRequest.Create(url); request.UserAgent = SessionManager.GetCachedConfiguration("SnCore.Web.UserAgent", "SnCore/1.0"); request.Accept = "*/*"; WebResponse response = request.GetResponse(); string content; using (StreamReader sr = new StreamReader(response.GetResponseStream())) { content = sr.ReadToEnd(); sr.Close(); } List <HtmlLinkControl> links = HtmlLinkExtractor.Extract(content, new Uri(url)); foreach (HtmlLinkControl link in links) { switch (link.Type.ToLower()) { case "application/rss+xml": case "application/atom+xml": TransitAccountFeed feed = new TransitAccountFeed(); feed.FeedUrl = link.Href; feed.LinkUrl = inputLinkUrl.Text; feed.Description = string.Empty; feed.Name = link.Title; feeds.Add(feed); break; } } }
protected List <TransitAccountEvent> discoverRel(string url) { List <TransitAccountEvent> result = new List <TransitAccountEvent>(); HttpWebRequest request = (HttpWebRequest)HttpWebRequest.Create(url); request.UserAgent = SessionManager.GetCachedConfiguration("SnCore.Web.UserAgent", "SnCore/1.0"); request.Accept = "*/*"; WebResponse response = request.GetResponse(); string content; using (StreamReader sr = new StreamReader(response.GetResponseStream())) { content = sr.ReadToEnd(); sr.Close(); } List <HtmlLinkControl> links = HtmlLinkExtractor.Extract(content, new Uri(url)); foreach (HtmlLinkControl link in links) { try { switch (link.Type.ToLower()) { case "text/calendar": TransitAccountEventICALEmitter emitter = TransitAccountEventICALEmitter.Parse( link.Href, SessionManager.GetCachedConfiguration("SnCore.Web.UserAgent", "SnCore/1.0")); emitter.AccountEvent.Website = link.Href; AddUnique(result, emitter.AccountEvent); break; } } catch { } } List <Uri> webcallinks = HtmlUriExtractor.Extract(content, new Uri(url)); foreach (Uri link in webcallinks) { try { if (link.Scheme.ToLower() == "webcal" || link.GetLeftPart(UriPartial.Path).EndsWith(".ics")) { TransitAccountEventICALEmitter emitter = TransitAccountEventICALEmitter.Parse( link.ToString(), SessionManager.GetCachedConfiguration("SnCore.Web.UserAgent", "SnCore/1.0")); emitter.AccountEvent.Website = link.ToString(); AddUnique(result, emitter.AccountEvent); } } catch { } } return(result); }
public void ExtractSome() { string[] tests = { "<html> bla bla bla" + " <LINK type=\"html/icon\" REL='SHORTCUT ICON' HREF='/groups/img/3/favicon.ico'>" + " <link rel=\"alternate\" type=\"application/rss+xml\" title=\"RSS\" href=\"http://groups.google.com/group/dotnetopenid/feed/rss_v2_0_msgs.xml\">" + " <link rel=\"alternate\" type=\"application/atom+xml\" title=\"Atom\" href=\"http://groups.google.com/group/dotnetopenid/feed/atom_v1_0_msgs.xml\">" + "</html>", "<a href='mailto:a bunch of crap %20 %20'></a>" }; foreach (string test in tests) { List <HtmlLinkControl> links = HtmlLinkExtractor.Extract(test, new Uri("http://www.google.com")); Console.WriteLine("Links: {0}", links.Count); foreach (HtmlLinkControl link in links) { Console.WriteLine("Link: title='{0}' type='{1}' rel='{2}", link.Title, link.Type, link.Rel); Console.WriteLine("\t{0}", link.Href); Console.WriteLine(HtmlGenericCollector.GetHtml(link)); } } }