private static void MyFunction(HtmlDocument doc, Uri docUri, out List <string> foundItems, out List <Uri> newUris) { foundItems = new List <string>(); newUris = new List <Uri>(); IEnumerable <HtmlNode> nodes = doc.DocumentNode.Descendants();// SelectNodes("//link[(@type='application/rss+xml' or @type='application/atom+xml') and @rel='alternate']"); if (nodes != null) { foreach (var node in nodes) { if (node.HasAttributes) { var href = node.Attributes["href"]; var type = node.Attributes["type"]; var rel = node.Attributes["rel"]; if (href != null && type != null && rel != null && IsRss(type, rel)) { try { Uri rssUri = new Uri(docUri, href.Value); foundItems.Add(rssUri.ToString()); } catch { } } } } } foreach (Uri link in s_extractor.ExtractLinks(docUri, doc)) { newUris.Add(link); } }
private void ProcessPageForImageLinks(PageData obPageData, Int32 iLevel, Int32 iStopLevel, bool bFlatHierarchy) { LinkDataCollection obLinks = new LinkDataCollection(); LinkExtractor obLinkExtractor = new LinkExtractor(obPageData.Url, iLevel); do { LinkStatus status = obLinkExtractor.ExtractLinks(); LinkDataCollection tempColl = obLinkExtractor.Links; ICollectionSupport.AddAll(obPageData.m_Outlinks, tempColl); if (iLevel >= iStopLevel) { break; } iLevel++; tempColl = new LinkDataCollection(); foreach (LinkData obLinkData in obPageData.m_Outlinks) { if (obLinkData.LinkType == LinkType.Outlink) { PageData obPage = new PageData(); obPage.m_strUrl = obLinkData.Url; ProcessPageForLinks(obPage, iLevel, iStopLevel, bFlatHierarchy); if (bFlatHierarchy) { ICollectionSupport.AddAll(tempColl, obPage.OutLinks); } } } if (bFlatHierarchy) { ICollectionSupport.AddAll(obPageData.m_Outlinks, tempColl); } }while(true); }