public LinkStatus ExtractLinks() { if (String.Empty == m_strUrl) { throw new ArgumentException("No URL specified"); } m_Links = new LinkDataCollection(); CreateParser(); if (m_obParser.Lexer.Page.mSource == null) { return LinkStatus.Broken; } NodeFilter obFilter = new NodeClassFilter(typeof(ATag)); NodeList collNodes = m_obParser.Parse(obFilter); if (null != collNodes) { PageData obPageData = new PageData(); obPageData.m_strUrl = m_obParser.URL; obPageData.m_iDepth = m_iLevel; for(Int32 i= 0; i < collNodes.Count; i++) { INode obNode = collNodes[i]; LinkData obLinkData = new LinkData(obPageData, obNode as ATag); m_Links.Add(obLinkData); } } return LinkStatus.Ok; }
/// <summary> /// Creates new instance of <see cref="PageData"></see> object. /// </summary> public PageData() { m_HeaderData = new HeadData(); m_Outlinks = new LinkDataCollection(); m_ImageLinks = new ImageDataCollection(); m_Cookies = new CookieCollection(); m_Tables = new TableDataCollection(); }
private void ProcessPageForImageLinks(PageData obPageData, Int32 iLevel, Int32 iStopLevel, bool bFlatHierarchy) { LinkDataCollection obLinks = new LinkDataCollection(); LinkExtractor obLinkExtractor = new LinkExtractor(obPageData.Url, iLevel); do { LinkStatus status = obLinkExtractor.ExtractLinks(); LinkDataCollection tempColl = obLinkExtractor.Links; ICollectionSupport.AddAll(obPageData.m_Outlinks, tempColl); if (iLevel >= iStopLevel) { break; } iLevel++; tempColl = new LinkDataCollection(); foreach(LinkData obLinkData in obPageData.m_Outlinks) { if (obLinkData.LinkType == LinkType.Outlink) { PageData obPage = new PageData(); obPage.m_strUrl = obLinkData.Url; ProcessPageForLinks(obPage, iLevel, iStopLevel, bFlatHierarchy); if (bFlatHierarchy) { ICollectionSupport.AddAll(tempColl, obPage.OutLinks); } } } if (bFlatHierarchy) { ICollectionSupport.AddAll(obPageData.m_Outlinks, tempColl); } }while(true); }