public LinkStatus ExtractLinks() { if (String.Empty == m_strUrl) { throw new ArgumentException("No URL specified"); } m_Links = new LinkDataCollection(); CreateParser(); if (m_obParser.Lexer.Page.mSource == null) { return(LinkStatus.Broken); } NodeFilter obFilter = new NodeClassFilter(typeof(ATag)); NodeList collNodes = m_obParser.Parse(obFilter); if (null != collNodes) { PageData obPageData = new PageData(); obPageData.m_strUrl = m_obParser.URL; obPageData.m_iDepth = m_iLevel; for (Int32 i = 0; i < collNodes.Count; i++) { INode obNode = collNodes[i]; LinkData obLinkData = new LinkData(obPageData, obNode as ATag); m_Links.Add(obLinkData); } } return(LinkStatus.Ok); }
private void ProcessPageForImageLinks(PageData obPageData, Int32 iLevel, Int32 iStopLevel, bool bFlatHierarchy) { LinkDataCollection obLinks = new LinkDataCollection(); LinkExtractor obLinkExtractor = new LinkExtractor(obPageData.Url, iLevel); do { LinkStatus status = obLinkExtractor.ExtractLinks(); LinkDataCollection tempColl = obLinkExtractor.Links; ICollectionSupport.AddAll(obPageData.m_Outlinks, tempColl); if (iLevel >= iStopLevel) { break; } iLevel++; tempColl = new LinkDataCollection(); foreach (LinkData obLinkData in obPageData.m_Outlinks) { if (obLinkData.LinkType == LinkType.Outlink) { PageData obPage = new PageData(); obPage.m_strUrl = obLinkData.Url; ProcessPageForLinks(obPage, iLevel, iStopLevel, bFlatHierarchy); if (bFlatHierarchy) { ICollectionSupport.AddAll(tempColl, obPage.OutLinks); } } } if (bFlatHierarchy) { ICollectionSupport.AddAll(obPageData.m_Outlinks, tempColl); } }while(true); }