public crawledLinkTargetCollection(crawledPage __rootpage, Int32 __low = 7, Int32 __max = 10, Int32 __iLimit = 7) : base(__rootpage) { low = __low; max = __max; iLimit = __iLimit; primary = new linkList(); //pages = new crawledPageCollection(__rootpage); }
/// <summary> /// Processes the specified page. /// </summary> /// <param name="page">The page.</param> /// <returns></returns> public linkList process(crawledPage page, Boolean isLinkStackEmpty) { linkList output = new linkList(); linkList secOutput = new linkList(); // pages.Add(page); if (page == null) { isLinkCollectingDone = true; return(null); //getResult(); } if (CountToTarget < 1) { isLinkCollectingDone = true; return(null); //getResult(); } if (iLimit < 0) { isLinkCollectingDone = true; return(null); //getResult(); } htmlContentPage hContent = page.tokenizedContent as htmlContentPage; if (hContent != null) { htmlLinkNodeCollection linkNodes = new htmlLinkNodeCollection(hContent.tokens); var lnk = linkNodes.getSorted(); foreach (htmlLinkNode ln in lnk) { link crawledLink = null; if (page.links.byUrl.ContainsKey(ln.url)) { crawledLink = page.links.byUrl[ln.url]; } var cwl = Add(crawledLink); if (cwl != null) { if (ln.isPrimary) { primary.Add(crawledLink); output.Add(cwl); } else { secondary.Add(crawledLink); secOutput.Add(crawledLink); } } } } else { } // Int32 cc = CountToTarget - output.Count(); if (!output.Any()) { if (isLinkStackEmpty) { collectionExtensions.AddMulti(output, secondary); } } iLimit--; //if (output.Count() == 0) //{ // isLinkCollectingDone = true; // return getResult(); //} return(output); }