예제 #1
0
 public crawledLinkTargetCollection(crawledPage __rootpage, Int32 __low = 7, Int32 __max = 10, Int32 __iLimit = 7) : base(__rootpage)
 {
     low     = __low;
     max     = __max;
     iLimit  = __iLimit;
     primary = new linkList();
     //pages = new crawledPageCollection(__rootpage);
 }
예제 #2
0
        /// <summary>
        /// Processes the specified page.
        /// </summary>
        /// <param name="page">The page.</param>
        /// <returns></returns>
        public linkList process(crawledPage page, Boolean isLinkStackEmpty)
        {
            linkList output    = new linkList();
            linkList secOutput = new linkList();

            // pages.Add(page);

            if (page == null)
            {
                isLinkCollectingDone = true;
                return(null); //getResult();
            }

            if (CountToTarget < 1)
            {
                isLinkCollectingDone = true;
                return(null); //getResult();
            }

            if (iLimit < 0)
            {
                isLinkCollectingDone = true;
                return(null); //getResult();
            }

            htmlContentPage hContent = page.tokenizedContent as htmlContentPage;

            if (hContent != null)
            {
                htmlLinkNodeCollection linkNodes = new htmlLinkNodeCollection(hContent.tokens);


                var lnk = linkNodes.getSorted();
                foreach (htmlLinkNode ln in lnk)
                {
                    link crawledLink = null;
                    if (page.links.byUrl.ContainsKey(ln.url))
                    {
                        crawledLink = page.links.byUrl[ln.url];
                    }

                    var cwl = Add(crawledLink);
                    if (cwl != null)
                    {
                        if (ln.isPrimary)
                        {
                            primary.Add(crawledLink);
                            output.Add(cwl);
                        }
                        else
                        {
                            secondary.Add(crawledLink);
                            secOutput.Add(crawledLink);
                        }
                    }
                }
            }
            else
            {
            }

            // Int32 cc = CountToTarget - output.Count();


            if (!output.Any())
            {
                if (isLinkStackEmpty)
                {
                    collectionExtensions.AddMulti(output, secondary);
                }
            }

            iLimit--;

            //if (output.Count() == 0)
            //{
            //    isLinkCollectingDone = true;
            //    return getResult();
            //}

            return(output);
        }