Exemplo n.º 1
0
        /// <summary>
        /// 采集详细页
        /// </summary>
        /// <param name="listPage"></param>
        public void ParseDetailPage(ListPage listPage)
        {
            ListPage backListPage        = null;
            bool     isNotPage           = false;
            int      pageIndex           = this.SnifferContext.GetStartPageIndex(listPage);
            int      snifferContextCount = SnifferContext.GetSnifferPageCount(listPage);
            int      donePageCount       = 0;

            while (!isNotPage)
            {
                listPage.Sniffer();

                //如果不成功,或者大于要采集的页数,则表示没有数据了,完成了分类
                if (!listPage.Succeed || donePageCount == snifferContextCount || (backListPage != null && backListPage.PageBody == listPage.PageBody))
                {
                    break;
                }

                OnPageIndexChange(listPage.PageUrl);

                foreach (UrlItem urlItem in listPage.SubPageUrlResults)
                {
                    DetailPageConfiguration detailPageConf = (DetailPageConfiguration)listPage.ListPageConfiguration.SubPageConfiguration;
                    DetailPage detailPage = new DetailPage(listPage, detailPageConf);
                    detailPage.PageIndex = detailPageConf.PageStartIndex;
                    detailPage.PageName  = urlItem.Title;
                    detailPage.PageUrl   = urlItem.Url;
                    detailPage.Sniffer();
                    OnDetailPageParseDone(detailPage);
                }


                pageIndex = pageIndex + (listPage.ListPageConfiguration.PageIndexStep - 1);
                donePageCount++;

                ListPage newListPage = new ListPage(listPage.Parent, listPage.ListPageConfiguration);
                newListPage.PageName = listPage.PageName;
                newListPage.PageUrl  = listPage.PageUrl;

                if (listPage.ListPageConfiguration.PageMethod == PageMethod.Get)
                {
                    ReplacePageIndex(newListPage, pageIndex);
                }
                else
                {
                    newListPage.PageQuery = string.Format(newListPage.PageQuery, pageIndex);
                }

                backListPage = listPage;
                listPage     = newListPage;
            }
        }
Exemplo n.º 2
0
        private void _start()
        {
            try
            {
                ListPage firstPage = null;
                lock (SnifferContext.FirstPages)
                {
                    if (SnifferContext.FirstPages.Count > 0)
                    {
                        firstPage = SnifferContext.FirstPages[0];
                        SnifferContext.FirstPages.Remove(firstPage);
                    }
                    else
                    {
                        return;
                    }
                }

                if (firstPage.ListPageConfiguration.SubPageConfiguration.PageType == PageType.DetailPage && this.SnifferContext.GetStartPageIndex(firstPage) > 1)
                {
                    if (firstPage.ListPageConfiguration.PageMethod == PageMethod.Get)
                    {
                        ReplacePageIndex(firstPage, this.SnifferContext.GetStartPageIndex(firstPage));
                    }
                    else
                    {
                        firstPage.PageQuery = string.Format(firstPage.PageQuery, this.SnifferContext.GetStartPageIndex(firstPage));
                    }
                }
                firstPage.Sniffer();

                if (firstPage.SubPageUrlResults.Count > 0 && firstPage.ListPageConfiguration.SubPageConfiguration.PageType == PageType.DetailPage)
                {
                    ParseDetailPage(firstPage);
                    OnListPageParseDone(firstPage);
                }
                else
                {
                    ParseListPage(firstPage);
                }

                OnFirstPageParseDone(firstPage);

                _start();
            }
            catch (System.Exception e)
            {
                InfoSniffer.LogManager.WriteLog(string.Format("<error><thread>{0}</thread><message>{1}</message></error>", this.ThreadIndex, e.Message));
            }
        }
Exemplo n.º 3
0
        /// <summary>
        /// 读取根页重载
        /// </summary>
        /// <param name="fileName"></param>
        /// <param name="rootPageName"></param>
        /// <returns></returns>
        public static List <ListPage> GetAllFirstPages(string fileName, string rootPageName)
        {
            SnifferConfig.OpenSnfFile(string.Format(AppDataPath + "{0}.xml", fileName));
            RootPageConfiguration rootPageConf = SnifferConfig.GetRootPageConfiguration(rootPageName);

            if (rootPageConf == null)
            {
                return(null);
            }

            ListPage        rootPage      = new ListPage((ListPageConfiguration)rootPageConf);
            List <ListPage> allFirstPages = new List <ListPage>();

            if (rootPageConf.IsSniffer)
            {
                rootPage.Sniffer();

                if (!rootPage.Done || rootPage.SubPageUrlResults.Count == 0)
                {
                    //采集不到
                }

                foreach (UrlItem urlItem in rootPage.SubPageUrlResults)
                {
                    ListPage page = new ListPage(rootPage, (ListPageConfiguration)rootPage.ListPageConfiguration.SubPageConfiguration);
                    page.PageName = urlItem.Title;
                    page.PageUrl  = urlItem.Url;
                    allFirstPages.Add(page);
                }
            }
            else
            {
                foreach (ListPageConfiguration firstPageConfi in rootPageConf.SubPageConfigurations)
                {
                    allFirstPages.Add(new ListPage(rootPage, firstPageConfi));
                }
            }

            return(allFirstPages);
        }
Exemplo n.º 4
0
        /// <summary>
        /// 读取列表页重载
        /// </summary>
        public static ListPage GetListPage(string fileName, string rootPageName, int firstIndex, int pageIndex)
        {
            List <ListPage> allFirstPages = GetAllFirstPages(fileName, rootPageName);

            ListPage firstPage = allFirstPages[firstIndex];

            //列表页的页码大于1,则要替换页码
            if (firstPage.ListPageConfiguration.SubPageConfiguration.PageType == PageType.DetailPage && pageIndex > 1)
            {
                if (firstPage.ListPageConfiguration.PageMethod == PageMethod.Get)
                {
                    ReplacePageIndex(firstPage, pageIndex);
                }
                else
                {
                    firstPage.PageQuery = string.Format(firstPage.PageQuery, pageIndex);
                }
            }

            firstPage.Sniffer();

            return(firstPage);
        }
Exemplo n.º 5
0
        /// <summary>
        /// 采集列表页
        /// </summary>
        /// <param name="listPage"></param>
        public void ParseListPage(ListPage listPage)
        {
            listPage.Sniffer();

            if (listPage.SubPageUrlResults.Count > 0)
            {
                foreach (UrlItem urlItem in listPage.SubPageUrlResults)
                {
                    ListPage subListPage = new ListPage(listPage, (ListPageConfiguration)listPage.ListPageConfiguration.SubPageConfiguration);
                    subListPage.PageName = urlItem.Title;
                    subListPage.PageUrl  = urlItem.Url;
                    subListPage.Sniffer();

                    if (subListPage.SubPageUrlResults.Count > 0)
                    {
                        if (subListPage.ListPageConfiguration.SubPageConfiguration.PageType == PageType.DetailPage)
                        {
                            string pageName = subListPage.PageName;
                            ParseDetailPage(subListPage);
                            OnListPageParseDone(subListPage);
                        }
                        else
                        {
                            ParseListPage(subListPage);
                        }
                    }
                    else if (listPage.ListPageConfiguration.For && !string.IsNullOrEmpty(listPage.PageBody))
                    {
                        subListPage          = new ListPage(listPage, (ListPageConfiguration)listPage.ListPageConfiguration);
                        subListPage.PageName = urlItem.Title;
                        subListPage.PageUrl  = urlItem.Url;

                        ParseListPage(subListPage);
                    }
                }
            }
        }