Exemplo n.º 1
0
        /// <summary>
        /// 一个列表页采集完成
        /// </summary>
        /// <param name="listPage"></param>
        void SnifferThread_ListPageParseDone(ListPage listPage)
        {
            string dir = listPage.SavePath;

            if (!Directory.Exists(dir))
            {
                Directory.CreateDirectory(dir);
            }

            string dirAndFileName = listPage.SavePathAndFileName;

            //导出到Xml文件
            Data.WriteXml(dirAndFileName);

            //如果有插件则调用插件
            RootPageConfiguration rootPageConf   = null;
            PageConfiguration     parentPageConf = listPage.Configuration.Parent;

            while (parentPageConf != null)
            {
                if (parentPageConf is RootPageConfiguration)
                {
                    rootPageConf = (RootPageConfiguration)parentPageConf;
                    break;
                }
                else
                {
                    parentPageConf = parentPageConf.Parent;
                }
            }
            if (rootPageConf != null && rootPageConf.Plugin != null)
            {
                rootPageConf.Plugin.Receive(Data, dirAndFileName);
            }

            //清空数据
            if (Data.Tables.Count > 0)
            {
                Data.Tables.Clear();
            }

            InfoSniffer.LogManager.WriteLog(string.Format("<donepage><thread>{0}</thread><page>{1}</page><donetime>{2}</donetime></donepage>", this.ThreadIndex, listPage.PageUrl.Replace("&", "&amp;"), DateTime.Now.ToString("yyyy-MM-dd HH:mm:ss.fff")));
        }
Exemplo n.º 2
0
        private static DetailPageConfiguration CreateDetailPageConfiguration(PageConfiguration parent, XmlNode parentConfNode, XmlNode pageConfNode)
        {
            XmlNode commonListConfigNode = null;
            XmlNode commonConfigNode     = null;

            if (parentConfNode.Attributes["Config"] != null)
            {
                commonListConfigNode = parentConfNode.ParentNode.SelectSingleNode(string.Format("CommonConfig[@Name='{0}']", parentConfNode.Attributes["Config"].Value));
                commonConfigNode     = commonListConfigNode.SelectSingleNode("SnifferPage");
            }

            DetailPageConfiguration detailPageConf = new DetailPageConfiguration(parent);

            detailPageConf.PageName = pageConfNode.Attributes["PageName"].Value;

            if (pageConfNode.Attributes["PageUrl"] != null)
            {
                detailPageConf.PageUrl = pageConfNode.Attributes["PageUrl"].Value;
            }
            else if (commonConfigNode != null && commonConfigNode.Attributes["PageUrl"] != null)
            {
                detailPageConf.PageUrl = commonConfigNode.Attributes["PageUrl"].Value;
            }
            if (pageConfNode.Attributes["PageType"] != null)
            {
                detailPageConf.PageType = (PageType)Enum.Parse(typeof(PageType), pageConfNode.Attributes["PageType"].Value, false);
            }
            else if (commonConfigNode != null && commonConfigNode.Attributes["PageType"] != null)
            {
                detailPageConf.PageType = (PageType)Enum.Parse(typeof(PageType), commonConfigNode.Attributes["PageType"].Value, false);
            }
            if (pageConfNode.Attributes["Encoding"] != null)
            {
                detailPageConf.Encoding = Encoding.GetEncoding(pageConfNode.Attributes["Encoding"].Value);
            }
            else if (commonConfigNode != null && commonConfigNode.Attributes["Encoding"] != null)
            {
                detailPageConf.Encoding = Encoding.GetEncoding(commonConfigNode.Attributes["Encoding"].Value);
            }


            //翻页
            if (pageConfNode.Attributes["PageQuery"] != null)
            {
                detailPageConf.PageQuery = pageConfNode.Attributes["PageQuery"].Value;
            }
            else if (commonConfigNode != null && commonConfigNode.Attributes["PageQuery"] != null)
            {
                detailPageConf.PageQuery = commonConfigNode.Attributes["PageQuery"].Value;
            }
            if (pageConfNode.Attributes["ReplacePageQuery"] != null)
            {
                detailPageConf.ReplacePageQuery = pageConfNode.Attributes["ReplacePageQuery"].Value;
            }
            else if (commonConfigNode != null && commonConfigNode.Attributes["ReplacePageQuery"] != null)
            {
                detailPageConf.ReplacePageQuery = commonConfigNode.Attributes["ReplacePageQuery"].Value;
            }
            if (pageConfNode.Attributes["PageIndexFormat"] != null)
            {
                detailPageConf.PageIndexFormat = pageConfNode.Attributes["PageIndexFormat"].Value;
            }
            else if (commonConfigNode != null && commonConfigNode.Attributes["PageIndexFormat"] != null)
            {
                detailPageConf.PageIndexFormat = commonConfigNode.Attributes["PageIndexFormat"].Value;
            }
            if (pageConfNode.Attributes["PageStartIndex"] != null)
            {
                detailPageConf.PageStartIndex = int.Parse(pageConfNode.Attributes["PageStartIndex"].Value);
            }
            else if (commonConfigNode != null && commonConfigNode.Attributes["PageStartIndex"] != null)
            {
                detailPageConf.PageStartIndex = int.Parse(commonConfigNode.Attributes["PageStartIndex"].Value);
            }

            if (pageConfNode.Attributes["PageIndexSeed"] != null)
            {
                detailPageConf.PageIndexSeed = int.Parse(pageConfNode.Attributes["PageIndexSeed"].Value);
            }
            else if (commonConfigNode != null && commonConfigNode.Attributes["PageIndexSeed"] != null)
            {
                detailPageConf.PageIndexSeed = int.Parse(commonConfigNode.Attributes["PageIndexSeed"].Value);
            }

            if (pageConfNode.Attributes["PageIndexStep"] != null)
            {
                detailPageConf.PageIndexStep = int.Parse(pageConfNode.Attributes["PageIndexStep"].Value);
            }
            else if (commonConfigNode != null && commonConfigNode.Attributes["PageIndexStep"] != null)
            {
                detailPageConf.PageIndexStep = int.Parse(commonConfigNode.Attributes["PageIndexStep"].Value);
            }

            if (pageConfNode.Attributes["EndPageDetermineRegex"] != null)
            {
                detailPageConf.EndPageDetermineRegex = pageConfNode.Attributes["EndPageDetermineRegex"].Value;
            }
            else if (commonConfigNode != null && commonConfigNode.Attributes["EndPageDetermineRegex"] != null)
            {
                detailPageConf.EndPageDetermineRegex = commonConfigNode.Attributes["EndPageDetermineRegex"].Value;
            }

            if (pageConfNode.Attributes["PageMethod"] != null)
            {
                detailPageConf.PageMethod = (PageMethod)Enum.Parse(typeof(PageMethod), pageConfNode.Attributes["PageMethod"].Value);
            }
            else if (commonConfigNode != null && commonConfigNode.Attributes["PageMethod"] != null)
            {
                detailPageConf.PageMethod = (PageMethod)Enum.Parse(typeof(PageMethod), commonConfigNode.Attributes["PageMethod"].Value);
            }



            XmlNode snifferUrlItemNode = pageConfNode.SelectSingleNode("SnifferSubPageUrlItem");

            if (snifferUrlItemNode == null && commonConfigNode != null && commonConfigNode.Attributes["SnifferSubPageUrlItem"] != null)
            {
                snifferUrlItemNode = commonConfigNode.SelectSingleNode("SnifferSubPageUrlItem");
            }
            if (snifferUrlItemNode != null)
            {
                detailPageConf.SnifferSubPageUrlItem = CreateSnifferUrlItem(snifferUrlItemNode);
            }


            List <XmlNodeList> lstItemLists = new List <XmlNodeList>();

            XmlNodeList itemNodes = pageConfNode.SelectNodes("SnifferItem");

            if (itemNodes != null)
            {
                lstItemLists.Add(itemNodes);
            }

            if (commonConfigNode != null)
            {
                XmlNodeList comItemNodes = commonConfigNode.SelectNodes("SnifferItem");
                if (comItemNodes != null)
                {
                    lstItemLists.Add(comItemNodes);
                }
            }

            for (int i = 0; i < lstItemLists.Count; i++)
            {
                itemNodes = lstItemLists[i];
                foreach (XmlNode itemNode in itemNodes)
                {
                    string itemName = itemNode.Attributes["ItemName"].Value;
                    bool   bool1    = false;
                    foreach (SnifferItem itm in detailPageConf.SnifferItems)
                    {
                        if (itm.ItemName == itemName)
                        {
                            bool1 = true;
                        }
                    }

                    if (!bool1)
                    {
                        SnifferItem item = new SnifferItem();

                        item.ItemName = itemNode.Attributes["ItemName"].Value;

                        if (itemNode.Attributes["SaveImage"] != null)
                        {
                            item.SaveImage = bool.Parse(itemNode.Attributes["SaveImage"].Value);
                        }
                        if (itemNode.Attributes["SaveImagesPath"] != null)
                        {
                            item.SaveImagesPath = itemNode.Attributes["SaveImagesPath"].Value;
                        }
                        if (itemNode.Attributes["ImageUrlPath"] != null)
                        {
                            item.ImageUrlPath = itemNode.Attributes["ImageUrlPath"].Value;
                        }
                        if (itemNode.Attributes["IsClearHTML"] != null)
                        {
                            item.IsClearHTML = bool.Parse(itemNode.Attributes["IsClearHTML"].Value);
                        }
                        if (itemNode.Attributes["IsUrl"] != null)
                        {
                            item.IsUrl = bool.Parse(itemNode.Attributes["IsUrl"].Value);
                        }
                        if (itemNode.Attributes["UrlToAbs"] != null)
                        {
                            item.UrlToAbs = bool.Parse(itemNode.Attributes["UrlToAbs"].Value);
                        }
                        if (itemNode.Attributes["Separator"] != null)
                        {
                            item.Separator = itemNode.Attributes["Separator"].Value;
                        }
                        if (itemNode.Attributes["ClearAElement"] != null)
                        {
                            item.ClearAElement = bool.Parse(itemNode.Attributes["ClearAElement"].Value);
                        }
                        if (itemNode.Attributes["MutiPage"] != null)
                        {
                            item.MutiPage = bool.Parse(itemNode.Attributes["MutiPage"].Value);
                        }
                        if (itemNode.Attributes["MutiPageSeparator"] != null)
                        {
                            item.MutiPageSeparator = itemNode.Attributes["MutiPageSeparator"].Value;
                        }

                        XmlNode ndClearRegexString = itemNode.SelectSingleNode("ClearRegexString");
                        if (ndClearRegexString != null)
                        {
                            item.ClearRegexString = ndClearRegexString.InnerText;
                        }

                        XmlNode ndDefaultValue = itemNode.SelectSingleNode("DefaultValue");
                        if (ndDefaultValue != null)
                        {
                            item.DefaultValue = ndDefaultValue.InnerText;
                        }

                        XmlNode ndRegexString = itemNode.SelectSingleNode("RegexString");
                        if (ndRegexString != null)
                        {
                            item.RegexString = CreateRegexString(ndRegexString);
                        }

                        detailPageConf.SnifferItems.Add(item);
                    }
                }
            }


            XmlNodeList subPageNodes = pageConfNode.SelectNodes("SnifferPage");

            foreach (XmlNode subPageNode in subPageNodes)
            {
                detailPageConf.SubPageConfigurations.Add(CreateDetailPageConfiguration(detailPageConf, pageConfNode, subPageNode));
            }

            if (commonConfigNode != null)
            {
                subPageNodes = commonConfigNode.SelectNodes("SnifferPage");
                foreach (XmlNode subPageNode in subPageNodes)
                {
                    detailPageConf.SubPageConfigurations.Add(CreateDetailPageConfiguration(detailPageConf, pageConfNode, subPageNode));
                }
            }

            return(detailPageConf);
        }
Exemplo n.º 3
0
        private static ListPageConfiguration CreateListPageConfiguration(PageConfiguration parent, XmlNode pageConfNode)
        {
            XmlNode commonConfigNode = null;

            if (pageConfNode.Attributes["Config"] != null)
            {
                commonConfigNode = pageConfNode.ParentNode.SelectSingleNode(string.Format("CommonConfig[@Name='{0}']", pageConfNode.Attributes["Config"].Value));
            }

            ListPageConfiguration pageConf = new ListPageConfiguration(parent);

            pageConf.PageName = pageConfNode.Attributes["PageName"].Value;
            pageConf.PageUrl  = pageConfNode.Attributes["PageUrl"].Value;

            if (pageConfNode.Attributes["PageType"] != null)
            {
                pageConf.PageType = (PageType)Enum.Parse(typeof(PageType), pageConfNode.Attributes["PageType"].Value, false);
            }
            else if (commonConfigNode.Attributes["PageType"] != null)
            {
                pageConf.PageType = (PageType)Enum.Parse(typeof(PageType), commonConfigNode.Attributes["PageType"].Value, false);
            }


            if (pageConfNode.Attributes["PageQuery"] != null)
            {
                pageConf.PageQuery = pageConfNode.Attributes["PageQuery"].Value;
            }
            else if (commonConfigNode != null && commonConfigNode.Attributes["PageQuery"] != null)
            {
                pageConf.PageQuery = commonConfigNode.Attributes["PageQuery"].Value;
            }

            if (pageConfNode.Attributes["ReplacePageQuery"] != null)
            {
                pageConf.ReplacePageQuery = pageConfNode.Attributes["ReplacePageQuery"].Value;
            }
            else if (commonConfigNode != null && commonConfigNode.Attributes["ReplacePageQuery"] != null)
            {
                pageConf.ReplacePageQuery = commonConfigNode.Attributes["ReplacePageQuery"].Value;
            }

            if (pageConfNode.Attributes["PageIndexFormat"] != null)
            {
                pageConf.PageIndexFormat = pageConfNode.Attributes["PageIndexFormat"].Value;
            }
            else if (commonConfigNode != null && commonConfigNode.Attributes["PageIndexFormat"] != null)
            {
                pageConf.PageIndexFormat = commonConfigNode.Attributes["PageIndexFormat"].Value;
            }

            if (pageConfNode.Attributes["PageIndexSeed"] != null)
            {
                pageConf.PageIndexSeed = int.Parse(pageConfNode.Attributes["PageIndexSeed"].Value);
            }
            else if (commonConfigNode != null && commonConfigNode.Attributes["PageIndexSeed"] != null)
            {
                pageConf.PageIndexSeed = int.Parse(commonConfigNode.Attributes["PageIndexSeed"].Value);
            }

            if (pageConfNode.Attributes["PageIndexStep"] != null)
            {
                pageConf.PageIndexStep = int.Parse(pageConfNode.Attributes["PageIndexStep"].Value);
            }
            else if (commonConfigNode != null && commonConfigNode.Attributes["PageIndexStep"] != null)
            {
                pageConf.PageIndexStep = int.Parse(commonConfigNode.Attributes["PageIndexStep"].Value);
            }

            if (pageConfNode.Attributes["PageMethod"] != null)
            {
                pageConf.PageMethod = (PageMethod)Enum.Parse(typeof(PageMethod), pageConfNode.Attributes["PageMethod"].Value);
            }
            else if (commonConfigNode != null && commonConfigNode.Attributes["PageMethod"] != null)
            {
                pageConf.PageMethod = (PageMethod)Enum.Parse(typeof(PageMethod), commonConfigNode.Attributes["PageMethod"].Value);
            }

            if (pageConfNode.Attributes["EndPageDetermineRegex"] != null)
            {
                pageConf.EndPageDetermineRegex = pageConfNode.Attributes["EndPageDetermineRegex"].Value;
            }
            else if (commonConfigNode != null && commonConfigNode.Attributes["EndPageDetermineRegex"] != null)
            {
                pageConf.EndPageDetermineRegex = commonConfigNode.Attributes["EndPageDetermineRegex"].Value;
            }

            if (pageConfNode.Attributes["For"] != null)
            {
                pageConf.For = bool.Parse(pageConfNode.Attributes["For"].Value);
            }
            else if (commonConfigNode != null && commonConfigNode.Attributes["For"] != null)
            {
                pageConf.For = bool.Parse(commonConfigNode.Attributes["For"].Value);
            }

            if (pageConfNode.Attributes["Encoding"] != null)
            {
                pageConf.Encoding = Encoding.GetEncoding(pageConfNode.Attributes["Encoding"].Value);
            }
            else if (commonConfigNode != null && commonConfigNode.Attributes["Encoding"] != null)
            {
                pageConf.Encoding = Encoding.GetEncoding(commonConfigNode.Attributes["Encoding"].Value);
            }

            XmlNode snifferUrlItemNode = pageConfNode.SelectSingleNode("SnifferSubPageUrlItem");

            if (commonConfigNode != null && snifferUrlItemNode == null)
            {
                snifferUrlItemNode = commonConfigNode.SelectSingleNode("SnifferSubPageUrlItem");
            }

            if (snifferUrlItemNode != null)
            {
                pageConf.SnifferSubPageUrlItem = CreateSnifferUrlItem(snifferUrlItemNode);
            }
            else
            {
                throw new System.Exception(string.Format("{0} 页没有配置 SnifferUrlItem 节点", pageConf.PageName));
            }

            XmlNode subPageNode = pageConfNode.SelectSingleNode("SnifferPage");

            if (commonConfigNode != null && subPageNode == null)
            {
                subPageNode = commonConfigNode.SelectSingleNode("SnifferPage");
            }

            if (subPageNode != null)
            {
                if (subPageNode.Attributes["PageType"].Value == "DetailPage")
                {
                    pageConf.SubPageConfiguration = CreateDetailPageConfiguration(pageConf, pageConfNode, subPageNode);
                }
                else
                {
                    pageConf.SubPageConfiguration = CreateListPageConfiguration(pageConf, subPageNode);
                }
            }

            return(pageConf);
        }
Exemplo n.º 4
0
 public ListPageConfiguration(PageConfiguration parent)
     : base(parent)
 {
 }
Exemplo n.º 5
0
 public PageBase(PageBase parent, PageConfiguration configuration)
 {
     this._parent        = parent;
     this._configuration = configuration;
 }
Exemplo n.º 6
0
 public PageBase(PageConfiguration configuration)
 {
     this._configuration = configuration;
 }
Exemplo n.º 7
0
 public PageConfiguration(PageConfiguration parent)
 {
     _parent = parent;
 }
Exemplo n.º 8
0
 public DetailPageConfiguration(PageConfiguration parent)
     : base(parent)
 {
 }