/// <summary> /// 一个列表页采集完成 /// </summary> /// <param name="listPage"></param> void SnifferThread_ListPageParseDone(ListPage listPage) { string dir = listPage.SavePath; if (!Directory.Exists(dir)) { Directory.CreateDirectory(dir); } string dirAndFileName = listPage.SavePathAndFileName; //导出到Xml文件 Data.WriteXml(dirAndFileName); //如果有插件则调用插件 RootPageConfiguration rootPageConf = null; PageConfiguration parentPageConf = listPage.Configuration.Parent; while (parentPageConf != null) { if (parentPageConf is RootPageConfiguration) { rootPageConf = (RootPageConfiguration)parentPageConf; break; } else { parentPageConf = parentPageConf.Parent; } } if (rootPageConf != null && rootPageConf.Plugin != null) { rootPageConf.Plugin.Receive(Data, dirAndFileName); } //清空数据 if (Data.Tables.Count > 0) { Data.Tables.Clear(); } InfoSniffer.LogManager.WriteLog(string.Format("<donepage><thread>{0}</thread><page>{1}</page><donetime>{2}</donetime></donepage>", this.ThreadIndex, listPage.PageUrl.Replace("&", "&"), DateTime.Now.ToString("yyyy-MM-dd HH:mm:ss.fff"))); }
private static DetailPageConfiguration CreateDetailPageConfiguration(PageConfiguration parent, XmlNode parentConfNode, XmlNode pageConfNode) { XmlNode commonListConfigNode = null; XmlNode commonConfigNode = null; if (parentConfNode.Attributes["Config"] != null) { commonListConfigNode = parentConfNode.ParentNode.SelectSingleNode(string.Format("CommonConfig[@Name='{0}']", parentConfNode.Attributes["Config"].Value)); commonConfigNode = commonListConfigNode.SelectSingleNode("SnifferPage"); } DetailPageConfiguration detailPageConf = new DetailPageConfiguration(parent); detailPageConf.PageName = pageConfNode.Attributes["PageName"].Value; if (pageConfNode.Attributes["PageUrl"] != null) { detailPageConf.PageUrl = pageConfNode.Attributes["PageUrl"].Value; } else if (commonConfigNode != null && commonConfigNode.Attributes["PageUrl"] != null) { detailPageConf.PageUrl = commonConfigNode.Attributes["PageUrl"].Value; } if (pageConfNode.Attributes["PageType"] != null) { detailPageConf.PageType = (PageType)Enum.Parse(typeof(PageType), pageConfNode.Attributes["PageType"].Value, false); } else if (commonConfigNode != null && commonConfigNode.Attributes["PageType"] != null) { detailPageConf.PageType = (PageType)Enum.Parse(typeof(PageType), commonConfigNode.Attributes["PageType"].Value, false); } if (pageConfNode.Attributes["Encoding"] != null) { detailPageConf.Encoding = Encoding.GetEncoding(pageConfNode.Attributes["Encoding"].Value); } else if (commonConfigNode != null && commonConfigNode.Attributes["Encoding"] != null) { detailPageConf.Encoding = Encoding.GetEncoding(commonConfigNode.Attributes["Encoding"].Value); } //翻页 if (pageConfNode.Attributes["PageQuery"] != null) { detailPageConf.PageQuery = pageConfNode.Attributes["PageQuery"].Value; } else if (commonConfigNode != null && commonConfigNode.Attributes["PageQuery"] != null) { detailPageConf.PageQuery = commonConfigNode.Attributes["PageQuery"].Value; } if (pageConfNode.Attributes["ReplacePageQuery"] != null) { detailPageConf.ReplacePageQuery = pageConfNode.Attributes["ReplacePageQuery"].Value; } else if (commonConfigNode != null && commonConfigNode.Attributes["ReplacePageQuery"] != null) { detailPageConf.ReplacePageQuery = commonConfigNode.Attributes["ReplacePageQuery"].Value; } if (pageConfNode.Attributes["PageIndexFormat"] != null) { detailPageConf.PageIndexFormat = pageConfNode.Attributes["PageIndexFormat"].Value; } else if (commonConfigNode != null && commonConfigNode.Attributes["PageIndexFormat"] != null) { detailPageConf.PageIndexFormat = commonConfigNode.Attributes["PageIndexFormat"].Value; } if (pageConfNode.Attributes["PageStartIndex"] != null) { detailPageConf.PageStartIndex = int.Parse(pageConfNode.Attributes["PageStartIndex"].Value); } else if (commonConfigNode != null && commonConfigNode.Attributes["PageStartIndex"] != null) { detailPageConf.PageStartIndex = int.Parse(commonConfigNode.Attributes["PageStartIndex"].Value); } if (pageConfNode.Attributes["PageIndexSeed"] != null) { detailPageConf.PageIndexSeed = int.Parse(pageConfNode.Attributes["PageIndexSeed"].Value); } else if (commonConfigNode != null && commonConfigNode.Attributes["PageIndexSeed"] != null) { detailPageConf.PageIndexSeed = int.Parse(commonConfigNode.Attributes["PageIndexSeed"].Value); } if (pageConfNode.Attributes["PageIndexStep"] != null) { detailPageConf.PageIndexStep = int.Parse(pageConfNode.Attributes["PageIndexStep"].Value); } else if (commonConfigNode != null && commonConfigNode.Attributes["PageIndexStep"] != null) { detailPageConf.PageIndexStep = int.Parse(commonConfigNode.Attributes["PageIndexStep"].Value); } if (pageConfNode.Attributes["EndPageDetermineRegex"] != null) { detailPageConf.EndPageDetermineRegex = pageConfNode.Attributes["EndPageDetermineRegex"].Value; } else if (commonConfigNode != null && commonConfigNode.Attributes["EndPageDetermineRegex"] != null) { detailPageConf.EndPageDetermineRegex = commonConfigNode.Attributes["EndPageDetermineRegex"].Value; } if (pageConfNode.Attributes["PageMethod"] != null) { detailPageConf.PageMethod = (PageMethod)Enum.Parse(typeof(PageMethod), pageConfNode.Attributes["PageMethod"].Value); } else if (commonConfigNode != null && commonConfigNode.Attributes["PageMethod"] != null) { detailPageConf.PageMethod = (PageMethod)Enum.Parse(typeof(PageMethod), commonConfigNode.Attributes["PageMethod"].Value); } XmlNode snifferUrlItemNode = pageConfNode.SelectSingleNode("SnifferSubPageUrlItem"); if (snifferUrlItemNode == null && commonConfigNode != null && commonConfigNode.Attributes["SnifferSubPageUrlItem"] != null) { snifferUrlItemNode = commonConfigNode.SelectSingleNode("SnifferSubPageUrlItem"); } if (snifferUrlItemNode != null) { detailPageConf.SnifferSubPageUrlItem = CreateSnifferUrlItem(snifferUrlItemNode); } List <XmlNodeList> lstItemLists = new List <XmlNodeList>(); XmlNodeList itemNodes = pageConfNode.SelectNodes("SnifferItem"); if (itemNodes != null) { lstItemLists.Add(itemNodes); } if (commonConfigNode != null) { XmlNodeList comItemNodes = commonConfigNode.SelectNodes("SnifferItem"); if (comItemNodes != null) { lstItemLists.Add(comItemNodes); } } for (int i = 0; i < lstItemLists.Count; i++) { itemNodes = lstItemLists[i]; foreach (XmlNode itemNode in itemNodes) { string itemName = itemNode.Attributes["ItemName"].Value; bool bool1 = false; foreach (SnifferItem itm in detailPageConf.SnifferItems) { if (itm.ItemName == itemName) { bool1 = true; } } if (!bool1) { SnifferItem item = new SnifferItem(); item.ItemName = itemNode.Attributes["ItemName"].Value; if (itemNode.Attributes["SaveImage"] != null) { item.SaveImage = bool.Parse(itemNode.Attributes["SaveImage"].Value); } if (itemNode.Attributes["SaveImagesPath"] != null) { item.SaveImagesPath = itemNode.Attributes["SaveImagesPath"].Value; } if (itemNode.Attributes["ImageUrlPath"] != null) { item.ImageUrlPath = itemNode.Attributes["ImageUrlPath"].Value; } if (itemNode.Attributes["IsClearHTML"] != null) { item.IsClearHTML = bool.Parse(itemNode.Attributes["IsClearHTML"].Value); } if (itemNode.Attributes["IsUrl"] != null) { item.IsUrl = bool.Parse(itemNode.Attributes["IsUrl"].Value); } if (itemNode.Attributes["UrlToAbs"] != null) { item.UrlToAbs = bool.Parse(itemNode.Attributes["UrlToAbs"].Value); } if (itemNode.Attributes["Separator"] != null) { item.Separator = itemNode.Attributes["Separator"].Value; } if (itemNode.Attributes["ClearAElement"] != null) { item.ClearAElement = bool.Parse(itemNode.Attributes["ClearAElement"].Value); } if (itemNode.Attributes["MutiPage"] != null) { item.MutiPage = bool.Parse(itemNode.Attributes["MutiPage"].Value); } if (itemNode.Attributes["MutiPageSeparator"] != null) { item.MutiPageSeparator = itemNode.Attributes["MutiPageSeparator"].Value; } XmlNode ndClearRegexString = itemNode.SelectSingleNode("ClearRegexString"); if (ndClearRegexString != null) { item.ClearRegexString = ndClearRegexString.InnerText; } XmlNode ndDefaultValue = itemNode.SelectSingleNode("DefaultValue"); if (ndDefaultValue != null) { item.DefaultValue = ndDefaultValue.InnerText; } XmlNode ndRegexString = itemNode.SelectSingleNode("RegexString"); if (ndRegexString != null) { item.RegexString = CreateRegexString(ndRegexString); } detailPageConf.SnifferItems.Add(item); } } } XmlNodeList subPageNodes = pageConfNode.SelectNodes("SnifferPage"); foreach (XmlNode subPageNode in subPageNodes) { detailPageConf.SubPageConfigurations.Add(CreateDetailPageConfiguration(detailPageConf, pageConfNode, subPageNode)); } if (commonConfigNode != null) { subPageNodes = commonConfigNode.SelectNodes("SnifferPage"); foreach (XmlNode subPageNode in subPageNodes) { detailPageConf.SubPageConfigurations.Add(CreateDetailPageConfiguration(detailPageConf, pageConfNode, subPageNode)); } } return(detailPageConf); }
private static ListPageConfiguration CreateListPageConfiguration(PageConfiguration parent, XmlNode pageConfNode) { XmlNode commonConfigNode = null; if (pageConfNode.Attributes["Config"] != null) { commonConfigNode = pageConfNode.ParentNode.SelectSingleNode(string.Format("CommonConfig[@Name='{0}']", pageConfNode.Attributes["Config"].Value)); } ListPageConfiguration pageConf = new ListPageConfiguration(parent); pageConf.PageName = pageConfNode.Attributes["PageName"].Value; pageConf.PageUrl = pageConfNode.Attributes["PageUrl"].Value; if (pageConfNode.Attributes["PageType"] != null) { pageConf.PageType = (PageType)Enum.Parse(typeof(PageType), pageConfNode.Attributes["PageType"].Value, false); } else if (commonConfigNode.Attributes["PageType"] != null) { pageConf.PageType = (PageType)Enum.Parse(typeof(PageType), commonConfigNode.Attributes["PageType"].Value, false); } if (pageConfNode.Attributes["PageQuery"] != null) { pageConf.PageQuery = pageConfNode.Attributes["PageQuery"].Value; } else if (commonConfigNode != null && commonConfigNode.Attributes["PageQuery"] != null) { pageConf.PageQuery = commonConfigNode.Attributes["PageQuery"].Value; } if (pageConfNode.Attributes["ReplacePageQuery"] != null) { pageConf.ReplacePageQuery = pageConfNode.Attributes["ReplacePageQuery"].Value; } else if (commonConfigNode != null && commonConfigNode.Attributes["ReplacePageQuery"] != null) { pageConf.ReplacePageQuery = commonConfigNode.Attributes["ReplacePageQuery"].Value; } if (pageConfNode.Attributes["PageIndexFormat"] != null) { pageConf.PageIndexFormat = pageConfNode.Attributes["PageIndexFormat"].Value; } else if (commonConfigNode != null && commonConfigNode.Attributes["PageIndexFormat"] != null) { pageConf.PageIndexFormat = commonConfigNode.Attributes["PageIndexFormat"].Value; } if (pageConfNode.Attributes["PageIndexSeed"] != null) { pageConf.PageIndexSeed = int.Parse(pageConfNode.Attributes["PageIndexSeed"].Value); } else if (commonConfigNode != null && commonConfigNode.Attributes["PageIndexSeed"] != null) { pageConf.PageIndexSeed = int.Parse(commonConfigNode.Attributes["PageIndexSeed"].Value); } if (pageConfNode.Attributes["PageIndexStep"] != null) { pageConf.PageIndexStep = int.Parse(pageConfNode.Attributes["PageIndexStep"].Value); } else if (commonConfigNode != null && commonConfigNode.Attributes["PageIndexStep"] != null) { pageConf.PageIndexStep = int.Parse(commonConfigNode.Attributes["PageIndexStep"].Value); } if (pageConfNode.Attributes["PageMethod"] != null) { pageConf.PageMethod = (PageMethod)Enum.Parse(typeof(PageMethod), pageConfNode.Attributes["PageMethod"].Value); } else if (commonConfigNode != null && commonConfigNode.Attributes["PageMethod"] != null) { pageConf.PageMethod = (PageMethod)Enum.Parse(typeof(PageMethod), commonConfigNode.Attributes["PageMethod"].Value); } if (pageConfNode.Attributes["EndPageDetermineRegex"] != null) { pageConf.EndPageDetermineRegex = pageConfNode.Attributes["EndPageDetermineRegex"].Value; } else if (commonConfigNode != null && commonConfigNode.Attributes["EndPageDetermineRegex"] != null) { pageConf.EndPageDetermineRegex = commonConfigNode.Attributes["EndPageDetermineRegex"].Value; } if (pageConfNode.Attributes["For"] != null) { pageConf.For = bool.Parse(pageConfNode.Attributes["For"].Value); } else if (commonConfigNode != null && commonConfigNode.Attributes["For"] != null) { pageConf.For = bool.Parse(commonConfigNode.Attributes["For"].Value); } if (pageConfNode.Attributes["Encoding"] != null) { pageConf.Encoding = Encoding.GetEncoding(pageConfNode.Attributes["Encoding"].Value); } else if (commonConfigNode != null && commonConfigNode.Attributes["Encoding"] != null) { pageConf.Encoding = Encoding.GetEncoding(commonConfigNode.Attributes["Encoding"].Value); } XmlNode snifferUrlItemNode = pageConfNode.SelectSingleNode("SnifferSubPageUrlItem"); if (commonConfigNode != null && snifferUrlItemNode == null) { snifferUrlItemNode = commonConfigNode.SelectSingleNode("SnifferSubPageUrlItem"); } if (snifferUrlItemNode != null) { pageConf.SnifferSubPageUrlItem = CreateSnifferUrlItem(snifferUrlItemNode); } else { throw new System.Exception(string.Format("{0} 页没有配置 SnifferUrlItem 节点", pageConf.PageName)); } XmlNode subPageNode = pageConfNode.SelectSingleNode("SnifferPage"); if (commonConfigNode != null && subPageNode == null) { subPageNode = commonConfigNode.SelectSingleNode("SnifferPage"); } if (subPageNode != null) { if (subPageNode.Attributes["PageType"].Value == "DetailPage") { pageConf.SubPageConfiguration = CreateDetailPageConfiguration(pageConf, pageConfNode, subPageNode); } else { pageConf.SubPageConfiguration = CreateListPageConfiguration(pageConf, subPageNode); } } return(pageConf); }
public ListPageConfiguration(PageConfiguration parent) : base(parent) { }
public PageBase(PageBase parent, PageConfiguration configuration) { this._parent = parent; this._configuration = configuration; }
public PageBase(PageConfiguration configuration) { this._configuration = configuration; }
public PageConfiguration(PageConfiguration parent) { _parent = parent; }
public DetailPageConfiguration(PageConfiguration parent) : base(parent) { }