public static RootPageConfiguration GetRootPageConfiguration(string rootPageName) { XmlNode rootConfNode; if (rootPageName == null) { rootConfNode = xmlDoc.SelectNodes("//SnifferRootPage")[0]; } else { rootConfNode = xmlDoc.SelectSingleNode(string.Format("//SnifferRootPage[@PageName=\"{0}\"]", rootPageName)); } if (rootConfNode == null) { return(null); } RootPageConfiguration rootConf = new RootPageConfiguration(); rootConf.PageName = rootConfNode.Attributes["PageName"].Value; rootConf.PageUrl = rootConfNode.Attributes["PageUrl"].Value; rootConf.PageType = PageType.ListPage; if (rootConfNode.Attributes["IsSniffer"] != null) { rootConf.IsSniffer = bool.Parse(rootConfNode.Attributes["IsSniffer"].Value); } if (rootConfNode.Attributes["SavePath"] != null) { rootConf.SavePath = rootConfNode.Attributes["SavePath"].Value; } if (rootConfNode.Attributes["PluginType"] != null) { Type pluginType = Type.GetType(rootConfNode.Attributes["PluginType"].Value); rootConf.Plugin = (IPlugin)pluginType.GetConstructor(new Type[0]).Invoke(new object[0]); } XmlNode snifferUrlItemNode = rootConfNode.SelectSingleNode("SnifferSubPageUrlItem"); if (snifferUrlItemNode != null) { rootConf.SnifferSubPageUrlItem = CreateSnifferUrlItem(snifferUrlItemNode); } else if (rootConf.IsSniffer) { throw new System.Exception(string.Format("{0} 页设置为需要采集,但是没有配置 SnifferUrlItem 节点", rootConf.PageName)); } XmlNodeList subPageNodes = rootConfNode.SelectNodes("SnifferPage"); foreach (XmlNode listPageNode in subPageNodes) { rootConf.SubPageConfigurations.Add(CreateListPageConfiguration(rootConf, listPageNode)); } return(rootConf); }
/// <summary> /// 一个列表页采集完成 /// </summary> /// <param name="listPage"></param> void SnifferThread_ListPageParseDone(ListPage listPage) { string dir = listPage.SavePath; if (!Directory.Exists(dir)) { Directory.CreateDirectory(dir); } string dirAndFileName = listPage.SavePathAndFileName; //导出到Xml文件 Data.WriteXml(dirAndFileName); //如果有插件则调用插件 RootPageConfiguration rootPageConf = null; PageConfiguration parentPageConf = listPage.Configuration.Parent; while (parentPageConf != null) { if (parentPageConf is RootPageConfiguration) { rootPageConf = (RootPageConfiguration)parentPageConf; break; } else { parentPageConf = parentPageConf.Parent; } } if (rootPageConf != null && rootPageConf.Plugin != null) { rootPageConf.Plugin.Receive(Data, dirAndFileName); } //清空数据 if (Data.Tables.Count > 0) { Data.Tables.Clear(); } InfoSniffer.LogManager.WriteLog(string.Format("<donepage><thread>{0}</thread><page>{1}</page><donetime>{2}</donetime></donepage>", this.ThreadIndex, listPage.PageUrl.Replace("&", "&"), DateTime.Now.ToString("yyyy-MM-dd HH:mm:ss.fff"))); }
/// <summary> /// 读取根页重载 /// </summary> /// <param name="fileName"></param> /// <param name="rootPageName"></param> /// <returns></returns> public static List <ListPage> GetAllFirstPages(string fileName, string rootPageName) { SnifferConfig.OpenSnfFile(string.Format(AppDataPath + "{0}.xml", fileName)); RootPageConfiguration rootPageConf = SnifferConfig.GetRootPageConfiguration(rootPageName); if (rootPageConf == null) { return(null); } ListPage rootPage = new ListPage((ListPageConfiguration)rootPageConf); List <ListPage> allFirstPages = new List <ListPage>(); if (rootPageConf.IsSniffer) { rootPage.Sniffer(); if (!rootPage.Done || rootPage.SubPageUrlResults.Count == 0) { //采集不到 } foreach (UrlItem urlItem in rootPage.SubPageUrlResults) { ListPage page = new ListPage(rootPage, (ListPageConfiguration)rootPage.ListPageConfiguration.SubPageConfiguration); page.PageName = urlItem.Title; page.PageUrl = urlItem.Url; allFirstPages.Add(page); } } else { foreach (ListPageConfiguration firstPageConfi in rootPageConf.SubPageConfigurations) { allFirstPages.Add(new ListPage(rootPage, firstPageConfi)); } } return(allFirstPages); }