Ejemplo n.º 1
0
        public static RootPageConfiguration GetRootPageConfiguration(string rootPageName)
        {
            XmlNode rootConfNode;

            if (rootPageName == null)
            {
                rootConfNode = xmlDoc.SelectNodes("//SnifferRootPage")[0];
            }
            else
            {
                rootConfNode = xmlDoc.SelectSingleNode(string.Format("//SnifferRootPage[@PageName=\"{0}\"]", rootPageName));
            }

            if (rootConfNode == null)
            {
                return(null);
            }

            RootPageConfiguration rootConf = new RootPageConfiguration();

            rootConf.PageName = rootConfNode.Attributes["PageName"].Value;
            rootConf.PageUrl  = rootConfNode.Attributes["PageUrl"].Value;
            rootConf.PageType = PageType.ListPage;

            if (rootConfNode.Attributes["IsSniffer"] != null)
            {
                rootConf.IsSniffer = bool.Parse(rootConfNode.Attributes["IsSniffer"].Value);
            }
            if (rootConfNode.Attributes["SavePath"] != null)
            {
                rootConf.SavePath = rootConfNode.Attributes["SavePath"].Value;
            }
            if (rootConfNode.Attributes["PluginType"] != null)
            {
                Type pluginType = Type.GetType(rootConfNode.Attributes["PluginType"].Value);
                rootConf.Plugin = (IPlugin)pluginType.GetConstructor(new Type[0]).Invoke(new object[0]);
            }

            XmlNode snifferUrlItemNode = rootConfNode.SelectSingleNode("SnifferSubPageUrlItem");

            if (snifferUrlItemNode != null)
            {
                rootConf.SnifferSubPageUrlItem = CreateSnifferUrlItem(snifferUrlItemNode);
            }
            else if (rootConf.IsSniffer)
            {
                throw new System.Exception(string.Format("{0} 页设置为需要采集,但是没有配置 SnifferUrlItem 节点", rootConf.PageName));
            }


            XmlNodeList subPageNodes = rootConfNode.SelectNodes("SnifferPage");

            foreach (XmlNode listPageNode in subPageNodes)
            {
                rootConf.SubPageConfigurations.Add(CreateListPageConfiguration(rootConf, listPageNode));
            }

            return(rootConf);
        }
Ejemplo n.º 2
0
        /// <summary>
        /// 一个列表页采集完成
        /// </summary>
        /// <param name="listPage"></param>
        void SnifferThread_ListPageParseDone(ListPage listPage)
        {
            string dir = listPage.SavePath;

            if (!Directory.Exists(dir))
            {
                Directory.CreateDirectory(dir);
            }

            string dirAndFileName = listPage.SavePathAndFileName;

            //导出到Xml文件
            Data.WriteXml(dirAndFileName);

            //如果有插件则调用插件
            RootPageConfiguration rootPageConf   = null;
            PageConfiguration     parentPageConf = listPage.Configuration.Parent;

            while (parentPageConf != null)
            {
                if (parentPageConf is RootPageConfiguration)
                {
                    rootPageConf = (RootPageConfiguration)parentPageConf;
                    break;
                }
                else
                {
                    parentPageConf = parentPageConf.Parent;
                }
            }
            if (rootPageConf != null && rootPageConf.Plugin != null)
            {
                rootPageConf.Plugin.Receive(Data, dirAndFileName);
            }

            //清空数据
            if (Data.Tables.Count > 0)
            {
                Data.Tables.Clear();
            }

            InfoSniffer.LogManager.WriteLog(string.Format("<donepage><thread>{0}</thread><page>{1}</page><donetime>{2}</donetime></donepage>", this.ThreadIndex, listPage.PageUrl.Replace("&", "&amp;"), DateTime.Now.ToString("yyyy-MM-dd HH:mm:ss.fff")));
        }
Ejemplo n.º 3
0
        /// <summary>
        /// 读取根页重载
        /// </summary>
        /// <param name="fileName"></param>
        /// <param name="rootPageName"></param>
        /// <returns></returns>
        public static List <ListPage> GetAllFirstPages(string fileName, string rootPageName)
        {
            SnifferConfig.OpenSnfFile(string.Format(AppDataPath + "{0}.xml", fileName));
            RootPageConfiguration rootPageConf = SnifferConfig.GetRootPageConfiguration(rootPageName);

            if (rootPageConf == null)
            {
                return(null);
            }

            ListPage        rootPage      = new ListPage((ListPageConfiguration)rootPageConf);
            List <ListPage> allFirstPages = new List <ListPage>();

            if (rootPageConf.IsSniffer)
            {
                rootPage.Sniffer();

                if (!rootPage.Done || rootPage.SubPageUrlResults.Count == 0)
                {
                    //采集不到
                }

                foreach (UrlItem urlItem in rootPage.SubPageUrlResults)
                {
                    ListPage page = new ListPage(rootPage, (ListPageConfiguration)rootPage.ListPageConfiguration.SubPageConfiguration);
                    page.PageName = urlItem.Title;
                    page.PageUrl  = urlItem.Url;
                    allFirstPages.Add(page);
                }
            }
            else
            {
                foreach (ListPageConfiguration firstPageConfi in rootPageConf.SubPageConfigurations)
                {
                    allFirstPages.Add(new ListPage(rootPage, firstPageConfi));
                }
            }

            return(allFirstPages);
        }