Exemple #1
0
        public void Test1()
        {
            //string content = DownloadHelper.Download("");
            //DownloadHelper.ParseIndexPage("http://yongche.16888.com/index.html");
            //DownloadHelper download = new DownloadHelper();
            //download.DownloadFromRequest("http://yongche.16888.com/index.html");

            //string targetPath = "c:\\a.html";

            //1. write website rule
            //2. save website info or send it to MQ queue
            /*List<UrlModel> urls = new List<UrlModel>(){
                UrlManager.CreateModel("http://yongche.16888.com/mrzs/index_1_1.html","美容知识"),
                UrlManager.CreateModel("http://yongche.16888.com/yfzs/index_1_1.html","养护知识"),
                UrlManager.CreateModel("http://yongche.16888.com/gzzs/index_1_1.html","改装知识"),
                UrlManager.CreateModel("http://yongche.16888.com/cjzs/index_1_1.html","车居知识"),
                UrlManager.CreateModel("http://yongche.16888.com/cyp/index_1_1.html","汽车用品"),
                UrlManager.CreateModel("http://yongche.16888.com/bszh/index_1_1.html","保险知识"),
                UrlManager.CreateModel("http://yongche.16888.com/wxzs/index_1_1.html","维修知识")
            };

            RuleModel rule = RuleManager.CreateModel("//dt[1]//a[2]", "//div[@class='news_list']//dl",
                                                        "//dt[1]//a[@class='f_gray']", "//dt[1]//span[@class='ico_j']",
                                                        "//dd[1]//span[1]", "//dd[1]//img[1]", "//dt[1]//span[@class='f_r']");

            WebSiteModel model = WebSiteManager.CreateModel(urls, rule, "addr");*/

            WebSiteModel model = CreateTestModel();

            string result = JsonHelper.Serializer(model);
            FileHelper.WriteTo(result, "c:\\bb.data");

            //2. download from url
            //3. save result

            WebSiteModel newModel = WebSiteManager.GetSiteInfo("c:\\bb.data");
            HtmlHelper helper = new HtmlHelper();
            List<string> targetPaths = new List<string>();

            foreach (UrlModel item in newModel.DownloadUrls)
            {
                string url = item.Url;
                string localDriver = "c:\\";
                string targetPath = string.Format("{0}{1}.html", localDriver, FileHelper.GenerateFileName(url));

                helper.Download(url);
                helper.SaveTo(helper.M_Html, targetPath);

                targetPaths.Add(targetPath);
            }

            //4. pase page from local file
            WebSiteModel parseModel = WebSiteManager.GetSiteInfo("c:\\bb.data");
            YongcheHtmlHelper yongche = new YongcheHtmlHelper();
            string tempContent = System.IO.File.ReadAllText(targetPaths[0], Encoding.Default);
            List<Article> articles = yongche.ParseArticle(tempContent, parseModel);
        }