Exemple #1
0
        static void Main(string[] args)
        {
            //抓取小说
            FileStream fs = null;

            if (File.Exists("D\\c.txt"))
            {
                fs = new FileStream("D:\\c.txt", FileMode.Append);
            }
            else
            {
                fs = new FileStream("D:\\c.txt", FileMode.Create);
            }


            var sw = new StreamWriter(fs, Encoding.UTF8);

            var baseUrl = "http://www.42xs.com/read/0/404/";
            var nextUrl = "171271.html";
            var url     = "";
            var txt     = "";
            var title   = "";

            while (nextUrl != "")
            {
                try
                {
                    url = baseUrl + nextUrl;
                    var doc      = new JumonyParser().LoadDocument(url);
                    var titleDom = doc.FindFirst("#center > div.title > h1");
                    title = titleDom.InnerText();

                    var dom = doc.FindFirst("#content");
                    txt = dom.InnerText();

                    var domNext = doc.FindFirst("#container > div:nth-child(3) > div > div.jump > a:nth-child(6)");
                    nextUrl = domNext.Attribute("href").Value();
                }
                catch
                {
                    Console.WriteLine("{0}没有成功", url);
                    nextUrl = "";
                }

                Console.WriteLine(title);
                sw.WriteLine("");
                sw.WriteLine(title);
                sw.WriteLine("");
                sw.WriteLine(txt);
            }
            Console.Write("The End. Press any key to exit...");
            Console.ReadKey();

            sw.Close();
            fs.Close();
        }
Exemple #2
0
        public void StyleParseTest()
        {
            var document = new JumonyParser().LoadDocument(Path.Combine(Environment.CurrentDirectory, "CssStyleSettingTest1.html"));

            Assert.AreEqual(document.FindFirst("div").Style().GetValue("display"), "none", "无法正确解析丢失分号的属性值");
            Assert.AreEqual(document.FindFirst("a").Style().GetValue("display"), null, "无法正确解析没有style属性的元素");
            Assert.AreEqual(document.FindFirst("p").Style().GetValue("width"), "12px", "无法正确解析空白样式设置值后随的正确表达式");
            Assert.AreEqual(document.FindFirst("p").Style().GetValue("display"), null, "无法正确解析空白样式设置值");

            var element = document.FindFirst("div#test1");

            Assert.AreEqual(element.Style().GetValue("display"), "none", "CSS 属性设置不应区分大小写");
        }
Exemple #3
0
    public void Test1()
    {
        var context = new ControllerContext(HttpContext.Request.RequestContext, new TestController());

        var result = ViewEngines.Engines.FindView(context, "~/ActionUrlTest/Test1.html", null);

        Assert.NotNull(result.View, "找不到视图");


        IHtmlDocument document;

        using (var writer = new StringWriter())
        {
            result.View.Render(new ViewContext(context, result.View, new ViewDataDictionary(), new TempDataDictionary(), writer), writer);

            document = new JumonyParser().Parse(writer.ToString());
        }


        var link = document.FindFirst("a");

        Assert.NotNull(link);

        Assert.AreEqual(link.Attribute("href").Value(), "/TestController/TestAction?arg=args");
    }
Exemple #4
0
        public void AttributeTest1()
        {
            var document = new JumonyParser().LoadDocument(Path.Combine(Environment.CurrentDirectory, "Test1.html"));

            var dataValues = new Dictionary <string, object>()
            {
                { "StyleClass", "Test" },
                { "ThisTime", new DateTime(2000, 1, 2) },
                { "ScriptValue1", null }
            };

            HtmlBinding.Create(document, dataValues).DataBind();

            Assert.AreEqual(document.FindFirst("body").Attribute("class").Value(), "Test", "针对属性的表达式绑定不成功");
            Assert.AreEqual(document.FindFirst("body").Attribute("test").Value(), "this time is 2000-01-02 #", "格式表达式测试失败");
        }
Exemple #5
0
        public void Test1()
        {
            var document = new JumonyParser().LoadDocument(Path.Combine(Environment.CurrentDirectory, "Test1.html"));

            HtmlBinding.Create(document, null).DataBind();


            Assert.AreEqual(document.FindFirst("title").InnerHtml(), "Test Title abc text", "对 title 元素内容的文本替换测试失败");
        }
Exemple #6
0
        public static void start()
        {
            string        pageStr   = Util.getHtmlStr("http://www.huachuan.gov.cn/zwgk/xxgksyzl/fgfgg/index.html", Encoding.Default);
            IHtmlDocument source    = new JumonyParser().Parse(pageStr);
            int           totalPage = int.Parse(source.FindFirst("font").InnerText().Split(new string[] { "共" }, StringSplitOptions.RemoveEmptyEntries)[1].Split(new string[] { "页" }, StringSplitOptions.RemoveEmptyEntries)[0]);

            Debug.WriteLine("----->数据总页数<-----:" + totalPage);
            for (int i = 0; i < totalPage; i++)
            {
                getByPage(i);
            }
        }
Exemple #7
0
        public void Test1()
        {
            var document = new JumonyParser().LoadDocument(Path.Combine(Environment.CurrentDirectory, "Test1.html"));
            var data     = new Dictionary <string, object>();

            data.Add("StyleClass", null);
            data.Add("ThisTime", null);
            data.Add("ScriptValue1", "TestValue");

            HtmlBinding.Create(document, data).DataBind();

            StringAssert.Contains(document.FindFirst("script").InnerHtml(), "var value1 =\"TestValue\";");
        }
        private void NewHTMLhreperGJ()
        {
            string url = "";

            this.Dispatcher.Invoke(new Action(() =>
            {
                url = UPURLganji.Text;
            }));
            int cityItems = 0;

            this.Dispatcher.Invoke(new Action(() =>
            {
                cityItems = Convert.ToInt32(ganjicombc.SelectedValue);
            }));
            try
            {
                using (var ctx = new oaEntities())
                {
                    #region 加载首页
                    DateTime DBtime = Convert.ToDateTime(DateTime.Now.Year.ToString() + "-" + DateTime.Now.Month.ToString() + "-" + DateTime.Now.Day.ToString());
                    DateTime ds     = DateTime.Now;
                    var      ftdef  = ctx.T_FGJHtmlData.DefaultIfEmpty().Where(x => x.FbTime != DBtime && x.Laiyuan == "赶集" && x.CityID == cityItems && x.FbTime <= ds).FirstOrDefault();

                    if (ftdef != null)
                    {
                        MaxTime = ctx.T_FGJHtmlData.DefaultIfEmpty().Where(x => x.FbTime != DBtime && x.Laiyuan == "赶集" && x.CityID == cityItems && x.FbTime <= ds).Max(x => x.FbTime);
                    }

                    string ALLhtml = GetHTMLstr_gj(url, "");
                    ALLhtml = ALLhtml.Replace("<!-- ", string.Empty);
                    ALLhtml = ALLhtml.Replace(" -->", string.Empty);
                    IHtmlDocument document             = new JumonyParser().Parse(ALLhtml);
                    IEnumerable <IHtmlElement> result1 = document.Find(".list-items");
                    L_Class_Ganji.Clear();
                    foreach (var item in result1)
                    {
                        #region MyRegion
                        try
                        {
                            newWORD.Class1 GJclass = new newWORD.Class1();
                            IHtmlElement   item_a  = item.FindFirst("a");
                            //信息名称
                            GJclass.TextName = item.FindFirst(".house-name").InnerText().Trim();

                            #region 判断时间的方法
                            string   Timestr = item.FindFirst(".house-pulishtime").InnerText().Trim();
                            DateTime Dte     = DateTime.Now;
                            GJclass.FbTime = Convert.ToDateTime(Dte.Year.ToString() + "-" + Dte.Month.ToString() + "-" + Dte.Day.ToString());
                            Timestr        = Timestr.Replace("更新于", string.Empty);
                            Timestr        = Timestr.Replace("前", string.Empty);

                            if (Timestr != "今天")
                            {
                                if (Timestr.IndexOf("分钟") > -1)
                                {
                                    if (Timestr.Replace("分钟", string.Empty).Trim().Length <= 0)
                                    {
                                        continue;
                                    }
                                    GJclass.FbTime = Dte.AddMinutes(-(Convert.ToInt32(Timestr.Replace("分钟", string.Empty))));
                                }
                                else if (Timestr.IndexOf("小时") > -1)
                                {
                                    if (Timestr.Replace("小时", string.Empty).Trim().Length <= 0)
                                    {
                                        continue;
                                    }
                                    GJclass.FbTime = Dte.AddHours(-(Convert.ToInt32(Timestr.Replace("小时", string.Empty))));
                                }
                                else if (Timestr.IndexOf("天") > -1)
                                {
                                    if (Timestr.Trim() == "昨天")
                                    {
                                        GJclass.FbTime = Dte.AddDays(-1);
                                    }
                                    else if (Timestr.Trim() == "前天")
                                    {
                                        GJclass.FbTime = Dte.AddDays(-2);
                                    }
                                    else
                                    {
                                        if (Timestr.Replace("天", string.Empty).Trim().Length <= 0)
                                        {
                                            continue;
                                        }
                                        GJclass.FbTime = Dte.AddDays(-3);
                                    }
                                }
                                else if (Timestr.Trim() == "刚刚")
                                {
                                    GJclass.FbTime = DateTime.Now;
                                }
                                else
                                {
                                    string[] strTime_ = Timestr.Split('-');
                                    GJclass.FbTime = Convert.ToDateTime((GJclass.FbTime.Year - 1) + "-" + strTime_[0] + "-" + strTime_[1]);
                                }
                                if (GJclass.FbTime <= MaxTime)
                                {
                                    continue;
                                }
                            }
                            #endregion

                            //连接
                            GJclass.href = item_a.Attribute("href").Value().IndexOf("https") > -1 ? item_a.Attribute("href").Value() : "https://3g.ganji.com" + item_a.Attribute("href").Value();
                            //地址
                            GJclass.Address = item.FindFirst(".house-addr").FindFirst(".house-area").InnerText();
                            L_Class_Ganji.Add(GJclass);
                        }
                        catch (Exception ex)
                        {
                        }
                        #endregion
                    }
                    #endregion
                    this.Dispatcher.Invoke(new Action(() =>
                    {
                        GJSUMcount.Text = L_Class_Ganji.Count().ToString();
                    }));
                    #region 加载分页
                    for (int i = 0; i < L_Class_Ganji.Count; i++)
                    {
                        Thread.Sleep(3000);
                        try {
                            this.Dispatcher.Invoke(new Action(() =>
                            {
                                GJReadCount.Text = i.ToString();
                            }));
                            string this_html = GetHTMLstr_gj(L_Class_Ganji[i].href, "");
                            if (this_html.Trim().Length <= 0)
                            {
                                continue;
                            }
                            IHtmlDocument document_this = new JumonyParser().Parse(this_html);
                            //金额
                            L_Class_Ganji[i].FwSumMoney = document_this.Exists(".house-price") ? document_this.FindFirst(".house-price").InnerText().Replace("算房贷", string.Empty).Trim() : string.Empty;
                            var this_Image = document_this.Find(".slide-area>li>img");

                            L_Class_Ganji[i].Image_str = string.Empty;
                            foreach (var img in this_Image)
                            {
                                L_Class_Ganji[i].Image_str = L_Class_Ganji[i].Image_str.Length > 0 ? L_Class_Ganji[i].Image_str + "---" + img.Attribute("data-big-image").Value() : "有---" + img.Attribute("data-big-image").Value();
                            }
                            var list = document_this.Find(".house-type>span");
                            //电话
                            var php = document_this.Find(".tel");
                            foreach (var item in php)
                            {
                                if (item.Attribute("href").Value().Trim().Length > 0)
                                {
                                    L_Class_Ganji[i].photo = item.Attribute("href").Value().Trim().Replace("tel:", string.Empty);
                                }
                            }
                            //联系人


                            L_Class_Ganji[i].Laiyuan = "赶集";

                            L_Class_Ganji[i].PersonName = document_this.Exists(".broker") ? document_this.FindFirst(".broker").FindFirst("span").InnerText().Replace("(个人)", string.Empty) : string.Empty;



                            foreach (var item in list)
                            {
                                if (item.InnerText().IndexOf("朝") > -1)
                                {
                                    L_Class_Ganji[i].FwChaoxiang = PFUANDtext(item, "朝");
                                }
                                else if (item.InnerText().IndexOf("室") > -1)
                                {
                                    L_Class_Ganji[i].FwHuXing = PFUANDtext(item, "室");
                                }
                                else if (item.InnerText().IndexOf("层") > -1)
                                {
                                    L_Class_Ganji[i].FwLoucheng = PFUANDtext(item, "层");
                                }
                                else if (item.InnerText().IndexOf("㎡") > -1)
                                {
                                    L_Class_Ganji[i].FwMianji = PFUANDtext(item, "㎡");
                                }
                                else if (item.InnerText().IndexOf("产权") > -1)
                                {
                                    L_Class_Ganji[i].FwNianxian = PFUANDtext(item, "产权");
                                }
                                else if (item.InnerText().IndexOf("毛") > -1)
                                {
                                    L_Class_Ganji[i].FwZhuangxiu = PFUANDtext(item, "毛");
                                }
                                else if (item.InnerText().IndexOf("装修") > -1)
                                {
                                    L_Class_Ganji[i].FwZhuangxiu = PFUANDtext(item, "装修");
                                }
                            }
                        }
                        catch (Exception e) {
                        }
                    }
                    #endregion
                    #region 进行数据存储

                    SaveDataHTML(ctx, L_Class_Ganji, cityItems);

                    ctx.SaveChanges();
                    #endregion
                }
                this.Dispatcher.Invoke(new Action(() =>
                {
                    GJLISTbox.Text = "完成更新————" + DateTime.Now.ToString();
                }));
            }
            catch (Exception e)
            {
                this.Dispatcher.Invoke(new Action(() =>
                {
                    Ertext.Text = DateTime.Now.ToString() + e.ToString();
                }));
                WhileBOOL2 = false;
            }
            WhileBOOL2 = false;
        }
Exemple #9
0
        public void Start_go()
        {
            falg = true;
            DataTable     dt     = new DataTable();//mogujie  E_COMMERCE
            SqlConnection conn   = new SqlConnection("Data Source=10.1.56.31;Initial Catalog=mogujie;Persist Security Info=True;User ID=sa;Password=123456");
            SqlConnection conn_E = new SqlConnection("Data Source=10.1.56.31;Initial Catalog=E_COMMERCE;Persist Security Info=True;User ID=sa;Password=123456");

            conn.Open();
            conn_E.Open();
            string         sql  = "select * from mogujieurl";//每次抓取是到数据库修改链接对应的页数页数
            SqlDataAdapter adap = new SqlDataAdapter(sql, conn);

            adap.Fill(dt);

            //一级分类的编号    ------------------------------------------------------
            string INDUSTRY_ID = "内衣配饰";
            //二级分类的编号  ------------------------------------------------------
            string PRODUCTTYPE_ID = null;
            //购物平台的来源
            string PLATFORM_ID = "蘑菇街";
            //string month = textBox1.Text;
            //if(month=="")
            //{
            //    MessageBox.Show("请输入月份");
            //    return ;
            //}
            DateTime sy = new DateTime();           //sy为datetime型

            sy = System.DateTime.Today;             //取当前日期给sy
            string year  = sy.Year.ToString();      //取年份
            string month = sy.Month.ToString();     //取月份

            for (int i = 0; i < dt.Rows.Count; i++) //i = 14 count = 22
            {
                int      BRAND_ID    = 177156;
                string   tradeItemId = null;  //--------贸易项目ID
                decimal  DISCOUNT    = 0.00M; //---------促销价
                string   url         = null;  //----------------详情页URL
                string   url_        = null;  //---------------分类连接
                int      pagenum     = 0;     //------------------页数
                string   TITLE       = "无数据"; //----------标题
                int      perPage     = 0;     //------------------每页的条数
                object[] a           = dt.Rows[i].ItemArray;

                url_    = (string)a[3];
                pagenum = (Int32)a[6];
                //尝试100页,不到的可以跳出;
                pagenum        = 100;
                PRODUCTTYPE_ID = (string)a[7];

                url_ = url_.Split('?')[0].ToString();
                int urlnum = int.Parse(url_.Split('/')[5].ToString());

                HttpUtility http = new HttpUtility();

                for (int ii = 0; ii < pagenum; ii++) //
                {
                    int    page      = ii + 1;
                    string url_and   = "http://list.mogujie.com/search?cKey=pc-wall-v1&page=" + page + "&fcid=" + urlnum + "&ad=2";
                    string Area_Html = http.GetHtmlText(url_and, "utf-8", "text/html;charset=utf-8", "");
                    var    dom       = (JObject)JsonConvert.DeserializeObject(Area_Html);
                    try
                    {
                        perPage = dom["result"]["wall"]["docs"].Count();
                    }
                    catch {
                        break;
                    }

                    for (int iii = 0; iii < perPage; iii++)//
                    {
                        try
                        {
                            //url_and = "http://list.mogujie.com/search?cKey=pc-wall-v1&page=" + page + "&fcid=" + urlnum + "&ad=2";
                            //Area_Html = http.GetHtmlText(url_and, "UTF-8", "text/html;charset=utf-8", "");
                            //var dom = (JObject)JsonConvert.DeserializeObject(Area_Html);
                            url         = dom["result"]["wall"]["docs"][iii]["link"].ToString();//商品编号
                            DISCOUNT    = Convert.ToDecimal(dom["result"]["wall"]["docs"][iii]["price"].ToString());
                            tradeItemId = dom["result"]["wall"]["docs"][iii]["tradeItemId"].ToString();
                            TITLE       = dom["result"]["wall"]["docs"][iii]["title"].ToString();
                        }
                        catch { }

                        try
                        {//-----------------------------------------------------------修改月份---------------
                            string    url_Ishave = "'" + url + "'";
                            DataTable dt3        = new DataTable();
                            sql  = @"select * from DATE where TITLE = '" + TITLE + "' and GOODSID =  '" + tradeItemId + "' and MONTH =  '" + month + "'  and  YEAR = '" + year + "'";
                            adap = new SqlDataAdapter(sql, conn);
                            adap.Fill(dt3);
                            if (dt3.Rows.Count >= 1)
                            {
                                richTextBox1.Text = "";
                                richTextBox1.Text = "重复数据";
                                continue;
                            }
                        }
                        catch
                        {
                            continue;
                        }

                        string html = null;
                        try
                        {
                            html = http.GetHtmlText(url, "utf-8", "text/html; charset=utf-8");
                        }
                        catch
                        {
                            continue;
                        }

                        var documenthtml = new JumonyParser().Parse(html);

                        //商品编号-----------------------------------------------------------------------------------------------
                        string GOODSID = "无数据";
                        //店铺名称--------------------------------------------------------------------------------------------------
                        string SHOPNAME = "无数据";
                        //收藏数------------------------------------------------------------------------------------------------------
                        string COLLECTION = "无数据";

                        string COLLECTION_NUM = "0";
                        //省市---------------------------------------------------------------------------------------------------
                        string area        = "无数据";
                        string PROVINCE    = "其他";
                        string CITY        = "其他";
                        int    PROVINCE_ID = 0;
                        int    CITY_ID     = 0;
                        GOODSID = tradeItemId;

                        try
                        {
                            var    shopId1 = documenthtml.FindFirst("#shopId");
                            string shopId  = shopId1.Attribute("value").Value();

                            string url_shop = "http://www.mogujie.com/trade/shopweb_index/asyShopHead?&shopId=" + shopId;
                            Area_Html = http.GetHtmlText(url_shop, "GBK", "text/html;charset=GBK", "");
                            try
                            {
                                var dom_SS = (JObject)JsonConvert.DeserializeObject(Area_Html);

                                SHOPNAME       = dom_SS["data"]["shopInfo"]["name"].ToString(); //店铺名称
                                COLLECTION     = dom_SS["data"]["shop_befaved_num"].ToString();
                                COLLECTION_NUM = COLLECTION;                                    //收藏数
                                area           = dom_SS["data"]["shopInfo"]["area"].ToString(); //省市
                                if (area.IndexOf("省") > -1 && area.Length == 6)
                                {
                                    PROVINCE = area.Substring(0, 2);
                                    CITY     = area.Substring(3, 3);
                                }
                                //省市编号查询
                                CITY = '\'' + CITY + '\'';
                                try
                                {
                                    DataTable dt1 = new DataTable();
                                    sql  = "select CITY_CODE from CITY where CITY_NAME = " + CITY;
                                    adap = new SqlDataAdapter(sql, conn_E);
                                    adap.Fill(dt1);
                                    object[] a1 = dt1.Rows[0].ItemArray;
                                    CITY_ID = (int)a1[0];
                                }
                                catch { }
                                //省编号查询
                                PROVINCE = '\'' + PROVINCE + '\'';
                                try
                                {
                                    DataTable dt2 = new DataTable();
                                    sql  = "select PROVINCE_CODE from PROVINCE where PROVINCE_NAME = " + PROVINCE;
                                    adap = new SqlDataAdapter(sql, conn_E);
                                    adap.Fill(dt2);
                                    object[] a2 = dt2.Rows[0].ItemArray;
                                    PROVINCE_ID = (int)a2[0];
                                }
                                catch { }
                            }

                            catch { }
                        }
                        catch { }

                        //价格----------------------------------------------------------------------------------------------------
                        decimal PRICE = 0.00M;
                        try
                        {
                            var    text   = documenthtml.FindFirst("#J_OriginPrice");
                            string PRICE1 = text.InnerHtml();
                            int    count  = PRICE1.Length - PRICE1.Replace("¥", "").Length;
                            if (count == 1)
                            {
                                PRICE = Convert.ToDecimal(PRICE1.Split('¥')[1]);
                            }
                            else
                            {
                                PRICE = DISCOUNT;
                            }
                        }
                        catch {
                            PRICE = DISCOUNT;
                        }

                        //总销量------------------------------------------------------------------------------------------
                        int SALE_VOLUME = 0;
                        try
                        {
                            var    text  = documenthtml.FindFirst(".property-extra");
                            var    text1 = text.FindFirst(".J_SaleNum");
                            string text2 = text1.InnerHtml();
                            SALE_VOLUME = int.Parse(text2);
                        }
                        catch { }

                        //销售额---------------------------------------------------------------------------------------------
                        decimal SALE_AMOUNT = 0.00M;
                        try
                        {
                            SALE_AMOUNT = DISCOUNT * SALE_VOLUME;
                        }
                        catch { }

                        //总评价----------------------------------------------------------------------------------------------
                        int COMMEN_NUM = 0;
                        try
                        {
                            var    text  = documenthtml.FindFirst(".property-extra");
                            var    text1 = text.FindFirst(".num");
                            string text2 = text1.InnerHtml();
                            COMMEN_NUM = int.Parse(text2);
                        }
                        catch { }

                        //好评中评差评--------------------------------------------------------------------------------------------
                        int POSITIVE_COMMEN = 0;  //好
                        int MODERATE_COMMEN = 0;  //中
                        int NEGATIVE_COMMEN = 0;  //差

                        try                       //好
                        {
                            var text  = documenthtml.FindFirst(".comment-content");
                            var text1 = text.FindFirst(".list");
                            var text2 = text1.Find(".best");
                            POSITIVE_COMMEN = text2.Count();
                        }
                        catch { }
                        try                       //差
                        {
                            var text  = documenthtml.FindFirst(".comment-content");
                            var text1 = text.FindFirst(".list");
                            var text2 = text1.Find("a");
                            NEGATIVE_COMMEN = text2.Count() - POSITIVE_COMMEN;
                        }
                        catch { }
                        try                       //中
                        {
                            MODERATE_COMMEN = COMMEN_NUM - POSITIVE_COMMEN - NEGATIVE_COMMEN;
                        }
                        catch { }

                        string strstr       = null;
                        string REPUTATION   = "无数据";
                        string SHOPTYPE     = "无数据";
                        string SHELVES_TIME = "无数据";
                        int    YEAR         = int.Parse(year);
                        int    MONTH        = int.Parse(month);
                        string DETAIL_URL   = url;

                        strstr            = "商品编号 = " + GOODSID + "\n" + "一级分类的编号 = " + INDUSTRY_ID + "\n" + "二级分类的编号  = " + PRODUCTTYPE_ID + "\n" + "购物平台的来源 = " + PLATFORM_ID + "\n" + "品牌ID = " + BRAND_ID + "\n" + "省份 = " + PROVINCE_ID + "\n" + "店铺名称 = " + SHOPNAME + "\n" + "城市 = " + CITY_ID + "\n" + "title = " + TITLE + "\n" + "价格 = " + PRICE + "\n" + "促销价 = " + DISCOUNT + "\n" + "销售总量 = " + SALE_VOLUME + "\n" + "销售额 = " + SALE_AMOUNT + "\n" + "总评 = " + COMMEN_NUM + "\n" + "好评 = " + POSITIVE_COMMEN + "\n" + "中评 = " + MODERATE_COMMEN + "\n" + "差评  = " + NEGATIVE_COMMEN + "\n" + "收藏人数 = " + COLLECTION_NUM + "\n" + "名誉 = " + REPUTATION + "\n" + "shoptype = " + SHOPTYPE + "\n" + "上市时间 = " + SHELVES_TIME + "\n" + "年 = " + YEAR + "\n" + "月 = " + MONTH + "\n" + "详细地址 = " + url;
                        richTextBox1.Text = "   ";
                        TITLE             = TITLE.Replace("'", "-");
                        SHOPNAME          = SHOPNAME.Replace("'", "-");
                        sql = string.Format("INSERT INTO DATE VALUES ('{0}','{1}','{2}','{3}','{4}','{5}','{6}','{7}','{8}','{9}','{10}','{11}','{12}','{13}','{14}','{15}','{16}','{17}','{18}','{19}','{20}','{21}','{22}','{23}')", GOODSID, INDUSTRY_ID, PRODUCTTYPE_ID, PLATFORM_ID, BRAND_ID, PROVINCE_ID, SHOPNAME, CITY_ID, TITLE, PRICE, DISCOUNT, SALE_VOLUME, SALE_AMOUNT, COMMEN_NUM, POSITIVE_COMMEN, MODERATE_COMMEN, NEGATIVE_COMMEN, COLLECTION_NUM, REPUTATION, SHOPTYPE, SHELVES_TIME, YEAR, MONTH, url);
                        SqlCommand com = new SqlCommand(sql, conn);
                        com.ExecuteNonQuery();
                        richTextBox1.Text = "--------插入成功---------\n" + strstr;
                        //Thread.Sleep(750);
                    } //for循环 每个页面商品的个数
                }     //for循环 每个分类的页数
            }         //for循环 URL表
            conn.Close();
            MessageBox.Show("蘑菇街抓取完成");
        }
Exemple #10
0
        private bool GO_58com(List <Class1> L_Class)
        {
            WebClient client = new WebClient();

            client.Encoding = Encoding.UTF8;
            string html = client.DownloadString(URL);

            IHtmlDocument document = new JumonyParser().Parse(html);

            if (document.FindFirst("title").InnerText().Trim() == "请输入验证码")
            {
                this.Dispatcher.Invoke(new Action(() =>
                {
                    DataGrid.Visibility    = Visibility.Collapsed;
                    Webbrowser1.Visibility = Visibility.Visible;
                    Webbrowser1.Navigate(URL);
                }));
                //弹出网站
                //System.Diagnostics.Process.Start(URL);
                return(true);
            }
            else
            {
                IEnumerable <IHtmlElement>  result = document.Find(".house-list-wrap");
                IEnumerable <IHtmlElement>  t      = result.Find("li");
                Dictionary <string, string> dir    = new Dictionary <string, string>();
                foreach (var item in t)
                {
                    #region MyRegion
                    Class1 _class = new Class1();

                    _class.TextName   = GetN_value(item, ".title>a");
                    _class.href       = item.Exists(".title > a") ? item.FindFirst(".title>a").Attribute("href").Value():string.Empty;
                    _class.Quyu       = "同城";
                    _class.PersonName = GetN_value(item, ".jjrname-outer");
                    _class.Laiyuan    = "58";
                    IEnumerable <IHtmlElement> adds  = item.Find(".baseinfo");
                    IEnumerable <IHtmlElement> addsa = adds.Find("a");
                    string adess = "";
                    foreach (var addsaa in addsa)
                    {
                        adess += addsaa.InnerText();
                    }
                    _class.Address = adess;
                    adds.Find("a");
                    string[] ssp = GetN_value(item, ".qj-listright").Split(' ');
                    int      j   = ssp.Length == 10 ? 0 : 10 - ssp.Length;
                    _class.SumMoney  = GetN_value(item, ".sum");
                    _class.PingMoney = GetN_value(item, ".unit");
                    _class.Allpm     = GetN_value(item, ".baseinfo");

                    _class.datetime    = GetN_value(item, ".time");
                    _class.Image_Count = GetInt_value(item, ".picNum");
                    _class.Image_str   = _class.Image_Count > 0 ? "有" : string.Empty;
                    L_Class.Add(_class);
                    r++;
                    #endregion
                }
                return(false);
            }
        }
Exemple #11
0
        public void SpecificationTest9()
        {
            var document = new JumonyParser().LoadDocument(Path.Combine(Environment.CurrentDirectory, "SpecificationTest9.html"));

            Assert.AreEqual(document.FindFirst("body").InnerText(), " <-anc> <_test> <中文> <1a> <:abc> ", "特殊字符解析错误");
        }