Пример #1
0
        public void GetDecInfo(string pageSource, DoIO doIO)
        {
            //解析html Install-Package HtmlAgilityPack
            //结合Xpath
            HtmlDocument htmlDoc = new HtmlDocument();

            htmlDoc.LoadHtml(pageSource);
            string             xpatnDiv     = "//div[@class='dataList']";
            HtmlNodeCollection htmlNode_dec = htmlDoc.DocumentNode.SelectNodes(xpatnDiv)[0].SelectNodes("//a[@class='single csgo']");

            foreach (var item in htmlNode_dec)
            {
                //防止全局匹配 重要
                var xpath = item.XPath;
                //磨损度
                string label_dec = null;
                if (item.SelectSingleNode(xpath + "//div[@class='label']") != null)
                {
                    label_dec = (item.SelectSingleNode(xpath + "//div[@class='label']").InnerText);
                }
                //饰品名
                string name_dec = item.SelectSingleNode(xpath + "//div[@class='name']").InnerText;
                //igxe价格
                string price_dec = item.SelectSingleNode(xpath + "//div[@class='inf clearfix']").SelectSingleNode(xpath + "//div[@class='price fl']").SelectSingleNode(xpath + "//span").InnerText
                                   + item.SelectSingleNode(xpath + "//div[@class='inf clearfix']").SelectSingleNode(xpath + "//div[@class='price fl']").SelectSingleNode(xpath + "//sub").InnerText;
                //在售数量
                string count_dec_html = item.SelectSingleNode(xpath + "//div[@class='inf clearfix']").SelectSingleNode(xpath + "//div[@class='sum fr']").InnerText;
                string count_dec      = count_dec_html.Replace("在售:", "");
                //图片地址
                string img_dec = item.SelectSingleNode(xpath + "//div[@class='img']").SelectSingleNode(xpath + "//img").Attributes["src"].Value;
                string txt     = "饰品:" + name_dec + ",磨损度:" + label_dec + ",价格:" + price_dec + ",在售数量:" + count_dec + ",图片地址:" + img_dec;
                Console.WriteLine(txt);
                doIO.WriteTxt(txt, "..\\..\\..\\bin\\data\\dec.txt");
            }
        }
Пример #2
0
        /// <summary>
        /// 处理IgxeCsgo数据
        /// </summary>
        public void DataAccess_IgxeCsgo()
        {
            try
            {
                var fs       = new FileStream(@"..\..\..\bin\data\dec2.txt", FileMode.Open);
                var sr       = new StreamReader(fs);
                var nextLine = "";
                var list_dec = new List <List <string> >();
                //按行读取
                while ((nextLine = sr.ReadLine()) != null)
                {
                    var list_info = new List <string>();
                    foreach (var arr in nextLine.Split(','))
                    {
                        list_info.Add(arr);
                    }
                    list_dec.Add(list_info);
                }
                sr.Close();
                var  list_count = new List <string>();
                DoIO doIO       = new DoIO();

                /*
                 * for (int i = 0; i < list_dec.Count; i++)
                 * {
                 *  for (int j = i+1; j < list_dec.Count; j++)
                 *  {
                 *      if (list_dec[i].First() == list_dec[j].First() && list_dec[i][1] == list_dec[j][1])
                 *      {
                 *          //list_dec.Remove(list_dec[j]);//删除重复行
                 *          Console.WriteLine((i + 1) + "," + (j + 1));
                 *      }
                 *  }
                 * }
                 * //重新保存
                 * foreach (var item in list_dec)
                 * {
                 *  string txt = null;
                 *  foreach (var item1 in item)
                 *  {
                 *      txt += item1 + ",";
                 *  }
                 *
                 *  doIO.WriteTxt(txt, "..\\..\\..\\bin\\data\\dec2.txt");
                 * }
                 */
            }
            catch (Exception e)
            {
                Console.WriteLine(e.ToString());
            }
        }
Пример #3
0
        static void Main(string[] args)
        {
            //insert into database
            var doIO     = new DoIO();
            var list_dec = new List <string>();

            list_dec = doIO.ReadTxt2(@"..\..\..\bin\data\dec3.txt");
            var dbHelper = new DBHelper();
            var hstList  = new List <Hashtable>();

            foreach (var item in list_dec)
            {
                try
                {
                    var hs          = new Hashtable();
                    var list_info   = new List <string>();
                    var arr_info    = item.Split(',');
                    var arr_decName = arr_info[0].Substring(3).Split(',', '|');
                    hs.Add("DecName_CN", arr_info[0].Substring(3));
                    hs.Add("Wear", arr_info[1].Substring(4));
                    hs.Add("Type", arr_decName[0]);
                    hs.Add("Image", arr_info[4].Substring(38));
                    hs.Add("DecPrice_CSGOID", 1);
                    hstList.Add(hs);
                    Console.WriteLine(list_dec.IndexOf(item));
                }
                catch (Exception e)
                {
                    Console.WriteLine(list_dec.IndexOf(item) + " -> error!");
                }
            }

            dbHelper.InsertData(hstList);

            //在线图片下载

            /*
             * DoIO doIO = new DoIO();
             * doIO.SaveHttpImg(@"..\..\..\bin\data\dec2.txt");
             */

            //数据处理执行程序

            /*
             * DataAccess dataAccess = new DataAccess();
             * dataAccess.DataAccess_IgxeCsgo();
             */
            //爬虫执行程序

            /*
             * var doIO = new DoIO();
             * var crawler = new Crawler();//调用爬虫的核心程序
             * var crawlingIgxeDec = new CrawlingIgxeDec();
             *
             * crawler.OnStart += (s, e) =>
             * {
             *  Console.WriteLine("=====================================");
             *  Console.WriteLine("爬虫开始抓取地址:" + e.Uri.ToString());
             * };
             * crawler.OnError += (s, e) =>
             * {
             *  Console.WriteLine("爬虫抓取出错:" + e.Message);
             * };
             * crawler.OnComplete += (s, e) =>
             * {
             *  crawlingIgxeDec.GetDecInfo(e.PageSource, doIO);
             * };
             * //爬取igxe饰品信息
             * for (int i = 1; i <= 515; i++)
             * {
             *  var uri = "https://www.igxe.cn/csgo/730?is_buying=0&is_stattrak%5B%5D=0&is_stattrak%5B%5D=0&sort=3&ctg_id=0&type_id=0&page_no=" + i + "&page_size=20&rarity_id=0&exterior_id=0&quality_id=0&capsule_id=0&_t=1560502035978";
             *  //不使用代理
             *  crawler.Start(new Uri(uri), null).Wait();
             *  //使用代理
             *  //var agentIp = "222.189.144.72";
             *  //var agentIpPort = 9999;
             *  //crawler.Start(new Uri(uri), new WebProxy(agentIp, agentIpPort)).Wait();
             * }*/
        }