public void GetDecInfo(string pageSource, DoIO doIO) { //解析html Install-Package HtmlAgilityPack //结合Xpath HtmlDocument htmlDoc = new HtmlDocument(); htmlDoc.LoadHtml(pageSource); string xpatnDiv = "//div[@class='dataList']"; HtmlNodeCollection htmlNode_dec = htmlDoc.DocumentNode.SelectNodes(xpatnDiv)[0].SelectNodes("//a[@class='single csgo']"); foreach (var item in htmlNode_dec) { //防止全局匹配 重要 var xpath = item.XPath; //磨损度 string label_dec = null; if (item.SelectSingleNode(xpath + "//div[@class='label']") != null) { label_dec = (item.SelectSingleNode(xpath + "//div[@class='label']").InnerText); } //饰品名 string name_dec = item.SelectSingleNode(xpath + "//div[@class='name']").InnerText; //igxe价格 string price_dec = item.SelectSingleNode(xpath + "//div[@class='inf clearfix']").SelectSingleNode(xpath + "//div[@class='price fl']").SelectSingleNode(xpath + "//span").InnerText + item.SelectSingleNode(xpath + "//div[@class='inf clearfix']").SelectSingleNode(xpath + "//div[@class='price fl']").SelectSingleNode(xpath + "//sub").InnerText; //在售数量 string count_dec_html = item.SelectSingleNode(xpath + "//div[@class='inf clearfix']").SelectSingleNode(xpath + "//div[@class='sum fr']").InnerText; string count_dec = count_dec_html.Replace("在售:", ""); //图片地址 string img_dec = item.SelectSingleNode(xpath + "//div[@class='img']").SelectSingleNode(xpath + "//img").Attributes["src"].Value; string txt = "饰品:" + name_dec + ",磨损度:" + label_dec + ",价格:" + price_dec + ",在售数量:" + count_dec + ",图片地址:" + img_dec; Console.WriteLine(txt); doIO.WriteTxt(txt, "..\\..\\..\\bin\\data\\dec.txt"); } }
/// <summary> /// 处理IgxeCsgo数据 /// </summary> public void DataAccess_IgxeCsgo() { try { var fs = new FileStream(@"..\..\..\bin\data\dec2.txt", FileMode.Open); var sr = new StreamReader(fs); var nextLine = ""; var list_dec = new List <List <string> >(); //按行读取 while ((nextLine = sr.ReadLine()) != null) { var list_info = new List <string>(); foreach (var arr in nextLine.Split(',')) { list_info.Add(arr); } list_dec.Add(list_info); } sr.Close(); var list_count = new List <string>(); DoIO doIO = new DoIO(); /* * for (int i = 0; i < list_dec.Count; i++) * { * for (int j = i+1; j < list_dec.Count; j++) * { * if (list_dec[i].First() == list_dec[j].First() && list_dec[i][1] == list_dec[j][1]) * { * //list_dec.Remove(list_dec[j]);//删除重复行 * Console.WriteLine((i + 1) + "," + (j + 1)); * } * } * } * //重新保存 * foreach (var item in list_dec) * { * string txt = null; * foreach (var item1 in item) * { * txt += item1 + ","; * } * * doIO.WriteTxt(txt, "..\\..\\..\\bin\\data\\dec2.txt"); * } */ } catch (Exception e) { Console.WriteLine(e.ToString()); } }
static void Main(string[] args) { //insert into database var doIO = new DoIO(); var list_dec = new List <string>(); list_dec = doIO.ReadTxt2(@"..\..\..\bin\data\dec3.txt"); var dbHelper = new DBHelper(); var hstList = new List <Hashtable>(); foreach (var item in list_dec) { try { var hs = new Hashtable(); var list_info = new List <string>(); var arr_info = item.Split(','); var arr_decName = arr_info[0].Substring(3).Split(',', '|'); hs.Add("DecName_CN", arr_info[0].Substring(3)); hs.Add("Wear", arr_info[1].Substring(4)); hs.Add("Type", arr_decName[0]); hs.Add("Image", arr_info[4].Substring(38)); hs.Add("DecPrice_CSGOID", 1); hstList.Add(hs); Console.WriteLine(list_dec.IndexOf(item)); } catch (Exception e) { Console.WriteLine(list_dec.IndexOf(item) + " -> error!"); } } dbHelper.InsertData(hstList); //在线图片下载 /* * DoIO doIO = new DoIO(); * doIO.SaveHttpImg(@"..\..\..\bin\data\dec2.txt"); */ //数据处理执行程序 /* * DataAccess dataAccess = new DataAccess(); * dataAccess.DataAccess_IgxeCsgo(); */ //爬虫执行程序 /* * var doIO = new DoIO(); * var crawler = new Crawler();//调用爬虫的核心程序 * var crawlingIgxeDec = new CrawlingIgxeDec(); * * crawler.OnStart += (s, e) => * { * Console.WriteLine("====================================="); * Console.WriteLine("爬虫开始抓取地址:" + e.Uri.ToString()); * }; * crawler.OnError += (s, e) => * { * Console.WriteLine("爬虫抓取出错:" + e.Message); * }; * crawler.OnComplete += (s, e) => * { * crawlingIgxeDec.GetDecInfo(e.PageSource, doIO); * }; * //爬取igxe饰品信息 * for (int i = 1; i <= 515; i++) * { * var uri = "https://www.igxe.cn/csgo/730?is_buying=0&is_stattrak%5B%5D=0&is_stattrak%5B%5D=0&sort=3&ctg_id=0&type_id=0&page_no=" + i + "&page_size=20&rarity_id=0&exterior_id=0&quality_id=0&capsule_id=0&_t=1560502035978"; * //不使用代理 * crawler.Start(new Uri(uri), null).Wait(); * //使用代理 * //var agentIp = "222.189.144.72"; * //var agentIpPort = 9999; * //crawler.Start(new Uri(uri), new WebProxy(agentIp, agentIpPort)).Wait(); * }*/ }