public static string Save(string index, string name, bool isOne = false) { //第一步声明HtmlAgilityPack.HtmlDocument实例 HtmlDocument doc = new HtmlDocument(); //获取Html页面代码 //string html = GetHttp("5","android"); Dictionary <string, string> result = HttpGET(index, name); string html = result["result"]; LogHelper.Warn("html:" + html); Console.WriteLine(html); //第二步加载html文档 doc.LoadHtml(html); //获取总页数 string strPageSize = ""; if (isOne) { HtmlNodeCollection htmlnode = doc.DocumentNode.SelectNodes("//div[@class='paginate-container']/div[@class='pagination']/a"); strPageSize = htmlnode[htmlnode.Count - 2].InnerText; LogHelper.Debug("总页数是:" + strPageSize); Console.WriteLine(); Console.WriteLine(); htmlnode = null; } //获取所有板块的a标签 HtmlNodeCollection collection = doc.DocumentNode.SelectNodes("//div[@class='repo-list-item d-flex flex-justify-start py-4 public source']/div"); if (collection == null) { LogHelper.Debug("collection=null"); return(""); } foreach (HtmlNode item in collection) { string strURL = ""; HtmlNode mItem; mItem = item.SelectSingleNode("//h3/a[@class='v-align-middle']"); strURL = "https://github.com" + mItem.Attributes["href"].Value; LogHelper.Debug("strURL" + strURL); if (RedisCacheHelper.Exists(strURL)) { Dictionary <string, string> mResult = RedisCacheHelper.Get <Dictionary <string, string> >(strURL); Console.WriteLine("重新取出的数据..."); Console.WriteLine("名字是:" + mResult["name"]); Console.WriteLine("地址是:" + mResult["url"]); Console.WriteLine("说明是:" + mResult["detailed"]); Console.WriteLine("更新时间是:" + mResult["updateTime"]); Console.WriteLine("Stargazers是:" + mResult["stargazers"]); Console.WriteLine("Forks是:" + mResult["forks"]); Console.WriteLine(); item.RemoveAll(); strURL = ""; //同一路径存在表示当前项目已爬过,跳过即可 continue; } Dictionary <string, string> mDic = new Dictionary <string, string>(); mDic.Add("name", mItem.InnerText.Replace("/n", "").Trim()); mDic.Add("url", strURL); Console.WriteLine("名字是:" + mItem.InnerText.Replace("/n", "").Trim()); Console.WriteLine("地址是:" + strURL); mItem.RemoveAll(); mItem = item.SelectSingleNode("//p[@class='col-9 d-inline-block text-gray mb-2 pr-4']"); mDic.Add("detailed", mItem.InnerText == null ? "" : mItem.InnerText.Replace("/n", "").Trim()); Console.WriteLine("说明是:" + mItem.InnerText == null ? "" : mItem.InnerText.Replace("/n", "").Trim()); mItem.RemoveAll(); mItem = item.SelectSingleNode("//p[@class='f6 text-gray mr-3 mb-0 mt-2']/relative-time"); mDic.Add("updateTime", mItem.InnerText == null ? "" : mItem.InnerText.Replace("/n", "").Trim()); Console.WriteLine("更新时间是:" + mItem.InnerText.Replace("/n", "").Trim()); mItem = item.SelectSingleNode("//div[@class='col-2 text-right pt-1 pr-3 pt-2']/a[@class='muted-link']"); mDic.Add("stargazers", mItem.InnerText == null ? "" : mItem.InnerText.Replace("/n", "").Trim()); Console.WriteLine("Stargazers是:" + mItem.InnerText.Replace("/n", "").Trim()); //mItem = item.SelectSingleNode("//div[@class='repo-list-stats']/a[@aria-label='Forks']"); mDic.Add("forks", mItem.InnerText == null ? "" : mItem.InnerText.Replace("/n", "").Trim()); Console.WriteLine("Forks是:" + mItem.InnerText.Replace("/n", "").Trim()); Console.WriteLine(); LogHelper.Debug("strURL" + strURL + "mDic" + mDic); RedisCacheHelper.Add <Dictionary <string, string> >(strURL, mDic); item.RemoveAll(); mDic.Clear(); strURL = ""; } //Console.ReadLine(); doc = null; result = null; collection = null; return(strPageSize); }
public static string SaveGitBook(string index, string name, bool isOne = false) { //第一步声明HtmlAgilityPack.HtmlDocument实例 HtmlDocument doc = new HtmlDocument(); //获取Html页面代码 //string html = GetHttp("5","android"); Dictionary <string, string> result = HttpGET(index, name); string html = result["result"]; LogHelper.Warn("html:" + html); Console.WriteLine(html); //第二步加载html文档 doc.LoadHtml(html); //获取总页数 string strPageSize = ""; if (isOne) { HtmlNodeCollection htmlnode = doc.DocumentNode.SelectNodes("//div[@class='panel panel-default']/ul[@class='list-group']/a[@class='list-group-item active']/span"); strPageSize = htmlnode[htmlnode.Count - 1].InnerText; LogHelper.Debug("书总数:" + strPageSize); Console.WriteLine(); Console.WriteLine(); htmlnode = null; } //获取所有板块的a标签 HtmlNodeCollection collection = doc.DocumentNode.SelectNodes("//div[@class='Books']/div[@class='Book']"); if (collection == null) { LogHelper.Debug("collection=null"); return(""); } foreach (HtmlNode item in collection) { string strURL = ""; HtmlNode mItem; mItem = item.SelectSingleNode("//div[@class='book-infos']/h2[@class='title']/a"); strURL = mItem.Attributes["href"].Value; LogHelper.Debug("strURL" + strURL); if (RedisCacheHelper.Exists(strURL)) { Dictionary <string, string> mResult = RedisCacheHelper.Get <Dictionary <string, string> >(strURL); Console.WriteLine("重新取出的数据..."); Console.WriteLine("名字是:" + mResult["name"]); Console.WriteLine("地址是:" + mResult["url"]); Console.WriteLine("说明是:" + mResult["detailed"]); Console.WriteLine("更新时间是:" + mResult["updateTime"]); Console.WriteLine("Stargazers是:" + mResult["stargazers"]); Console.WriteLine("Forks是:" + mResult["forks"]); Console.WriteLine(); item.RemoveAll(); strURL = ""; //同一路径存在表示当前项目已爬过,跳过即可 continue; } Dictionary <string, string> mDic = new Dictionary <string, string>(); mDic.Add("name", mItem.InnerText.Replace("/n", "").Trim()); mDic.Add("url", strURL); Console.WriteLine("名字是:" + mItem.InnerText.Replace("/n", "").Trim()); Console.WriteLine("地址是:" + strURL); mItem.RemoveAll(); mItem = item.SelectSingleNode("//p[@class='description']"); if (mItem != null) { mDic.Add("detailed", mItem == null ? "" : mItem.InnerText.Replace("/n", "").Trim()); Console.WriteLine("说明是:" + mItem.InnerText.Replace("/n", "").Trim()); mItem.RemoveAll(); } mItem = item.SelectSingleNode("//p[@class='updated']/span"); mDic.Add("updateTime", mItem == null ? "" : mItem.InnerText.Replace("/n", "").Trim()); Console.WriteLine("更新时间是:" + mItem.InnerText.Replace("/n", "").Trim()); mItem = item.SelectSingleNode("//div[@class='btn-group']/a[@class='btn btn-count btn-md']"); mDic.Add("stargazers", mItem == null ? "" : mItem.InnerText.Replace("/n", "").Trim()); Console.WriteLine("Stargazers是:" + mItem.InnerText.Replace("/n", "").Trim()); //mItem = item.SelectSingleNode("//div[@class='repo-list-stats']/a[@aria-label='Forks']"); mDic.Add("forks", mItem == null ? "" : mItem.InnerText.Replace("/n", "").Trim()); Console.WriteLine("Forks是:" + mItem.InnerText.Replace("/n", "").Trim()); Console.WriteLine(); LogHelper.Debug("strURL" + strURL + "mDic" + mDic); RedisCacheHelper.Add <Dictionary <string, string> >(strURL, mDic); item.RemoveAll(); mDic.Clear(); strURL = ""; } //Console.ReadLine(); doc = null; result = null; collection = null; return(strPageSize); }