Пример #1
0
        public static string Save(string index, string name, bool isOne = false)
        {
            //第一步声明HtmlAgilityPack.HtmlDocument实例
            HtmlDocument doc = new HtmlDocument();
            //获取Html页面代码
            //string html = GetHttp("5","android");
            Dictionary <string, string> result = HttpGET(index, name);
            string html = result["result"];

            LogHelper.Warn("html:" + html);
            Console.WriteLine(html);
            //第二步加载html文档
            doc.LoadHtml(html);
            //获取总页数
            string strPageSize = "";

            if (isOne)
            {
                HtmlNodeCollection htmlnode = doc.DocumentNode.SelectNodes("//div[@class='paginate-container']/div[@class='pagination']/a");
                strPageSize = htmlnode[htmlnode.Count - 2].InnerText;
                LogHelper.Debug("总页数是:" + strPageSize);
                Console.WriteLine();
                Console.WriteLine();
                htmlnode = null;
            }
            //获取所有板块的a标签
            HtmlNodeCollection collection = doc.DocumentNode.SelectNodes("//div[@class='repo-list-item d-flex flex-justify-start py-4 public source']/div");

            if (collection == null)
            {
                LogHelper.Debug("collection=null");
                return("");
            }
            foreach (HtmlNode item in collection)
            {
                string   strURL = "";
                HtmlNode mItem;
                mItem  = item.SelectSingleNode("//h3/a[@class='v-align-middle']");
                strURL = "https://github.com" + mItem.Attributes["href"].Value;
                LogHelper.Debug("strURL" + strURL);
                if (RedisCacheHelper.Exists(strURL))
                {
                    Dictionary <string, string> mResult = RedisCacheHelper.Get <Dictionary <string, string> >(strURL);
                    Console.WriteLine("重新取出的数据...");
                    Console.WriteLine("名字是:" + mResult["name"]);
                    Console.WriteLine("地址是:" + mResult["url"]);
                    Console.WriteLine("说明是:" + mResult["detailed"]);
                    Console.WriteLine("更新时间是:" + mResult["updateTime"]);
                    Console.WriteLine("Stargazers是:" + mResult["stargazers"]);
                    Console.WriteLine("Forks是:" + mResult["forks"]);
                    Console.WriteLine();
                    item.RemoveAll();
                    strURL = "";
                    //同一路径存在表示当前项目已爬过,跳过即可
                    continue;
                }
                Dictionary <string, string> mDic = new Dictionary <string, string>();
                mDic.Add("name", mItem.InnerText.Replace("/n", "").Trim());
                mDic.Add("url", strURL);
                Console.WriteLine("名字是:" + mItem.InnerText.Replace("/n", "").Trim());
                Console.WriteLine("地址是:" + strURL);
                mItem.RemoveAll();
                mItem = item.SelectSingleNode("//p[@class='col-9 d-inline-block text-gray mb-2 pr-4']");
                mDic.Add("detailed", mItem.InnerText == null ? "" : mItem.InnerText.Replace("/n", "").Trim());
                Console.WriteLine("说明是:" + mItem.InnerText == null ? "" : mItem.InnerText.Replace("/n", "").Trim());
                mItem.RemoveAll();
                mItem = item.SelectSingleNode("//p[@class='f6 text-gray mr-3 mb-0 mt-2']/relative-time");
                mDic.Add("updateTime", mItem.InnerText == null ? "" : mItem.InnerText.Replace("/n", "").Trim());
                Console.WriteLine("更新时间是:" + mItem.InnerText.Replace("/n", "").Trim());
                mItem = item.SelectSingleNode("//div[@class='col-2 text-right pt-1 pr-3 pt-2']/a[@class='muted-link']");
                mDic.Add("stargazers", mItem.InnerText == null ? "" : mItem.InnerText.Replace("/n", "").Trim());
                Console.WriteLine("Stargazers是:" + mItem.InnerText.Replace("/n", "").Trim());
                //mItem = item.SelectSingleNode("//div[@class='repo-list-stats']/a[@aria-label='Forks']");
                mDic.Add("forks", mItem.InnerText == null ? "" : mItem.InnerText.Replace("/n", "").Trim());
                Console.WriteLine("Forks是:" + mItem.InnerText.Replace("/n", "").Trim());
                Console.WriteLine();
                LogHelper.Debug("strURL" + strURL + "mDic" + mDic);
                RedisCacheHelper.Add <Dictionary <string, string> >(strURL, mDic);
                item.RemoveAll();
                mDic.Clear();
                strURL = "";
            }
            //Console.ReadLine();
            doc        = null;
            result     = null;
            collection = null;
            return(strPageSize);
        }
Пример #2
0
        public static string SaveGitBook(string index, string name, bool isOne = false)
        {
            //第一步声明HtmlAgilityPack.HtmlDocument实例
            HtmlDocument doc = new HtmlDocument();
            //获取Html页面代码
            //string html = GetHttp("5","android");
            Dictionary <string, string> result = HttpGET(index, name);
            string html = result["result"];

            LogHelper.Warn("html:" + html);
            Console.WriteLine(html);
            //第二步加载html文档
            doc.LoadHtml(html);
            //获取总页数
            string strPageSize = "";

            if (isOne)
            {
                HtmlNodeCollection htmlnode = doc.DocumentNode.SelectNodes("//div[@class='panel panel-default']/ul[@class='list-group']/a[@class='list-group-item active']/span");
                strPageSize = htmlnode[htmlnode.Count - 1].InnerText;
                LogHelper.Debug("书总数:" + strPageSize);
                Console.WriteLine();
                Console.WriteLine();
                htmlnode = null;
            }
            //获取所有板块的a标签
            HtmlNodeCollection collection = doc.DocumentNode.SelectNodes("//div[@class='Books']/div[@class='Book']");

            if (collection == null)
            {
                LogHelper.Debug("collection=null");
                return("");
            }
            foreach (HtmlNode item in collection)
            {
                string   strURL = "";
                HtmlNode mItem;
                mItem  = item.SelectSingleNode("//div[@class='book-infos']/h2[@class='title']/a");
                strURL = mItem.Attributes["href"].Value;

                LogHelper.Debug("strURL" + strURL);
                if (RedisCacheHelper.Exists(strURL))
                {
                    Dictionary <string, string> mResult = RedisCacheHelper.Get <Dictionary <string, string> >(strURL);
                    Console.WriteLine("重新取出的数据...");
                    Console.WriteLine("名字是:" + mResult["name"]);
                    Console.WriteLine("地址是:" + mResult["url"]);
                    Console.WriteLine("说明是:" + mResult["detailed"]);
                    Console.WriteLine("更新时间是:" + mResult["updateTime"]);
                    Console.WriteLine("Stargazers是:" + mResult["stargazers"]);
                    Console.WriteLine("Forks是:" + mResult["forks"]);
                    Console.WriteLine();
                    item.RemoveAll();
                    strURL = "";
                    //同一路径存在表示当前项目已爬过,跳过即可
                    continue;
                }
                Dictionary <string, string> mDic = new Dictionary <string, string>();
                mDic.Add("name", mItem.InnerText.Replace("/n", "").Trim());
                mDic.Add("url", strURL);
                Console.WriteLine("名字是:" + mItem.InnerText.Replace("/n", "").Trim());
                Console.WriteLine("地址是:" + strURL);
                mItem.RemoveAll();
                mItem = item.SelectSingleNode("//p[@class='description']");
                if (mItem != null)
                {
                    mDic.Add("detailed", mItem == null ? "" : mItem.InnerText.Replace("/n", "").Trim());
                    Console.WriteLine("说明是:" + mItem.InnerText.Replace("/n", "").Trim());
                    mItem.RemoveAll();
                }
                mItem = item.SelectSingleNode("//p[@class='updated']/span");
                mDic.Add("updateTime", mItem == null ? "" : mItem.InnerText.Replace("/n", "").Trim());
                Console.WriteLine("更新时间是:" + mItem.InnerText.Replace("/n", "").Trim());
                mItem = item.SelectSingleNode("//div[@class='btn-group']/a[@class='btn btn-count btn-md']");
                mDic.Add("stargazers", mItem == null ? "" : mItem.InnerText.Replace("/n", "").Trim());
                Console.WriteLine("Stargazers是:" + mItem.InnerText.Replace("/n", "").Trim());
                //mItem = item.SelectSingleNode("//div[@class='repo-list-stats']/a[@aria-label='Forks']");
                mDic.Add("forks", mItem == null ? "" : mItem.InnerText.Replace("/n", "").Trim());
                Console.WriteLine("Forks是:" + mItem.InnerText.Replace("/n", "").Trim());
                Console.WriteLine();
                LogHelper.Debug("strURL" + strURL + "mDic" + mDic);
                RedisCacheHelper.Add <Dictionary <string, string> >(strURL, mDic);
                item.RemoveAll();
                mDic.Clear();
                strURL = "";
            }
            //Console.ReadLine();
            doc        = null;
            result     = null;
            collection = null;
            return(strPageSize);
        }