Ejemplo n.º 1
0
        /// <summary>
        /// 百度搜索热词抓取
        /// </summary>
        /// <param name="hc"></param>
        public static void BaiduHotKey(HttpClient hc)
        {
            var _t = hc.GetByteArrayAsync("http://top.baidu.com/buzz?b=341&c=513&fr=topbuzz&qq-pf-to=pcqq.c2c");

            _t.Wait();
            var          value = _t.Result;
            string       html  = Encoding.GetEncoding("gb2312").GetString(value);
            HtmlDocument docu  = new HtmlDocument();

            docu.LoadHtml(html);
            var node = docu.DocumentNode.SelectNodes("//table[@class='list-table']/tr[position()>1] ");
            List <WeiboHotKey> weiboHotKey = new List <WeiboHotKey>();

            foreach (var item in node)
            {
                var title = item.SelectSingleNode("./td[@class='keyword']/a[@class='list-title']")?.InnerText.ToString();
                var count = GetInt(item.SelectSingleNode("./td[@class='last']/span[@class='icon-rise']")?.InnerText.ToString());
                var type  = item.SelectSingleNode("./td[@class='keyword']/span")?.GetAttributeValue("class", "").ToString();

                weiboHotKey.Add(new WeiboHotKey()
                {
                    title    = title,
                    HotValue = count,
                    type     = !string.IsNullOrEmpty(type) ? "新" : ""
                });
            }

            using (var client = new RedisAccess(RedisConnect.WeiboHotKeyRedisPool))
            {
                client.Set("Baidu", weiboHotKey);
            }
        }
Ejemplo n.º 2
0
        /// <summary>
        /// 360搜索热词抓取
        /// </summary>
        /// <param name="hc"></param>
        public static void SoHotKey(HttpClient hc)
        {
            var _t = hc.GetStringAsync("https://trends.so.com/top/realtime");

            _t.Wait();
            var value = _t.Result;
            List <WeiboHotKey> weiboHotKey = new List <WeiboHotKey>();
            JToken             jvalue      = JsonConvert.DeserializeObject <JToken>(value);

            foreach (var item in jvalue["data"]["result"])
            {
                string isHot = item["update"].ToObject <int>() == 1 ? "新" : "";
                string hot   = item["heat"]?.ToString();
                weiboHotKey.Add(new WeiboHotKey()
                {
                    title    = item["query"].ToString(),
                    HotValue = !string.IsNullOrEmpty(hot) ? GetInt(hot) : 0,
                    type     = isHot
                });
            }
            using (var client = new RedisAccess(RedisConnect.WeiboHotKeyRedisPool))
            {
                client.Set("360", weiboHotKey);
            }
        }
        public static void AddJwt(this IServiceCollection services, IConfiguration Configuration)
        {
            services.AddScoped <IIdentityService, IdentityService>();
            services.AddScoped <IPasswordHasher, PasswordHasher>();
            services.AddScoped <IDataRepository <Identity>, IdentityManager>();
            services.AddDbContext <IdentityContext>(opts => opts.UseSqlServer(Configuration["ConnectionString:IdentityDB"]));
            services.AddMvc().AddApplicationPart(Assembly.Load(new AssemblyName("JWTCommonLibForDotNetCore")));;

            var appSettingsSection = Configuration.GetSection("JWTSettings");

            services.Configure <JWTSettings>(appSettingsSection);

            services.Configure <HashingOptions>(Configuration.GetSection("HashingOptions"));

            var appSettings = appSettingsSection.Get <JWTSettings>();
            var key         = Encoding.ASCII.GetBytes(appSettings.Secret);

            services.AddAuthentication(x =>
            {
                x.DefaultAuthenticateScheme = JwtBearerDefaults.AuthenticationScheme;
                x.DefaultChallengeScheme    = JwtBearerDefaults.AuthenticationScheme;
            })
            .AddJwtBearer(x =>
            {
                x.Events = new JwtBearerEvents
                {
                    OnTokenValidated = OnTokenValidated
                };
                x.RequireHttpsMetadata      = false;
                x.SaveToken                 = true;
                x.TokenValidationParameters = new TokenValidationParameters
                {
                    ValidateIssuerSigningKey = true,
                    IssuerSigningKey         = new SymmetricSecurityKey(key),
                    ValidateIssuer           = false,
                    ValidateAudience         = false
                };
            });

            if (appSettings.RedisConnectionString != null && appSettings.RedisConnectionString != string.Empty)
            {
                RedisAccess.Startup(appSettings.RedisConnectionString, appSettings.TokenExpiredTimeInSecounds);
                UseRedis = true;
            }
        }
Ejemplo n.º 4
0
        /// <summary>
        /// 新浪微博热词抓取
        /// </summary>
        /// <param name="hc"></param>
        public static void WeiboHotKey(HttpClient hc)
        {
            #region 微博
            var _t = hc.GetStringAsync("http://s.weibo.com/top/summary?cate=realtimehot");
            _t.Wait();
            string value = _t.Result;
            CQ     DOM   = new CQ(value);
            var    libs  = DOM["script"];
            foreach (var lib in libs)
            {
                var mainLib = CQ.Create(lib);
                if (mainLib.Html().Contains("\"pid\":\"pl_top_realtimehot\""))
                {
                    List <WeiboHotKey> weiboHotKey = new List <WeiboHotKey>();
                    string             jsonValue   = mainLib.Html().Replace("STK && STK.pageletM && STK.pageletM.view(", "");
                    jsonValue = jsonValue.Remove(jsonValue.Length - 1);
                    var    jobject   = JsonConvert.DeserializeObject <JToken>(jsonValue);
                    string htmlValue = jobject["html"].ToString();
                    CQ     hotDOM    = new CQ(htmlValue);
                    var    hotLibs   = hotDOM["tbody tr"];
                    foreach (var hotlib in hotLibs)
                    {
                        var         hotLibItem = CQ.Create(hotlib);
                        WeiboHotKey hotkey     = new WeiboHotKey()
                        {
                            title    = hotLibItem["div.rank_content p a"].Text(),
                            type     = hotLibItem["div.rank_content p i"].Text(),
                            HotValue = GetInt(hotLibItem["p.star_num span"].Text())
                        };
                        weiboHotKey.Add(hotkey);
                    }

                    using (var client = new RedisAccess(RedisConnect.WeiboHotKeyRedisPool))
                    {
                        client.Set("Weibo", weiboHotKey);
                    }
                }
            }
            #endregion
        }
Ejemplo n.º 5
0
        private static void ItemCrawl()
        {
            using (var client = new RedisAccess(redisPool))
            {
                try
                {
                    //产生临时库(临时库和缓存总库取并集)
                    string wechatItem = client.PopItemWithHighestScoreFromSortedSet(itemsKey);
                    if (!string.IsNullOrEmpty(wechatItem))
                    {
                        Palas.Protocol.PFItemToAnalyze itemToAnalyze = JsonConvert.DeserializeObject <Palas.Protocol.PFItemToAnalyze>(wechatItem);

                        Palas.Protocol.PFItemToAnalyze analyzeResult = AnalyzeItem.Analyzer(itemToAnalyze, Palas.Protocol.PFAnalyzeFlag.Splite);

                        //6.使用AnalyzeFirst对文章进行第一次分析
                        MultriAnalyzeFlag analyzeFlags = AnalyzeItem.BuildAnalyzeFlag((Enums.AnalyzeFlag)analyzeResult.AnalyzeFlag);
                        Palas.Protocol.PFItemToAnalyze analyzeFirstResult = AnalyzeItem.AnalyzerFirst(analyzeResult, analyzeFlags);

                        //7.使用AnalyzeSecond对文章进行第二次分析
                        Palas.Protocol.PFItemToAnalyze analyzeSecondResult = AnalyzeItem.AnalyzeSecond(analyzeFirstResult, analyzeFlags);

                        //8.使用AnalyzeIssue对文章进行分Issue分析
                        Palas.Protocol.PFItemToAnalyze analyzeIssueResult = AnalyzeItem.IssueAnalyzer(analyzeSecondResult, analyzeFlags);

                        //9.使用IndexThenDup将文章去重索引到ES
                        if (analyzeIssueResult != null)
                        {
                            //此处对Item进行一次转换
                            Palas.Protocol.PFItem pfItem = analyzeIssueResult.Item;
                            Item _item = TypeExchangeUtility.ExchangeItem(pfItem);

                            _item = FilterIssue.FilterExcludeExpression(_item);

                            Enums.ProcessStatus result = Enums.ProcessStatus.Failed;
                            int retry = 0;
                            do
                            {
                                try
                                {
                                    //retry++;
                                    ESAccess.IndexOnly(_item);
                                    result = Enums.ProcessStatus.Succeeded;
                                    //result = DupThenIndexBusiness.DupThenIndexItem(_item);
                                    //if (result == Enums.ProcessStatus.Failed)
                                    //Thread.Sleep(new TimeSpan(0, 0, 30));
                                }
                                catch //(Exception ex)
                                {
                                    result = Enums.ProcessStatus.Failed;
                                    Thread.Sleep(new TimeSpan(0, 1, 00));
                                }
                            }while (result == Enums.ProcessStatus.Failed && retry < 3);

                            //10.判断是否成功
                            //Assert.AreNotEqual(Enums.ProcessStatus.Failed, result);
                            Console.WriteLine(string.Format("Index Dup: {2} paper: {0}, Status: {1}", _item.ItemID, result.ToString(), _item.DuplicationID));
                        }
                    }
                    else
                    {
                        //if have nothing to emit, then sleep for a little while to release CPU
                        Thread.Sleep(100);
                        return;
                    }
                }
                catch (Exception ex)
                {
                }
            }
        }
Ejemplo n.º 6
0
        private static void WechatCrawler()
        {
            using (var client = new RedisAccess(redisPool))
            {
                try
                {
                    //产生临时库(临时库和缓存总库取并集)
                    string wechatItem = client.PopItemWithHighestScoreFromSortedSet(wechatSubsKey);
                    if (!string.IsNullOrEmpty(wechatItem))
                    {
                        WeChatItemModel wechatItemModel = JsonConvert.DeserializeObject <WeChatItemModel>(wechatItem);

                        Palas.Protocol.PFItemToAnalyze pfItemToAnalyze = new Palas.Protocol.PFItemToAnalyze();
                        Palas.Protocol.PFItem          pfItem          = new Palas.Protocol.PFItem();

                        if (wechatItemModel != null && !string.IsNullOrEmpty(wechatItemModel.Id))
                        {
                            pfItem.AuthorID           = wechatItemModel.WechatSubId;
                            pfItem.AuthorName         = wechatItemModel.WechatSubName;
                            pfItem.ContentDetailLevel = Palas.Protocol.PFContentDetailLevel.FullContent;
                            pfItem.Crawler            = "WechatSubs";
                            pfItem.CrawlID            = wechatItemModel.WechatSubId;
                            pfItem.FetchTime          = DateTimeUtility.GetUnixTimeStamp(DateTime.Now);
                            pfItem.ItemID             = wechatItemModel.Id;
                            pfItem.MediaID            = "WeChat";
                            pfItem.MediaName          = "微信";
                            pfItem.PubDate            = DateTimeUtility.GetUnixTimeStamp(wechatItemModel.PubDate);
                            pfItem.Url        = wechatItemModel.Url.Replace("&amp;", "&");
                            pfItem.CleanTitle = wechatItemModel.Title;

                            if (wechatItemModel.Tags != null && wechatItemModel.Tags.Count > 0)
                            {
                                pfItem.Tag = string.Join(",", wechatItemModel.Tags);
                            }

                            string content = GetHtmlContent(pfItem.Url);
                            if (!string.IsNullOrEmpty(content))
                            {
                                pfItem.HTMLText  = HtmlFormattor.FormatHtml(content, pfItem.Url);
                                pfItem.CleanText = HTMLCleaner.CleanHTML(pfItem.HTMLText, true);

                                pfItemToAnalyze.AnalyzeFlag = 312287;
                                pfItemToAnalyze.Item        = pfItem;

                                pfItemToAnalyze.CrawlRecode      = new Palas.Protocol.PFCrawlRecode(pfItem.Crawler, pfItem.CrawlID, "Common", "", "N", 312287, 1, "WeChatSync", 0, 0, 0, "", 0, 0, true, pfItem.MediaID, pfItem.MediaName, pfItem.MediaChannel);
                                pfItemToAnalyze.CrawlRecode.Tags = pfItem.Tag;

                                pfItemToAnalyze.CrawlRecode.JoinIssueIDs = string.Join(",", wechatItemModel.Customers);
                                pfItemToAnalyze.CrawlRecode.MediaType    = 11;
                                pfItemToAnalyze.CrawlRecode.MediaWeight  = 8;

                                // 临时追加

                                if (wechatItemModel.Tags != null && wechatItemModel.Tags.Count > 0)
                                {
                                    pfItem.Tag = string.Join(",", wechatItemModel.Tags);
                                    //pfItemToAnalyze.CrawlRecode.Tags = pfItem.Tag;
                                }
                                // 临时追加完毕

                                //pfItemToAnalyze.CrawlRecode = new Palas.Protocol.PFCrawlRecode(pfItem.Crawler, pfItem.CrawlID, "Common", "", "N", 312287, string.Join(",", wechatItemModel.Customers), 1, "WeChatSync", 0, 0, "", 0, "", "", 0, 0, "", true, 4, pfItem.MediaID, pfItem.MediaName, "", 11, 0, 8, 0, "", "",);

                                Palas.Protocol.PFItemToAnalyze analyzeResult = AnalyzeItem.Analyzer(pfItemToAnalyze, Palas.Protocol.PFAnalyzeFlag.Splite);

                                //6.使用AnalyzeFirst对文章进行第一次分析
                                MultriAnalyzeFlag analyzeFlags = AnalyzeItem.BuildAnalyzeFlag((Enums.AnalyzeFlag)analyzeResult.AnalyzeFlag);
                                Palas.Protocol.PFItemToAnalyze analyzeFirstResult = AnalyzeItem.AnalyzerFirst(analyzeResult, analyzeFlags);

                                //7.使用AnalyzeSecond对文章进行第二次分析
                                Palas.Protocol.PFItemToAnalyze analyzeSecondResult = AnalyzeItem.AnalyzeSecond(analyzeFirstResult, analyzeFlags);

                                //8.使用AnalyzeIssue对文章进行分Issue分析
                                Palas.Protocol.PFItemToAnalyze analyzeIssueResult = AnalyzeItem.IssueAnalyzer(analyzeSecondResult, analyzeFlags);

                                //9.使用IndexThenDup将文章去重索引到ES
                                if (analyzeIssueResult != null)
                                {
                                    //此处对Item进行一次转换
                                    Palas.Protocol.PFItem pfItem2 = analyzeIssueResult.Item;
                                    Item _item = TypeExchangeUtility.ExchangeItem(pfItem2);

                                    _item = FilterIssue.FilterExcludeExpression(_item);

                                    Enums.ProcessStatus result = Enums.ProcessStatus.Failed;
                                    int retry = 0;
                                    do
                                    {
                                        try
                                        {
                                            //retry++;
                                            ESAccess.IndexOnly(_item);
                                            result = Enums.ProcessStatus.Succeeded;
                                            //result = DupThenIndexBusiness.DupThenIndexItem(_item);
                                            //if (result == Enums.ProcessStatus.Failed)
                                            //Thread.Sleep(new TimeSpan(0, 0, 30));
                                        }
                                        catch //(Exception ex)
                                        {
                                            result = Enums.ProcessStatus.Failed;
                                            Thread.Sleep(new TimeSpan(0, 1, 00));
                                        }
                                    }while (result == Enums.ProcessStatus.Failed && retry < 3);

                                    //10.判断是否成功
                                    //Assert.AreNotEqual(Enums.ProcessStatus.Failed, result);
                                    Console.WriteLine(string.Format("Index paper: {0}, Status: {1}", _item.ItemID, result.ToString()));
                                }

                                return;
                            }
                        }
                    }
                    else
                    {
                        //if have nothing to emit, then sleep for a little while to release CPU
                        Thread.Sleep(50);
                    }
                }
                catch (Exception ex)
                {
                }
            }
        }