예제 #1
0
        public async Task <bool> CrawlManager()
        {
            if (!IsInProccess(CrawlInProccessIsInProccessCacheKey))
            {
                try
                {
                    SetScheduleInProccess(SchedulerStat.inProccess, CrawlInProccessIsInProccessCacheKey);
                    Repository.ContentManagerRepository ContentManagerRepository = new Repository.ContentManagerRepository();
                    Repository.BaseContentRepository    BaseContentRepository    = new Repository.BaseContentRepository();
                    var SuccessList     = new List <Models.ContentManager.Rss>();
                    var BaseContentList = new List <Models.BaseContent.BaseContent>();
                    var deque           = await BaseContentRepository.Deque(10);

                    //TODO : loop in basecontetn and then just pass them to crawl function and those change status and add to content repository possible not true !!!
                    foreach (var item in deque)
                    {
                        var res = await Crawler(item);
                    }

                    SetScheduleInProccess(SchedulerStat.idle, CrawlInProccessIsInProccessCacheKey);
                    return(true);
                }
                catch (Exception ex)
                {
                    SetScheduleInProccess(SchedulerStat.idle, CrawlInProccessIsInProccessCacheKey);
                    return(false);
                }
            }

            return(false);
        }
예제 #2
0
        public async Task <bool> CrawlLinkManager()
        {
            if (!IsInProccess(CrawlLinkIsInProccessCacheKey))
            {
                try
                {
                    SetScheduleInProccess(SchedulerStat.inProccess, CrawlLinkIsInProccessCacheKey);
                    Repository.ContentManagerRepository ContentManagerRepository = new Repository.ContentManagerRepository();
                    Repository.BaseContentRepository    BaseContentRepository    = new Repository.BaseContentRepository();
                    var SuccessList     = new List <Models.ContentManager.Site>();
                    var BaseContentList = new List <Models.BaseContent.BaseContent>();
                    var deque           = await ContentManagerRepository.DequeSite(10);

                    foreach (var item in deque)
                    {
                        var rssModel = (await ContentManagerRepository.GetSiteAllRss(item._id)).FirstOrDefault(q => string.IsNullOrEmpty(q.url));
                        var res      = await CrawlerLink(item);

                        if (res != null)
                        {
                            SuccessList.Add(item);
                            foreach (var rss in res)
                            {
                                BaseContentList.Add(new Models.BaseContent.BaseContent()
                                {
                                    dateticks       = rss.dateticks,
                                    description     = rss.description,
                                    insertdateticks = DateTime.Now.Ticks,
                                    rssid           = rssModel != null ? rssModel._id: "",
                                    title           = rss.title,
                                    url             = rss.url,
                                    userid          = item.userid,
                                    bycrawled       = true
                                });
                            }
                        }
                    }

                    //TODO: Resolve concarrency problem insert repeated if waite for preve task
                    var addRes = await BaseContentRepository.Add(BaseContentList);

                    var changeRes = await ContentManagerRepository.ChangeLastCarawlDateSite(SuccessList);

                    // Cache Url on Is Repeated Url If not repeated ...
                    //var AddToRedisRes = await BaseContentRepository.AddRssURlInRedis(BaseContentList);

                    SetScheduleInProccess(SchedulerStat.idle, CrawlLinkIsInProccessCacheKey);
                    return(true);
                }
                catch (Exception ex)
                {
                    SetScheduleInProccess(SchedulerStat.idle, CrawlLinkIsInProccessCacheKey);
                    return(false);
                }
            }

            return(false);
        }
예제 #3
0
        public async Task <ViewModel.BaseContent.BaseContentManageServiceResult> PostManage([FromBody] ViewModel.BaseContent.BaseContentSearchVM simpleSearch)
        {
            try
            {
                var page  = simpleSearch.page < 1 ? 1 : simpleSearch.page;
                var limit = 20;
                var skip  = limit * (page - 1);

                var rssids = new List <string>();
                var contentManagerRepository = new Repository.ContentManagerRepository();
                var _userRssList             = await contentManagerRepository.GetUserAllRss(Util.Common.My.id);

                if (_userRssList.Any())
                {
                    if (simpleSearch.categories != null && simpleSearch.categories.Any() && !simpleSearch.categories.Any(s => s == "-1"))
                    {
                        _userRssList = _userRssList.Where(s => s.categories.Any(x => simpleSearch.categories.Contains(x))).ToList();
                    }

                    if (simpleSearch.tags != null && simpleSearch.tags.Any() && !simpleSearch.tags.Any(s => s == "-1"))
                    {
                        _userRssList = _userRssList.Where(s => s.tags.Any(x => simpleSearch.tags.Contains(x))).ToList();
                    }

                    if (!string.IsNullOrWhiteSpace(simpleSearch.siteid) && simpleSearch.siteid != "-1")
                    {
                        _userRssList = _userRssList.Where(s => s.siteid == simpleSearch.siteid).ToList();
                    }

                    if (!string.IsNullOrWhiteSpace(simpleSearch.rssid) && simpleSearch.rssid != "-1")
                    {
                        _userRssList = _userRssList.Where(s => s._id == simpleSearch.rssid).ToList();
                    }

                    rssids = _userRssList.Select(s => s._id).ToList();
                }

                string q      = "{aggregate:'basecontent',pipeline:[{$match:{ 'rssid':{$in:" + rssids.toJSON() + "}}},{$sort : { 'dateticks' : -1  }},{$skip:" + skip.ToString() + "},{$limit:" + limit.ToString() + "}]}";
                var    _model = await NoSql.Instance.RunCommandAsync <BsonDocument>(q);

                var model0 = new List <Models.BaseContent.BaseContent>();
                foreach (var item in _model.GetValue("result").AsBsonArray)
                {
                    model0.Add(MongoDB.Bson.Serialization.BsonSerializer.Deserialize <Models.BaseContent.BaseContent>(item.AsBsonDocument));
                }

                var model = new List <ViewModel.BaseContent.SimpleBaseContentManageVM>();
                foreach (var item in model0)
                {
                    var rssModel = await ContentManagerRepository.RssInfo(item.rssid);

                    var siteModel = await ContentManagerRepository.SiteInfo(rssModel.siteid);

                    model.Add(new ViewModel.BaseContent.SimpleBaseContentManageVM()
                    {
                        dateticks       = item.dateticks,
                        description     = item.description,
                        insertdateticks = item.insertdateticks,
                        pdate           = Common.ConvertG2JDateText(new DateTime(item.dateticks), true),
                        pinsertdate     = Common.ConvertG2JDateText(new DateTime(item.insertdateticks), true),
                        rssid           = item.rssid,
                        rsstitle        = rssModel.title,
                        sitetitle       = siteModel.title,
                        title           = item.title,
                        url             = item.url,
                        userid          = item.userid,
                        _id             = item._id,
                    });
                }

                var result = new ViewModel.BaseContent.BaseContentManageServiceResult();
                result.Data                = model;
                result.CurrentPage         = page;
                result.PrevPage            = (page == 1 ? 2 : page) - 1;
                result.NextPage            = page + 1;
                result.ServiceResultStatus = (int)Util.Common.ServiceResultStatus.OK;
                return(result);
            }
            catch (Exception ex)
            {
                return(new ViewModel.BaseContent.BaseContentManageServiceResult()
                {
                    ServiceResultStatus = (int)Util.Common.ServiceResultStatus.Error,
                    ServiceResultMassage = ex.GetBaseException().Message
                });
            }
        }
예제 #4
0
        public async Task <bool> RssFetcherManager()
        {
            //TODO Repeated URL Problem gel  all url add then inset to redis this incorrect maybe first add one by one url in redis then add to list to insert mongo LN 36
            if (!IsInProccess())
            {
                try
                {
                    SetScheduleInProccess(SchedulerStat.inProccess);

                    Repository.ContentManagerRepository ContentManagerRepository = new Repository.ContentManagerRepository();
                    Repository.BaseContentRepository    BaseContentRepository    = new Repository.BaseContentRepository();
                    var SuccessList     = new List <Models.ContentManager.Rss>();
                    var BaseContentList = new List <Models.BaseContent.BaseContent>();
                    var deque           = await ContentManagerRepository.DequeRss(50);

                    foreach (var item in deque)
                    {
                        if (!string.IsNullOrWhiteSpace(item.url))
                        {
                            var res = await RssFetcher(item.url);

                            if (res != null)
                            {
                                SuccessList.Add(item);
                                foreach (var rss in res)
                                {
                                    BaseContentList.Add(new Models.BaseContent.BaseContent()
                                    {
                                        dateticks       = rss.dateticks,
                                        description     = rss.description,
                                        insertdateticks = DateTime.Now.Ticks,
                                        rssid           = item._id,
                                        title           = rss.title,
                                        url             = rss.url,
                                        userid          = item.userid,
                                        bycrawled       = false
                                    });
                                }
                            }
                        }
                    }
                    //TODO: Resolve concarrency problem insert repeated if waite for preve task
                    var addRes = await BaseContentRepository.Add(BaseContentList);

                    var changeRes = await ContentManagerRepository.ChangeLastCarawlDateRss(SuccessList);

                    // Cache Url on Is Repeated Url If not repeated ...
                    //var AddToRedisRes = await BaseContentRepository.AddRssURlInRedis(BaseContentList);

                    SetScheduleInProccess(SchedulerStat.idle);
                    return(true);
                }
                catch (Exception ex)
                {
                    SetScheduleInProccess(SchedulerStat.idle);
                    return(false);
                }
            }

            return(false);
        }