public bool CrawlData(string Id, string createdBy, ref string msg)
        {
            NSLog.Logger.Info("CrawlData: " + Id);
            var result = true;

            try
            {
                using (var _db = new CMS_Context())
                {
                    /* get key by ID */
                    var keyWord = _db.CMS_KeyWord.Where(o => o.ID == Id && o.Status == (byte)Commons.EStatus.Active).FirstOrDefault();
                    if (keyWord != null)
                    {
                        /* check time span crawl */
                        var timeSpanCrawl = DateTime.Now - keyWord.UpdatedDate;
                        if (timeSpanCrawl.Value.TotalMinutes > 5 || keyWord.UpdatedDate == keyWord.CreatedDate) /* 5min to crawl data again */
                        {
                            /* update crawer date */
                            var bkTime = keyWord.UpdatedDate;
                            keyWord.UpdatedDate = DateTime.Now;
                            keyWord.UpdatedBy   = createdBy;
                            keyWord.KeyWord     = keyWord.KeyWord.Trim();
                            _db.SaveChanges();

                            /* cookies
                             *  User: [email protected]
                             *  Pass: pitool.org79
                             */
                            if (!string.IsNullOrEmpty(keyWord.CrawlAccountID))
                            {
                                var _Cookie = keyWord.CMS_Account.Cookies;
                                if (!string.IsNullOrEmpty(_Cookie))
                                {
                                    CrawlerBoardHelper._Cookies = _Cookie;
                                }
                                else
                                {
                                    CrawlerHelper._Cookies = "_b = \"AS+B1gn0GdpGgLQl83JubKX1bG19kiuUUvX8lnvITKDHNq2tJcgqXNIQ0cLN+kjq4KM=\"; _pinterest_pfob = enabled; _ga = GA1.2.229901352.1528170174; pnodepath = \"/pin4\"; fba = True; G_ENABLED_IDPS = google; bei = false; logged_out = True; sessionFunnelEventLogged = 1; cm_sub = denied; _auth = 1; csrftoken = fkrSitmDb4vW2kT1G3GfOkcC8mPvl0kV; _pinterest_sess = \"TWc9PSZWaE0xeDZOVm4yL3Yva0VSazRkRjlHR013bk9mdVJBcU9zVEtEOUhXVjhKSFZmZUEreWJiNDYrV3FubVRoVzdqdDF0dmtDcXErcFF6MmlXQUQ3RDVzWERCWTZYZUt4eXMzemkvOGlXRFZQT1J6MjkwampOZlVJUFEvTnNkTUZYMkJ3dGxPTTRKaVIwdGNJY2h1MUhaSHlFT3djd0huNHE0YmtiTTBZR3dVTVB3d0RyYVE4UC8rMjZCYWo2eTJLNGJVSHR6KzRENjlWVE0rNFMxNWdGMUtVL0VtL2RDZktiUFg3M1Y2Z2dEbllPeUxFR3FOdEd6SUJSRTlBMWs1YkJnbTBlWHhwcC9pMmlqRmoydlh0V2VQSGYvYk1zeXlSM2dIU1dmUXIyRWVxWVBPdTYzbHFjcVhYRWRBT0FTQ3VBNmdWMm5QUlREZDdSY2ZQeE1NWklqSUZxNDllVHF1WUVzRFRrRjBXQnZCMVBGTlYxT0UzM1daeHFOUnBBTzliMzFJdmovQ1hQR2Vvc1pkTHNxL1FjT3FrWllTR1d6VHFrd2g5cFBFMmswM3dIa0dOOHVCbGd6aVlKUkJlZlZNeWVyRTBYREcrQVFlUTdRc1NqMlFlQ3RvaWlZMjJXZ1RURmIxNDA2d2JTODRGNk9BYWpoRzVJTUhLMkJ4UDJGb0NmN0NOQXpmZ0FoR08xcElmWmh5S29OeGRadFpDVWR1RGw3ZzZGRS81SlU4UlhSUVlIWm4wRzRJMGFVaTQzdGI3T2ovSCtHR2ZSWlk0M1RCN2JXSmZJRFdQUUpZWVpRMW5ta0pMbXgwT2NZckZJcHg0RTJrTjJlZWJIdXFSdkdJTWNXc2d3NHpXdzFTRGhKVkN4YmY4SCtJaTdSQSt0K2dhc1VDc0tkNnJIeVFhb3BHeDd6OUwvamZsanRKV0ZYNGFmZWFQNGlqNFVqekVFcGUreHU4UGVqZXRuMFVDNE1QbkFuWnJ6YzNjMTF3dVNZUHJ2MjBwMi8xeXNwbnczMlpSa3cvbzVPQUhQSyswNlU4Y2JQaThxNWN1NWtHVm83SWc0YjJVVW1tUWZYcHpWR2RCYS8wRE0yb2RtNUs0NzRteFp4JjVhOXZDbjB5RGtxL1lROE5WOVNDMjB4c1dMND0=\"";
                                }
                            }
                            else
                            {
                                CrawlerHelper._Cookies = "_b = \"AS+B1gn0GdpGgLQl83JubKX1bG19kiuUUvX8lnvITKDHNq2tJcgqXNIQ0cLN+kjq4KM=\"; _pinterest_pfob = enabled; _ga = GA1.2.229901352.1528170174; pnodepath = \"/pin4\"; fba = True; G_ENABLED_IDPS = google; bei = false; logged_out = True; sessionFunnelEventLogged = 1; cm_sub = denied; _auth = 1; csrftoken = fkrSitmDb4vW2kT1G3GfOkcC8mPvl0kV; _pinterest_sess = \"TWc9PSZWaE0xeDZOVm4yL3Yva0VSazRkRjlHR013bk9mdVJBcU9zVEtEOUhXVjhKSFZmZUEreWJiNDYrV3FubVRoVzdqdDF0dmtDcXErcFF6MmlXQUQ3RDVzWERCWTZYZUt4eXMzemkvOGlXRFZQT1J6MjkwampOZlVJUFEvTnNkTUZYMkJ3dGxPTTRKaVIwdGNJY2h1MUhaSHlFT3djd0huNHE0YmtiTTBZR3dVTVB3d0RyYVE4UC8rMjZCYWo2eTJLNGJVSHR6KzRENjlWVE0rNFMxNWdGMUtVL0VtL2RDZktiUFg3M1Y2Z2dEbllPeUxFR3FOdEd6SUJSRTlBMWs1YkJnbTBlWHhwcC9pMmlqRmoydlh0V2VQSGYvYk1zeXlSM2dIU1dmUXIyRWVxWVBPdTYzbHFjcVhYRWRBT0FTQ3VBNmdWMm5QUlREZDdSY2ZQeE1NWklqSUZxNDllVHF1WUVzRFRrRjBXQnZCMVBGTlYxT0UzM1daeHFOUnBBTzliMzFJdmovQ1hQR2Vvc1pkTHNxL1FjT3FrWllTR1d6VHFrd2g5cFBFMmswM3dIa0dOOHVCbGd6aVlKUkJlZlZNeWVyRTBYREcrQVFlUTdRc1NqMlFlQ3RvaWlZMjJXZ1RURmIxNDA2d2JTODRGNk9BYWpoRzVJTUhLMkJ4UDJGb0NmN0NOQXpmZ0FoR08xcElmWmh5S29OeGRadFpDVWR1RGw3ZzZGRS81SlU4UlhSUVlIWm4wRzRJMGFVaTQzdGI3T2ovSCtHR2ZSWlk0M1RCN2JXSmZJRFdQUUpZWVpRMW5ta0pMbXgwT2NZckZJcHg0RTJrTjJlZWJIdXFSdkdJTWNXc2d3NHpXdzFTRGhKVkN4YmY4SCtJaTdSQSt0K2dhc1VDc0tkNnJIeVFhb3BHeDd6OUwvamZsanRKV0ZYNGFmZWFQNGlqNFVqekVFcGUreHU4UGVqZXRuMFVDNE1QbkFuWnJ6YzNjMTF3dVNZUHJ2MjBwMi8xeXNwbnczMlpSa3cvbzVPQUhQSyswNlU4Y2JQaThxNWN1NWtHVm83SWc0YjJVVW1tUWZYcHpWR2RCYS8wRE0yb2RtNUs0NzRteFp4JjVhOXZDbjB5RGtxL1lROE5WOVNDMjB4c1dMND0=\"";
                            }
                            var searchStr = HttpUtility.UrlEncode(keyWord.KeyWord);

                            /* get first class result */
                            var           model = new CMS_CrawlerModels();
                            CMSPinFactory _fac  = new CMSPinFactory();
                            CrawlerHelper.Get_Tagged_Pins(ref model, searchStr, Commons.PinDefault);
                            if (model != null && model.Pins != null && model.Pins.Any())
                            {
                                /* get second class result */
                                var listPinID = model.Pins.Select(o => o.ID).ToList();
                                Parallel.ForEach(listPinID, pinID =>
                                {
                                    CrawlerHelper.Get_Tagged_OrtherPins(ref model, searchStr, Commons.PinOrtherDefault, "", 1, pinID);
                                });
                            }

                            /* create or update pin */
                            var res = _fac.CreateOrUpdate(model.Pins, keyWord.ID, createdBy, ref msg);

                            if (res == false)
                            {
                                /* back to last crawl data */
                                //keyWord.UpdatedDate = bkTime;
                                //_db.SaveChanges();
                                result = false;
                            }
                            else
                            {
                                keyWord.UpdatedDate = DateTime.Now;
                                _db.SaveChanges();
                            }
                        }
                    }
                }

                NSLog.Logger.Info("ResponseCrawlData: " + Id, result);
            }
            catch (Exception ex)
            {
                msg    = "Crawl data is unsuccessfully.";
                result = false;
                LogHelper.WriteLogs("ErrorCrawlData: " + Id, JsonConvert.SerializeObject(ex));
                NSLog.Logger.Error("ErrorCrawlData: " + Id, ex);
            }

            return(result);
        }
Пример #2
0
        public bool CrawlData(string Id, string createdBy, ref string msg)
        {
            NSLog.Logger.Info("CrawlData: " + Id);
            var model    = new CMS_CrawlerModels();
            var sequence = 0;
            var key      = "";

            var result = true;

            try
            {
                using (var _db = new CMS_Context())
                {
                    /* get key by ID */
                    var keyWord = _db.CMS_KeyWord.Where(o => o.ID == Id).FirstOrDefault();
                    if (keyWord != null)
                    {
                        sequence = keyWord.Sequence;
                        key      = keyWord.KeyWord;
                        /* check time span crawl */
                        var timeSpanCrawl = DateTime.Now - keyWord.UpdatedDate;
                        if (timeSpanCrawl.Value.TotalMinutes > 5 || keyWord.UpdatedDate == keyWord.CreatedDate) /* 5min to crawl data again */
                        {
                            /* update crawer date */
                            var bkTime = keyWord.UpdatedDate;
                            keyWord.UpdatedDate = DateTime.Now;
                            keyWord.UpdatedBy   = createdBy;
                            _db.SaveChanges();

                            /* call drawler api to crawl data */
                            CMSPinFactory _fac = new CMSPinFactory();

                            var listAcc    = _db.CMS_Account.Where(o => o.Status == (byte)Commons.EStatus.Active && o.IsActive).ToList();
                            var listCookie = listAcc.Select(x => x.Cookies).ToList();
                            var _cookie    = CommonHelper.RamdomCookie(listCookie);
                            CrawlerFbHelpers_v2.CrawlerAllFb(keyWord.KeyWord, _cookie, ref model);

                            var res = false;
                            if (model.Pins.Count > 0)
                            {
                                res = _fac.CreateOrUpdate(model.Pins, keyWord.ID, createdBy, ref msg);
                            }

                            if (res == false)
                            {
                                /* back to last crawl data */
                                //keyWord.UpdatedDate = bkTime;
                                //_db.SaveChanges();
                                result = false;
                            }
                            else
                            {
                                keyWord.UpdatedDate = DateTime.Now;
                                _db.SaveChanges();
                            }
                        }
                    }
                }

                LogHelper.WriteLogs(sequence.ToString() + " " + key, "Num post: " + model.Pins.Count().ToString());
                NSLog.Logger.Info("ResponseCrawlData", result.ToString());
            }
            catch (Exception ex)
            {
                msg    = "Crawl data is unsuccessfully.";
                result = false;

                LogHelper.WriteLogs("ErrorCrawlData: " + Id, JsonConvert.SerializeObject(ex));
                NSLog.Logger.Error("ErrorCrawlData: " + Id, ex);
            }

            return(result);
        }
Пример #3
0
        public bool CrawlData(string Id, string createdBy, ref string msg)
        {
            NSLog.Logger.Info("CrawlData: " + Id);
            var      model    = new CMS_CrawlerModels();
            var      sequence = 0;
            var      key      = "";
            var      _cookie  = "";
            DateTime lastdate = DateTime.Now.AddDays(-7);
            DateTime datenow  = DateTime.Now;

            var result = true;

            try
            {
                using (var _db = new CMS_Context())
                {
                    /* get key by ID */
                    var keyWord = _db.CMS_KeyWord.Where(o => o.ID == Id).FirstOrDefault();
                    if (keyWord != null)
                    {
                        sequence = keyWord.Sequence;
                        key      = keyWord.KeyWord;
                        /* check time span crawl */
                        var timeSpanCrawl = DateTime.Now - keyWord.UpdatedDate;
                        if (timeSpanCrawl.Value.TotalMinutes > 5 || keyWord.UpdatedDate == keyWord.CreatedDate) /* 5min to crawl data again */
                        {
                            /* update crawer date */
                            var bkTime = keyWord.UpdatedDate;
                            keyWord.UpdatedDate = DateTime.Now;
                            keyWord.UpdatedBy   = createdBy;
                            _db.SaveChanges();

                            /* call drawler api to crawl data */
                            CMSPinFactory _fac = new CMSPinFactory();

                            var listAcc    = _db.CMS_Account.Where(o => o.Status == (byte)Commons.EStatus.Active && o.IsActive && !string.IsNullOrEmpty(o.Cookies)).ToList();
                            var listCookie = listAcc.Select(x => x.Cookies).ToList();
                            _cookie = CommonHelper.RamdomCookie(listCookie);
                            /* crawler tab post */
                            var    PageSize  = Convert.ToInt32(Commons.PageSize);
                            var    modelPost = new CMS_CrawlerModels();
                            string q         = "keywords_search(" + keyWord.KeyWord.Replace(" ", "+") + ")";
                            string ref_path  = "/search/str/" + keyWord.KeyWord + "/stories-keyword/stories-public";
                            //CrawlerFBToolHelpers.CrawlerNow(q, ref_path, "list", (byte)Commons.EType.Post, _cookie, PageSize, ref modelPost);
                            //string q = "stories-public(stories-keyword(" + keyWord.KeyWord + "))";
                            //string ref_path = "/search/str/" + keyWord.KeyWord + "/stories-keyword/stories-public";
                            NSLog.Logger.Info("done crawler tab post : ", modelPost.Pins.Count);
                            if (modelPost.Pins != null && modelPost.Pins.Any())
                            {
                                model.Pins.AddRange(modelPost.Pins);
                            }
                            /* crawler tab people */
                            var modelPeople = new CMS_CrawlerModels();
                            q        = "stories-opinion(stories-keyword(" + keyWord.KeyWord + "))";
                            ref_path = "/search/str/" + keyWord.KeyWord + "/stories-keyword/stories-opinion";
                            //CrawlerFBToolHelpers.CrawlerNow(q, ref_path, "list", (byte)Commons.EType.People, _cookie, PageSize, ref modelPeople);
                            NSLog.Logger.Info("done crawler tab people : ", modelPeople.Pins.Count);
                            if (modelPeople.Pins != null && modelPeople.Pins.Any())
                            {
                                model.Pins.AddRange(modelPeople.Pins);
                            }

                            /* crawler tab photo */
                            var modelPhoto = new CMS_CrawlerModels();
                            q        = "photos-keyword(" + keyWord.KeyWord.Replace(" ", "+") + ")";
                            ref_path = "/search/str/" + keyWord.KeyWord.Replace(" ", "+") + "/photos-keyword";
                            CrawlerFBToolHelpers.CrawlerNow(q, ref_path, "grid", (byte)Commons.EType.Photo, _cookie, 70, ref modelPhoto);



                            /*crawler detail tab photo */
                            PinsModels refmodelPhoto = new PinsModels();
                            var        options       = new ParallelOptions {
                                MaxDegreeOfParallelism = 10
                            };
                            //for (int i = 0; i < modelPhoto.Pins.Count; i++)
                            //{
                            //    CrawlerFBToolHelpers.CrawlerDetail(modelPhoto.Pins[i].PhotoID, _cookie, (byte)Commons.EType.Photo, ref refmodelPhoto);
                            //}
                            Parallel.ForEach(modelPhoto.Pins, options, pin =>
                            {
                                CrawlerFBToolHelpers.CrawlerDetail(pin.PhotoID, _cookie, (byte)Commons.EType.Photo, ref pin);
                            });
                            NSLog.Logger.Info("done crawler tab photo : ", modelPhoto.Pins.Count);
                            if (modelPhoto.Pins != null && modelPhoto.Pins.Any())
                            {
                                model.Pins.AddRange(modelPhoto.Pins);
                            }
                            var res = false;
                            if (model.Pins.Count > 0)
                            {
                                NSLog.Logger.Info("done crawler before 7 days ago : ", model.Pins.Count);
                                /* check 7 days ago */
                                model.Pins = model.Pins.Where(o => o.Created_At >= lastdate && o.Created_At <= datenow).ToList();
                                NSLog.Logger.Info("done crawler after 7 days ago : ", model.Pins.Count);

                                Parallel.ForEach(model.Pins, options, pin =>
                                {
                                    if (pin.Type != (byte)Commons.EType.Photo)
                                    {
                                        CrawlerFBToolHelpers.CrawlerDetail(pin.PhotoID, _cookie, (byte)Commons.EType.Post, ref pin);
                                    }
                                });

                                res = _fac.CreateOrUpdate(model.Pins, keyWord.ID, createdBy, keyWord.KeyWord, ref msg);
                            }

                            if (res == false)
                            {
                                /* back to last crawl data */
                                //keyWord.UpdatedDate = bkTime;
                                //_db.SaveChanges();
                                result = false;
                            }
                            else
                            {
                                keyWord.UpdatedDate = DateTime.Now;
                                _db.SaveChanges();
                            }
                        }
                    }
                }

                LogHelper.WriteLogs(sequence.ToString() + " " + key, "Num post: " + model.Pins.Count().ToString());
                NSLog.Logger.Info("ResponseCrawlData", result.ToString());
            }
            catch (Exception ex)
            {
                msg    = "Crawl data is unsuccessfully.";
                result = false;

                LogHelper.WriteLogs("ErrorCrawlData: " + Id, JsonConvert.SerializeObject(ex));
                NSLog.Logger.Error("ErrorCrawlData: " + Id, ex);
            }

            return(result);
        }