public bool CrawlData(string Id, string createdBy, ref string msg) { NSLog.Logger.Info("CrawlData: " + Id); var result = true; try { using (var _db = new CMS_Context()) { /* get key by ID */ var keyWord = _db.CMS_KeyWord.Where(o => o.ID == Id && o.Status == (byte)Commons.EStatus.Active).FirstOrDefault(); if (keyWord != null) { /* check time span crawl */ var timeSpanCrawl = DateTime.Now - keyWord.UpdatedDate; if (timeSpanCrawl.Value.TotalMinutes > 5 || keyWord.UpdatedDate == keyWord.CreatedDate) /* 5min to crawl data again */ { /* update crawer date */ var bkTime = keyWord.UpdatedDate; keyWord.UpdatedDate = DateTime.Now; keyWord.UpdatedBy = createdBy; keyWord.KeyWord = keyWord.KeyWord.Trim(); _db.SaveChanges(); /* cookies * User: [email protected] * Pass: pitool.org79 */ if (!string.IsNullOrEmpty(keyWord.CrawlAccountID)) { var _Cookie = keyWord.CMS_Account.Cookies; if (!string.IsNullOrEmpty(_Cookie)) { CrawlerBoardHelper._Cookies = _Cookie; } else { CrawlerHelper._Cookies = "_b = \"AS+B1gn0GdpGgLQl83JubKX1bG19kiuUUvX8lnvITKDHNq2tJcgqXNIQ0cLN+kjq4KM=\"; _pinterest_pfob = enabled; _ga = GA1.2.229901352.1528170174; pnodepath = \"/pin4\"; fba = True; G_ENABLED_IDPS = google; bei = false; logged_out = True; sessionFunnelEventLogged = 1; cm_sub = denied; _auth = 1; csrftoken = fkrSitmDb4vW2kT1G3GfOkcC8mPvl0kV; _pinterest_sess = \"TWc9PSZWaE0xeDZOVm4yL3Yva0VSazRkRjlHR013bk9mdVJBcU9zVEtEOUhXVjhKSFZmZUEreWJiNDYrV3FubVRoVzdqdDF0dmtDcXErcFF6MmlXQUQ3RDVzWERCWTZYZUt4eXMzemkvOGlXRFZQT1J6MjkwampOZlVJUFEvTnNkTUZYMkJ3dGxPTTRKaVIwdGNJY2h1MUhaSHlFT3djd0huNHE0YmtiTTBZR3dVTVB3d0RyYVE4UC8rMjZCYWo2eTJLNGJVSHR6KzRENjlWVE0rNFMxNWdGMUtVL0VtL2RDZktiUFg3M1Y2Z2dEbllPeUxFR3FOdEd6SUJSRTlBMWs1YkJnbTBlWHhwcC9pMmlqRmoydlh0V2VQSGYvYk1zeXlSM2dIU1dmUXIyRWVxWVBPdTYzbHFjcVhYRWRBT0FTQ3VBNmdWMm5QUlREZDdSY2ZQeE1NWklqSUZxNDllVHF1WUVzRFRrRjBXQnZCMVBGTlYxT0UzM1daeHFOUnBBTzliMzFJdmovQ1hQR2Vvc1pkTHNxL1FjT3FrWllTR1d6VHFrd2g5cFBFMmswM3dIa0dOOHVCbGd6aVlKUkJlZlZNeWVyRTBYREcrQVFlUTdRc1NqMlFlQ3RvaWlZMjJXZ1RURmIxNDA2d2JTODRGNk9BYWpoRzVJTUhLMkJ4UDJGb0NmN0NOQXpmZ0FoR08xcElmWmh5S29OeGRadFpDVWR1RGw3ZzZGRS81SlU4UlhSUVlIWm4wRzRJMGFVaTQzdGI3T2ovSCtHR2ZSWlk0M1RCN2JXSmZJRFdQUUpZWVpRMW5ta0pMbXgwT2NZckZJcHg0RTJrTjJlZWJIdXFSdkdJTWNXc2d3NHpXdzFTRGhKVkN4YmY4SCtJaTdSQSt0K2dhc1VDc0tkNnJIeVFhb3BHeDd6OUwvamZsanRKV0ZYNGFmZWFQNGlqNFVqekVFcGUreHU4UGVqZXRuMFVDNE1QbkFuWnJ6YzNjMTF3dVNZUHJ2MjBwMi8xeXNwbnczMlpSa3cvbzVPQUhQSyswNlU4Y2JQaThxNWN1NWtHVm83SWc0YjJVVW1tUWZYcHpWR2RCYS8wRE0yb2RtNUs0NzRteFp4JjVhOXZDbjB5RGtxL1lROE5WOVNDMjB4c1dMND0=\""; } } else { CrawlerHelper._Cookies = "_b = \"AS+B1gn0GdpGgLQl83JubKX1bG19kiuUUvX8lnvITKDHNq2tJcgqXNIQ0cLN+kjq4KM=\"; _pinterest_pfob = enabled; _ga = GA1.2.229901352.1528170174; pnodepath = \"/pin4\"; fba = True; G_ENABLED_IDPS = google; bei = false; logged_out = True; sessionFunnelEventLogged = 1; cm_sub = denied; _auth = 1; csrftoken = fkrSitmDb4vW2kT1G3GfOkcC8mPvl0kV; _pinterest_sess = \"TWc9PSZWaE0xeDZOVm4yL3Yva0VSazRkRjlHR013bk9mdVJBcU9zVEtEOUhXVjhKSFZmZUEreWJiNDYrV3FubVRoVzdqdDF0dmtDcXErcFF6MmlXQUQ3RDVzWERCWTZYZUt4eXMzemkvOGlXRFZQT1J6MjkwampOZlVJUFEvTnNkTUZYMkJ3dGxPTTRKaVIwdGNJY2h1MUhaSHlFT3djd0huNHE0YmtiTTBZR3dVTVB3d0RyYVE4UC8rMjZCYWo2eTJLNGJVSHR6KzRENjlWVE0rNFMxNWdGMUtVL0VtL2RDZktiUFg3M1Y2Z2dEbllPeUxFR3FOdEd6SUJSRTlBMWs1YkJnbTBlWHhwcC9pMmlqRmoydlh0V2VQSGYvYk1zeXlSM2dIU1dmUXIyRWVxWVBPdTYzbHFjcVhYRWRBT0FTQ3VBNmdWMm5QUlREZDdSY2ZQeE1NWklqSUZxNDllVHF1WUVzRFRrRjBXQnZCMVBGTlYxT0UzM1daeHFOUnBBTzliMzFJdmovQ1hQR2Vvc1pkTHNxL1FjT3FrWllTR1d6VHFrd2g5cFBFMmswM3dIa0dOOHVCbGd6aVlKUkJlZlZNeWVyRTBYREcrQVFlUTdRc1NqMlFlQ3RvaWlZMjJXZ1RURmIxNDA2d2JTODRGNk9BYWpoRzVJTUhLMkJ4UDJGb0NmN0NOQXpmZ0FoR08xcElmWmh5S29OeGRadFpDVWR1RGw3ZzZGRS81SlU4UlhSUVlIWm4wRzRJMGFVaTQzdGI3T2ovSCtHR2ZSWlk0M1RCN2JXSmZJRFdQUUpZWVpRMW5ta0pMbXgwT2NZckZJcHg0RTJrTjJlZWJIdXFSdkdJTWNXc2d3NHpXdzFTRGhKVkN4YmY4SCtJaTdSQSt0K2dhc1VDc0tkNnJIeVFhb3BHeDd6OUwvamZsanRKV0ZYNGFmZWFQNGlqNFVqekVFcGUreHU4UGVqZXRuMFVDNE1QbkFuWnJ6YzNjMTF3dVNZUHJ2MjBwMi8xeXNwbnczMlpSa3cvbzVPQUhQSyswNlU4Y2JQaThxNWN1NWtHVm83SWc0YjJVVW1tUWZYcHpWR2RCYS8wRE0yb2RtNUs0NzRteFp4JjVhOXZDbjB5RGtxL1lROE5WOVNDMjB4c1dMND0=\""; } var searchStr = HttpUtility.UrlEncode(keyWord.KeyWord); /* get first class result */ var model = new CMS_CrawlerModels(); CMSPinFactory _fac = new CMSPinFactory(); CrawlerHelper.Get_Tagged_Pins(ref model, searchStr, Commons.PinDefault); if (model != null && model.Pins != null && model.Pins.Any()) { /* get second class result */ var listPinID = model.Pins.Select(o => o.ID).ToList(); Parallel.ForEach(listPinID, pinID => { CrawlerHelper.Get_Tagged_OrtherPins(ref model, searchStr, Commons.PinOrtherDefault, "", 1, pinID); }); } /* create or update pin */ var res = _fac.CreateOrUpdate(model.Pins, keyWord.ID, createdBy, ref msg); if (res == false) { /* back to last crawl data */ //keyWord.UpdatedDate = bkTime; //_db.SaveChanges(); result = false; } else { keyWord.UpdatedDate = DateTime.Now; _db.SaveChanges(); } } } } NSLog.Logger.Info("ResponseCrawlData: " + Id, result); } catch (Exception ex) { msg = "Crawl data is unsuccessfully."; result = false; LogHelper.WriteLogs("ErrorCrawlData: " + Id, JsonConvert.SerializeObject(ex)); NSLog.Logger.Error("ErrorCrawlData: " + Id, ex); } return(result); }
public bool CrawlData(string Id, string createdBy, ref string msg) { NSLog.Logger.Info("CrawlData: " + Id); var model = new CMS_CrawlerModels(); var sequence = 0; var key = ""; var result = true; try { using (var _db = new CMS_Context()) { /* get key by ID */ var keyWord = _db.CMS_KeyWord.Where(o => o.ID == Id).FirstOrDefault(); if (keyWord != null) { sequence = keyWord.Sequence; key = keyWord.KeyWord; /* check time span crawl */ var timeSpanCrawl = DateTime.Now - keyWord.UpdatedDate; if (timeSpanCrawl.Value.TotalMinutes > 5 || keyWord.UpdatedDate == keyWord.CreatedDate) /* 5min to crawl data again */ { /* update crawer date */ var bkTime = keyWord.UpdatedDate; keyWord.UpdatedDate = DateTime.Now; keyWord.UpdatedBy = createdBy; _db.SaveChanges(); /* call drawler api to crawl data */ CMSPinFactory _fac = new CMSPinFactory(); var listAcc = _db.CMS_Account.Where(o => o.Status == (byte)Commons.EStatus.Active && o.IsActive).ToList(); var listCookie = listAcc.Select(x => x.Cookies).ToList(); var _cookie = CommonHelper.RamdomCookie(listCookie); CrawlerFbHelpers_v2.CrawlerAllFb(keyWord.KeyWord, _cookie, ref model); var res = false; if (model.Pins.Count > 0) { res = _fac.CreateOrUpdate(model.Pins, keyWord.ID, createdBy, ref msg); } if (res == false) { /* back to last crawl data */ //keyWord.UpdatedDate = bkTime; //_db.SaveChanges(); result = false; } else { keyWord.UpdatedDate = DateTime.Now; _db.SaveChanges(); } } } } LogHelper.WriteLogs(sequence.ToString() + " " + key, "Num post: " + model.Pins.Count().ToString()); NSLog.Logger.Info("ResponseCrawlData", result.ToString()); } catch (Exception ex) { msg = "Crawl data is unsuccessfully."; result = false; LogHelper.WriteLogs("ErrorCrawlData: " + Id, JsonConvert.SerializeObject(ex)); NSLog.Logger.Error("ErrorCrawlData: " + Id, ex); } return(result); }
public bool CrawlData(string Id, string createdBy, ref string msg) { NSLog.Logger.Info("CrawlData: " + Id); var model = new CMS_CrawlerModels(); var sequence = 0; var key = ""; var _cookie = ""; DateTime lastdate = DateTime.Now.AddDays(-7); DateTime datenow = DateTime.Now; var result = true; try { using (var _db = new CMS_Context()) { /* get key by ID */ var keyWord = _db.CMS_KeyWord.Where(o => o.ID == Id).FirstOrDefault(); if (keyWord != null) { sequence = keyWord.Sequence; key = keyWord.KeyWord; /* check time span crawl */ var timeSpanCrawl = DateTime.Now - keyWord.UpdatedDate; if (timeSpanCrawl.Value.TotalMinutes > 5 || keyWord.UpdatedDate == keyWord.CreatedDate) /* 5min to crawl data again */ { /* update crawer date */ var bkTime = keyWord.UpdatedDate; keyWord.UpdatedDate = DateTime.Now; keyWord.UpdatedBy = createdBy; _db.SaveChanges(); /* call drawler api to crawl data */ CMSPinFactory _fac = new CMSPinFactory(); var listAcc = _db.CMS_Account.Where(o => o.Status == (byte)Commons.EStatus.Active && o.IsActive && !string.IsNullOrEmpty(o.Cookies)).ToList(); var listCookie = listAcc.Select(x => x.Cookies).ToList(); _cookie = CommonHelper.RamdomCookie(listCookie); /* crawler tab post */ var PageSize = Convert.ToInt32(Commons.PageSize); var modelPost = new CMS_CrawlerModels(); string q = "keywords_search(" + keyWord.KeyWord.Replace(" ", "+") + ")"; string ref_path = "/search/str/" + keyWord.KeyWord + "/stories-keyword/stories-public"; //CrawlerFBToolHelpers.CrawlerNow(q, ref_path, "list", (byte)Commons.EType.Post, _cookie, PageSize, ref modelPost); //string q = "stories-public(stories-keyword(" + keyWord.KeyWord + "))"; //string ref_path = "/search/str/" + keyWord.KeyWord + "/stories-keyword/stories-public"; NSLog.Logger.Info("done crawler tab post : ", modelPost.Pins.Count); if (modelPost.Pins != null && modelPost.Pins.Any()) { model.Pins.AddRange(modelPost.Pins); } /* crawler tab people */ var modelPeople = new CMS_CrawlerModels(); q = "stories-opinion(stories-keyword(" + keyWord.KeyWord + "))"; ref_path = "/search/str/" + keyWord.KeyWord + "/stories-keyword/stories-opinion"; //CrawlerFBToolHelpers.CrawlerNow(q, ref_path, "list", (byte)Commons.EType.People, _cookie, PageSize, ref modelPeople); NSLog.Logger.Info("done crawler tab people : ", modelPeople.Pins.Count); if (modelPeople.Pins != null && modelPeople.Pins.Any()) { model.Pins.AddRange(modelPeople.Pins); } /* crawler tab photo */ var modelPhoto = new CMS_CrawlerModels(); q = "photos-keyword(" + keyWord.KeyWord.Replace(" ", "+") + ")"; ref_path = "/search/str/" + keyWord.KeyWord.Replace(" ", "+") + "/photos-keyword"; CrawlerFBToolHelpers.CrawlerNow(q, ref_path, "grid", (byte)Commons.EType.Photo, _cookie, 70, ref modelPhoto); /*crawler detail tab photo */ PinsModels refmodelPhoto = new PinsModels(); var options = new ParallelOptions { MaxDegreeOfParallelism = 10 }; //for (int i = 0; i < modelPhoto.Pins.Count; i++) //{ // CrawlerFBToolHelpers.CrawlerDetail(modelPhoto.Pins[i].PhotoID, _cookie, (byte)Commons.EType.Photo, ref refmodelPhoto); //} Parallel.ForEach(modelPhoto.Pins, options, pin => { CrawlerFBToolHelpers.CrawlerDetail(pin.PhotoID, _cookie, (byte)Commons.EType.Photo, ref pin); }); NSLog.Logger.Info("done crawler tab photo : ", modelPhoto.Pins.Count); if (modelPhoto.Pins != null && modelPhoto.Pins.Any()) { model.Pins.AddRange(modelPhoto.Pins); } var res = false; if (model.Pins.Count > 0) { NSLog.Logger.Info("done crawler before 7 days ago : ", model.Pins.Count); /* check 7 days ago */ model.Pins = model.Pins.Where(o => o.Created_At >= lastdate && o.Created_At <= datenow).ToList(); NSLog.Logger.Info("done crawler after 7 days ago : ", model.Pins.Count); Parallel.ForEach(model.Pins, options, pin => { if (pin.Type != (byte)Commons.EType.Photo) { CrawlerFBToolHelpers.CrawlerDetail(pin.PhotoID, _cookie, (byte)Commons.EType.Post, ref pin); } }); res = _fac.CreateOrUpdate(model.Pins, keyWord.ID, createdBy, keyWord.KeyWord, ref msg); } if (res == false) { /* back to last crawl data */ //keyWord.UpdatedDate = bkTime; //_db.SaveChanges(); result = false; } else { keyWord.UpdatedDate = DateTime.Now; _db.SaveChanges(); } } } } LogHelper.WriteLogs(sequence.ToString() + " " + key, "Num post: " + model.Pins.Count().ToString()); NSLog.Logger.Info("ResponseCrawlData", result.ToString()); } catch (Exception ex) { msg = "Crawl data is unsuccessfully."; result = false; LogHelper.WriteLogs("ErrorCrawlData: " + Id, JsonConvert.SerializeObject(ex)); NSLog.Logger.Error("ErrorCrawlData: " + Id, ex); } return(result); }