public List<string> GetPhotoId1(string hashTag) { try { lstThreadsHash_comment.Add(Thread.CurrentThread); lstThreadsHash_comment.Distinct(); Thread.CurrentThread.IsBackground = true; } catch { } string url = string.Empty; bool value = true; try { url = "https://www.instagram.com/explore/tags/" + hashTag + "/"; GlobusHttpHelper objInstagramUser = new GlobusHttpHelper(); string pageSource = objInstagramUser.getHtmlfromUrl(new Uri(url), "", ""); if (string.IsNullOrEmpty(pageSource)) { pageSource = objInstagramUser.getHtmlfromUrl(new Uri(url), "", ""); } if (!string.IsNullOrEmpty(pageSource)) { if (pageSource.Contains("date")) { string token = Utils.getBetween(pageSource, "csrf_token\":\"", "\"}"); string[] arr = Regex.Split(pageSource, "code"); string imageId = string.Empty; string imageSrc = string.Empty; if (arr.Length > 1) { arr = arr.Skip(1).ToArray(); foreach (string itemarr in arr) { try { // By Anil if (itemarr.Contains("date")) { imageId = Utils.getBetween(itemarr, "\":\"", "\""); if (!string.IsNullOrEmpty(imageId)) { //lstPhotoId.Add(imageId); // lstPhotoId.Distinct(); if (ClGlobul.lstPhotoId.Count < ClGlobul.SnapVideosCounterComment) { if (!ClGlobul.lstPhotoId.Contains(imageId)) { ClGlobul.lstPhotoId.Add(imageId); ClGlobul.lstPhotoId.Distinct(); // return lstPhotoId; } } else { return ClGlobul.lstPhotoId; } //imageId = "http://websta.me"+imageId; } } } catch (Exception ex) { } } #region Forpagination if (pageSource.Contains("has_next_page\":true")) { while (value) { if (pageSource.Contains("has_next_page\":true") && ClGlobul.lstPhotoId.Count < ClGlobul.NumberofSnapsVideosToComment) { string IDD = Utils.getBetween(pageSource, "\"id\":\"", "\""); string code_ID = Utils.getBetween(pageSource, "end_cursor\":\"", "\""); string postdata = "q=ig_user(" + IDD + ")+%7B+media.after(" + code_ID + "%2C+12)+%7B%0A++count%2C%0A++nodes+%7B%0A++++caption%2C%0A++++code%2C%0A++++comments+%7B%0A++++++count%0A++++%7D%2C%0A++++date%2C%0A++++dimensions+%7B%0A++++++height%2C%0A++++++width%0A++++%7D%2C%0A++++display_src%2C%0A++++id%2C%0A++++is_video%2C%0A++++likes+%7B%0A++++++count%0A++++%7D%2C%0A++++owner+%7B%0A++++++id%0A++++%7D%2C%0A++++thumbnail_src%0A++%7D%2C%0A++page_info%0A%7D%0A+%7D&ref=users%3A%3Ashow"; pageSource = objInstagramUser.postFormDatainta(new Uri("https://www.instagram.com/query/"), postdata, "https://www.instagram.com/" + hashTag, token); string[] data1 = Regex.Split(pageSource, "code"); foreach (string val in data1) { if (val.Contains("date")) { if (ClGlobul.lstPhotoId.Count < ClGlobul.NumberofSnapsVideosToComment) { string photo_codes = Utils.getBetween(val, "\":\"", "\""); if (!ClGlobul.lstPhotoId.Contains(photo_codes)) { ClGlobul.lstPhotoId.Add(photo_codes); } } else { return ClGlobul.lstPhotoId; } } } } else { value = false; } } } #endregion } } } } catch (Exception ex) { GlobusLogHelper.log.Info("Error : " + ex.StackTrace); } //string url = IGGlobals.Instance.IGWEPME + "tag/" + hashTag.Replace("%23", "").Replace("#", ""); //GlobusHttpHelper objInstagramUser = new GlobusHttpHelper(); //List<string> lstPhotoId = new List<string>(); //string pageSource = objInstagramUser.getHtmlfromUrl(new Uri(url), ""); //if (string.IsNullOrEmpty(pageSource)) //{ // pageSource = objInstagramUser.getHtmlfromUrl(new Uri(url), ""); //} //if (!string.IsNullOrEmpty(pageSource)) //{ // if (pageSource.Contains("<div class=\"mainimg_wrapper\">")) // { // string[] arr = Regex.Split(pageSource, "<div class=\"mainimg_wrapper\">"); // if (arr.Length > 1) // { // arr = arr.Skip(1).ToArray(); // foreach (string itemarr in arr) // { // try // { // string startString = "<a href=\"/p/"; // string endString = "\" class=\"mainimg\""; // string imageId = string.Empty; // string imageSrc = string.Empty; // if (itemarr.Contains("<a href=\"/p/")) // { // int indexStart = itemarr.IndexOf("<a href=\"/p/"); // string itemarrNow = itemarr.Substring(indexStart); // if (itemarrNow.Contains(startString) && itemarrNow.Contains(endString)) // { // try // { // imageId = Utils.getBetween(itemarrNow, startString, endString).Replace("/", ""); // } // catch { } // if (!string.IsNullOrEmpty(imageId)) // { // lstPhotoId.Add(imageId); // lstPhotoId.Distinct(); // if (lstPhotoId.Count >= ClGlobul.SnapVideosCounterComment) // { // return lstPhotoId; // } // //imageId = "http://websta.me"+imageId; // } // } // } // } // catch (Exception ex) // { // } // } // #region pagination // string pageLink = string.Empty; // while (true) // { // try // { // //Globals.HasTagListListThread.Add(Thread.CurrentThread); // //Globals.HasTagListListThread.Distinct(); // //Thread.CurrentThread.IsBackground = true; // } // catch // { // } // //if (stopScrapImageBool) return; // string startString = "<a href=\"/p/"; // string endString = "\" class=\"mainimg\""; // string imageId = string.Empty; // string imageSrc = string.Empty; // if (!string.IsNullOrEmpty(pageLink)) // { // pageSource = objInstagramUser.getHtmlfromUrl(new Uri(pageLink)); // } // if (pageSource.Contains("<ul class=\"pager\">") && pageSource.Contains("rel=\"next\">")) // { // try // { // pageLink = Utils.getBetween(pageSource, "<ul class=\"pager\">", "rel=\"next\">"); // } // catch { } // if (!string.IsNullOrEmpty(pageLink)) // { // try // { // int len = pageLink.IndexOf("<a href=\""); // len = len + ("<a href=\"").Length; // pageLink = pageLink.Substring(len); // pageLink = pageLink.Trim(); // pageLink = pageLink.TrimEnd(new char[] { '"' }); // pageLink = "http://websta.me/" + pageLink; // } // catch { } // if (!string.IsNullOrEmpty(pageLink)) // { // string response = string.Empty; // try // { // response = objInstagramUser.getHtmlfromUrl(new Uri(pageLink)); // } // catch { } // if (!string.IsNullOrEmpty(response)) // { // if (response.Contains("<div class=\"mainimg_wrapper\">")) // { // try // { // string[] arr1 = Regex.Split(response, "<div class=\"mainimg_wrapper\">"); // if (arr1.Length > 1) // { // arr1 = arr1.Skip(1).ToArray(); // foreach (string items in arr1) // { // try // { // //if (stopScrapImageBool) return; // if (items.Contains("<a href=\"/p/")) // { // int indexStart = items.IndexOf("<a href=\"/p/"); // string itemarrNow = items.Substring(indexStart); // try // { // imageId = Utils.getBetween(itemarrNow, startString, endString).Replace("/", ""); // } // catch { } // if (!string.IsNullOrEmpty(imageId)) // { // lstPhotoId.Add(imageId); // lstPhotoId.Distinct(); // if (lstPhotoId.Count >= ClGlobul.NumberofSnapsVideosToComment) // { // return lstPhotoId; // } // //imageId = "http://websta.me"+imageId; // } // //counter++; // //Addtologger("Image DownLoaded with ImageName "+imageId+"_"+counter); // if (lstPhotoId.Count >= ClGlobul.NumberofSnapsVideosToComment) // { // return lstPhotoId; // } // } // } // catch { } // } // if (lstPhotoId.Count >= ClGlobul.NumberofSnapsVideosToComment) // { // return lstPhotoId; // } // } // } // catch { } // } // } // else // { // } // } // else // { // break; // } // } // else // { // break; // } // } // else // { // break; // } // } // #endregion // } // } //} return ClGlobul.lstPhotoId; }
public List<string> GetPhotoId(string hashTag) { try { lstThreadsHash_comment.Add(Thread.CurrentThread); lstThreadsHash_comment.Distinct(); Thread.CurrentThread.IsBackground = true; } catch (Exception ex) { GlobusLogHelper.log.Error("Error : " + ex.StackTrace); } string url = string.Empty; bool value = true; List<string> lstPhotoId = new List<string>(); try { url = "https://www.instagram.com/explore/tags/" + hashTag + "/"; GlobusHttpHelper objInstagramUser = new GlobusHttpHelper(); string pageSource = objInstagramUser.getHtmlfromUrl(new Uri(url), "", ""); if (string.IsNullOrEmpty(pageSource)) { pageSource = objInstagramUser.getHtmlfromUrl(new Uri(url), "", ""); } if (!string.IsNullOrEmpty(pageSource)) { if (pageSource.Contains("date")) { string token = Utils.getBetween(pageSource, "csrf_token\":\"", "\"}"); string[] arr = Regex.Split(pageSource, "code"); string imageId = string.Empty; string imageSrc = string.Empty; if (arr.Length > 1) { arr = arr.Skip(1).ToArray(); foreach (string itemarr in arr) { try { // By Anil if (itemarr.Contains("date")) { imageId = Utils.getBetween(itemarr, "\":\"", "\""); if (!string.IsNullOrEmpty(imageId)) { //lstPhotoId.Add(imageId); // lstPhotoId.Distinct(); if (lstPhotoId.Count < ClGlobul.NumberofSnapsVideosToLike) { lstPhotoId.Add(imageId); lstPhotoId.Distinct(); // return lstPhotoId; } else { return lstPhotoId; } //imageId = "http://websta.me"+imageId; } } } catch (Exception ex) { } } #region Forpagination if (pageSource.Contains("has_next_page\":true")) { while (value) { if (pageSource.Contains("has_next_page\":true") && lstPhotoId.Count < ClGlobul.NumberofSnapsVideosToLike) { string IDD = Utils.getBetween(pageSource, "\"id\":\"", "\""); string code_ID = Utils.getBetween(pageSource, "end_cursor\":\"", "\""); string postdata = "q=ig_user(" + IDD + ")+%7B+media.after(" + code_ID + "%2C+12)+%7B%0A++count%2C%0A++nodes+%7B%0A++++caption%2C%0A++++code%2C%0A++++comments+%7B%0A++++++count%0A++++%7D%2C%0A++++date%2C%0A++++dimensions+%7B%0A++++++height%2C%0A++++++width%0A++++%7D%2C%0A++++display_src%2C%0A++++id%2C%0A++++is_video%2C%0A++++likes+%7B%0A++++++count%0A++++%7D%2C%0A++++owner+%7B%0A++++++id%0A++++%7D%2C%0A++++thumbnail_src%0A++%7D%2C%0A++page_info%0A%7D%0A+%7D&ref=users%3A%3Ashow"; pageSource = objInstagramUser.postFormDatainta(new Uri("https://www.instagram.com/query/"), postdata, "https://www.instagram.com/" + hashTag, token); string[] data1 = Regex.Split(pageSource, "code"); foreach (string val in data1) { if (val.Contains("date")) { if (lstPhotoId.Count < ClGlobul.NumberofSnapsVideosToLike) { string photo_codes = Utils.getBetween(val, "\":\"", "\""); lstPhotoId.Add(photo_codes); } else { return lstPhotoId; } } } } else { value = false; } } } #endregion } } } } catch (Exception ex) { GlobusLogHelper.log.Info("Error : " + ex.StackTrace); } return lstPhotoId; }
public void startScrapePhotoUrlByHashtag(ref InstagramUser obj_GDUSER) { if (isStopScrapeUser) { return; } try { lstofThreadScrapeUser.Add(Thread.CurrentThread); lstofThreadScrapeUser.Distinct(); Thread.CurrentThread.IsBackground = true; } catch (Exception ex) { GlobusLogHelper.log.Error("Error : " + ex.StackTrace); } string username = "******"; int temp = 50; bool value = true; List<string> List_PhotoUrl = new List<string>(); try { string url = "https://www.instagram.com/explore/tags/" + username + "/"; GlobusHttpHelper objInstagramUser = new GlobusHttpHelper(); string pageSource = objInstagramUser.getHtmlfromUrl(new Uri(url), "", ""); if (string.IsNullOrEmpty(pageSource)) { pageSource = objInstagramUser.getHtmlfromUrl(new Uri(url), "", ""); } if (!string.IsNullOrEmpty(pageSource)) { if (pageSource.Contains("date")) { string token = Utils.getBetween(pageSource, "csrf_token\":\"", "\"}"); string[] arr = Regex.Split(pageSource, "code"); string imageId = string.Empty; string imageSrc = string.Empty; if (arr.Length > 1) { arr = arr.Skip(1).ToArray(); foreach (string itemarr in arr) { try { if (itemarr.Contains("date")) { imageId = Utils.getBetween(itemarr, "\":\"", "\""); imageId = "https://www.instagram.com/p/" + imageId + "/"; if (!string.IsNullOrEmpty(imageId)) { if (temp > List_PhotoUrl.Count()) { List_PhotoUrl.Add(imageId); List_PhotoUrl = List_PhotoUrl.Distinct().ToList(); } else { } } } } catch (Exception ex) { } } if (pageSource.Contains("has_next_page\":true")) { while (value) { if (pageSource.Contains("has_next_page\":true") && List_PhotoUrl.Count < temp) { string IDD = Utils.getBetween(pageSource, "\"id\":\"", "\""); string code_ID = Utils.getBetween(pageSource, "end_cursor\":\"", "\""); string postdata = "q=ig_user(" + IDD + ")+%7B+media.after(" + code_ID + "%2C+12)+%7B%0A++count%2C%0A++nodes+%7B%0A++++caption%2C%0A++++code%2C%0A++++comments+%7B%0A++++++count%0A++++%7D%2C%0A++++date%2C%0A++++dimensions+%7B%0A++++++height%2C%0A++++++width%0A++++%7D%2C%0A++++display_src%2C%0A++++id%2C%0A++++is_video%2C%0A++++likes+%7B%0A++++++count%0A++++%7D%2C%0A++++owner+%7B%0A++++++id%0A++++%7D%2C%0A++++thumbnail_src%0A++%7D%2C%0A++page_info%0A%7D%0A+%7D&ref=users%3A%3Ashow"; pageSource = objInstagramUser.postFormDatainta(new Uri("https://www.instagram.com/query/"), postdata, "https://www.instagram.com/" + username, token); string[] data1 = Regex.Split(pageSource, "code"); foreach (string val in data1) { if (val.Contains("date")) { string photo_codes = Utils.getBetween(val, "\":\"", "\""); photo_codes = "https://www.instagram.com/p/" + photo_codes + "/"; if (temp > List_PhotoUrl.Count()) { List_PhotoUrl.Add(photo_codes); List_PhotoUrl = List_PhotoUrl.Distinct().ToList(); } else { } } } } else { value = false; } } } } } } } catch { } }
public string changeproxy_post(string url , string post,string refer,string token) { string responce = string.Empty; List<string> lst_workingfinalproxy = new List<string>(); try { DataSet ds = DataBaseHandler.SelectQuery("select * from tlb_WorkingProxy", "tlb_WorkingProxy"); DataTable dt = ds.Tables[0]; foreach (DataRow item in dt.Rows) { lst_workingfinalproxy.Add(item[1] + ":" + item[2] + ":" + item[3] + ":" + item[4]); } } catch (Exception ex) { } try { GlobusHttpHelper obj_global = new GlobusHttpHelper(); abc: if (lst_workingfinalproxy.Count > 0) { string proxyaddress = string.Empty; string proxyport = string.Empty; string proxyusername = string.Empty; string proxypassword = string.Empty; bool value = true; while (value) { string proxy = lst_workingfinalproxy[RandomNumberGenerator.GenerateRandom(0, lst_workingfinalproxy.Count)]; ClGlobul.Workingproxylist.Remove(proxy); string[] proxydata = Regex.Split(proxy, ":"); if (proxydata.Count() == 2) { proxyaddress = proxydata[0]; proxyport = proxydata[1]; } if (proxydata.Count() == 4) { proxyaddress = proxydata[0]; proxyport = proxydata[1]; proxyusername = proxydata[2]; proxypassword = proxydata[3]; } obj_global.proxyAddress = proxyaddress; obj_global.port = int.Parse(proxyport); obj_global.proxyUsername = proxyusername; obj_global.proxyPassword = proxypassword; responce = obj_global.postFormDatainta(new Uri("https://www.instagram.com/query/"), post, refer, token); if (string.IsNullOrEmpty(responce)) { goto abc; } else { return responce; } } return responce; } else { GlobusLogHelper.log.Info("Please Upload Working Proxy to Scrape Data"); return null; } return responce; } catch (Exception ex) { GlobusLogHelper.log.Error("Error:" + ex.StackTrace); } return responce; }
public void startScrapeImageFromHashtagWithoutAccount() { // string username = usernmeToScrape; int temp = noOfUserToScrape; GlobusHttpHelper objInstagramUser = new GlobusHttpHelper(); List<string> lst_workingfinalproxy = new List<string>(); try { DataSet ds = DataBaseHandler.SelectQuery("select * from tlb_WorkingProxy", "tlb_WorkingProxy"); DataTable dt = ds.Tables[0]; foreach (DataRow item in dt.Rows) { lst_workingfinalproxy.Add(item[1] + ":" + item[2] + ":" + item[3] + ":" + item[4]); } } catch (Exception ex) { } try { if (lst_workingfinalproxy.Count > 0) { string Responce_Following = objInstagramUser.getHtmlfromUrl(new Uri("http://www.Instagram.com")); string proxy = lst_workingfinalproxy[RandomNumberGenerator.GenerateRandom(0, lst_workingfinalproxy.Count)]; string[] Data_Proxy = Regex.Split(proxy, ":"); if (Data_Proxy.Count() == 2) { objInstagramUser.proxyAddress = Data_Proxy[0]; objInstagramUser.port = int.Parse(Data_Proxy[1]); } if (Data_Proxy.Count() == 4) { objInstagramUser.proxyAddress = Data_Proxy[0]; objInstagramUser.port = int.Parse(Data_Proxy[1]); objInstagramUser.proxyUsername = Data_Proxy[2]; objInstagramUser.proxyPassword = Data_Proxy[3]; } } } catch (Exception ex) { GlobusLogHelper.log.Error("Error:" + ex.StackTrace); } foreach (string itemHash in listOfHashTag) { int count = 0; bool value = true; List<string> List_PhotoUrl = new List<string>(); try { // Old Url string url = "https://www.instagram.com/explore/tags/" + itemHash + "/"; // string url = "http://www.gramfeed.com/instagram/tags" + itemHash; string pageSource = objInstagramUser.getHtmlfromUrl(new Uri(url), "", ""); if (string.IsNullOrEmpty(pageSource)) { pageSource = changeproxy(url); } if (!string.IsNullOrEmpty(pageSource)) { #region Oldcode if (pageSource.Contains("date")) { string token = Utils.getBetween(pageSource, "csrf_token\":\"", "\"}"); string[] arr = Regex.Split(pageSource, "code"); string imageId = string.Empty; string imageUrl = string.Empty; string imageSrc = string.Empty; if (arr.Length > 1) { arr = arr.Skip(1).ToArray(); foreach (string itemarr in arr) { try { if (itemarr.Contains("date")) { imageId = Utils.getBetween(itemarr, "\":\"", "\""); imageUrl = "https://www.instagram.com/p/" + imageId + "/"; if (!string.IsNullOrEmpty(imageId)) { if (noOfUserToScrape > List_PhotoUrl.Count() && !List_PhotoUrl.Contains(imageUrl) && !ClGlobul.HashPhotoUrl_scraper.Contains(imageUrl)) { List_PhotoUrl.Add(imageUrl); List_PhotoUrl = List_PhotoUrl.Distinct().ToList(); ClGlobul.HashPhotoUrl_scraper.Add(imageUrl); try { GlobusLogHelper.log.Info("Scrape ImageUrl Form Hahstag ==>" + imageUrl + "Form Username ==>" + itemHash); string CSVData = "WithoutLogin".Replace(",", string.Empty) + "," + itemHash.Replace(",", string.Empty) + "," + imageUrl.Replace(",", string.Empty); GlobusFileHelper.ExportDataCSVFile(CVSHeader_ImageUrlHahstag, CSVData, CSVPath_ImageUrlHashTag); } catch (Exception ex) { // GlobusLogHelper.log.Error("Error : " + ex.StackTrace); } try { count++; DataBaseHandler.InsertQuery("insert into ScrapedImage(Username,ImageURL,ImageID) values('" + itemHash + "','" + imageUrl + "','" + imageId + "') ", "ScrapedImage"); } catch (Exception ex) { // GlobusLogHelper.log.Error("Error : " + ex.StackTrace); } } else { } } } } catch (Exception ex) { // GlobusLogHelper.log.Error("Error : " + ex.StackTrace); } } if (minDelayScrapeUser != 0) { mindelay = minDelayScrapeUser; } if (maxDelayScrapeUser != 0) { maxdelay = maxDelayScrapeUser; } int delay = RandomNumberGenerator.GenerateRandom(mindelay, maxdelay); GlobusLogHelper.log.Info("[ " + DateTime.Now + " ] => [ Delay For " + delay + " Seconds "); Thread.Sleep(delay * 1000); if (pageSource.Contains("has_next_page\":true")) { while (value) { if (pageSource.Contains("has_next_page\":true") && temp > List_PhotoUrl.Count()) { string IDD = Utils.getBetween(pageSource, "\"id\":\"", "\""); string code_ID = Utils.getBetween(pageSource, "end_cursor\":\"", "\""); string postdata = "q=ig_user(" + IDD + ")+%7B+media.after(" + code_ID + "%2C+12)+%7B%0A++count%2C%0A++nodes+%7B%0A++++caption%2C%0A++++code%2C%0A++++comments+%7B%0A++++++count%0A++++%7D%2C%0A++++date%2C%0A++++dimensions+%7B%0A++++++height%2C%0A++++++width%0A++++%7D%2C%0A++++display_src%2C%0A++++id%2C%0A++++is_video%2C%0A++++likes+%7B%0A++++++count%0A++++%7D%2C%0A++++owner+%7B%0A++++++id%0A++++%7D%2C%0A++++thumbnail_src%0A++%7D%2C%0A++page_info%0A%7D%0A+%7D&ref=users%3A%3Ashow"; pageSource = objInstagramUser.postFormDatainta(new Uri("https://www.instagram.com/query/"), postdata, "https://www.instagram.com/" + itemHash, token); if(string.IsNullOrEmpty(pageSource)) { pageSource = changeproxy_post("https://www.instagram.com/query/", postdata, "https://www.instagram.com/" + itemHash, token); } string[] data1 = Regex.Split(pageSource, "code"); foreach (string val in data1) { if (val.Contains("date")) { string photo_codes = Utils.getBetween(val, "\":\"", "\""); imageUrl = "https://www.instagram.com/p/" + photo_codes + "/"; if (temp > List_PhotoUrl.Count() && !List_PhotoUrl.Contains(imageUrl) && !ClGlobul.HashPhotoUrl_scraper.Contains(imageUrl)) { List_PhotoUrl.Add(photo_codes); List_PhotoUrl = List_PhotoUrl.Distinct().ToList(); ClGlobul.HashPhotoUrl_scraper.Add(imageUrl); try { GlobusLogHelper.log.Info("Scrape ImageUrl Form Hahstag ==>" + imageUrl + "Form Username ==>" + itemHash); string CSVData = "WithoutLogin".Replace(",", string.Empty) + "," + itemHash.Replace(",", string.Empty) + "," + imageUrl.Replace(",", string.Empty); GlobusFileHelper.ExportDataCSVFile(CVSHeader_ImageUrlHahstag, CSVData, CSVPath_ImageUrlHashTag); } catch (Exception ex) { // GlobusLogHelper.log.Error("Error : " + ex.StackTrace); } try { count++; DataBaseHandler.InsertQuery("insert into ScrapedImage(Username,ImageURL,ImageID) values('" + itemHash + "','" + imageUrl + "','" + photo_codes + "') ", "ScrapedImage"); } catch (Exception ex) { // GlobusLogHelper.log.Error("Error : " + ex.StackTrace); } } else { } } } if (minDelayScrapeUser != 0) { mindelay = minDelayScrapeUser; } if (maxDelayScrapeUser != 0) { maxdelay = maxDelayScrapeUser; } delay = RandomNumberGenerator.GenerateRandom(mindelay, maxdelay); GlobusLogHelper.log.Info("[ " + DateTime.Now + " ] => [ Delay For " + delay + " Seconds "); Thread.Sleep(delay * 1000); } else { value = false; } } } } } #endregion } } catch (Exception ex) { // GlobusLogHelper.log.Error("Error : " + ex.StackTrace); } //GlobusLogHelper.log.Info(" Process Completed "); // old one finally { GlobusLogHelper.log.Info(" Process Completed for HashTag " + itemHash); } } }
public void ScrapePhotoURLWithoutAccounttt() { //isScrapePhotoURL = false; GlobusHttpHelper Obj_global = new GlobusHttpHelper(); List<string> lst_WorkingFinalProxy = new List<string>(); try { DataSet ds = DataBaseHandler.SelectQuery("select * from tlb_WorkingProxy", "tlb_WorkingProxy"); DataTable dt = ds.Tables[0]; foreach (DataRow item in dt.Rows) { lst_WorkingFinalProxy.Add(item[1] + ":" + item[2] + ":" + item[3] + ":" + item[4]); } } catch (Exception ex) { } try { if (lst_WorkingFinalProxy.Count > 0) { string proxy = lst_WorkingFinalProxy[RandomNumberGenerator.GenerateRandom(0, lst_WorkingFinalProxy.Count)]; string[] Data_Proxy = Regex.Split(proxy, ":"); if (Data_Proxy.Count() == 2) { Obj_global.proxyAddress = Data_Proxy[0]; Obj_global.port = int.Parse(Data_Proxy[1]); } if (Data_Proxy.Count() == 4) { Obj_global.proxyAddress = Data_Proxy[0]; Obj_global.port = int.Parse(Data_Proxy[1]); Obj_global.proxyUsername = Data_Proxy[2]; Obj_global.proxyPassword = Data_Proxy[3]; } } } catch (Exception ex) { GlobusLogHelper.log.Error("Error:" + ex.StackTrace); } // Obj_global.proxyAddress = int count = 0; List<string> PhotoUrlList = new List<string>(); List<string> ListofUser_commentOnphoto = new List<string>(); try { foreach (string item in listOfPhotoUrl) { string username = item;//"i_am_komal_jha"; int temp = noOfPhotoToScrape; bool value = true; string Url_user = "******" + username + "/"; string responce_user = Obj_global.getHtmlfromUrl(new Uri(Url_user), ""); if(string.IsNullOrEmpty(responce_user)) { responce_user = changeproxy(Url_user); } string token = Utils.getBetween(responce_user, "csrf_token\":\"", "\"}"); string[] photo_code = Regex.Split(responce_user, "code\":"); foreach (string list in photo_code) { if (list.Contains("date")) { string photo_codes = Utils.getBetween("@" + list, "@\"", "\""); string Imageurl = "https://www.instagram.com/p/" + photo_codes + "/"; if (PhotoUrlList.Count < temp && !PhotoUrlList.Contains(Imageurl)) { PhotoUrlList.Add(Imageurl); PhotoUrlList = PhotoUrlList.Distinct().ToList(); try { GlobusLogHelper.log.Info("Scrape Url===>" + Imageurl + "Username ==>" + username); string CSVData = "WithoutLogin".Replace(",", string.Empty) + "," + username.Replace(",", string.Empty) + "," + Imageurl.Replace(",", string.Empty); GlobusFileHelper.ExportDataCSVFile(CVSHeader_PhotoUrl, CSVData, CSVPath_PhotoUrl + ".csv"); } catch (Exception ex) { // GlobusLogHelper.log.Error("Error ==> " + ex.StackTrace); } try { DataBaseHandler.InsertQuery("insert into ScrapedImage(Username,ImageURL,ImageID) Values('" + username + "','" + Imageurl + "','" + photo_code + "')", "ScrapedImage"); count++; } catch(Exception ex) { // GlobusLogHelper.log.Error("Error ==> " + ex.StackTrace); } } else { break; } } } if (responce_user.Contains("has_next_page\":true")) { while (value) { if (responce_user.Contains("has_next_page\":true") && PhotoUrlList.Count < temp) { string IDD = Utils.getBetween(responce_user, "\"id\":\"", "\""); string code_ID = Utils.getBetween(responce_user, "end_cursor\":\"", "\""); string postdata = "q=ig_user(" + IDD + ")+%7B+media.after(" + code_ID + "%2C+12)+%7B%0A++count%2C%0A++nodes+%7B%0A++++caption%2C%0A++++code%2C%0A++++comments+%7B%0A++++++count%0A++++%7D%2C%0A++++date%2C%0A++++dimensions+%7B%0A++++++height%2C%0A++++++width%0A++++%7D%2C%0A++++display_src%2C%0A++++id%2C%0A++++is_video%2C%0A++++likes+%7B%0A++++++count%0A++++%7D%2C%0A++++owner+%7B%0A++++++id%0A++++%7D%2C%0A++++thumbnail_src%0A++%7D%2C%0A++page_info%0A%7D%0A+%7D&ref=users%3A%3Ashow"; responce_user = Obj_global.postFormDatainta(new Uri("https://www.instagram.com/query/"), postdata, "https://www.instagram.com/" + username, token); string[] data1 = Regex.Split(responce_user, "code"); foreach (string val in data1) { if (val.Contains("date")) { if (PhotoUrlList.Count < temp) { string photo_codes = Utils.getBetween(val, "\":\"", "\""); string Imageurl = "https://www.instagram.com/p/" + photo_codes + "/"; if (PhotoUrlList.Count < temp && !PhotoUrlList.Contains(Imageurl)) { PhotoUrlList.Add(Imageurl); PhotoUrlList = PhotoUrlList.Distinct().ToList(); try { GlobusLogHelper.log.Info("Scrape Url===>" + Imageurl + "Username ==>" + username); string CSVData = "WithoutLogin".Replace(",", string.Empty) + "," + username.Replace(",", string.Empty) + "," + Imageurl.Replace(",", string.Empty); GlobusFileHelper.ExportDataCSVFile(CVSHeader_PhotoUrl, CSVData, CSVPath_PhotoUrl + ".csv"); } catch (Exception ex) { // GlobusLogHelper.log.Error("Error ==> " + ex.StackTrace); } try { count++; DataBaseHandler.InsertQuery("insert into ScrapedImage(Username,ImageURL,ImageID) Values('" + username + "','" + Imageurl + "','" + photo_code + "')", "ScrapedImage"); } catch (Exception ex) { // GlobusLogHelper.log.Error("Error ==> " + ex.StackTrace); } } else { break; } } } } } else { value = false; } } } } } catch (Exception ex) { // GlobusLogHelper.log.Error("Error ==> " + ex.Message); } GlobusLogHelper.log.Info("!! Scrape Photo URL Process Completed !!"); }
public void Start_ScrapeCommentuser_WithoutAccout() { int count = 0; List<string> ListofUser_commentOnphoto = new List<string>(); List<string> PhotoId_list = new List<string>(); GlobusHttpHelper Obj_Globosoft = new GlobusHttpHelper(); List<string> lst_WorkingFinalProxy = new List<string>(); string ProxyAd = string.Empty; string ProxyPort = "80"; string ProxyUsername = string.Empty; string ProxyPass = string.Empty; try { DataSet ds = DataBaseHandler.SelectQuery("select * from tlb_WorkingProxy", "tlb_WorkingProxy"); DataTable dt = ds.Tables[0]; foreach (DataRow item in dt.Rows) { lst_WorkingFinalProxy.Add(item[1] + ":" + item[2] + ":" + item[3] + ":" + item[4]); } } catch(Exception ex) { } try { if (lst_WorkingFinalProxy.Count > 0) { string proxy = lst_WorkingFinalProxy[RandomNumberGenerator.GenerateRandom(0, lst_WorkingFinalProxy.Count)]; string[] Data_Proxy = Regex.Split(proxy, ":"); if (Data_Proxy.Count() == 2) { ProxyAd = Data_Proxy[0]; ProxyPort = Data_Proxy[1]; } if (Data_Proxy.Count() == 4) { ProxyAd = Data_Proxy[0]; ProxyPort = Data_Proxy[1]; ProxyUsername = Data_Proxy[2]; ProxyPass = Data_Proxy[3]; } } } catch (Exception ex) { GlobusLogHelper.log.Error("Error:" + ex.StackTrace); } try { string username = string.Empty; foreach (string item1 in listOfUsernameForCommentuserScraper) { username = item1; int temp = noOfPhotoToScrape; bool value = true; string Url_user = "******" + username; string responce_user = Obj_Globosoft.getHtmlfromUrl_withProxt(new Uri(Url_user),ProxyAd,int.Parse(ProxyPort),ProxyUsername,ProxyPass); if(string.IsNullOrEmpty(responce_user)) { responce_user = changeproxy(Url_user); } string token = Utils.getBetween(responce_user, "csrf_token\":\"", "\"}"); string[] photo_code = Regex.Split(responce_user, "code\":"); foreach (string list in photo_code) { if (list.Contains("date")) { if (PhotoId_list.Count < temp) { string photo_codes = Utils.getBetween("@" + list, "@\"", "\""); PhotoId_list.Add(photo_codes); } else { break; } } } if (responce_user.Contains("has_next_page\":true")) { while (value) { if (responce_user.Contains("has_next_page\":true") && PhotoId_list.Count < temp) { string IDD = Utils.getBetween(responce_user, "\"id\":\"", "\""); string code_ID = Utils.getBetween(responce_user, "end_cursor\":\"", "\""); string postdata = "q=ig_user(" + IDD + ")+%7B+media.after(" + code_ID + "%2C+12)+%7B%0A++count%2C%0A++nodes+%7B%0A++++caption%2C%0A++++code%2C%0A++++comments+%7B%0A++++++count%0A++++%7D%2C%0A++++date%2C%0A++++dimensions+%7B%0A++++++height%2C%0A++++++width%0A++++%7D%2C%0A++++display_src%2C%0A++++id%2C%0A++++is_video%2C%0A++++likes+%7B%0A++++++count%0A++++%7D%2C%0A++++owner+%7B%0A++++++id%0A++++%7D%2C%0A++++thumbnail_src%0A++%7D%2C%0A++page_info%0A%7D%0A+%7D&ref=users%3A%3Ashow"; Obj_Globosoft.proxyAddress = ProxyAd; Obj_Globosoft.port = int.Parse(ProxyPort); Obj_Globosoft.proxyUsername = ProxyUsername; Obj_Globosoft.proxyPassword = ProxyPass; responce_user = Obj_Globosoft.postFormDatainta(new Uri("https://www.instagram.com/query/"), postdata, "https://www.instagram.com/" + username, token); if(string.IsNullOrEmpty(responce_user)) { responce_user = changeproxy_post("https://www.instagram.com/query/", postdata, "https://www.instagram.com/" + username, token); } string[] data1 = Regex.Split(responce_user, "code"); foreach (string val in data1) { if (val.Contains("date")) { if (PhotoId_list.Count < temp) { string photo_codes = Utils.getBetween(val, "\":\"", "\""); PhotoId_list.Add(photo_codes); string imageUrl = "https://www.instagram.com/p/" + photo_codes + "/"; try { DataBaseHandler.InsertQuery("insert into ScrapedImage(Username,ImageURL,ImageID) values('" + username + "','" + imageUrl + "','" + photo_codes + "')", "ScrapedImage"); } catch (Exception ex) { } } else { break; } } } } else { value = false; } } } } foreach (string item in PhotoId_list) { if (ListofUser_commentOnphoto.Count >= noOfUserToScrape) { break; } try { string url = "https://www.instagram.com/p/" + item + "/"; string responce = Obj_Globosoft.getHtmlfromUrl_withProxt(new Uri(url),ProxyAd,int.Parse(ProxyPort),ProxyUsername,ProxyPass); if (responce.Contains("comments\":{\"count")) { string[] data = Regex.Split(responce, "text"); foreach (string itemm in data) { if (itemm.Contains("username")) { if (ListofUser_commentOnphoto.Count < noOfUserToScrape) { string Username_List = Utils.getBetween(itemm, "username\":\"", "\""); if (!ListofUser_commentOnphoto.Contains(Username_List)) { ListofUser_commentOnphoto.Add(Username_List); ListofUser_commentOnphoto = ListofUser_commentOnphoto.Distinct().ToList(); try { GlobusLogHelper.log.Info("ScrapeUser===>" + Username_List + "Form==>" + username); string CSVData = "WithoutLogin".Replace(",", string.Empty) + "," + username.Replace(",", string.Empty) + "," + url.Replace(",", string.Empty) + "," + Username_List.Replace(",", string.Empty); GlobusFileHelper.ExportDataCSVFile(CVSHeader_PhotoCommentUser, CSVData, CSVPath_PhotoCommentUrl); } catch (Exception ex) { // GlobusLogHelper.log.Error("Error ==> " + ex.Message); } try { DataBaseHandler.InsertQuery("insert into ScrapedUsername(Username) values('" + Username_List + "')", "ScrapedUsername"); count++; } catch (Exception ex) { // GlobusLogHelper.log.Error("Error ==> " + ex.Message); } } else { //break; } } else { break; } //ListofUser_commentOnphoto = ListofUser_commentOnphoto.Distinct().ToList(); } } } } catch (Exception ex) { // GlobusLogHelper.log.Info("Error:" + ex.StackTrace); } } } catch (Exception ex) { // } finally { GlobusLogHelper.log.Info("!! proccess Completed Successfully Scrape Comment User !!"); } }