public static string GetAppReviews(string appID, int reviewsPage, bool isUsingProxies = false) { // Creating Instance of HTTP Requests Handler using (WebRequests httpClient = new WebRequests()) { // Configuring Request Object httpClient.Host = Consts.HOST; httpClient.Origin = Consts.ORIGIN; httpClient.Encoding = "utf-8"; httpClient.AllowAutoRedirect = true; httpClient.Accept = "*/*"; httpClient.UserAgent = Consts.USER_AGENT; httpClient.ContentType = "application/x-www-form-urlencoded;charset=UTF-8"; httpClient.EncodingDetection = WebRequests.CharsetDetection.DefaultCharset; httpClient.Headers.Add(Consts.ACCEPT_LANGUAGE); // Checking for the need to use a Proxy on this request if (isUsingProxies) { httpClient.Proxy = ProxiesLoader.GetWebProxy(); } // Assembling Post Data string postData = String.Format(Consts.REVIEWS_POST_DATA, reviewsPage, appID); // Issuing Request return(httpClient.Post(Consts.REVIEWS_URL, postData)); } }
/// <summary> /// </summary> /// <param name="username"></param> /// <param name="authToken"></param> public static async void Logout(string username, string authToken) { long timestamp = Timestamps.GenerateRetardedTimestamp(); var postData = new Dictionary <string, string> { { "json", "{}" }, { "username", username }, { "timestamp", timestamp.ToString(CultureInfo.InvariantCulture) } }; HttpResponseMessage response = await WebRequests.Post("logout", postData, authToken, timestamp.ToString(CultureInfo.InvariantCulture)); switch (response.StatusCode) { case HttpStatusCode.OK: { // Yup, save the data and return true return; } default: // Well, f**k return; } }
public static string Post(ref WebRequests client, Logger logger, string url, string data) { int retry = 10; string htmlResponse = String.Empty; do { // Get html of the current category main page try { htmlResponse = client.Post(url, data, true); } catch (Exception ex) { logger.Error(ex); } // Sanity check if (!String.IsNullOrWhiteSpace(htmlResponse) && client.StatusCode == HttpStatusCode.OK) { break; } retry -= 1; logger.Debug(String.Format("Status Code not OK. Retries left: {0}", retry)); logger.Debug("StatusCode = " + client.StatusCode + " Message = " + client.Error); logger.Debug("Html Response = " + htmlResponse); // Polite Sleeping Thread.Sleep(TimeSpan.FromSeconds(_rnd.Next(2, 5))); } while (retry >= 0); return(htmlResponse); }
/// <summary> /// </summary> /// <param name="events"></param> /// <param name="snapInfo"></param> /// <param name="username"></param> /// <param name="authToken"></param> /// <returns></returns> public static async Task <bool> SendEvents(Dictionary <string, object>[] events, Dictionary <string, Dictionary <string, double> > snapInfo, string username, string authToken) { long timestamp = Timestamps.GenerateRetardedTimestamp(); var postData = new Dictionary <string, string> { { "events", JsonConvert.SerializeObject(events) }, { "json", JsonConvert.SerializeObject(snapInfo) }, { "username", username }, { "timestamp", timestamp.ToString(CultureInfo.InvariantCulture) } }; HttpResponseMessage response = await WebRequests.Post("update_snaps", postData, authToken, timestamp.ToString(CultureInfo.InvariantCulture)); switch (response.StatusCode) { case HttpStatusCode.OK: return(true); default: // Well, f**k return(false); } }
/// <summary> /// </summary> /// <param name="username"></param> /// <param name="authToken"></param> /// <param name="isPrivate"></param> /// <returns></returns> public static async Task <bool> UpdateAccountPrivacy(string username, string authToken, bool isPrivate) { int privacy = 0; if (isPrivate == true) { privacy = 1; } long timestamp = Timestamps.GenerateRetardedTimestamp(); var postData = new Dictionary <string, string> { { "username", username }, { "action", "updatePrivacy" }, { "privacySetting", privacy.ToString() }, { "timestamp", timestamp.ToString(CultureInfo.InvariantCulture) } }; HttpResponseMessage response = await WebRequests.Post("settings", postData, authToken, timestamp.ToString(CultureInfo.InvariantCulture)); switch (response.StatusCode) { case HttpStatusCode.OK: return(true); default: return(false); } }
public void AddScore( NewLeaderBoardScore newScore, Action <LeaderBoardScoreModel> onSuccess, Action <string> onError ) { WebRequests.Post( URL, JsonConvert.SerializeObject( newScore, new JsonSerializerSettings() { ContractResolver = new DefaultContractResolver { NamingStrategy = new SnakeCaseNamingStrategy() } } ), (response) => { var leaderBoardScore = JsonConvert .DeserializeObject <LeaderBoardScoreModel>(response); onSuccess(leaderBoardScore); }, (error) => { Debug.Log(error); onError(error); } ); }
public static string GetAppReviews (string appID, int reviewsPage, bool isUsingProxies = false) { // Creating Instance of HTTP Requests Handler using (WebRequests httpClient = new WebRequests ()) { // Configuring Request Object httpClient.Host = Consts.HOST; httpClient.Origin = Consts.ORIGIN; httpClient.Encoding = "utf-8"; httpClient.AllowAutoRedirect = true; httpClient.Accept = "*/*"; httpClient.UserAgent = Consts.USER_AGENT; httpClient.ContentType = "application/x-www-form-urlencoded;charset=UTF-8"; httpClient.EncodingDetection = WebRequests.CharsetDetection.DefaultCharset; httpClient.Headers.Add (Consts.ACCEPT_LANGUAGE); // Checking for the need to use a Proxy on this request if (isUsingProxies) { httpClient.Proxy = ProxiesLoader.GetWebProxy (); } // Assembling Post Data string postData = String.Format (Consts.REVIEWS_POST_DATA, reviewsPage, appID); // Issuing Request return httpClient.Post (Consts.REVIEWS_URL, postData); } }
/// <summary> /// </summary> /// <param name="username"></param> /// <param name="authToken"></param> /// <returns></returns> public static async Task <Account> Update(string username, string authToken) { long timestamp = Timestamps.GenerateRetardedTimestamp(); var postData = new Dictionary <string, string> { { "username", username }, { "timestamp", timestamp.ToString(CultureInfo.InvariantCulture) } }; HttpResponseMessage response = await WebRequests.Post("updates", postData, authToken, timestamp.ToString(CultureInfo.InvariantCulture)); switch (response.StatusCode) { case HttpStatusCode.OK: { // Http Request Worked string data = await response.Content.ReadAsStringAsync(); Account parsedData = await JsonConvert.DeserializeObjectAsync <Account>(data); // we updated n shit return(!parsedData.Logged ? null : parsedData); } default: // Well, f**k return(null); } }
static string[] getPermissions(WebRequests server, string appUrl) { var match = Regex.Match(appUrl, "id=([^&]*)"); if (match.Success) { string idApp = match.Groups[1].Value; string response = server.Post("https://play.google.com/store/xhr/getdoc?authuser=0", "xhr=1&ids=" + idApp); } return null; }
/// <summary> /// </summary> /// <param name="username"></param> /// <param name="authToken"></param> /// <param name="friendsOnly"></param> /// <param name="friendsToBlock"></param> /// <returns></returns> public static async Task <bool> UpdateStoryPrivacy(string username, string authToken, bool friendsOnly, List <string> friendsToBlock = null) { string privacySetting = ""; if (friendsOnly == false) { privacySetting = "EVERYONE"; } if (friendsOnly == true && friendsToBlock == null) { privacySetting = "FRIENDS"; } if (friendsOnly == true && friendsToBlock != null) { privacySetting = "CUSTOM"; } long timestamp = Timestamps.GenerateRetardedTimestamp(); var postData = new Dictionary <string, string> { { "username", username }, { "action", "updateStoryPrivacy" }, { "privacySetting", privacySetting }, { "timestamp", timestamp.ToString(CultureInfo.InvariantCulture) } }; if (friendsOnly == true && friendsToBlock != null) { string blockedFriendsData = ""; foreach (string s in friendsToBlock) { blockedFriendsData += string.Format("'{0}'", s); if (friendsToBlock.IndexOf(s) != friendsToBlock.Count - 1) { blockedFriendsData += ","; } } postData.Add("storyFriendsToBlock", string.Format("[{0}]", blockedFriendsData)); } HttpResponseMessage response = await WebRequests.Post("settings", postData, authToken, timestamp.ToString(CultureInfo.InvariantCulture)); switch (response.StatusCode) { case HttpStatusCode.OK: return(true); default: return(false); } }
/// <summary> /// </summary> /// <param name="snapId"></param> /// <param name="username"></param> /// <param name="authToken"></param> /// <returns></returns> public static async Task <byte[]> GetBlob(string snapId, string username, string authToken) { long timestamp = Timestamps.GenerateRetardedTimestamp(); var postData = new Dictionary <string, string> { { "id", snapId }, { "username", username }, { "timestamp", timestamp.ToString(CultureInfo.InvariantCulture) } }; HttpResponseMessage response = await WebRequests.Post("blob", postData, authToken, timestamp.ToString(CultureInfo.InvariantCulture)); switch (response.StatusCode) { case HttpStatusCode.OK: { byte[] data = await response.Content.ReadAsByteArrayAsync(); // Validate Blob Shit byte[] decryptedBlob = null; if (Blob.ValidateMediaBlob(data)) { decryptedBlob = data; } else { data = Blob.DecryptBlob(data); if (Blob.ValidateMediaBlob(data)) { decryptedBlob = data; } } return(decryptedBlob); } default: // Well, f**k return(null); } }
/// <summary> /// </summary> /// <param name="username"></param> /// <param name="password"></param> public static async Task <Tuple <TempEnumHolder.LoginStatus, Account> > Login(string username, string password) { long timestamp = Timestamps.GenerateRetardedTimestamp(); var postData = new Dictionary <string, string> { { "password", password }, { "username", username }, { "timestamp", timestamp.ToString(CultureInfo.InvariantCulture) } }; HttpResponseMessage response = await WebRequests.Post("login", postData, KeyVault.StaticToken, timestamp.ToString(CultureInfo.InvariantCulture)); switch (response.StatusCode) { case HttpStatusCode.OK: { // Http Request Worked string data = await response.Content.ReadAsStringAsync(); Account parsedData = await JsonConvert.DeserializeObjectAsync <Account>(data); // Check if we were logged in if (!parsedData.Logged) { return (new Tuple <TempEnumHolder.LoginStatus, Account>( TempEnumHolder.LoginStatus.InvalidCredentials, parsedData)); } // Yup, save the data and return true return (new Tuple <TempEnumHolder.LoginStatus, Account>( TempEnumHolder.LoginStatus.Success, parsedData)); } default: // Well, f**k return (new Tuple <TempEnumHolder.LoginStatus, Account>( TempEnumHolder.LoginStatus.ServerError, null)); } }
/// <summary> /// </summary> /// <param name="action"></param> /// <param name="username"></param> /// <param name="authToken"></param> /// <param name="friend"></param> /// <param name="postDataEntries"></param> public static async Task <FriendAction> Friend(string friend, string action, string username, string authToken, Dictionary <string, string> postDataEntries = null) { long timestamp = Timestamps.GenerateRetardedTimestamp(); var postData = new Dictionary <string, string> { { "friend", friend }, { "action", action }, { "username", username }, { "timestamp", timestamp.ToString(CultureInfo.InvariantCulture) } }; if (postDataEntries != null) { foreach (var postDataEntry in postDataEntries) { postData.Add(postDataEntry.Key, postDataEntry.Value); } } HttpResponseMessage response = await WebRequests.Post("friend", postData, authToken, timestamp.ToString(CultureInfo.InvariantCulture)); switch (response.StatusCode) { case HttpStatusCode.OK: { string data = await response.Content.ReadAsStringAsync(); FriendAction parsedData = await JsonConvert.DeserializeObjectAsync <FriendAction>(data); // Yup, save the data and return true return(parsedData); } default: // Well, f**k return(null); } }
/// <summary> /// </summary> /// <param name="username"></param> /// <param name="authToken"></param> /// <param name="birthMonth"></param> /// <param name="birthDay"></param> /// <returns></returns> public static async Task <bool> UpdateBirthday(string username, string authToken, int birthMonth, int birthDay) { long timestamp = Timestamps.GenerateRetardedTimestamp(); var postData = new Dictionary <string, string> { { "username", username }, { "action", "updateBirthday" }, { "birthday", string.Format("{0}-{1}", birthMonth, birthDay) }, { "timestamp", timestamp.ToString(CultureInfo.InvariantCulture) } }; HttpResponseMessage response = await WebRequests.Post("settings", postData, authToken, timestamp.ToString(CultureInfo.InvariantCulture)); switch (response.StatusCode) { case HttpStatusCode.OK: return(true); default: return(false); } }
/// <summary> /// </summary> /// <param name="username"></param> /// <param name="authToken"></param> /// <param name="canViewMatureContent"></param> public static async Task <bool> UpdateMaturitySettings(string username, string authToken, bool canViewMatureContent) { long timestamp = Timestamps.GenerateRetardedTimestamp(); var postData = new Dictionary <string, string> { { "username", username }, { "action", "updateCanViewMatureContent" }, { "canViewMatureContent", canViewMatureContent.ToString() }, { "timestamp", timestamp.ToString(CultureInfo.InvariantCulture) } }; HttpResponseMessage response = await WebRequests.Post("settings", postData, authToken, timestamp.ToString(CultureInfo.InvariantCulture)); switch (response.StatusCode) { case HttpStatusCode.OK: return(true); default: return(false); } }
private static void PerformLoginOnPocket(WebRequests client, string username, string password) { // Explanation of the requests below: // In order to perform the LOGIN on Pocket website, we have to issue a POST request with a few parameters in it's BODY. // Aside from the obvious "Login" and "Password", there's also a variable parameter called "Form_Check". // This "form_check" parameter can be found on the login page, hidden in it's HTML, so what we are going to do is "Extract" this information // from there, and use it on the login request to automate the whole login flow. // First thing we have to do is to define the actual "Target" for our demo, in this case, it will be "www.imdb.com" // To start Simple, let's try to get the HTML of the Home Page of the site. Since we are RETRIEVING a resource (the home page), the request method needs to be a GET // But first, let's setup the headers for this request client.ClearCookies(); client.Host = "getpocket.com"; client.UserAgent = "Web Crawling 101 Book - Used for educational purposes only"; client.Referer = "https://getpocket.com/login?e=4"; client.Origin = "https://getpocket.com"; client.Timeout = 18000; // 18 Seconds // Reaching Home Page for the "Form Check" parameter Console.WriteLine(" => Executing GET Request for Home Page"); string homePageHTML = client.Get("https://getpocket.com/login?e=4"); // Parsing "Form Check" parameter string formCheck = ExtractFormCheckParameter(homePageHTML); Console.WriteLine(" => Extracted FormCheck parameter hidden within the HTML '{0}'", formCheck); // Formatting the HTTP POST BODY of the LOGIN Request string postData = String.Format("feed_id={0}&password={1}&route=&form_check={2}&src=&source=email&source_page=%2Flogin&is_ajax=1", username, password, formCheck); // HTTP Post URl for "Login" on Pocket Console.WriteLine(" => Performing Login"); string pocketLoginUrl = "https://getpocket.com/login_process.php"; string loginResponse = client.Post(pocketLoginUrl, postData); Console.WriteLine(" => Login Status Code : {0}", client.StatusCode); Console.WriteLine(" => Login Response Text : {0}", loginResponse); }
/// <summary> /// </summary> /// <param name="friends"></param> /// <param name="username"></param> /// <param name="authToken"></param> public static async Task <Dictionary <string, Best> > GetBests(List <Friend> friends, string username, string authToken) { var friendsList = new string[friends.Count]; for (int i = 0; i < friendsList.Length; i++) { friendsList[i] = friends[i].Name; } long timestamp = Timestamps.GenerateRetardedTimestamp(); var postData = new Dictionary <string, string> { { "friend_usernames", JsonConvert.SerializeObject(friendsList) }, { "username", username }, { "timestamp", timestamp.ToString(CultureInfo.InvariantCulture) } }; HttpResponseMessage response = await WebRequests.Post("bests", postData, authToken, timestamp.ToString(CultureInfo.InvariantCulture)); switch (response.StatusCode) { case HttpStatusCode.OK: { string data = await response.Content.ReadAsStringAsync(); Dictionary <string, Best> parsedData = await JsonConvert.DeserializeObjectAsync <Dictionary <string, Best> >(data); // Yup, save the data and return true return(parsedData); } default: // Well, f**k return(null); } }
/// <summary> /// Executes a Search using the searchField as the search parameter, /// paginates / scrolls the search results to the end adding all the url of apps /// it finds to a AWS SQS queue /// </summary> /// <param name="searchField"></param> private static void CrawlStore (string searchField) { // Console Feedback Console.WriteLine ("Crawling Search Term : [ " + searchField + " ]"); // Compiling Regular Expression used to parse the "pagToken" out of the Play Store Regex pagTokenRegex = new Regex (@"GAEi+.+\:S\:.{11}\\42", RegexOptions.Compiled); // HTML Response string response; // MongoDB Helper // Configuring MongoDB Wrapper MongoDBWrapper mongoDB = new MongoDBWrapper (); string fullServerAddress = String.Join (":", Consts.MONGO_SERVER, Consts.MONGO_PORT); mongoDB.ConfigureDatabase (Consts.MONGO_USER, Consts.MONGO_PASS, Consts.MONGO_AUTH_DB, fullServerAddress, Consts.MONGO_TIMEOUT, Consts.MONGO_DATABASE, Consts.MONGO_COLLECTION); // Ensuring the database has the proper indexe mongoDB.EnsureIndex ("Url"); // Response Parser PlayStoreParser parser = new PlayStoreParser (); // Executing Web Requests using (WebRequests server = new WebRequests ()) { // Creating Request Object server.Host = Consts.HOST; // Executing Initial Request response = server.Post (String.Format (Consts.CRAWL_URL, searchField), Consts.INITIAL_POST_DATA); // Parsing Links out of Html Page (Initial Request) foreach (string url in parser.ParseAppUrls (response)) { // Checks whether the app have been already processed // or is queued to be processed if ((!mongoDB.AppProcessed (Consts.APP_URL_PREFIX + url)) && (!mongoDB.AppQueued (url))) { // Console Feedback Console.WriteLine (" . Queued App"); // Than, queue it :) mongoDB.AddToQueue (url); Thread.Sleep (250); // Hiccup } else { // Console Feedback Console.WriteLine (" . Duplicated App. Skipped"); } } // Executing Requests for more Play Store Links int initialSkip = 48; int currentMultiplier = 1; int errorsCount = 0; do { // Finding pagToken from HTML var rgxMatch = pagTokenRegex.Match (response); // If there's no match, skips it if (!rgxMatch.Success) { break; } // Reading Match from Regex, and applying needed replacements string pagToken = rgxMatch.Value.Replace (":S:", "%3AS%3A").Replace("\\42", String.Empty).Replace(@"\\u003d", String.Empty); // Assembling new PostData with paging values string postData = String.Format (Consts.POST_DATA, pagToken); // Executing request for values response = server.Post (String.Format (Consts.CRAWL_URL, searchField), postData); // Checking Server Status if (server.StatusCode != System.Net.HttpStatusCode.OK) { LogWriter.Error ("Http Error", "Status Code [ " + server.StatusCode + " ]"); errorsCount++; continue; } // Parsing Links foreach (string url in parser.ParseAppUrls (response)) { // Checks whether the app have been already processed // or is queued to be processed if ((!mongoDB.AppProcessed (Consts.APP_URL_PREFIX + url)) && (!mongoDB.AppQueued (url))) { // Console Feedback Console.WriteLine (" . Queued App"); // Than, queue it :) mongoDB.AddToQueue (url); Thread.Sleep (250); // Hiccup } else { // Console Feedback Console.WriteLine (" . Duplicated App. Skipped"); } } // Incrementing Paging Multiplier currentMultiplier++; } while (parser.AnyResultFound (response) && errorsCount <= Consts.MAX_REQUEST_ERRORS); } }
/// <summary> /// Executes a Search using the searchField as the search parameter, /// paginates / scrolls the search results to the end adding all the url of apps /// it finds to a AWS SQS queue /// </summary> /// <param name="searchField"></param> private static void CrawlStore(string searchField) { // Console Feedback Console.WriteLine("Crawling Search Term : [ " + searchField + " ]"); // HTML Response string response; // MongoDB Helper // Configuring MongoDB Wrapper MongoDBWrapper mongoDB = new MongoDBWrapper(); string fullServerAddress = String.Join(":", Consts.MONGO_SERVER, Consts.MONGO_PORT); mongoDB.ConfigureDatabase(Consts.MONGO_USER, Consts.MONGO_PASS, Consts.MONGO_AUTH_DB, fullServerAddress, Consts.MONGO_TIMEOUT, Consts.MONGO_DATABASE, Consts.MONGO_COLLECTION); // Response Parser PlayStoreParser parser = new PlayStoreParser(); // Executing Web Requests using (WebRequests server = new WebRequests()) { // Creating Request Object server.Host = Consts.HOST; // Executing Initial Request response = server.Post(Consts.CRAWL_URL, Consts.INITIAL_POST_DATA); // Parsing Links out of Html Page (Initial Request) foreach (string url in parser.ParseAppUrls(response)) { // Checks whether the app have been already processed // or is queued to be processed if ((!mongoDB.AppProcessed(Consts.APP_URL_PREFIX + url)) && (!mongoDB.AppQueued(url))) { // Console Feedback Console.WriteLine(" . Queued App"); // Than, queue it :) mongoDB.AddToQueue(url); } else { // Console Feedback Console.WriteLine(" . Duplicated App. Skipped"); } } // Executing Requests for more Play Store Links int initialSkip = 48; int currentMultiplier = 1; int errorsCount = 0; do { // Assembling new PostData with paging values string postData = String.Format(Consts.POST_DATA, (initialSkip * currentMultiplier)); // Executing request for values response = server.Post(Consts.CRAWL_URL, postData); // Checking Server Status if (server.StatusCode != System.Net.HttpStatusCode.OK) { LogWriter.Error("Http Error", "Status Code [ " + server.StatusCode + " ]"); errorsCount++; continue; } // Parsing Links foreach (string url in parser.ParseAppUrls(response)) { // Checks whether the app have been already processed // or is queued to be processed if ((!mongoDB.AppProcessed(Consts.APP_URL_PREFIX + url)) && (!mongoDB.AppQueued(url))) { // Console Feedback Console.WriteLine(" . Queued App"); // Than, queue it :) mongoDB.AddToQueue(url); } else { // Console Feedback Console.WriteLine(" . Duplicated App. Skipped"); } } // Incrementing Paging Multiplier currentMultiplier++; } while (parser.AnyResultFound(response) && errorsCount <= Consts.MAX_REQUEST_ERRORS); } }
/// <summary> /// Executes a Search using the searchField as the search parameter, /// paginates / scrolls the search results to the end adding all the url of apps /// it finds to a Mongo "QUEUE" collection /// </summary> /// <param name="searchField"></param> private static void CrawlStore (string searchField, bool shouldUseProxies) { // Console Feedback _logger.Warn ("Crawling Search Term : [ " + searchField + " ]"); // Hashset of urls used to keep track of what's been parsed already HashSet<String> foundUrls = new HashSet<String> (); // Control variable to avoid "Loop" on pagging bool isDonePagging = false; // Compiling Regular Expression used to parse the "pagToken" out of the Play Store Regex pagTokenRegex = new Regex (@"GAEi+.+\:S\:.{11}\\42", RegexOptions.Compiled); // HTML Response string response; // MongoDB Helper // Configuring MongoDB Wrapper MongoDBWrapper mongoDB = new MongoDBWrapper (); string fullServerAddress = String.Join (":", Consts.MONGO_SERVER, Consts.MONGO_PORT); mongoDB.ConfigureDatabase (Consts.MONGO_USER, Consts.MONGO_PASS, Consts.MONGO_AUTH_DB, fullServerAddress, Consts.MONGO_TIMEOUT, Consts.MONGO_DATABASE, Consts.MONGO_COLLECTION); // Ensuring the database has the proper indexe mongoDB.EnsureIndex ("Url"); // Response Parser PlayStoreParser parser = new PlayStoreParser (); // Executing Web Requests using (WebRequests server = new WebRequests ()) { // Creating Request Object server.Headers.Add (Consts.ACCEPT_LANGUAGE); server.Host = Consts.HOST; server.UserAgent = Consts.GITHUBURL; server.Encoding = "utf-8"; // Checking for the need to use "HTTP Proxies" if (shouldUseProxies) { server.Proxy = ProxiesLoader.GetWebProxy (); } // Executing Initial Request response = server.Post (String.Format (Consts.CRAWL_URL, searchField), Consts.INITIAL_POST_DATA); // Parsing Links out of Html Page (Initial Request) foreach (string url in parser.ParseAppUrls (response)) { // Checks whether the app have been already processed // or is queued to be processed foundUrls.Add (url); if ((!mongoDB.AppProcessed (Consts.APP_URL_PREFIX + url)) && (!mongoDB.AppQueued (url))) { // Than, queue it :) mongoDB.AddToQueue (url); Thread.Sleep (250); // Hiccup } } // Executing Requests for more Play Store Links int initialSkip = 48; int currentMultiplier = 1; int errorsCount = 0; do { // Finding pagToken from HTML var rgxMatch = pagTokenRegex.Match (response); // If there's no match, skips it if (!rgxMatch.Success) { break; } // Reading Match from Regex, and applying needed replacements string pagToken = rgxMatch.Value.Replace (":S:", "%3AS%3A").Replace("\\42", String.Empty).Replace(@"\\u003d", String.Empty); // Assembling new PostData with paging values string postData = String.Format (Consts.POST_DATA, pagToken); // Executing request for values response = server.Post (String.Format (Consts.CRAWL_URL, searchField), postData); // Checking Server Status if (server.StatusCode != System.Net.HttpStatusCode.OK) { _logger.Error ("Http Error" + " - Status Code [ " + server.StatusCode + " ]"); errorsCount++; continue; } // Parsing Links foreach (string url in parser.ParseAppUrls (response)) { if (foundUrls.Contains (url)) { isDonePagging = true; break; } // Checks whether the app have been already processed foundUrls.Add (url); if ((!mongoDB.AppProcessed (Consts.APP_URL_PREFIX + url)) && (!mongoDB.AppQueued (url))) { // Than, queue it :) mongoDB.AddToQueue (url); Thread.Sleep (250); // Hiccup } } // Incrementing Paging Multiplier currentMultiplier++; } while (!isDonePagging && parser.AnyResultFound (response) && errorsCount <= Consts.MAX_REQUEST_ERRORS); } }
/// <summary> /// /// </summary> /// <param name="categoryUrl"></param> private static void CrawlCategory (string categoryUrl, string categoryName, bool shouldUseProxies) { // Console Feedback _logger.Warn ("Crawling Category : [ " + categoryName + " ]"); // Hashset of urls used to keep track of what's been parsed already HashSet<String> foundUrls = new HashSet<String> (); // Control variable to avoid "Loop" on pagging bool isDonePagging = false; // HTML Response string response; // MongoDB Helper // Configuring MongoDB Wrapper MongoDBWrapper mongoDB = new MongoDBWrapper (); string fullServerAddress = String.Join (":", Consts.MONGO_SERVER, Consts.MONGO_PORT); mongoDB.ConfigureDatabase (Consts.MONGO_USER, Consts.MONGO_PASS, Consts.MONGO_AUTH_DB, fullServerAddress, Consts.MONGO_TIMEOUT, Consts.MONGO_DATABASE, Consts.MONGO_COLLECTION); // Ensuring the database has the proper indexe mongoDB.EnsureIndex ("Url"); // Response Parser PlayStoreParser parser = new PlayStoreParser (); // Executing Web Requests using (WebRequests server = new WebRequests ()) { // Creating Request Object server.Headers.Add (Consts.ACCEPT_LANGUAGE); server.Host = Consts.HOST; server.UserAgent = Consts.GITHUBURL; server.Encoding = "utf-8"; // Executing Initial Request response = server.Get (categoryUrl); // Parsing Links out of Html Page (Initial Request) foreach (string url in parser.ParseAppUrls (response)) { // Saving found url on local hashset foundUrls.Add (url); // Checks whether the app have been already processed // or is queued to be processed if ((!mongoDB.AppProcessed (Consts.APP_URL_PREFIX + url)) && (!mongoDB.AppQueued (url))) { // Than, queue it :) mongoDB.AddToQueue (url); } } // Executing Requests for more Play Store Links int baseSkip = 60; int currentMultiplier = 1; int errorsCount = 0; do { // Assembling new PostData with paging values string postData = String.Format (Consts.CATEGORIES_POST_DATA, (currentMultiplier * baseSkip), baseSkip); // Executing request for values response = server.Post (String.Format (categoryUrl + "?authuser=0"), postData); // Checking Server Status if (server.StatusCode != System.Net.HttpStatusCode.OK) { _logger.Error ("Http Error" + " - Status Code [ " + server.StatusCode + " ]"); errorsCount++; continue; } // Parsing Links foreach (string url in parser.ParseAppUrls (response)) { // If a certain app is found twice, it means that the "pagging" logic got stuck into a // Loop, so the all the apps for this category were parsed already if (foundUrls.Contains (url)) { isDonePagging = true; break; } // Saving found url on local hashset foundUrls.Add (url); // Checks whether the app have been already processed // or is queued to be processed if ((!mongoDB.AppProcessed (Consts.APP_URL_PREFIX + url)) && (!mongoDB.AppQueued (url))) { // Than, queue it :) mongoDB.AddToQueue (url); } } // Incrementing Paging Multiplier currentMultiplier++; } while (!isDonePagging && errorsCount <= Consts.MAX_REQUEST_ERRORS); } }
public static List <AppShortDescription> CollectAppsShortInformationFromCategories(string category) { List <AppShortDescription> parsedApps_list = new List <AppShortDescription>(); log.Info("Crawling Category : [ " + category + " ]"); int numberOfCyclesCompleted = 0; while (numberOfCyclesCompleted < Consts.CATEGORY_NUMBER_OF_CYCLES) { string crawlUrl = String.Format(Consts.CRAWL_URL_CATEGORY, category, "Russia"); string postData = String.Format(Consts.POST_DATA_CATEGORY, Consts.CATEGORY_NUMBER_OF_APPS_PER_CYCLE * numberOfCyclesCompleted); numberOfCyclesCompleted++; //Console.WriteLine(postDataTest); // HTML Response string response = string.Empty; // Executing Web Requests using (WebRequests server = new WebRequests()) { // Creating Request Object server.Host = Consts.HOST; // this is how we actually connect to all this shit // the only thing left - we need to randomize it and check if 200 //WebProxy proxyObject = new WebProxy("http://" + ProxyLoader.ReturnRandomProxy(), true); //server.Proxy = proxyObject; int insertedAppCount = 0; int skippedAppCount = 0; int errorsCount = 0; // Executing Request response = server.Post(crawlUrl, postData); // Checking Server Status if (server.StatusCode != System.Net.HttpStatusCode.OK) { log.Error("Http Error - Status Code: " + server.StatusCode); errorsCount++; if (errorsCount > Consts.MAX_REQUEST_ERRORS) { log.Info("Crawl Stopped: MAX_REQUEST_ERRORS reached"); break; } else { continue; } } //var kek1 = parser.ParseAppUrls(response); // Parsing Links out of Html Page foreach (AppShortDescription asd in parser.ParseAppUrls(response)) { if (!parsedApps_list.Contains(asd)) { parsedApps_list.Add(asd); log.Info("Inserted App: " + asd); ++insertedAppCount; } else { ++skippedAppCount; log.Info("Duplicated App. Skipped: " + asd); } } exit: log.Info("Inserted App Count: " + insertedAppCount); log.Info("Skipped App Count: " + skippedAppCount); log.Info("Error Count: " + errorsCount + "\n"); } } return(parsedApps_list); }
/// <summary> /// Executes a Search using the searchField as the search parameter, /// paginates / scrolls the search results to the end adding all the url of apps /// it finds to a AWS SQS queue /// </summary> /// <param name="searchField"></param> private static void CrawlStore (string searchField) { // Console Feedback Console.WriteLine ("Crawling Search Term : [ " + searchField + " ]"); // HTML Response string response; // MongoDB Helper // Configuring MongoDB Wrapper MongoDBWrapper mongoDB = new MongoDBWrapper (); string fullServerAddress = String.Join (":", Consts.MONGO_SERVER, Consts.MONGO_PORT); mongoDB.ConfigureDatabase (Consts.MONGO_USER, Consts.MONGO_PASS, Consts.MONGO_AUTH_DB, fullServerAddress, Consts.MONGO_TIMEOUT, Consts.MONGO_DATABASE, Consts.MONGO_COLLECTION); // Response Parser PlayStoreParser parser = new PlayStoreParser (); // Executing Web Requests using (WebRequests server = new WebRequests ()) { // Creating Request Object server.Host = Consts.HOST; // Executing Initial Request response = server.Post (Consts.CRAWL_URL, Consts.INITIAL_POST_DATA); // Parsing Links out of Html Page (Initial Request) foreach (string url in parser.ParseAppUrls (response)) { // Checks whether the app have been already processed // or is queued to be processed if ((!mongoDB.AppProcessed (Consts.APP_URL_PREFIX + url)) && (!mongoDB.AppQueued (url))) { // Console Feedback Console.WriteLine (" . Queued App"); // Than, queue it :) mongoDB.AddToQueue (url); } else { // Console Feedback Console.WriteLine (" . Duplicated App. Skipped"); } } // Executing Requests for more Play Store Links int initialSkip = 48; int currentMultiplier = 1; int errorsCount = 0; do { // Assembling new PostData with paging values string postData = String.Format (Consts.POST_DATA, (initialSkip * currentMultiplier)); // Executing request for values response = server.Post (Consts.CRAWL_URL, postData); // Checking Server Status if (server.StatusCode != System.Net.HttpStatusCode.OK) { LogWriter.Error ("Http Error", "Status Code [ " + server.StatusCode + " ]"); errorsCount++; continue; } // Parsing Links foreach (string url in parser.ParseAppUrls (response)) { // Checks whether the app have been already processed // or is queued to be processed if ((!mongoDB.AppProcessed (Consts.APP_URL_PREFIX + url)) && (!mongoDB.AppQueued (url))) { // Console Feedback Console.WriteLine (" . Queued App"); // Than, queue it :) mongoDB.AddToQueue (url); } else { // Console Feedback Console.WriteLine (" . Duplicated App. Skipped"); } } // Incrementing Paging Multiplier currentMultiplier++; } while (parser.AnyResultFound (response) && errorsCount <= Consts.MAX_REQUEST_ERRORS); } }
private static ISet <string> CollectAppUrls(string searchField, int maxAppUrls) { ISet <string> resultUrls = new HashSet <string>(); log.Info("Crawling Search Term : [ " + searchField + " ]"); string crawlUrl = String.Format(Consts.CRAWL_URL, searchField); // HTML Response string response; // Executing Web Requests using (WebRequests server = new WebRequests()) { // Creating Request Object server.Host = Consts.HOST; int insertedAppCount = 0; int skippedAppCount = 0; int errorsCount = 0; string postData = Consts.INITIAL_POST_DATA; do { // Executing Request response = server.Post(crawlUrl, postData); // Checking Server Status if (server.StatusCode != System.Net.HttpStatusCode.OK) { log.Error("Http Error - Status Code: " + server.StatusCode); errorsCount++; if (errorsCount > Consts.MAX_REQUEST_ERRORS) { log.Info("Crawl Stopped: MAX_REQUEST_ERRORS reached"); break; } else { continue; } } // Parsing Links out of Html Page foreach (string url in parser.ParseAppUrls(response)) { if (!resultUrls.Contains(url)) { resultUrls.Add(url); log.Info("Inserted App: " + url); ++insertedAppCount; if (maxAppUrls > 0 && insertedAppCount >= maxAppUrls) { goto exit; } } else { ++skippedAppCount; log.Info("Duplicated App. Skipped: " + url); } } // Get pagTok value that will be used to fetch next stream data. // If not found, that means we have reached the end of stream. string pagTok = getPageToken(response); if (pagTok.Length == 0) { break; } // Build the next post data postData = String.Format(Consts.POST_DATA, pagTok); } while (true); exit: log.Info("Inserted App Count: " + insertedAppCount); log.Info("Skipped App Count: " + skippedAppCount); log.Info("Error Count: " + errorsCount + "\n"); } return(resultUrls); }
/// <summary> /// Executes a Search using the searchField as the search parameter, /// paginates / scrolls the search results to the end adding all the url of apps /// it finds to a Mongo "QUEUE" collection /// </summary> /// <param name="searchField"></param> private static void CrawlStore(string searchField, bool shouldUseProxies) { // Console Feedback _logger.Warn("Crawling Search Term : [ " + searchField + " ]"); // Hashset of urls used to keep track of what's been parsed already HashSet <String> foundUrls = new HashSet <String> (); // Control variable to avoid "Loop" on pagging bool isDonePagging = false; // Compiling Regular Expression used to parse the "pagToken" out of the Play Store Regex pagTokenRegex = new Regex(@"GAEi+.+\:S\:.{11}\\x22", RegexOptions.Compiled); // HTML Response string response; // Response Parser PlayStoreParser parser = new PlayStoreParser(); // Executing Web Requests using (WebRequests server = new WebRequests()) { // Creating Request Object server.Headers.Add(Consts.ACCEPT_LANGUAGE); server.Host = Consts.HOST; server.UserAgent = Consts.GITHUBURL; server.Encoding = "utf-8"; // Checking for the need to use "HTTP Proxies" if (shouldUseProxies) { server.Proxy = ProxiesLoader.GetWebProxy(); } // Executing Initial Request response = server.Post(String.Format(Consts.CRAWL_URL, searchField), Consts.INITIAL_POST_DATA); // Parsing Links out of Html Page (Initial Request) foreach (string url in parser.ParseAppUrls(response)) { // Checks whether the app have been already processed // or is queued to be processed foundUrls.Add(url); if ((!_mongoDB.AppProcessed(Consts.APP_URL_PREFIX + url)) && (!_mongoDB.AppQueued(url))) { // Than, queue it :) _mongoDB.AddToQueue(url); Thread.Sleep(250); // Hiccup } } // Executing Requests for more Play Store Links int initialSkip = 48; int currentMultiplier = 1; int errorsCount = 0; do { // Finding pagToken from HTML var rgxMatch = pagTokenRegex.Match(response); // If there's no match, skips it if (!rgxMatch.Success) { break; } // Reading Match from Regex, and applying needed replacements string pagToken = rgxMatch.Value.Replace(":S:", "%3AS%3A").Replace("\\x22", String.Empty).Replace(@"\\u003d", String.Empty); // Assembling new PostData with paging values string postData = String.Format(Consts.POST_DATA, pagToken); // Executing request for values response = server.Post(String.Format(Consts.CRAWL_URL, searchField), postData); // Checking Server Status if (server.StatusCode != System.Net.HttpStatusCode.OK) { _logger.Error("Http Error" + " - Status Code [ " + server.StatusCode + " ]"); errorsCount++; continue; } // Parsing Links foreach (string url in parser.ParseAppUrls(response)) { if (foundUrls.Contains(url)) { isDonePagging = true; break; } // Checks whether the app have been already processed foundUrls.Add(url); if ((!_mongoDB.AppProcessed(Consts.APP_URL_PREFIX + url)) && (!_mongoDB.AppQueued(url))) { // Than, queue it :) _mongoDB.AddToQueue(url); Thread.Sleep(250); // Hiccup } } // Incrementing Paging Multiplier currentMultiplier++; } while (!isDonePagging && parser.AnyResultFound(response) && errorsCount <= Consts.MAX_REQUEST_ERRORS); } }
/// <summary> /// /// </summary> /// <param name="categoryUrl"></param> private static void CrawlCategory(string categoryUrl, string categoryName, bool shouldUseProxies) { // Console Feedback _logger.Warn("Crawling Category : [ " + categoryName + " ]"); // Hashset of urls used to keep track of what's been parsed already HashSet <String> foundUrls = new HashSet <String> (); // Control variable to avoid "Loop" on pagging bool isDonePagging = false; // HTML Response string response; // MongoDB Helper // Configuring MongoDB Wrapper MongoDBWrapper mongoDB = new MongoDBWrapper(); string fullServerAddress = String.Join(":", Consts.MONGO_SERVER, Consts.MONGO_PORT); mongoDB.ConfigureDatabase(Consts.MONGO_USER, Consts.MONGO_PASS, Consts.MONGO_AUTH_DB, fullServerAddress, Consts.MONGO_TIMEOUT, Consts.MONGO_DATABASE, Consts.MONGO_COLLECTION); // Ensuring the database has the proper indexe mongoDB.EnsureIndex("Url"); // Response Parser PlayStoreParser parser = new PlayStoreParser(); // Executing Web Requests using (WebRequests server = new WebRequests()) { // Creating Request Object server.Headers.Add(Consts.ACCEPT_LANGUAGE); server.Host = Consts.HOST; server.UserAgent = Consts.GITHUBURL; server.Encoding = "utf-8"; // Executing Initial Request response = server.Get(categoryUrl); // Parsing Links out of Html Page (Initial Request) foreach (string url in parser.ParseAppUrls(response)) { // Saving found url on local hashset foundUrls.Add(url); // Checks whether the app have been already processed // or is queued to be processed if ((!mongoDB.AppProcessed(Consts.APP_URL_PREFIX + url)) && (!mongoDB.AppQueued(url))) { // Than, queue it :) mongoDB.AddToQueue(url); } } // Executing Requests for more Play Store Links int baseSkip = 60; int currentMultiplier = 1; int errorsCount = 0; do { // Assembling new PostData with paging values string postData = String.Format(Consts.CATEGORIES_POST_DATA, (currentMultiplier * baseSkip), baseSkip); // Executing request for values response = server.Post(String.Format(categoryUrl + "?authuser=0"), postData); // Checking Server Status if (server.StatusCode != System.Net.HttpStatusCode.OK) { _logger.Error("Http Error" + " - Status Code [ " + server.StatusCode + " ]"); errorsCount++; continue; } // Parsing Links foreach (string url in parser.ParseAppUrls(response)) { // If a certain app is found twice, it means that the "pagging" logic got stuck into a // Loop, so the all the apps for this category were parsed already if (foundUrls.Contains(url)) { isDonePagging = true; break; } // Saving found url on local hashset foundUrls.Add(url); // Checks whether the app have been already processed // or is queued to be processed if ((!mongoDB.AppProcessed(Consts.APP_URL_PREFIX + url)) && (!mongoDB.AppQueued(url))) { // Than, queue it :) mongoDB.AddToQueue(url); } } // Incrementing Paging Multiplier currentMultiplier++; } while (!isDonePagging && errorsCount <= Consts.MAX_REQUEST_ERRORS); } }
/// <summary> /// Executes a Search using the searchField as the search parameter, /// paginates / scrolls the search results to the end adding all the url of apps /// it finds to a AWS SQS queue /// </summary> /// <param name="searchField"></param> private static void CrawlStore(string searchField) { // Console Feedback Console.WriteLine("Crawling Search Term : [ " + searchField + " ]"); // Compiling Regular Expression used to parse the "pagToken" out of the Play Store Regex pagTokenRegex = new Regex(@"GAEi+.+\:S\:.{11}\\42", RegexOptions.Compiled); // HTML Response string response; // MongoDB Helper // Configuring MongoDB Wrapper MongoDBWrapper mongoDB = new MongoDBWrapper(); string fullServerAddress = String.Join(":", Consts.MONGO_SERVER, Consts.MONGO_PORT); mongoDB.ConfigureDatabase(Consts.MONGO_USER, Consts.MONGO_PASS, Consts.MONGO_AUTH_DB, fullServerAddress, Consts.MONGO_TIMEOUT, Consts.MONGO_DATABASE, Consts.MONGO_COLLECTION); // Ensuring the database has the proper indexe mongoDB.EnsureIndex("Url"); // Response Parser PlayStoreParser parser = new PlayStoreParser(); // Executing Web Requests using (WebRequests server = new WebRequests()) { // Creating Request Object server.Host = Consts.HOST; // Executing Initial Request response = server.Post(String.Format(Consts.CRAWL_URL, searchField), Consts.INITIAL_POST_DATA); // Parsing Links out of Html Page (Initial Request) foreach (string url in parser.ParseAppUrls(response)) { // Checks whether the app have been already processed // or is queued to be processed if ((!mongoDB.AppProcessed(Consts.APP_URL_PREFIX + url)) && (!mongoDB.AppQueued(url))) { // Console Feedback Console.WriteLine(" . Queued App"); // Than, queue it :) mongoDB.AddToQueue(url); Thread.Sleep(250); // Hiccup } else { // Console Feedback Console.WriteLine(" . Duplicated App. Skipped"); } } // Executing Requests for more Play Store Links int initialSkip = 48; int currentMultiplier = 1; int errorsCount = 0; do { // Finding pagToken from HTML var rgxMatch = pagTokenRegex.Match(response); // If there's no match, skips it if (!rgxMatch.Success) { break; } // Reading Match from Regex, and applying needed replacements string pagToken = rgxMatch.Value.Replace(":S:", "%3AS%3A").Replace("\\42", String.Empty).Replace(@"\\u003d", String.Empty); // Assembling new PostData with paging values string postData = String.Format(Consts.POST_DATA, pagToken); // Executing request for values response = server.Post(String.Format(Consts.CRAWL_URL, searchField), postData); // Checking Server Status if (server.StatusCode != System.Net.HttpStatusCode.OK) { LogWriter.Error("Http Error", "Status Code [ " + server.StatusCode + " ]"); errorsCount++; continue; } // Parsing Links foreach (string url in parser.ParseAppUrls(response)) { // Checks whether the app have been already processed // or is queued to be processed if ((!mongoDB.AppProcessed(Consts.APP_URL_PREFIX + url)) && (!mongoDB.AppQueued(url))) { // Console Feedback Console.WriteLine(" . Queued App"); // Than, queue it :) mongoDB.AddToQueue(url); Thread.Sleep(250); // Hiccup } else { // Console Feedback Console.WriteLine(" . Duplicated App. Skipped"); } } // Incrementing Paging Multiplier currentMultiplier++; } while (parser.AnyResultFound(response) && errorsCount <= Consts.MAX_REQUEST_ERRORS); } }
public static List <AppShortDescription> CollectAppsShortInformationFromKeywords(string keyword) { List <AppShortDescription> parsedApps_list = new List <AppShortDescription> (); log.Info("Crawling Search Term : [ " + keyword + " ]"); string crawlUrl = String.Format(Consts.CRAWL_URL_KEYWORD_INITIAL, keyword, "Russia", "ru"); string postData = Consts.POST_DATA_KEYWORD_INITAL; // HTML Response string response = string.Empty; // Executing Web Requests using (WebRequests server = new WebRequests()) { // Creating Request Object server.Host = Consts.HOST; // this is how we actually connect to all this shit // the only thing left - we need to randomize it and check if 200 //WebProxy proxyObject = new WebProxy("http://" + ProxyLoader.ReturnRandomProxy(), true); //server.Proxy = proxyObject; int insertedAppCount = 0; int skippedAppCount = 0; int errorsCount = 0; do { // Executing Request response = server.Post(crawlUrl, postData); // Checking Server Status if (server.StatusCode != System.Net.HttpStatusCode.OK) { log.Error("Http Error - Status Code: " + server.StatusCode); errorsCount++; if (errorsCount > Consts.MAX_REQUEST_ERRORS) { log.Info("Crawl Stopped: MAX_REQUEST_ERRORS reached"); break; } else { continue; } } //var kek1 = parser.ParseAppUrls(response); // Parsing Links out of Html Page foreach (AppShortDescription asd in parser.ParseAppUrls(response)) { if (!parsedApps_list.Contains(asd)) { parsedApps_list.Add(asd); log.Info("Inserted App: " + asd); ++insertedAppCount; //if (maxAppUrls > 0 && insertedAppCount >= maxAppUrls) //{ // goto exit; //} } else { ++skippedAppCount; log.Info("Duplicated App. Skipped: " + asd); } } // Get pagTok value that will be used to fetch next stream data. // If not found, that means we have reached the end of stream. ClusterAndToken cat_cl = getPageAndClusterTokens(response); if (cat_cl == null) { break; } else { crawlUrl = Consts.CRAWL_URL_KEYWORD_CLUSTER; postData = String.Format(Consts.POST_DATA_KEYWORD_CLUSTER, cat_cl.clp, cat_cl.pagTok); } Console.WriteLine("Inserted apps: " + insertedAppCount + "."); } while (true); exit: log.Info("Inserted App Count: " + insertedAppCount); log.Info("Skipped App Count: " + skippedAppCount); log.Info("Error Count: " + errorsCount + "\n"); } return(parsedApps_list); }