static void Main (string[] args) { // Checking for Input Parameters if (args == null || args.Length != 1) { Console.WriteLine ("Incorrect number of arguments received. Expected One"); System.Environment.Exit (-100); } // Human Readable Variable string inputFile = args[0]; // Checking if the Input file received exists if (!File.Exists (inputFile)) { Console.WriteLine (String.Format("Received input file does not exist : {0}", inputFile)); System.Environment.Exit (-101); } // App Status _appStatus = new Dictionary<String, AppStatusModel> (); // Creating Instance of Database Manager MongoDBWrapper mongoDB = new MongoDBWrapper (); string fullServerAddress = String.Join (":", Consts.MONGO_SERVER, Consts.MONGO_PORT); mongoDB.ConfigureDatabase (Consts.MONGO_USER, Consts.MONGO_PASS, Consts.MONGO_AUTH_DB, fullServerAddress, Consts.MONGO_TIMEOUT, Consts.MONGO_DATABASE, Consts.MONGO_COLLECTION); // Creating Instance of Parser PlayStoreParser dataParser = new PlayStoreParser (); goto PeopleData; using (WebRequests httpClient = new WebRequests ()) { // Minor Configuration of the Http Client - Ensures that the requests response will be in english // By doing so, we have no problems parsing the dates to their proper formats httpClient.Headers.Add (Consts.ACCEPT_LANGUAGE); httpClient.Host = Consts.HOST; httpClient.Encoding = "utf-8"; httpClient.EncodingDetection = WebRequests.CharsetDetection.DefaultCharset; // Iterating over File Lines (App Urls) - To Extract Data, Not The Reviews Yet. foreach (string appUrl in File.ReadAllLines (inputFile)) { // Logging Progress Console.WriteLine ("\n => Processing App : " + appUrl); // Executing Http Get Request for the Apps's Data - With max of 5 Retries String appDataResponse = String.Empty; int currentRetry = 0; do { // Http Get appDataResponse = httpClient.Get (appUrl); } while (String.IsNullOrWhiteSpace(appDataResponse) || ++currentRetry <= _maxRetries); // Sanity Check if (String.IsNullOrWhiteSpace (appDataResponse)) { Console.WriteLine ("\t\t.Error - Failed to find page of app : " + appUrl + ". Skipping it"); continue; } Console.WriteLine("\t\t.Page Found. Firing Parser"); // Parsing App Data AppModel appData = dataParser.ParseAppPage (appDataResponse, appUrl); // Checking If this app is on the database already if (mongoDB.AppProcessed (appUrl)) { Console.WriteLine ("\t\t.Previous Version of App Found. Updating It"); mongoDB.UpdateRecord (appData, "Url", appData.Url); // Updating App Status _appStatus.Add ( appData.Url, new AppStatusModel () { appId = appData.Url.Replace (Consts.PLAY_STORE_PREFIX, String.Empty), appUrl = appData.Url, appName = appData.Name, status = "Updated" } ); } else { Console.WriteLine ("\t\t.No Previous Version of the App Found. Adding to Database"); mongoDB.Insert<AppModel> (appData); // Updating App Status _appStatus.Add ( appData.Url, new AppStatusModel () { appId = appData.Url.Replace (Consts.PLAY_STORE_PREFIX, String.Empty), appUrl = appData.Url, appName = appData.Name, status = "Inserted" } ); } } } Reviews: // Next Phase: Parse Reviews of those Apps Console.WriteLine ("\n => Parsing Complete. Obtaining Reviews"); // Iterating again over app urls to parse the reviews from this app foreach (string appUrl in File.ReadAllLines (inputFile)) { // Reaching App Id string appID = _appStatus[appUrl].appId; // Reviews-Break-Parsing Flag bool shouldContinueParsing = true; // Parsing Review Pages from the apps for (int currentPage = 1; /* no stop condition */; currentPage++) { // Getting Reviews Data Bundle string reviewsData = ReviewsWrapper.GetAppReviews (appID, currentPage); // Checking for Blocking Situation if (String.IsNullOrEmpty (reviewsData)) { Console.WriteLine("Blocked by Play Store. Sleeping process for 10 minutes before retrying."); // Thread Wait for 10 Minutes Thread.Sleep (10 * 60 * 1000); } // Checking for "No Reviews" app if (reviewsData.Length < 50) { Console.WriteLine ("No Reviews left for this app. Skipping"); break; } // Normalizing Response to Proper HTML reviewsData = ReviewsWrapper.NormalizeResponse (reviewsData); // Iterating over Parsed Reviews foreach (var review in dataParser.ParseReviews (reviewsData)) { // Adding App Data to the review review.appID = _appStatus[appUrl].appId; review.appName = _appStatus[appUrl].appName; review.appURL = _appStatus[appUrl].appUrl; // Incrementing Reviews Count for this app _appStatus[appUrl].reviews++; // Adding Review Object to Database review.timestamp = DateTime.Now; // Building Query to check for duplicated review var duplicatedReviewQuery = Query.EQ ("permalink", review.permalink); // Checking for duplicated review before inserting it if (mongoDB.FindMatch<AppReview> (duplicatedReviewQuery, 1, 0, Consts.REVIEWS_COLLECTION).Count () == 0) { // Inserting Review into MongoDB mongoDB.Insert<AppReview> (review, Consts.REVIEWS_COLLECTION); } else { Console.WriteLine ("Duplicated Review. Skipping App"); // When this happens, there are no more reviews to be parsed shouldContinueParsing = false; // Skipping this apps processing } } // Hiccup to avoid Blocking problems Console.WriteLine ("Parsed Reviews: " + _appStatus[appUrl].reviews); Thread.Sleep (new Random ().Next (14000, 21000)); if (!shouldContinueParsing) { break; } } } PeopleData: Console.WriteLine ("\n\n => Processing People Data"); Console.WriteLine ("\nSimulating Google Login Using Selenium."); using (var firefoxDriver = new FirefoxDriver ()) { // Navigating to Dummy Url - One that I Know that well be asked for a login firefoxDriver.Navigate ().GoToUrl ("https://play.google.com/store/people/details?id=101242565951396343093"); // Reaching Login Fields var loginField = firefoxDriver.FindElementById ("Email"); var passwordField = firefoxDriver.FindElementById ("Passwd"); var btnSignIn = firefoxDriver.FindElementById ("signIn"); // Sending Credentials to the browser loginField.SendKeys ("YOUREMAIL"); passwordField.SendKeys ("YOURPASSWORD"); btnSignIn.Click (); string lastPeople = "https://play.google.com/store/people/details?id=115037241907660526856"; bool shouldcontinue = false; // Processing Reviewers Data foreach (string peopleUrl in mongoDB.FindPeopleUrls ()) { // Skipping until last link if (peopleUrl == lastPeople) { shouldcontinue = true; } if (!shouldcontinue) continue; // Navigating To the Reviewer Page firefoxDriver.Navigate ().GoToUrl (peopleUrl); // Executing Get Request for the Reviewer page on Google Play string reviewerPage = firefoxDriver.PageSource; // Extracting Reviewer Data from the Page ReviewerPageData reviewerData = dataParser.ParsePeopleData (reviewerPage); // Adding Url to the model reviewerData.reviewerUrl = peopleUrl; // Inserting it to the database - If no previous record of this Reviewer is found if (!mongoDB.IsReviewerOnDatabase (peopleUrl)) { mongoDB.Insert<ReviewerPageData> (reviewerData, "ReviewersData"); } } } // End of Processing + Console Feedback Console.WriteLine ("\n\n == Processing Summary =="); foreach (var status in _appStatus.Select (t => t.Value)) { // Message string cMessage = "=> App : {0} - Status {1} - Reviews : {2}"; Console.WriteLine (String.Format (cMessage, status.appName, status.status, status.reviews)); } Console.ReadLine (); }
static void Main (string[] args) { // Configuring Log Object Threshold LogWriter.Threshold = TLogEventLevel.Information; LogWriter.LogEvent += LogWriter_LogEvent; // Parsing Arguments LogWriter.Info ("Checking for Arguments"); if (args == null || args.Length != 3) { LogWriter.Fatal ("Arguments Fatal", "Incorrect number of arguments received. Try passing two."); return; // Halts. } LogWriter.Info ("Reading Arguments"); // Reading actual arguments received _arguments.Add ("AppsToProcess", Int32.Parse (args[0])); _arguments.Add ("ReviewsPagePerApp", Int32.Parse (args[1])); _arguments.Add ("AppsToSkip", Int32.Parse (args[2])); // Building MongoDB Query - This query specifies which applications you want to parse out the reviews // For more regarding MongoDB Queries, check the documentation on the project wiki page //var mongoQuery = Query.EQ ("Instalations", "1,000,000 - 5,000,000"); var mongoQuery = Query.EQ ("Category", "/store/apps/category/EDUCATION"); LogWriter.Info ("Configuring MonboDB Client"); // Creating instance of Mongo Handler for the main collection MongoDBWrapper mongoClient = new MongoDBWrapper (); string fullServerAddress = String.Join (":", Consts.MONGO_SERVER, Consts.MONGO_PORT); mongoClient.ConfigureDatabase (Consts.MONGO_USER, Consts.MONGO_PASS, Consts.MONGO_AUTH_DB, fullServerAddress, Consts.MONGO_TIMEOUT, Consts.MONGO_DATABASE, Consts.MONGO_COLLECTION); LogWriter.Info ("Iterating over Apps"); // App URL Prefix (must be removed in order to obtain the app ID) string playStorePrefix = "https://play.google.com/store/apps/details?id="; // Creating Play Store Parser PlayStoreParser parser = new PlayStoreParser (); // Iterating over Query Results for the App Ids foreach (var appRecord in mongoClient.FindMatch<AppModel>(mongoQuery, _arguments["AppsToProcess"], _arguments["AppsToSkip"])) { // Extracting app ID from URL string appId = appRecord.Url.Replace(playStorePrefix, String.Empty); // Console Feedback LogWriter.Info("Processing App [ " + appRecord.Name + " ] "); bool shouldSkipApp = false; // Iterating over Review Pages up to the max received as argument for (int currentPage = 1; currentPage <= _arguments["ReviewsPagePerApp"]; currentPage++) { // Checking for the need to skip this app in case of duplicated review if (shouldSkipApp) break; try { // Page Feedback LogWriter.Info("\tCurrent Page: " + currentPage); // Issuing Request for Reviews string response = GetAppReviews(appId, currentPage); // Checking for Blocking Situation if (String.IsNullOrEmpty(response)) { LogWriter.Info("Blocked by Play Store. Sleeping process for 10 minutes before retrying."); // Thread Wait for 10 Minutes Thread.Sleep(10 * 60 * 1000); } // Checking for "No Reviews" app if (response.Length < 50) { LogWriter.Info("No Reviews for this app. Skipping"); break; } // Normalizing Response to Proper HTML response = NormalizeResponse(response); // Iterating over Parsed Reviews foreach (var review in parser.ParseReviews(response)) { // Adding App Data to the review review.appID = appId; review.appName = appRecord.Name; review.appURL = appRecord.Url; // Adding processing timestamp to the model review.timestamp = DateTime.Now; // Building Query to check for duplicated review var duplicatedReviewQuery = Query.EQ("permalink", review.permalink); // Checking for duplicated review before inserting it if (mongoClient.FindMatch<AppReview>(duplicatedReviewQuery, 1, 0, Consts.REVIEWS_COLLECTION).Count() == 0) { // Inserting Review into MongoDB mongoClient.Insert<AppReview>(review, Consts.REVIEWS_COLLECTION); } else { LogWriter.Info("Duplicated Review", "Review already parsed. Skipping App"); //shouldSkipApp = true; //break; } } } catch (Exception ex) { LogWriter.Error(ex); } } } }
static void Main (string[] args) { // Configuring Log Object Logger logger = LogManager.GetCurrentClassLogger (); // Control Variable (Bool - Should the process use proxies? ) bool isUsingProxies = false; logger.Info ("Checking proxies configuration"); // Checking for the need to use proxies if (args != null && args.Length == 1) { // Setting flag to true isUsingProxies = true; // Loading proxies from .txt received as argument String fPath = args[0]; // Sanity Check if (!File.Exists (fPath)) { logger.Fatal ("Couldnt find proxies on path : " + fPath); System.Environment.Exit (-100); } // Reading Proxies from File logger.Info ("Loading Proxies"); string[] fLines = File.ReadAllLines (fPath, Encoding.GetEncoding ("UTF-8")); try { // Actual Load of Proxies ProxiesLoader.Load (fLines.ToList ()); } catch (Exception ex) { logger.Fatal (ex); System.Environment.Exit (-101); } } // MongoDB instance Creation logger.Info ("Configuring MonboDB Client"); // Creating instance of Mongo Handler for the main collection MongoDBWrapper mongoClient = new MongoDBWrapper (); string fullServerAddress = String.Join (":", Consts.MONGO_SERVER, Consts.MONGO_PORT); mongoClient.ConfigureDatabase (Consts.MONGO_USER, Consts.MONGO_PASS, Consts.MONGO_AUTH_DB, fullServerAddress, Consts.MONGO_TIMEOUT, Consts.MONGO_DATABASE, Consts.MONGO_COLLECTION); logger.Info ("Iterating over Apps"); // Creating Play Store Parser PlayStoreParser parser = new PlayStoreParser (); // App Model AppModel appRecord; // Control Variable bool noError = true; // Finding all the "Apps" that didn't have the reviews visited yet while ((appRecord = mongoClient.FindAndModifyReviews ()) != null) { // Extracting app ID from URL string appId = appRecord.Url.Replace (Consts.PLAY_STORE_PREFIX, String.Empty); // Console Feedback logger.Info ("Processing App [ " + appRecord.Name + " ] "); try { // Console Feedback Console.Write ("Reviews from : " + appRecord.Name); // Issuing Request for Reviews string response = ReviewsWrapper.GetAppReviews (appId, 1, isUsingProxies); // Checking for Blocking Situation if (String.IsNullOrEmpty(response)) { logger.Info ("Blocked by Play Store. Sleeping process for 10 minutes before retrying."); // Thread Wait for 10 seconds Thread.Sleep (TimeSpan.FromSeconds (10)); } // Checking for "No Reviews" app if (response.Length < 50) { logger.Info ("No Reviews for this app. Skipping"); Console.Write (" - No Reviews Found\n"); continue; } // Normalizing Response to Proper HTML response = ReviewsWrapper.NormalizeResponse (response); // List of Reviews List<AppReview> reviews = new List<AppReview> (); // Iterating over Parsed Reviews foreach (var review in parser.ParseReviews (response)) { // Adding App Data to the review review.appID = appId; review.appName = appRecord.Name; review.appURL = appRecord.Url; // Capture Timestamp to the model review.timestamp = DateTime.Now; // Adding reviews to the current list reviews.Add (review); } // Any Review Found ? if (reviews.Count > 0) { Console.Write (" - " + reviews.Count + " Reviews Found\n"); // Checking if there was any previous list of reviews if (appRecord.Reviews == null) { appRecord.Reviews = reviews; } else // Previous List found - Appending only the new ones { foreach (var review in reviews) { if (!appRecord.Reviews.Any (t => t.permalink.Equals (review.permalink))) { appRecord.Reviews.Add (review); } } } } } catch (Exception ex) { logger.Error (ex); Console.ForegroundColor = ConsoleColor.Red; Console.WriteLine ("Error : " + ex.Message); Console.ForegroundColor = ConsoleColor.White; noError = false; } finally { // Toggling back the "ReviewsStatus" attribute from the model if (noError) { appRecord.ReviewsStatus = "Visited"; mongoClient.SaveRecord<AppModel> (appRecord); } else // "Error" status { appRecord.ReviewsStatus = "Error"; mongoClient.SaveRecord<AppModel> (appRecord); } } } }