static void Main(string[] args) { // Checking for Input Parameters if (args == null || args.Length != 1) { Console.WriteLine("Incorrect number of arguments received. Expected One"); System.Environment.Exit(-100); } // Human Readable Variable string inputFile = args[0]; // Checking if the Input file received exists if (!File.Exists(inputFile)) { Console.WriteLine(String.Format("Received input file does not exist : {0}", inputFile)); System.Environment.Exit(-101); } // App Status _appStatus = new Dictionary <String, AppStatusModel> (); // Creating Instance of Database Manager MongoDBWrapper mongoDB = new MongoDBWrapper(); string fullServerAddress = String.Join(":", Consts.MONGO_SERVER, Consts.MONGO_PORT); mongoDB.ConfigureDatabase(Consts.MONGO_USER, Consts.MONGO_PASS, Consts.MONGO_AUTH_DB, fullServerAddress, Consts.MONGO_TIMEOUT, Consts.MONGO_DATABASE, Consts.MONGO_COLLECTION); // Creating Instance of Parser PlayStoreParser dataParser = new PlayStoreParser(); goto PeopleData; using (WebRequests httpClient = new WebRequests()) { // Minor Configuration of the Http Client - Ensures that the requests response will be in english // By doing so, we have no problems parsing the dates to their proper formats httpClient.Headers.Add(Consts.ACCEPT_LANGUAGE); httpClient.Host = Consts.HOST; httpClient.Encoding = "utf-8"; httpClient.EncodingDetection = WebRequests.CharsetDetection.DefaultCharset; // Iterating over File Lines (App Urls) - To Extract Data, Not The Reviews Yet. foreach (string appUrl in File.ReadAllLines(inputFile)) { // Logging Progress Console.WriteLine("\n => Processing App : " + appUrl); // Executing Http Get Request for the Apps's Data - With max of 5 Retries String appDataResponse = String.Empty; int currentRetry = 0; do { // Http Get appDataResponse = httpClient.Get(appUrl); } while (String.IsNullOrWhiteSpace(appDataResponse) || ++currentRetry <= _maxRetries); // Sanity Check if (String.IsNullOrWhiteSpace(appDataResponse)) { Console.WriteLine("\t\t.Error - Failed to find page of app : " + appUrl + ". Skipping it"); continue; } Console.WriteLine("\t\t.Page Found. Firing Parser"); // Parsing App Data AppModel appData = dataParser.ParseAppPage(appDataResponse, appUrl); // Checking If this app is on the database already if (mongoDB.AppProcessed(appUrl)) { Console.WriteLine("\t\t.Previous Version of App Found. Updating It"); mongoDB.UpdateRecord(appData, "Url", appData.Url); // Updating App Status _appStatus.Add ( appData.Url, new AppStatusModel() { appId = appData.Url.Replace(Consts.PLAY_STORE_PREFIX, String.Empty), appUrl = appData.Url, appName = appData.Name, status = "Updated" } ); } else { Console.WriteLine("\t\t.No Previous Version of the App Found. Adding to Database"); mongoDB.Insert <AppModel> (appData); // Updating App Status _appStatus.Add ( appData.Url, new AppStatusModel() { appId = appData.Url.Replace(Consts.PLAY_STORE_PREFIX, String.Empty), appUrl = appData.Url, appName = appData.Name, status = "Inserted" } ); } } } Reviews: // Next Phase: Parse Reviews of those Apps Console.WriteLine("\n => Parsing Complete. Obtaining Reviews"); // Iterating again over app urls to parse the reviews from this app foreach (string appUrl in File.ReadAllLines(inputFile)) { // Reaching App Id string appID = _appStatus[appUrl].appId; // Reviews-Break-Parsing Flag bool shouldContinueParsing = true; // Parsing Review Pages from the apps for (int currentPage = 1; /* no stop condition */; currentPage++) { // Getting Reviews Data Bundle string reviewsData = ReviewsWrapper.GetAppReviews(appID, currentPage); // Checking for Blocking Situation if (String.IsNullOrEmpty(reviewsData)) { Console.WriteLine("Blocked by Play Store. Sleeping process for 10 minutes before retrying."); // Thread Wait for 10 Minutes Thread.Sleep(10 * 60 * 1000); } // Checking for "No Reviews" app if (reviewsData.Length < 50) { Console.WriteLine("No Reviews left for this app. Skipping"); break; } // Normalizing Response to Proper HTML reviewsData = ReviewsWrapper.NormalizeResponse(reviewsData); // Iterating over Parsed Reviews foreach (var review in dataParser.ParseReviews(reviewsData)) { // Adding App Data to the review review.appID = _appStatus[appUrl].appId; review.appName = _appStatus[appUrl].appName; review.appURL = _appStatus[appUrl].appUrl; // Incrementing Reviews Count for this app _appStatus[appUrl].reviews++; // Adding Review Object to Database review.timestamp = DateTime.Now; // Building Query to check for duplicated review var duplicatedReviewQuery = Query.EQ("permalink", review.permalink); // Checking for duplicated review before inserting it if (mongoDB.FindMatch <AppReview> (duplicatedReviewQuery, 1, 0, Consts.REVIEWS_COLLECTION).Count() == 0) { // Inserting Review into MongoDB mongoDB.Insert <AppReview> (review, Consts.REVIEWS_COLLECTION); } else { Console.WriteLine("Duplicated Review. Skipping App"); // When this happens, there are no more reviews to be parsed shouldContinueParsing = false; // Skipping this apps processing } } // Hiccup to avoid Blocking problems Console.WriteLine("Parsed Reviews: " + _appStatus[appUrl].reviews); Thread.Sleep(new Random().Next(14000, 21000)); if (!shouldContinueParsing) { break; } } } PeopleData: Console.WriteLine("\n\n => Processing People Data"); Console.WriteLine("\nSimulating Google Login Using Selenium."); using (var firefoxDriver = new FirefoxDriver()) { // Navigating to Dummy Url - One that I Know that well be asked for a login firefoxDriver.Navigate().GoToUrl("https://play.google.com/store/people/details?id=101242565951396343093"); // Reaching Login Fields var loginField = firefoxDriver.FindElementById("Email"); var passwordField = firefoxDriver.FindElementById("Passwd"); var btnSignIn = firefoxDriver.FindElementById("signIn"); // Sending Credentials to the browser loginField.SendKeys("YOUREMAIL"); passwordField.SendKeys("YOURPASSWORD"); btnSignIn.Click(); string lastPeople = "https://play.google.com/store/people/details?id=115037241907660526856"; bool shouldcontinue = false; // Processing Reviewers Data foreach (string peopleUrl in mongoDB.FindPeopleUrls()) { // Skipping until last link if (peopleUrl == lastPeople) { shouldcontinue = true; } if (!shouldcontinue) { continue; } // Navigating To the Reviewer Page firefoxDriver.Navigate().GoToUrl(peopleUrl); // Executing Get Request for the Reviewer page on Google Play string reviewerPage = firefoxDriver.PageSource; // Extracting Reviewer Data from the Page ReviewerPageData reviewerData = dataParser.ParsePeopleData(reviewerPage); // Adding Url to the model reviewerData.reviewerUrl = peopleUrl; // Inserting it to the database - If no previous record of this Reviewer is found if (!mongoDB.IsReviewerOnDatabase(peopleUrl)) { mongoDB.Insert <ReviewerPageData> (reviewerData, "ReviewersData"); } } } // End of Processing + Console Feedback Console.WriteLine("\n\n == Processing Summary =="); foreach (var status in _appStatus.Select(t => t.Value)) { // Message string cMessage = "=> App : {0} - Status {1} - Reviews : {2}"; Console.WriteLine(String.Format(cMessage, status.appName, status.status, status.reviews)); } Console.ReadLine(); }
static void Main(string[] args) { // Configuring Log Object Logger logger = LogManager.GetCurrentClassLogger(); // Control Variable (Bool - Should the process use proxies? ) bool isUsingProxies = false; // Checking for the need to use proxies if (args != null && args.Length == 1) { // Setting flag to true isUsingProxies = true; // Loading proxies from .txt received as argument String fPath = args[0]; // Sanity Check if (!File.Exists(fPath)) { logger.Fatal("Couldnt find proxies on path : " + fPath); System.Environment.Exit(-100); } // Reading Proxies from File string[] fLines = File.ReadAllLines(fPath, Encoding.GetEncoding("UTF-8")); try { // Actual Load of Proxies ProxiesLoader.Load(fLines.ToList()); } catch (Exception ex) { logger.Fatal(ex); System.Environment.Exit(-101); } } // MongoDB instance Creation logger.Info("Configuring MonboDB Client"); // Creating instance of Mongo Handler for the main collection MongoDBWrapper mongoClient = new MongoDBWrapper(); string fullServerAddress = String.Join(":", Consts.MONGO_SERVER, Consts.MONGO_PORT); mongoClient.ConfigureDatabase(Consts.MONGO_USER, Consts.MONGO_PASS, Consts.MONGO_AUTH_DB, fullServerAddress, Consts.MONGO_TIMEOUT, Consts.MONGO_DATABASE, Consts.MONGO_COLLECTION); logger.Info("Iterating over Apps"); // Creating Play Store Parser PlayStoreParser parser = new PlayStoreParser(); // App Model AppModel appRecord; // Control Variable bool noError = true; // Finding all the "Apps" that didn't have the reviews visited yet while ((appRecord = mongoClient.FindAndModifyReviews()) != null) { // Extracting app ID from URL string appId = appRecord.Url.Replace(Consts.PLAY_STORE_PREFIX, String.Empty); // Console Feedback logger.Info("Processing App [ " + appRecord.Name + " ] "); try { // Console Feedback Console.Write("Reviews from : " + appRecord.Name); // Issuing Request for Reviews string response = ReviewsWrapper.GetAppReviews(appId, 1, isUsingProxies); // Checking for Blocking Situation if (String.IsNullOrEmpty(response)) { logger.Info("Blocked by Play Store. Sleeping process for 10 minutes before retrying."); // Thread Wait for 10 seconds Thread.Sleep(TimeSpan.FromSeconds(10)); } // Checking for "No Reviews" app if (response.Length < 50) { logger.Info("No Reviews for this app. Skipping"); Console.Write(" - No Reviews Found\n"); continue; } // Normalizing Response to Proper HTML response = ReviewsWrapper.NormalizeResponse(response); // List of Reviews List <AppReview> reviews = new List <AppReview> (); // Iterating over Parsed Reviews foreach (var review in parser.ParseReviews(response)) { // Adding App Data to the review review.appID = appId; review.appName = appRecord.Name; review.appURL = appRecord.Url; // Capture Timestamp to the model review.timestamp = DateTime.Now; // Adding reviews to the current list reviews.Add(review); } // Any Review Found ? if (reviews.Count > 0) { Console.Write(" - " + reviews.Count + " Reviews Found\n"); // Checking if there was any previous list of reviews if (appRecord.Reviews == null) { appRecord.Reviews = reviews; } else // Previous List found - Appending only the new ones { foreach (var review in reviews) { if (!appRecord.Reviews.Any(t => t.permalink.Equals(review.permalink))) { appRecord.Reviews.Add(review); } } } } } catch (Exception ex) { logger.Error(ex); Console.ForegroundColor = ConsoleColor.Red; Console.WriteLine("Error : " + ex.Message); Console.ForegroundColor = ConsoleColor.White; noError = false; } finally { // Toggling back the "ReviewsStatus" attribute from the model if (noError) { appRecord.ReviewsStatus = "Visited"; mongoClient.SaveRecord <AppModel> (appRecord); } else // "Error" status { appRecord.ReviewsStatus = "Error"; mongoClient.SaveRecord <AppModel> (appRecord); } } } }
static void Main(string[] args) { // Configuring Log Object Logger logger = LogManager.GetCurrentClassLogger(); // Parsing Arguments logger.Info("Checking for Arguments"); if (args == null || args.Length != 3) { logger.Fatal("Arguments Fatal", "Incorrect number of arguments received. Try passing two."); return; // Halts. } logger.Info("Reading Arguments"); // Reading actual arguments received _arguments.Add("AppsToProcess", Int32.Parse(args[0])); _arguments.Add("ReviewsPagePerApp", Int32.Parse(args[1])); _arguments.Add("AppsToSkip", Int32.Parse(args[2])); // Building MongoDB Query - This query specifies which applications you want to parse out the reviews // For more regarding MongoDB Queries, check the documentation on the project wiki page //var mongoQuery = Query.EQ ("Instalations", "1,000,000 - 5,000,000"); var mongoQuery = Query.EQ("Category", "/store/apps/category/EDUCATION"); logger.Info("Configuring MonboDB Client"); // Creating instance of Mongo Handler for the main collection MongoDBWrapper mongoClient = new MongoDBWrapper(); string fullServerAddress = String.Join(":", Consts.MONGO_SERVER, Consts.MONGO_PORT); mongoClient.ConfigureDatabase(Consts.MONGO_USER, Consts.MONGO_PASS, Consts.MONGO_AUTH_DB, fullServerAddress, Consts.MONGO_TIMEOUT, Consts.MONGO_DATABASE, Consts.MONGO_COLLECTION); logger.Info("Iterating over Apps"); // Creating Play Store Parser PlayStoreParser parser = new PlayStoreParser(); // Iterating over Query Results for the App Ids foreach (var appRecord in mongoClient.FindMatch <AppModel>(mongoQuery, _arguments["AppsToProcess"], _arguments["AppsToSkip"])) { // Extracting app ID from URL string appId = appRecord.Url.Replace(Consts.PLAY_STORE_PREFIX, String.Empty); // Console Feedback logger.Info("Processing App [ " + appRecord.Name + " ] "); bool shouldSkipApp = false; // Iterating over Review Pages up to the max received as argument for (int currentPage = 1; currentPage <= _arguments["ReviewsPagePerApp"]; currentPage++) { // Checking for the need to skip this app in case of duplicated review if (shouldSkipApp) { break; } try { // Page Feedback logger.Info("\tCurrent Page: " + currentPage); // Issuing Request for Reviews string response = ReviewsWrapper.GetAppReviews(appId, currentPage); // Checking for Blocking Situation if (String.IsNullOrEmpty(response)) { logger.Info("Blocked by Play Store. Sleeping process for 10 minutes before retrying."); // Thread Wait for 10 Minutes Thread.Sleep(10 * 60 * 1000); } // Checking for "No Reviews" app if (response.Length < 50) { logger.Info("No Reviews for this app. Skipping"); break; } // Normalizing Response to Proper HTML response = ReviewsWrapper.NormalizeResponse(response); // Iterating over Parsed Reviews foreach (var review in parser.ParseReviews(response)) { // Adding App Data to the review review.appID = appId; review.appName = appRecord.Name; review.appURL = appRecord.Url; // Adding processing timestamp to the model review.timestamp = DateTime.Now; // Building Query to check for duplicated review var duplicatedReviewQuery = Query.EQ("permalink", review.permalink); // Checking for duplicated review before inserting it if (mongoClient.FindMatch <AppReview>(duplicatedReviewQuery, 1, 0, Consts.REVIEWS_COLLECTION).Count() == 0) { // Inserting Review into MongoDB mongoClient.Insert <AppReview>(review, Consts.REVIEWS_COLLECTION); } else { logger.Info("Duplicated Review", "Review already parsed. Skipping App"); //shouldSkipApp = true; //break; } } } catch (Exception ex) { logger.Error(ex); } } } }