예제 #1
0
        static void Main(string[] args)
        {
            // Checking for Input Parameters
            if (args == null || args.Length != 1)
            {
                Console.WriteLine("Incorrect number of arguments received. Expected One");
                System.Environment.Exit(-100);
            }

            // Human Readable Variable
            string inputFile = args[0];

            // Checking if the Input file received exists
            if (!File.Exists(inputFile))
            {
                Console.WriteLine(String.Format("Received input file does not exist : {0}", inputFile));
                System.Environment.Exit(-101);
            }

            // App Status
            _appStatus = new Dictionary <String, AppStatusModel> ();

            // Creating Instance of Database Manager
            MongoDBWrapper mongoDB           = new MongoDBWrapper();
            string         fullServerAddress = String.Join(":", Consts.MONGO_SERVER, Consts.MONGO_PORT);

            mongoDB.ConfigureDatabase(Consts.MONGO_USER, Consts.MONGO_PASS, Consts.MONGO_AUTH_DB, fullServerAddress, Consts.MONGO_TIMEOUT, Consts.MONGO_DATABASE, Consts.MONGO_COLLECTION);

            // Creating Instance of Parser
            PlayStoreParser dataParser = new PlayStoreParser();

            goto PeopleData;

            using (WebRequests httpClient = new WebRequests())
            {
                // Minor Configuration of the Http Client - Ensures that the requests response will be in english
                // By doing so, we have no problems parsing the dates to their proper formats
                httpClient.Headers.Add(Consts.ACCEPT_LANGUAGE);
                httpClient.Host              = Consts.HOST;
                httpClient.Encoding          = "utf-8";
                httpClient.EncodingDetection = WebRequests.CharsetDetection.DefaultCharset;

                // Iterating over File Lines (App Urls) - To Extract Data, Not The Reviews Yet.
                foreach (string appUrl in File.ReadAllLines(inputFile))
                {
                    // Logging Progress
                    Console.WriteLine("\n => Processing App : " + appUrl);

                    // Executing Http Get Request for the Apps's Data - With max of 5 Retries
                    String appDataResponse = String.Empty;
                    int    currentRetry    = 0;

                    do
                    {
                        // Http Get
                        appDataResponse = httpClient.Get(appUrl);
                    } while (String.IsNullOrWhiteSpace(appDataResponse) || ++currentRetry <= _maxRetries);

                    // Sanity Check
                    if (String.IsNullOrWhiteSpace(appDataResponse))
                    {
                        Console.WriteLine("\t\t.Error - Failed to find page of app : " + appUrl + ". Skipping it");
                        continue;
                    }

                    Console.WriteLine("\t\t.Page Found. Firing Parser");

                    // Parsing App Data
                    AppModel appData = dataParser.ParseAppPage(appDataResponse, appUrl);

                    // Checking If this app is on the database already
                    if (mongoDB.AppProcessed(appUrl))
                    {
                        Console.WriteLine("\t\t.Previous Version of App Found. Updating It");
                        mongoDB.UpdateRecord(appData, "Url", appData.Url);

                        // Updating App Status
                        _appStatus.Add
                        (
                            appData.Url,
                            new AppStatusModel()
                        {
                            appId   = appData.Url.Replace(Consts.PLAY_STORE_PREFIX, String.Empty),
                            appUrl  = appData.Url,
                            appName = appData.Name,
                            status  = "Updated"
                        }
                        );
                    }
                    else
                    {
                        Console.WriteLine("\t\t.No Previous Version of the App Found. Adding to Database");
                        mongoDB.Insert <AppModel> (appData);

                        // Updating App Status
                        _appStatus.Add
                        (
                            appData.Url,
                            new AppStatusModel()
                        {
                            appId   = appData.Url.Replace(Consts.PLAY_STORE_PREFIX, String.Empty),
                            appUrl  = appData.Url,
                            appName = appData.Name,
                            status  = "Inserted"
                        }
                        );
                    }
                }
            }

Reviews:
            // Next Phase: Parse Reviews of those Apps
            Console.WriteLine("\n => Parsing Complete. Obtaining Reviews");

            // Iterating again over app urls to parse the reviews from this app
            foreach (string appUrl in File.ReadAllLines(inputFile))
            {
                // Reaching App Id
                string appID = _appStatus[appUrl].appId;

                // Reviews-Break-Parsing Flag
                bool shouldContinueParsing = true;

                // Parsing Review Pages from the apps
                for (int currentPage = 1; /* no stop condition */; currentPage++)
                {
                    // Getting Reviews Data Bundle
                    string reviewsData = ReviewsWrapper.GetAppReviews(appID, currentPage);

                    // Checking for Blocking Situation
                    if (String.IsNullOrEmpty(reviewsData))
                    {
                        Console.WriteLine("Blocked by Play Store. Sleeping process for 10 minutes before retrying.");

                        // Thread Wait for 10 Minutes
                        Thread.Sleep(10 * 60 * 1000);
                    }

                    // Checking for "No Reviews" app
                    if (reviewsData.Length < 50)
                    {
                        Console.WriteLine("No Reviews left for this app. Skipping");
                        break;
                    }

                    // Normalizing Response to Proper HTML
                    reviewsData = ReviewsWrapper.NormalizeResponse(reviewsData);

                    // Iterating over Parsed Reviews
                    foreach (var review in dataParser.ParseReviews(reviewsData))
                    {
                        // Adding App Data to the review
                        review.appID   = _appStatus[appUrl].appId;
                        review.appName = _appStatus[appUrl].appName;
                        review.appURL  = _appStatus[appUrl].appUrl;

                        // Incrementing Reviews Count for this app
                        _appStatus[appUrl].reviews++;

                        // Adding Review Object to Database
                        review.timestamp = DateTime.Now;

                        // Building Query to check for duplicated review
                        var duplicatedReviewQuery = Query.EQ("permalink", review.permalink);

                        // Checking for duplicated review before inserting it
                        if (mongoDB.FindMatch <AppReview> (duplicatedReviewQuery, 1, 0, Consts.REVIEWS_COLLECTION).Count() == 0)
                        {
                            // Inserting Review into MongoDB
                            mongoDB.Insert <AppReview> (review, Consts.REVIEWS_COLLECTION);
                        }
                        else
                        {
                            Console.WriteLine("Duplicated Review. Skipping App");

                            // When this happens, there are no more reviews to be parsed
                            shouldContinueParsing = false; // Skipping this apps processing
                        }
                    }

                    // Hiccup to avoid Blocking problems
                    Console.WriteLine("Parsed Reviews: " + _appStatus[appUrl].reviews);
                    Thread.Sleep(new Random().Next(14000, 21000));

                    if (!shouldContinueParsing)
                    {
                        break;
                    }
                }
            }

PeopleData:

            Console.WriteLine("\n\n => Processing People Data");

            Console.WriteLine("\nSimulating Google Login Using Selenium.");
            using (var firefoxDriver = new FirefoxDriver())
            {
                // Navigating to Dummy Url - One that I Know that well be asked for a login
                firefoxDriver.Navigate().GoToUrl("https://play.google.com/store/people/details?id=101242565951396343093");

                // Reaching Login Fields
                var loginField    = firefoxDriver.FindElementById("Email");
                var passwordField = firefoxDriver.FindElementById("Passwd");
                var btnSignIn     = firefoxDriver.FindElementById("signIn");

                // Sending Credentials to the browser
                loginField.SendKeys("YOUREMAIL");
                passwordField.SendKeys("YOURPASSWORD");
                btnSignIn.Click();

                string lastPeople     = "https://play.google.com/store/people/details?id=115037241907660526856";
                bool   shouldcontinue = false;

                // Processing Reviewers Data
                foreach (string peopleUrl in mongoDB.FindPeopleUrls())
                {
                    // Skipping until last link
                    if (peopleUrl == lastPeople)
                    {
                        shouldcontinue = true;
                    }

                    if (!shouldcontinue)
                    {
                        continue;
                    }

                    // Navigating To the Reviewer Page
                    firefoxDriver.Navigate().GoToUrl(peopleUrl);

                    // Executing Get Request for the Reviewer page on Google Play
                    string reviewerPage = firefoxDriver.PageSource;

                    // Extracting Reviewer Data from the Page
                    ReviewerPageData reviewerData = dataParser.ParsePeopleData(reviewerPage);

                    // Adding Url to the model
                    reviewerData.reviewerUrl = peopleUrl;

                    // Inserting it to the database - If no previous record of this Reviewer is found
                    if (!mongoDB.IsReviewerOnDatabase(peopleUrl))
                    {
                        mongoDB.Insert <ReviewerPageData> (reviewerData, "ReviewersData");
                    }
                }
            }

            // End of Processing + Console Feedback
            Console.WriteLine("\n\n == Processing Summary ==");

            foreach (var status in _appStatus.Select(t => t.Value))
            {
                // Message
                string cMessage = "=> App : {0} - Status {1} - Reviews : {2}";

                Console.WriteLine(String.Format(cMessage, status.appName, status.status, status.reviews));
            }

            Console.ReadLine();
        }
예제 #2
0
        static void Main(string[] args)
        {
            // Configuring Log Object
            Logger logger = LogManager.GetCurrentClassLogger();

            // Control Variable (Bool - Should the process use proxies? )
            bool isUsingProxies = false;

            // Checking for the need to use proxies
            if (args != null && args.Length == 1)
            {
                // Setting flag to true
                isUsingProxies = true;

                // Loading proxies from .txt received as argument
                String fPath = args[0];

                // Sanity Check
                if (!File.Exists(fPath))
                {
                    logger.Fatal("Couldnt find proxies on path : " + fPath);
                    System.Environment.Exit(-100);
                }

                // Reading Proxies from File
                string[] fLines = File.ReadAllLines(fPath, Encoding.GetEncoding("UTF-8"));

                try
                {
                    // Actual Load of Proxies
                    ProxiesLoader.Load(fLines.ToList());
                }
                catch (Exception ex)
                {
                    logger.Fatal(ex);
                    System.Environment.Exit(-101);
                }
            }

            // MongoDB instance Creation
            logger.Info("Configuring MonboDB Client");

            // Creating instance of Mongo Handler for the main collection
            MongoDBWrapper mongoClient       = new MongoDBWrapper();
            string         fullServerAddress = String.Join(":", Consts.MONGO_SERVER, Consts.MONGO_PORT);

            mongoClient.ConfigureDatabase(Consts.MONGO_USER, Consts.MONGO_PASS, Consts.MONGO_AUTH_DB, fullServerAddress, Consts.MONGO_TIMEOUT, Consts.MONGO_DATABASE, Consts.MONGO_COLLECTION);

            logger.Info("Iterating over Apps");

            // Creating Play Store Parser
            PlayStoreParser parser = new PlayStoreParser();

            // App Model
            AppModel appRecord;

            // Control Variable
            bool noError = true;

            // Finding all the "Apps" that didn't have the reviews visited yet
            while ((appRecord = mongoClient.FindAndModifyReviews()) != null)
            {
                // Extracting app ID from URL
                string appId = appRecord.Url.Replace(Consts.PLAY_STORE_PREFIX, String.Empty);

                // Console Feedback
                logger.Info("Processing App [ " + appRecord.Name + " ] ");

                try
                {
                    // Console Feedback
                    Console.Write("Reviews from : " + appRecord.Name);

                    // Issuing Request for Reviews
                    string response = ReviewsWrapper.GetAppReviews(appId, 1, isUsingProxies);

                    // Checking for Blocking Situation
                    if (String.IsNullOrEmpty(response))
                    {
                        logger.Info("Blocked by Play Store. Sleeping process for 10 minutes before retrying.");

                        // Thread Wait for 10 seconds
                        Thread.Sleep(TimeSpan.FromSeconds(10));
                    }

                    // Checking for "No Reviews" app
                    if (response.Length < 50)
                    {
                        logger.Info("No Reviews for this app. Skipping");
                        Console.Write(" - No Reviews Found\n");
                        continue;
                    }

                    // Normalizing Response to Proper HTML
                    response = ReviewsWrapper.NormalizeResponse(response);

                    // List of Reviews
                    List <AppReview> reviews = new List <AppReview> ();

                    // Iterating over Parsed Reviews
                    foreach (var review in parser.ParseReviews(response))
                    {
                        // Adding App Data to the review
                        review.appID   = appId;
                        review.appName = appRecord.Name;
                        review.appURL  = appRecord.Url;

                        // Capture Timestamp to the model
                        review.timestamp = DateTime.Now;

                        // Adding reviews to the current list
                        reviews.Add(review);
                    }

                    // Any Review Found ?
                    if (reviews.Count > 0)
                    {
                        Console.Write(" - " + reviews.Count + " Reviews Found\n");

                        // Checking if there was any previous list of reviews
                        if (appRecord.Reviews == null)
                        {
                            appRecord.Reviews = reviews;
                        }
                        else // Previous List found - Appending only the new ones
                        {
                            foreach (var review in reviews)
                            {
                                if (!appRecord.Reviews.Any(t => t.permalink.Equals(review.permalink)))
                                {
                                    appRecord.Reviews.Add(review);
                                }
                            }
                        }
                    }
                }
                catch (Exception ex)
                {
                    logger.Error(ex);
                    Console.ForegroundColor = ConsoleColor.Red;
                    Console.WriteLine("Error : " + ex.Message);
                    Console.ForegroundColor = ConsoleColor.White;
                    noError = false;
                }
                finally
                {
                    // Toggling back the "ReviewsStatus" attribute from the model
                    if (noError)
                    {
                        appRecord.ReviewsStatus = "Visited";
                        mongoClient.SaveRecord <AppModel> (appRecord);
                    }
                    else // "Error" status
                    {
                        appRecord.ReviewsStatus = "Error";
                        mongoClient.SaveRecord <AppModel> (appRecord);
                    }
                }
            }
        }
예제 #3
0
        static void Main(string[] args)
        {
            // Configuring Log Object
            Logger logger = LogManager.GetCurrentClassLogger();

            // Parsing Arguments
            logger.Info("Checking for Arguments");

            if (args == null || args.Length != 3)
            {
                logger.Fatal("Arguments Fatal", "Incorrect number of arguments received. Try passing two.");
                return; // Halts.
            }

            logger.Info("Reading Arguments");

            // Reading actual arguments received
            _arguments.Add("AppsToProcess", Int32.Parse(args[0]));
            _arguments.Add("ReviewsPagePerApp", Int32.Parse(args[1]));
            _arguments.Add("AppsToSkip", Int32.Parse(args[2]));

            // Building MongoDB Query - This query specifies which applications you want to parse out the reviews
            // For more regarding MongoDB Queries, check the documentation on the project wiki page
            //var mongoQuery = Query.EQ ("Instalations", "1,000,000 - 5,000,000");
            var mongoQuery = Query.EQ("Category", "/store/apps/category/EDUCATION");

            logger.Info("Configuring MonboDB Client");

            // Creating instance of Mongo Handler for the main collection
            MongoDBWrapper mongoClient       = new MongoDBWrapper();
            string         fullServerAddress = String.Join(":", Consts.MONGO_SERVER, Consts.MONGO_PORT);

            mongoClient.ConfigureDatabase(Consts.MONGO_USER, Consts.MONGO_PASS, Consts.MONGO_AUTH_DB, fullServerAddress, Consts.MONGO_TIMEOUT, Consts.MONGO_DATABASE, Consts.MONGO_COLLECTION);

            logger.Info("Iterating over Apps");

            // Creating Play Store Parser
            PlayStoreParser parser = new PlayStoreParser();

            // Iterating over Query Results for the App Ids
            foreach (var appRecord in mongoClient.FindMatch <AppModel>(mongoQuery, _arguments["AppsToProcess"], _arguments["AppsToSkip"]))
            {
                // Extracting app ID from URL
                string appId = appRecord.Url.Replace(Consts.PLAY_STORE_PREFIX, String.Empty);

                // Console Feedback
                logger.Info("Processing App [ " + appRecord.Name + " ] ");

                bool shouldSkipApp = false;

                // Iterating over Review Pages up to the max received as argument
                for (int currentPage = 1; currentPage <= _arguments["ReviewsPagePerApp"]; currentPage++)
                {
                    // Checking for the need to skip this app in case of duplicated review
                    if (shouldSkipApp)
                    {
                        break;
                    }

                    try
                    {
                        // Page Feedback
                        logger.Info("\tCurrent Page: " + currentPage);

                        // Issuing Request for Reviews
                        string response = ReviewsWrapper.GetAppReviews(appId, currentPage);

                        // Checking for Blocking Situation
                        if (String.IsNullOrEmpty(response))
                        {
                            logger.Info("Blocked by Play Store. Sleeping process for 10 minutes before retrying.");

                            // Thread Wait for 10 Minutes
                            Thread.Sleep(10 * 60 * 1000);
                        }

                        // Checking for "No Reviews" app
                        if (response.Length < 50)
                        {
                            logger.Info("No Reviews for this app. Skipping");
                            break;
                        }

                        // Normalizing Response to Proper HTML
                        response = ReviewsWrapper.NormalizeResponse(response);

                        // Iterating over Parsed Reviews
                        foreach (var review in parser.ParseReviews(response))
                        {
                            // Adding App Data to the review
                            review.appID   = appId;
                            review.appName = appRecord.Name;
                            review.appURL  = appRecord.Url;

                            // Adding processing timestamp to the model
                            review.timestamp = DateTime.Now;

                            // Building Query to check for duplicated review
                            var duplicatedReviewQuery = Query.EQ("permalink", review.permalink);

                            // Checking for duplicated review before inserting it
                            if (mongoClient.FindMatch <AppReview>(duplicatedReviewQuery, 1, 0, Consts.REVIEWS_COLLECTION).Count() == 0)
                            {
                                // Inserting Review into MongoDB
                                mongoClient.Insert <AppReview>(review, Consts.REVIEWS_COLLECTION);
                            }
                            else
                            {
                                logger.Info("Duplicated Review", "Review already parsed. Skipping App");
                                //shouldSkipApp = true;
                                //break;
                            }
                        }
                    }
                    catch (Exception ex)
                    {
                        logger.Error(ex);
                    }
                }
            }
        }