예제 #1
0
        static void Main(string[] args)
        {
            // Loading Keen.IO Keys and Misc. from Config File
            _keenIOProjectID = ConfigurationManager.AppSettings["keenIOProjectID"];
            _keenIOMasterKey = ConfigurationManager.AppSettings["keenIOMasterKey"];
            _keenIOWriteKey  = ConfigurationManager.AppSettings["keenIOWriteKey"];
            _keenIOReadKey   = ConfigurationManager.AppSettings["keenIOReadKey"];
            _bucketName      = ConfigurationManager.AppSettings["keenIOBucketName"];

            // Configuring MongoDB Wrapper for connection and queries
            MongoDBWrapper mongoDB           = new MongoDBWrapper();
            string         fullServerAddress = String.Join(":", Consts.MONGO_SERVER, Consts.MONGO_PORT);

            mongoDB.ConfigureDatabase(Consts.MONGO_USER, Consts.MONGO_PASS, Consts.MONGO_AUTH_DB, fullServerAddress, Consts.MONGO_TIMEOUT, Consts.MONGO_DATABASE, Consts.MONGO_COLLECTION);

            // Creating Keen.IO Variables
            var projectSettings = new ProjectSettingsProvider(_keenIOProjectID, _keenIOMasterKey, _keenIOWriteKey, _keenIOReadKey);
            var keenClient      = new KeenClient(projectSettings);

            // From This point on, you can change your code to reflect your own "Reading" logic
            // What I've done is simply read the records from the MongoDB database and Upload them to Keen.IO
            foreach (var currentApp in mongoDB.FindMatch <AppModel> (Query.NE("Uploaded", true)))
            {
                try
                {
                    // Adding Event to Keen.IO
                    keenClient.AddEvent("PlayStore2014", currentApp);

                    // Incrementing Counter
                    _appsCounter++;

                    // Console feedback Every 100 Processed Apps
                    if (_appsCounter % 100 == 0)
                    {
                        Console.WriteLine("Uploaded : " + _appsCounter);
                    }

                    mongoDB.SetUpdated(currentApp.Url);
                }
                catch (Exception ex)
                {
                    Console.WriteLine("\n\t" + ex.Message);
                }
            }
        }
예제 #2
0
        static void Main (string[] args)
        {
            // Loading Keen.IO Keys and Misc. from Config File
            _keenIOProjectID = ConfigurationManager.AppSettings["keenIOProjectID"];
            _keenIOMasterKey = ConfigurationManager.AppSettings["keenIOMasterKey"];
            _keenIOWriteKey  = ConfigurationManager.AppSettings["keenIOWriteKey"];
            _keenIOReadKey   = ConfigurationManager.AppSettings["keenIOReadKey"];
            _bucketName      = ConfigurationManager.AppSettings["keenIOBucketName"];

            // Configuring MongoDB Wrapper for connection and queries
            MongoDBWrapper mongoDB   = new MongoDBWrapper ();
            string fullServerAddress = String.Join (":", Consts.MONGO_SERVER, Consts.MONGO_PORT);
            mongoDB.ConfigureDatabase (Consts.MONGO_USER, Consts.MONGO_PASS, Consts.MONGO_AUTH_DB, fullServerAddress, Consts.MONGO_TIMEOUT, Consts.MONGO_DATABASE, Consts.MONGO_COLLECTION);

            // Creating Keen.IO Variables
            var projectSettings = new ProjectSettingsProvider (_keenIOProjectID, _keenIOMasterKey, _keenIOWriteKey, _keenIOReadKey);
            var keenClient      = new KeenClient (projectSettings);

            // From This point on, you can change your code to reflect your own "Reading" logic
            // What I've done is simply read the records from the MongoDB database and Upload them to Keen.IO
            foreach (var currentApp in mongoDB.FindMatch<AppModel> (Query.NE ("Uploaded", true)))
            {
                try
                {
                    // Adding Event to Keen.IO
                    keenClient.AddEvent ("PlayStore2014", currentApp);

                    // Incrementing Counter
                    _appsCounter++;

                    // Console feedback Every 100 Processed Apps
                    if (_appsCounter % 100 == 0)
                    {
                        Console.WriteLine ("Uploaded : " + _appsCounter);
                    }

                    mongoDB.SetUpdated (currentApp.Url);
                }
                catch (Exception ex)
                {
                    Console.WriteLine ("\n\t" + ex.Message);
                }
            }
        }
예제 #3
0
        ///  *** READ THIS BEFORE YOU START. ***
        ///  *** I MEAN IT, PLEASE, READ IT  ***
        /// 
        ///  This exporting helper will download ALL THE APPS found on the database, and
        ///  dump it to a CSV file (with headers).
        ///  
        ///  Note that, since the database is Hosted on AWS, i will PAY (for the internet outbound traffic) if you execute a full database export,
        ///  so, if you are going to execute a full export, please, get in touch with me before running this project, or send me a donation
        ///  via paypal on [email protected]
        ///  
        ///  Also, be nice with the database.
        ///  
        ///  ** END OF WARNING ***

        static void Main (string[] args)
        {
            // Logs Counter
            int processedApps = 0;

            // Configuring Log Object
            Logger logger = LogManager.GetCurrentClassLogger ();
            logger.Info ("Worker Started");

            logger.Info ("Checking Arguments");
            
            // Periodic Log Timer
            Timer loggingThread = new Timer((TimerCallback) =>
            {
                logger.Info ("Processed Apps: " + processedApps);

            }, null, 10000, 10000);
            
            // Validating Arguments
            if (!ValidateArgs (args))
            {
                logger.Fatal ("Invalid Args", "Args must have 1 element");
                return;
            }

            logger.Info ("Checking Write Permissions on output Path");
            // Validating Write Permissions on output path
            if (!ValidateFilePermissions (args[0]))
            {
                logger.Fatal ("Insuficient Permissions", "Cannot write on path : " + args[0]);
                return;
            }

            // Configuring MongoDB Wrapper
            MongoDBWrapper mongoDB = new MongoDBWrapper();
            string fullServerAddress = String.Join(":", Consts.MONGO_SERVER, Consts.MONGO_PORT);
            mongoDB.ConfigureDatabase(Consts.MONGO_USER, Consts.MONGO_PASS, Consts.MONGO_AUTH_DB, fullServerAddress, Consts.MONGO_TIMEOUT, Consts.MONGO_DATABASE, Consts.MONGO_COLLECTION);
            
            // Opening Output Stream
            using (StreamWriter sWriter = new StreamWriter (args[0], true, Encoding.GetEncoding("ISO-8859-1")))
            {
                // Auto Flush Content
                sWriter.AutoFlush = true;

                // Writing Headers
                String headersLine = "Url,ReferenceDate,Name,Developer,IsTopDeveloper,DeveloperURL,PublicationDate,"
                                   + "Category,IsFree,Price,Reviewers,Score.Total,Score.Count,Score.FiveStars,"
                                   + "Score.FourStars,Score.ThreeStars,Score.TwoStars,Score.OneStars,LastUpdateDate"
                                   + "AppSize,Instalations,CurrentVersion,MinimumOSVersion,ContentRating,HaveInAppPurchases,DeveloperEmail,DeveloperWebsite,DeveloperPrivacyPolicy";

                sWriter.WriteLine (headersLine);

                // Example of MongoDB Query Construction
                // Queries for records which have the attribute "IsTopDeveloper" equal to "false"
                //var mongoQuery = Query.EQ ("IsTopDeveloper", false);
                var mongoQuery = Query.EQ ("Category", "/store/apps/category/SPORTS");

                // More Examples of Queries
                // var mongoQuery = Query.EQ ("Category", "/store/apps/category/GAME_CASINO");
                // var mongoQuery = Query.GT ("Price", 10);

                // Reading all apps from the database
                // USAGE: CHANGE FindMatches to FindAll if you want to export all the records from the database
                foreach (AppModel app in mongoDB.FindMatch<AppModel>(mongoQuery))
                {
                    try
                    {
                        // Writing line to File
                        sWriter.WriteLine (app.ToString ());
                        processedApps++;
                    }
                    catch (Exception ex)
                    {
                        logger.Error (ex);
                    }
                }
            }

            // Logging end of the Process
            logger.Info ("Finished Exporting Database");
        }
예제 #4
0
        static void Main (string[] args)
        {
            // Configuring Log Object Threshold
            LogWriter.Threshold = TLogEventLevel.Information;
            LogWriter.LogEvent  += LogWriter_LogEvent;

            // Parsing Arguments
            LogWriter.Info ("Checking for Arguments");

            if (args == null || args.Length != 3)
            {
                LogWriter.Fatal ("Arguments Fatal", "Incorrect number of arguments received. Try passing two.");
                return; // Halts.
            }

            LogWriter.Info ("Reading Arguments");

            // Reading actual arguments received
            _arguments.Add ("AppsToProcess", Int32.Parse (args[0]));
            _arguments.Add ("ReviewsPagePerApp", Int32.Parse (args[1]));
            _arguments.Add ("AppsToSkip", Int32.Parse (args[2]));

            // Building MongoDB Query - This query specifies which applications you want to parse out the reviews
            // For more regarding MongoDB Queries, check the documentation on the project wiki page
            //var mongoQuery = Query.EQ ("Instalations", "1,000,000 - 5,000,000");
            var mongoQuery = Query.EQ ("Category", "/store/apps/category/EDUCATION");

            LogWriter.Info ("Configuring MonboDB Client");

            // Creating instance of Mongo Handler for the main collection
            MongoDBWrapper mongoClient = new MongoDBWrapper ();
            string fullServerAddress = String.Join (":", Consts.MONGO_SERVER, Consts.MONGO_PORT);
            mongoClient.ConfigureDatabase (Consts.MONGO_USER, Consts.MONGO_PASS, Consts.MONGO_AUTH_DB, fullServerAddress, Consts.MONGO_TIMEOUT, Consts.MONGO_DATABASE, Consts.MONGO_COLLECTION);

            LogWriter.Info ("Iterating over Apps");

            // App URL Prefix (must be removed in order to obtain the app ID)
            string playStorePrefix = "https://play.google.com/store/apps/details?id=";

            // Creating Play Store Parser
            PlayStoreParser parser = new PlayStoreParser ();

            // Iterating over Query Results for the App Ids
            foreach (var appRecord in mongoClient.FindMatch<AppModel>(mongoQuery, _arguments["AppsToProcess"], _arguments["AppsToSkip"]))
            {
                // Extracting app ID from URL
                string appId = appRecord.Url.Replace(playStorePrefix, String.Empty);

                // Console Feedback
                LogWriter.Info("Processing App [ " + appRecord.Name + " ] ");

                bool shouldSkipApp = false;

                // Iterating over Review Pages up to the max received as argument
                for (int currentPage = 1; currentPage <= _arguments["ReviewsPagePerApp"]; currentPage++)
                {
                    // Checking for the need to skip this app in case of duplicated review
                    if (shouldSkipApp)
                        break;

                    try
                    {
                        // Page Feedback
                        LogWriter.Info("\tCurrent Page: " + currentPage);

                        // Issuing Request for Reviews
                        string response = GetAppReviews(appId, currentPage);

                        // Checking for Blocking Situation
                        if (String.IsNullOrEmpty(response))
                        {
                            LogWriter.Info("Blocked by Play Store. Sleeping process for 10 minutes before retrying.");

                            // Thread Wait for 10 Minutes
                            Thread.Sleep(10 * 60 * 1000);
                        }

                        // Checking for "No Reviews" app
                        if (response.Length < 50)
                        {
                            LogWriter.Info("No Reviews for this app. Skipping");
                            break;
                        }

                        // Normalizing Response to Proper HTML
                        response = NormalizeResponse(response);

                        // Iterating over Parsed Reviews
                        foreach (var review in parser.ParseReviews(response))
                        {
                            // Adding App Data to the review
                            review.appID = appId;
                            review.appName = appRecord.Name;
                            review.appURL = appRecord.Url;

                            // Adding processing timestamp to the model
                            review.timestamp = DateTime.Now;

                            // Building Query to check for duplicated review
                            var duplicatedReviewQuery = Query.EQ("permalink", review.permalink);

                            // Checking for duplicated review before inserting it
                            if (mongoClient.FindMatch<AppReview>(duplicatedReviewQuery, 1, 0, Consts.REVIEWS_COLLECTION).Count() == 0)
                            {
                                // Inserting Review into MongoDB
                                mongoClient.Insert<AppReview>(review, Consts.REVIEWS_COLLECTION);
                            }
                            else
                            {
                                LogWriter.Info("Duplicated Review", "Review already parsed. Skipping App");
                                //shouldSkipApp = true;
                                //break;
                            }
                        }
                    }
                    catch (Exception ex)
                    {
                        LogWriter.Error(ex);
                    }
                }
            }
        }
예제 #5
0
        static void Main(string[] args)
        {
            // Checking for Input Parameters
            if (args == null || args.Length != 1)
            {
                Console.WriteLine("Incorrect number of arguments received. Expected One");
                System.Environment.Exit(-100);
            }

            // Human Readable Variable
            string inputFile = args[0];

            // Checking if the Input file received exists
            if (!File.Exists(inputFile))
            {
                Console.WriteLine(String.Format("Received input file does not exist : {0}", inputFile));
                System.Environment.Exit(-101);
            }

            // App Status
            _appStatus = new Dictionary <String, AppStatusModel> ();

            // Creating Instance of Database Manager
            MongoDBWrapper mongoDB           = new MongoDBWrapper();
            string         fullServerAddress = String.Join(":", Consts.MONGO_SERVER, Consts.MONGO_PORT);

            mongoDB.ConfigureDatabase(Consts.MONGO_USER, Consts.MONGO_PASS, Consts.MONGO_AUTH_DB, fullServerAddress, Consts.MONGO_TIMEOUT, Consts.MONGO_DATABASE, Consts.MONGO_COLLECTION);

            // Creating Instance of Parser
            PlayStoreParser dataParser = new PlayStoreParser();

            goto PeopleData;

            using (WebRequests httpClient = new WebRequests())
            {
                // Minor Configuration of the Http Client - Ensures that the requests response will be in english
                // By doing so, we have no problems parsing the dates to their proper formats
                httpClient.Headers.Add(Consts.ACCEPT_LANGUAGE);
                httpClient.Host              = Consts.HOST;
                httpClient.Encoding          = "utf-8";
                httpClient.EncodingDetection = WebRequests.CharsetDetection.DefaultCharset;

                // Iterating over File Lines (App Urls) - To Extract Data, Not The Reviews Yet.
                foreach (string appUrl in File.ReadAllLines(inputFile))
                {
                    // Logging Progress
                    Console.WriteLine("\n => Processing App : " + appUrl);

                    // Executing Http Get Request for the Apps's Data - With max of 5 Retries
                    String appDataResponse = String.Empty;
                    int    currentRetry    = 0;

                    do
                    {
                        // Http Get
                        appDataResponse = httpClient.Get(appUrl);
                    } while (String.IsNullOrWhiteSpace(appDataResponse) || ++currentRetry <= _maxRetries);

                    // Sanity Check
                    if (String.IsNullOrWhiteSpace(appDataResponse))
                    {
                        Console.WriteLine("\t\t.Error - Failed to find page of app : " + appUrl + ". Skipping it");
                        continue;
                    }

                    Console.WriteLine("\t\t.Page Found. Firing Parser");

                    // Parsing App Data
                    AppModel appData = dataParser.ParseAppPage(appDataResponse, appUrl);

                    // Checking If this app is on the database already
                    if (mongoDB.AppProcessed(appUrl))
                    {
                        Console.WriteLine("\t\t.Previous Version of App Found. Updating It");
                        mongoDB.UpdateRecord(appData, "Url", appData.Url);

                        // Updating App Status
                        _appStatus.Add
                        (
                            appData.Url,
                            new AppStatusModel()
                        {
                            appId   = appData.Url.Replace(Consts.PLAY_STORE_PREFIX, String.Empty),
                            appUrl  = appData.Url,
                            appName = appData.Name,
                            status  = "Updated"
                        }
                        );
                    }
                    else
                    {
                        Console.WriteLine("\t\t.No Previous Version of the App Found. Adding to Database");
                        mongoDB.Insert <AppModel> (appData);

                        // Updating App Status
                        _appStatus.Add
                        (
                            appData.Url,
                            new AppStatusModel()
                        {
                            appId   = appData.Url.Replace(Consts.PLAY_STORE_PREFIX, String.Empty),
                            appUrl  = appData.Url,
                            appName = appData.Name,
                            status  = "Inserted"
                        }
                        );
                    }
                }
            }

Reviews:
            // Next Phase: Parse Reviews of those Apps
            Console.WriteLine("\n => Parsing Complete. Obtaining Reviews");

            // Iterating again over app urls to parse the reviews from this app
            foreach (string appUrl in File.ReadAllLines(inputFile))
            {
                // Reaching App Id
                string appID = _appStatus[appUrl].appId;

                // Reviews-Break-Parsing Flag
                bool shouldContinueParsing = true;

                // Parsing Review Pages from the apps
                for (int currentPage = 1; /* no stop condition */; currentPage++)
                {
                    // Getting Reviews Data Bundle
                    string reviewsData = ReviewsWrapper.GetAppReviews(appID, currentPage);

                    // Checking for Blocking Situation
                    if (String.IsNullOrEmpty(reviewsData))
                    {
                        Console.WriteLine("Blocked by Play Store. Sleeping process for 10 minutes before retrying.");

                        // Thread Wait for 10 Minutes
                        Thread.Sleep(10 * 60 * 1000);
                    }

                    // Checking for "No Reviews" app
                    if (reviewsData.Length < 50)
                    {
                        Console.WriteLine("No Reviews left for this app. Skipping");
                        break;
                    }

                    // Normalizing Response to Proper HTML
                    reviewsData = ReviewsWrapper.NormalizeResponse(reviewsData);

                    // Iterating over Parsed Reviews
                    foreach (var review in dataParser.ParseReviews(reviewsData))
                    {
                        // Adding App Data to the review
                        review.appID   = _appStatus[appUrl].appId;
                        review.appName = _appStatus[appUrl].appName;
                        review.appURL  = _appStatus[appUrl].appUrl;

                        // Incrementing Reviews Count for this app
                        _appStatus[appUrl].reviews++;

                        // Adding Review Object to Database
                        review.timestamp = DateTime.Now;

                        // Building Query to check for duplicated review
                        var duplicatedReviewQuery = Query.EQ("permalink", review.permalink);

                        // Checking for duplicated review before inserting it
                        if (mongoDB.FindMatch <AppReview> (duplicatedReviewQuery, 1, 0, Consts.REVIEWS_COLLECTION).Count() == 0)
                        {
                            // Inserting Review into MongoDB
                            mongoDB.Insert <AppReview> (review, Consts.REVIEWS_COLLECTION);
                        }
                        else
                        {
                            Console.WriteLine("Duplicated Review. Skipping App");

                            // When this happens, there are no more reviews to be parsed
                            shouldContinueParsing = false; // Skipping this apps processing
                        }
                    }

                    // Hiccup to avoid Blocking problems
                    Console.WriteLine("Parsed Reviews: " + _appStatus[appUrl].reviews);
                    Thread.Sleep(new Random().Next(14000, 21000));

                    if (!shouldContinueParsing)
                    {
                        break;
                    }
                }
            }

PeopleData:

            Console.WriteLine("\n\n => Processing People Data");

            Console.WriteLine("\nSimulating Google Login Using Selenium.");
            using (var firefoxDriver = new FirefoxDriver())
            {
                // Navigating to Dummy Url - One that I Know that well be asked for a login
                firefoxDriver.Navigate().GoToUrl("https://play.google.com/store/people/details?id=101242565951396343093");

                // Reaching Login Fields
                var loginField    = firefoxDriver.FindElementById("Email");
                var passwordField = firefoxDriver.FindElementById("Passwd");
                var btnSignIn     = firefoxDriver.FindElementById("signIn");

                // Sending Credentials to the browser
                loginField.SendKeys("YOUREMAIL");
                passwordField.SendKeys("YOURPASSWORD");
                btnSignIn.Click();

                string lastPeople     = "https://play.google.com/store/people/details?id=115037241907660526856";
                bool   shouldcontinue = false;

                // Processing Reviewers Data
                foreach (string peopleUrl in mongoDB.FindPeopleUrls())
                {
                    // Skipping until last link
                    if (peopleUrl == lastPeople)
                    {
                        shouldcontinue = true;
                    }

                    if (!shouldcontinue)
                    {
                        continue;
                    }

                    // Navigating To the Reviewer Page
                    firefoxDriver.Navigate().GoToUrl(peopleUrl);

                    // Executing Get Request for the Reviewer page on Google Play
                    string reviewerPage = firefoxDriver.PageSource;

                    // Extracting Reviewer Data from the Page
                    ReviewerPageData reviewerData = dataParser.ParsePeopleData(reviewerPage);

                    // Adding Url to the model
                    reviewerData.reviewerUrl = peopleUrl;

                    // Inserting it to the database - If no previous record of this Reviewer is found
                    if (!mongoDB.IsReviewerOnDatabase(peopleUrl))
                    {
                        mongoDB.Insert <ReviewerPageData> (reviewerData, "ReviewersData");
                    }
                }
            }

            // End of Processing + Console Feedback
            Console.WriteLine("\n\n == Processing Summary ==");

            foreach (var status in _appStatus.Select(t => t.Value))
            {
                // Message
                string cMessage = "=> App : {0} - Status {1} - Reviews : {2}";

                Console.WriteLine(String.Format(cMessage, status.appName, status.status, status.reviews));
            }

            Console.ReadLine();
        }
예제 #6
0
        static void Main(string[] args)
        {
            // Configuring Log Object
            Logger logger = LogManager.GetCurrentClassLogger();

            // Parsing Arguments
            logger.Info("Checking for Arguments");

            if (args == null || args.Length != 3)
            {
                logger.Fatal("Arguments Fatal", "Incorrect number of arguments received. Try passing two.");
                return; // Halts.
            }

            logger.Info("Reading Arguments");

            // Reading actual arguments received
            _arguments.Add("AppsToProcess", Int32.Parse(args[0]));
            _arguments.Add("ReviewsPagePerApp", Int32.Parse(args[1]));
            _arguments.Add("AppsToSkip", Int32.Parse(args[2]));

            // Building MongoDB Query - This query specifies which applications you want to parse out the reviews
            // For more regarding MongoDB Queries, check the documentation on the project wiki page
            //var mongoQuery = Query.EQ ("Instalations", "1,000,000 - 5,000,000");
            var mongoQuery = Query.EQ("Category", "/store/apps/category/EDUCATION");

            logger.Info("Configuring MonboDB Client");

            // Creating instance of Mongo Handler for the main collection
            MongoDBWrapper mongoClient       = new MongoDBWrapper();
            string         fullServerAddress = String.Join(":", Consts.MONGO_SERVER, Consts.MONGO_PORT);

            mongoClient.ConfigureDatabase(Consts.MONGO_USER, Consts.MONGO_PASS, Consts.MONGO_AUTH_DB, fullServerAddress, Consts.MONGO_TIMEOUT, Consts.MONGO_DATABASE, Consts.MONGO_COLLECTION);

            logger.Info("Iterating over Apps");

            // Creating Play Store Parser
            PlayStoreParser parser = new PlayStoreParser();

            // Iterating over Query Results for the App Ids
            foreach (var appRecord in mongoClient.FindMatch <AppModel>(mongoQuery, _arguments["AppsToProcess"], _arguments["AppsToSkip"]))
            {
                // Extracting app ID from URL
                string appId = appRecord.Url.Replace(Consts.PLAY_STORE_PREFIX, String.Empty);

                // Console Feedback
                logger.Info("Processing App [ " + appRecord.Name + " ] ");

                bool shouldSkipApp = false;

                // Iterating over Review Pages up to the max received as argument
                for (int currentPage = 1; currentPage <= _arguments["ReviewsPagePerApp"]; currentPage++)
                {
                    // Checking for the need to skip this app in case of duplicated review
                    if (shouldSkipApp)
                    {
                        break;
                    }

                    try
                    {
                        // Page Feedback
                        logger.Info("\tCurrent Page: " + currentPage);

                        // Issuing Request for Reviews
                        string response = ReviewsWrapper.GetAppReviews(appId, currentPage);

                        // Checking for Blocking Situation
                        if (String.IsNullOrEmpty(response))
                        {
                            logger.Info("Blocked by Play Store. Sleeping process for 10 minutes before retrying.");

                            // Thread Wait for 10 Minutes
                            Thread.Sleep(10 * 60 * 1000);
                        }

                        // Checking for "No Reviews" app
                        if (response.Length < 50)
                        {
                            logger.Info("No Reviews for this app. Skipping");
                            break;
                        }

                        // Normalizing Response to Proper HTML
                        response = ReviewsWrapper.NormalizeResponse(response);

                        // Iterating over Parsed Reviews
                        foreach (var review in parser.ParseReviews(response))
                        {
                            // Adding App Data to the review
                            review.appID   = appId;
                            review.appName = appRecord.Name;
                            review.appURL  = appRecord.Url;

                            // Adding processing timestamp to the model
                            review.timestamp = DateTime.Now;

                            // Building Query to check for duplicated review
                            var duplicatedReviewQuery = Query.EQ("permalink", review.permalink);

                            // Checking for duplicated review before inserting it
                            if (mongoClient.FindMatch <AppReview>(duplicatedReviewQuery, 1, 0, Consts.REVIEWS_COLLECTION).Count() == 0)
                            {
                                // Inserting Review into MongoDB
                                mongoClient.Insert <AppReview>(review, Consts.REVIEWS_COLLECTION);
                            }
                            else
                            {
                                logger.Info("Duplicated Review", "Review already parsed. Skipping App");
                                //shouldSkipApp = true;
                                //break;
                            }
                        }
                    }
                    catch (Exception ex)
                    {
                        logger.Error(ex);
                    }
                }
            }
        }
예제 #7
0
        ///  *** READ THIS BEFORE YOU START. ***
        ///  *** I MEAN IT, PLEASE, READ IT  ***
        ///
        ///  This exporting helper will download ALL THE APPS found on the database, and
        ///  dump it to a CSV file (with headers).
        ///
        ///  Note that, since the database is Hosted on AWS, i will PAY (for the internet outbound traffic) if you execute a full database export,
        ///  so, if you are going to execute a full export, please, get in touch with me before running this project, or send me a donation
        ///  via paypal on [email protected]
        ///
        ///  Also, be nice with the database.
        ///
        ///  ** END OF WARNING ***

        static void Main(string[] args)
        {
            // Logs Counter
            int processedApps = 0;

            // Configuring Log Object Threshold
            LogWriter.Threshold = TLogEventLevel.Information;

            // Overriding LogWriter Event
            LogWriter.LogEvent += LogWriter_LogEvent;

            LogWriter.Info("Checking Arguments");

            // Periodic Log Timer
            Timer loggingThread = new Timer((TimerCallback) =>
            {
                LogWriter.Info("Processed Apps: " + processedApps);
            }, null, 10000, 10000);

            // Validating Arguments
            if (!ValidateArgs(args))
            {
                LogWriter.Fatal("Invalid Args", "Args must have 1 element");
                return;
            }

            LogWriter.Info("Checking Write Permissions on output Path");
            // Validating Write Permissions on output path
            if (!ValidateFilePermissions(args[0]))
            {
                LogWriter.Fatal("Insuficient Permissions", "Cannot write on path : " + args[0]);
                return;
            }

            // Configuring MongoDB Wrapper
            MongoDBWrapper mongoDB           = new MongoDBWrapper();
            string         fullServerAddress = String.Join(":", Consts.MONGO_SERVER, Consts.MONGO_PORT);

            mongoDB.ConfigureDatabase(Consts.MONGO_USER, Consts.MONGO_PASS, Consts.MONGO_AUTH_DB, fullServerAddress, Consts.MONGO_TIMEOUT, Consts.MONGO_DATABASE, Consts.MONGO_COLLECTION);

            // Opening Output Stream
            using (StreamWriter sWriter = new StreamWriter(args[0], true, Encoding.GetEncoding("ISO-8859-1")))
            {
                // Auto Flush Content
                sWriter.AutoFlush = true;

                // Writing Headers
                String headersLine = "_id,Url,ReferenceDate,Name,Developer,IsTopDeveloper,DeveloperURL,PublicationDate,"
                                     + "Category,IsFree,Price,Reviewers,CoverImgUrl,Description,Score.Total,Score.Count,Score.FiveStars,"
                                     + "Score.FourStars,Score.ThreeStars,Score.TwoStars,Score.OneStars,LastUpdateDate"
                                     + "AppSize,Instalations,CurrentVersion,MinimumOSVersion,ContentRating,HaveInAppPurchases,DeveloperEmail,DeveloperWebsite,DeveloperPrivacyPolicy";

                sWriter.WriteLine(headersLine);

                // Example of MongoDB Query Construction
                // Queries for records which have the attribute "IsTopDeveloper" equal to "false"
                var mongoQuery = Query.EQ("IsTopDeveloper", false);

                // Reading all apps from the database
                // USAGE: CHANGE FindMatches to FindAll if you want to export all the records from the database
                foreach (AppModel app in mongoDB.FindMatch <AppModel>(mongoQuery, 10, 0))
                {
                    try
                    {
                        // Writing line to File
                        sWriter.WriteLine(app.ToString());
                        processedApps++;
                    }
                    catch (Exception ex)
                    {
                        LogWriter.Error(ex);
                    }
                }
            }

            // Logging end of the Process
            LogWriter.Info("Finished Exporting Database");

            // Removing Event
            LogWriter.LogEvent -= LogWriter_LogEvent;
        }
예제 #8
0
        static void Main (string[] args)
        {
            // Checking for Input Parameters
            if (args == null || args.Length != 1)
            {
                Console.WriteLine ("Incorrect number of arguments received. Expected One");
                System.Environment.Exit (-100);
            }

            // Human Readable Variable
            string inputFile = args[0];

            // Checking if the Input file received exists
            if (!File.Exists (inputFile))
            {
                Console.WriteLine (String.Format("Received input file does not exist : {0}", inputFile));
                System.Environment.Exit (-101);
            }

            // App Status 
            _appStatus = new Dictionary<String, AppStatusModel> ();

            // Creating Instance of Database Manager
            MongoDBWrapper mongoDB   = new MongoDBWrapper ();
            string fullServerAddress = String.Join (":", Consts.MONGO_SERVER, Consts.MONGO_PORT);
            mongoDB.ConfigureDatabase (Consts.MONGO_USER, Consts.MONGO_PASS, Consts.MONGO_AUTH_DB, fullServerAddress, Consts.MONGO_TIMEOUT, Consts.MONGO_DATABASE, Consts.MONGO_COLLECTION);

            // Creating Instance of Parser
            PlayStoreParser dataParser = new PlayStoreParser ();

            goto PeopleData;

            using (WebRequests httpClient = new WebRequests ())
            {
                // Minor Configuration of the Http Client - Ensures that the requests response will be in english
                // By doing so, we have no problems parsing the dates to their proper formats
                httpClient.Headers.Add (Consts.ACCEPT_LANGUAGE);
                httpClient.Host     = Consts.HOST;
                httpClient.Encoding = "utf-8";
                httpClient.EncodingDetection = WebRequests.CharsetDetection.DefaultCharset;

                // Iterating over File Lines (App Urls) - To Extract Data, Not The Reviews Yet.
                foreach (string appUrl in File.ReadAllLines (inputFile))
                {
                    // Logging Progress
                    Console.WriteLine ("\n => Processing App : " + appUrl);

                    // Executing Http Get Request for the Apps's Data - With max of 5 Retries
                    String appDataResponse = String.Empty;
                    int currentRetry       = 0;

                    do
                    {
                        // Http Get
                        appDataResponse = httpClient.Get (appUrl);

                    } while (String.IsNullOrWhiteSpace(appDataResponse) || ++currentRetry <= _maxRetries);

                    // Sanity Check
                    if (String.IsNullOrWhiteSpace (appDataResponse))
                    {
                        Console.WriteLine ("\t\t.Error - Failed to find page of app : " + appUrl + ". Skipping it");
                        continue;
                    }

                    Console.WriteLine("\t\t.Page Found. Firing Parser");

                    // Parsing App Data
                    AppModel appData = dataParser.ParseAppPage (appDataResponse, appUrl);

                    // Checking If this app is on the database already
                    if (mongoDB.AppProcessed (appUrl))
                    {
                        Console.WriteLine ("\t\t.Previous Version of App Found. Updating It");
                        mongoDB.UpdateRecord (appData, "Url", appData.Url);

                        // Updating App Status
                        _appStatus.Add
                            (
                                appData.Url,
                                new AppStatusModel ()
                                {
                                    appId   = appData.Url.Replace (Consts.PLAY_STORE_PREFIX, String.Empty),
                                    appUrl  = appData.Url,
                                    appName = appData.Name,
                                    status  = "Updated"
                                }
                            );
                    }
                    else
                    {
                        Console.WriteLine ("\t\t.No Previous Version of the App Found. Adding to Database");
                        mongoDB.Insert<AppModel> (appData);

                        // Updating App Status
                        _appStatus.Add
                            (
                                appData.Url,
                                new AppStatusModel ()
                                {
                                    appId   = appData.Url.Replace (Consts.PLAY_STORE_PREFIX, String.Empty),
                                    appUrl  = appData.Url,
                                    appName = appData.Name,
                                    status  = "Inserted"
                                }
                            );
                    }
                }
            }

            Reviews:
            // Next Phase: Parse Reviews of those Apps
            Console.WriteLine ("\n => Parsing Complete. Obtaining Reviews");

            // Iterating again over app urls to parse the reviews from this app
            foreach (string appUrl in File.ReadAllLines (inputFile))
            {
                // Reaching App Id
                string appID = _appStatus[appUrl].appId;

                // Reviews-Break-Parsing Flag
                bool shouldContinueParsing = true;

                // Parsing Review Pages from the apps
                for (int currentPage = 1; /* no stop condition */; currentPage++)
                {
                    // Getting Reviews Data Bundle
                    string reviewsData = ReviewsWrapper.GetAppReviews (appID, currentPage);

                    // Checking for Blocking Situation
                    if (String.IsNullOrEmpty (reviewsData))
                    {
                        Console.WriteLine("Blocked by Play Store. Sleeping process for 10 minutes before retrying.");

                        // Thread Wait for 10 Minutes
                        Thread.Sleep (10 * 60 * 1000);
                    }

                    // Checking for "No Reviews" app
                    if (reviewsData.Length < 50)
                    {
                        Console.WriteLine ("No Reviews left for this app. Skipping");
                        break;
                    }

                    // Normalizing Response to Proper HTML
                    reviewsData = ReviewsWrapper.NormalizeResponse (reviewsData);

                    // Iterating over Parsed Reviews
                    foreach (var review in dataParser.ParseReviews (reviewsData))
                    {
                        // Adding App Data to the review
                        review.appID   = _appStatus[appUrl].appId;
                        review.appName = _appStatus[appUrl].appName;
                        review.appURL  = _appStatus[appUrl].appUrl;

                        // Incrementing Reviews Count for this app
                        _appStatus[appUrl].reviews++;

                        // Adding Review Object to Database
                        review.timestamp = DateTime.Now;

                        // Building Query to check for duplicated review
                        var duplicatedReviewQuery = Query.EQ ("permalink", review.permalink);

                        // Checking for duplicated review before inserting it
                        if (mongoDB.FindMatch<AppReview> (duplicatedReviewQuery, 1, 0, Consts.REVIEWS_COLLECTION).Count () == 0)
                        {
                            // Inserting Review into MongoDB
                            mongoDB.Insert<AppReview> (review, Consts.REVIEWS_COLLECTION);
                        }
                        else
                        {
                            Console.WriteLine ("Duplicated Review. Skipping App");

                            // When this happens, there are no more reviews to be parsed
                            shouldContinueParsing = false; // Skipping this apps processing
                        }
                    }

                    // Hiccup to avoid Blocking problems
                    Console.WriteLine ("Parsed Reviews: " + _appStatus[appUrl].reviews);
                    Thread.Sleep (new Random ().Next (14000, 21000));

                    if (!shouldContinueParsing)
                    {
                        break;
                    }
                }
            }

            PeopleData:

            Console.WriteLine ("\n\n => Processing People Data");

            Console.WriteLine ("\nSimulating Google Login Using Selenium.");
            using (var firefoxDriver = new FirefoxDriver ())
            {
                // Navigating to Dummy Url - One that I Know that well be asked for a login
                firefoxDriver.Navigate ().GoToUrl ("https://play.google.com/store/people/details?id=101242565951396343093");

                // Reaching Login Fields
                var loginField    = firefoxDriver.FindElementById ("Email");
                var passwordField = firefoxDriver.FindElementById ("Passwd");
                var btnSignIn     = firefoxDriver.FindElementById ("signIn");

                // Sending Credentials to the browser
                loginField.SendKeys ("YOUREMAIL");
                passwordField.SendKeys ("YOURPASSWORD");
                btnSignIn.Click ();

                string lastPeople = "https://play.google.com/store/people/details?id=115037241907660526856";
                bool shouldcontinue = false;

                // Processing Reviewers Data
                foreach (string peopleUrl in mongoDB.FindPeopleUrls ())
                {
                    // Skipping until last link
                    if (peopleUrl == lastPeople)
                    {
                        shouldcontinue = true;
                    }

                    if (!shouldcontinue) continue;

                    // Navigating To the Reviewer Page
                    firefoxDriver.Navigate ().GoToUrl (peopleUrl);

                    // Executing Get Request for the Reviewer page on Google Play
                    string reviewerPage = firefoxDriver.PageSource;

                    // Extracting Reviewer Data from the Page
                    ReviewerPageData reviewerData = dataParser.ParsePeopleData (reviewerPage);

                    // Adding Url to the model
                    reviewerData.reviewerUrl = peopleUrl;

                    // Inserting it to the database - If no previous record of this Reviewer is found
                    if (!mongoDB.IsReviewerOnDatabase (peopleUrl))
                    {
                        mongoDB.Insert<ReviewerPageData> (reviewerData, "ReviewersData");
                    }
                }
            }

            // End of Processing + Console Feedback
            Console.WriteLine ("\n\n == Processing Summary ==");

            foreach (var status in _appStatus.Select (t => t.Value))
            {
                // Message
                string cMessage = "=> App : {0} - Status {1} - Reviews : {2}";

                Console.WriteLine (String.Format (cMessage, status.appName, status.status, status.reviews));
            }

            Console.ReadLine ();
        }
예제 #9
0
        static void Main(string[] args)
        {
            // Loading Keen.IO Keys and Misc. from Config File
            _keenIOProjectID = ConfigurationManager.AppSettings["keenIOProjectID"];
            _keenIOMasterKey = ConfigurationManager.AppSettings["keenIOMasterKey"];
            _keenIOWriteKey  = ConfigurationManager.AppSettings["keenIOWriteKey"];
            _keenIOReadKey   = ConfigurationManager.AppSettings["keenIOReadKey"];
            _bucketName      = ConfigurationManager.AppSettings["keenIOBucketName"];

            // Configuring MongoDB Wrapper for connection and queries
            MongoDBWrapper mongoDB   = new MongoDBWrapper ();
            string fullServerAddress = String.Join (":", Consts.MONGO_SERVER, Consts.MONGO_PORT);
            mongoDB.ConfigureDatabase (Consts.MONGO_USER, Consts.MONGO_PASS, Consts.MONGO_AUTH_DB, fullServerAddress, Consts.MONGO_TIMEOUT, Consts.MONGO_DATABASE, Consts.MONGO_COLLECTION);

            // Creating Keen.IO Variables
            var projectSettings = new ProjectSettingsProvider (_keenIOProjectID, _keenIOMasterKey, _keenIOWriteKey, _keenIOReadKey);
            var keenClient      = new KeenClient (projectSettings);

            var eventsToSend = new List<AppModel>();
            long totalProcessed = 0;
            long totalSent = 0;

            DateTime start = DateTime.Now;

            // From This point on, you can change your code to reflect your own "Reading" logic
            // What I've done is simply read the records from the MongoDB database and Upload them to Keen.IO

            // if(args.Length != 0 && args[0] == "reset")
            {
                int count = 0;

                foreach (var currentApp in mongoDB.FindMatch<AppModel>(Query.NE("Uploaded", true)))
                {
                    mongoDB.SetUpdated(currentApp.Url, false);
                    ++count;

                    if((count % 100) == 0)
                    {
                        Console.WriteLine("Reset update for {0}", count);
                    }
                }
            }

            foreach (var currentApp in mongoDB.FindMatch<AppModel> (Query.NE ("Uploaded", true)))
            {
                if (eventsToSend.Count < 1000)
                {
                    eventsToSend.Add(currentApp);
                    continue;
                }

                var sent = SendEventsToKeep(keenClient, eventsToSend, mongoDB);

                totalProcessed += eventsToSend.Count;
                totalSent += sent;

                Console.WriteLine("processed {0} events took {1}: ({2} events per sec)", totalProcessed, DateTime.Now - start, ((double)totalProcessed) / (DateTime.Now - start).TotalSeconds);

                eventsToSend.Clear();
            }

            {
                var sent = SendEventsToKeep(keenClient, eventsToSend, mongoDB);
                totalProcessed += eventsToSend.Count;
                Console.WriteLine("processed {0} events took {1}: ({2} events per sec)", totalProcessed, DateTime.Now - start, ((double)totalProcessed) / (DateTime.Now - start).TotalSeconds);
            }

            if(totalProcessed != totalSent)
            {
                totalProcessed = 0;
                totalSent = 0;

                foreach (var currentApp in mongoDB.FindMatch<AppModel>(Query.NE("Uploaded", true)))
                {
                    if (eventsToSend.Count < 1)
                    {
                        eventsToSend.Add(currentApp);
                        continue;
                    }

                    var sent = SendEventsToKeep(keenClient, eventsToSend, mongoDB);

                    totalProcessed += eventsToSend.Count;
                    totalSent += sent;

                    Console.WriteLine("processed {0} events took {1}: ({2} events per sec)", totalProcessed, DateTime.Now - start, ((double)totalProcessed) / (DateTime.Now - start).TotalSeconds);

                    eventsToSend.Clear();
                }

                {
                    var sent = SendEventsToKeep(keenClient, eventsToSend, mongoDB);
                    totalProcessed += eventsToSend.Count;
                    Console.WriteLine("processed {0} events took {1}: ({2} events per sec)", totalProcessed, DateTime.Now - start, ((double)totalProcessed) / (DateTime.Now - start).TotalSeconds);
                }
            }
        }
        static void Main(string[] args)
        {
            // Loading Keen.IO Keys and Misc. from Config File
            _keenIOProjectID = ConfigurationManager.AppSettings["keenIOProjectID"];
            _keenIOMasterKey = ConfigurationManager.AppSettings["keenIOMasterKey"];
            _keenIOWriteKey  = ConfigurationManager.AppSettings["keenIOWriteKey"];
            _keenIOReadKey   = ConfigurationManager.AppSettings["keenIOReadKey"];
            _bucketName      = ConfigurationManager.AppSettings["keenIOBucketName"];

            // Configuring MongoDB Wrapper for connection and queries
            MongoDBWrapper mongoDB           = new MongoDBWrapper();
            string         fullServerAddress = String.Join(":", Consts.MONGO_SERVER, Consts.MONGO_PORT);

            mongoDB.ConfigureDatabase(Consts.MONGO_USER, Consts.MONGO_PASS, Consts.MONGO_AUTH_DB, fullServerAddress, Consts.MONGO_TIMEOUT, Consts.MONGO_DATABASE, Consts.MONGO_COLLECTION);

            // Creating Keen.IO Variables
            var projectSettings = new ProjectSettingsProvider(_keenIOProjectID, _keenIOMasterKey, _keenIOWriteKey, _keenIOReadKey);
            var keenClient      = new KeenClient(projectSettings);

            var  eventsToSend   = new List <AppModel>();
            long totalProcessed = 0;
            long totalSent      = 0;

            DateTime start = DateTime.Now;

            // From This point on, you can change your code to reflect your own "Reading" logic
            // What I've done is simply read the records from the MongoDB database and Upload them to Keen.IO

            // if(args.Length != 0 && args[0] == "reset")
            {
                int count = 0;

                foreach (var currentApp in mongoDB.FindMatch <AppModel>(Query.NE("Uploaded", true)))
                {
                    mongoDB.SetUpdated(currentApp.Url, false);
                    ++count;

                    if ((count % 100) == 0)
                    {
                        Console.WriteLine("Reset update for {0}", count);
                    }
                }
            }

            foreach (var currentApp in mongoDB.FindMatch <AppModel> (Query.NE("Uploaded", true)))
            {
                if (eventsToSend.Count < 1000)
                {
                    eventsToSend.Add(currentApp);
                    continue;
                }

                var sent = SendEventsToKeep(keenClient, eventsToSend, mongoDB);

                totalProcessed += eventsToSend.Count;
                totalSent      += sent;

                Console.WriteLine("processed {0} events took {1}: ({2} events per sec)", totalProcessed, DateTime.Now - start, ((double)totalProcessed) / (DateTime.Now - start).TotalSeconds);

                eventsToSend.Clear();
            }

            {
                var sent = SendEventsToKeep(keenClient, eventsToSend, mongoDB);
                totalProcessed += eventsToSend.Count;
                Console.WriteLine("processed {0} events took {1}: ({2} events per sec)", totalProcessed, DateTime.Now - start, ((double)totalProcessed) / (DateTime.Now - start).TotalSeconds);
            }

            if (totalProcessed != totalSent)
            {
                totalProcessed = 0;
                totalSent      = 0;

                foreach (var currentApp in mongoDB.FindMatch <AppModel>(Query.NE("Uploaded", true)))
                {
                    if (eventsToSend.Count < 1)
                    {
                        eventsToSend.Add(currentApp);
                        continue;
                    }

                    var sent = SendEventsToKeep(keenClient, eventsToSend, mongoDB);

                    totalProcessed += eventsToSend.Count;
                    totalSent      += sent;

                    Console.WriteLine("processed {0} events took {1}: ({2} events per sec)", totalProcessed, DateTime.Now - start, ((double)totalProcessed) / (DateTime.Now - start).TotalSeconds);

                    eventsToSend.Clear();
                }

                {
                    var sent = SendEventsToKeep(keenClient, eventsToSend, mongoDB);
                    totalProcessed += eventsToSend.Count;
                    Console.WriteLine("processed {0} events took {1}: ({2} events per sec)", totalProcessed, DateTime.Now - start, ((double)totalProcessed) / (DateTime.Now - start).TotalSeconds);
                }
            }
        }