예제 #1
0
        public static string GetAppReviews (string appID, int reviewsPage, bool isUsingProxies = false)
        {
            // Creating Instance of HTTP Requests Handler
            using (WebRequests httpClient = new WebRequests ())
            {
                // Configuring Request Object
                httpClient.Host              = Consts.HOST;
                httpClient.Origin            = Consts.ORIGIN;
                httpClient.Encoding          = "utf-8";
                httpClient.AllowAutoRedirect = true;
                httpClient.Accept            = "*/*";
                httpClient.UserAgent         = Consts.USER_AGENT;
                httpClient.ContentType       = "application/x-www-form-urlencoded;charset=UTF-8";
                httpClient.EncodingDetection = WebRequests.CharsetDetection.DefaultCharset;
                httpClient.Headers.Add (Consts.ACCEPT_LANGUAGE);

                // Checking for the need to use a Proxy on this request
                if (isUsingProxies)
                {
                    httpClient.Proxy = ProxiesLoader.GetWebProxy ();
                }

                // Assembling Post Data
                string postData = String.Format (Consts.REVIEWS_POST_DATA, reviewsPage, appID);

                // Issuing Request
                return httpClient.Post (Consts.REVIEWS_URL, postData);
            }
        }
예제 #2
0
        public string GetRootPage (bool useProxies)
        {
            string htmlResponse = String.Empty;
            int currentRetry = 0, maxRetries = 100;

            using (WebRequests httpClient = new WebRequests())
            {
                // (Re) Trying to reach Root Page
                do
                {
                    // Should this request use HTTP Proxies ?
                    if (useProxies)
                    {
                        httpClient.Proxy = ProxiesLoader.GetWebProxy ();
                    }

                    htmlResponse = httpClient.Get (Consts.ROOT_STORE_URL);

                    currentRetry++;
                
                } while (String.IsNullOrEmpty (htmlResponse) && currentRetry <= maxRetries);
            }

            return htmlResponse;
        }
예제 #3
0
 static string[] getPermissions(WebRequests server, string appUrl)
 {
     var match = Regex.Match(appUrl, "id=([^&]*)");
     if (match.Success)
     {
         string idApp = match.Groups[1].Value;
         string response = server.Post("https://play.google.com/store/xhr/getdoc?authuser=0", "xhr=1&ids=" + idApp);
     }
     return null;
 }
예제 #4
0
        public string Get (string url, bool useProxies)
        {
            using (WebRequests httpClient = new WebRequests ())
            {
                // Should this request use HTTP Proxies ?
                if (useProxies)
                {
                    httpClient.Proxy = ProxiesLoader.GetWebProxy ();
                }

                httpClient.UserAgent = Consts.USER_AGENT;
                string htmlResponse  = httpClient.Get (url);

                return htmlResponse;
            }
        }
예제 #5
0
        /// <summary>
        /// Initialises a new instance of the OxideMod class
        /// </summary>
        public void Load()
        {
            // Create the commandline
            commandline = new CommandLine(Environment.CommandLine);

            // Load the config
            if (!File.Exists("oxide.root.json"))
            {
                throw new FileNotFoundException("Could not load Oxide root configuration", "oxide.root.json");
            }
            rootconfig = ConfigFile.Load <OxideConfig>("oxide.root.json");

            // Work out the instance directory
            for (int i = 0; i < rootconfig.InstanceCommandLines.Length; i++)
            {
                string varname, format;
                rootconfig.GetInstanceCommandLineArg(i, out varname, out format);
                if (string.IsNullOrEmpty(varname) || commandline.HasVariable(varname))
                {
                    InstanceDirectory = Path.Combine(Environment.CurrentDirectory, string.Format(format, commandline.GetVariable(varname)));
                    break;
                }
            }
            if (InstanceDirectory == null)
            {
                throw new Exception("Could not identify instance directory");
            }
            ExtensionDirectory = Path.Combine(Environment.CurrentDirectory, rootconfig.ExtensionDirectory);
            PluginDirectory    = Path.Combine(InstanceDirectory, rootconfig.PluginDirectory);
            DataDirectory      = Path.Combine(InstanceDirectory, rootconfig.DataDirectory);
            LogDirectory       = Path.Combine(InstanceDirectory, rootconfig.LogDirectory);
            ConfigDirectory    = Path.Combine(InstanceDirectory, rootconfig.ConfigDirectory);
            TempDirectory      = Path.Combine(InstanceDirectory, rootconfig.TempDirectory);
            if (!Directory.Exists(ExtensionDirectory))
            {
                throw new Exception("Could not identify extension directory");
            }
            if (!Directory.Exists(InstanceDirectory))
            {
                Directory.CreateDirectory(InstanceDirectory);
            }
            if (!Directory.Exists(PluginDirectory))
            {
                Directory.CreateDirectory(PluginDirectory);
            }
            if (!Directory.Exists(DataDirectory))
            {
                Directory.CreateDirectory(DataDirectory);
            }
            if (!Directory.Exists(LogDirectory))
            {
                Directory.CreateDirectory(LogDirectory);
            }
            if (!Directory.Exists(ConfigDirectory))
            {
                Directory.CreateDirectory(ConfigDirectory);
            }
            if (!Directory.Exists(TempDirectory))
            {
                Directory.CreateDirectory(TempDirectory);
            }

            // Create the loggers
            filelogger           = new RotatingFileLogger();
            filelogger.Directory = LogDirectory;
            rootlogger           = new CompoundLogger();
            rootlogger.AddLogger(filelogger);

            // Log Oxide core loading
            rootlogger.Write(LogType.Info, "Loading Oxide core v{0}...", Version);

            // Create the managers
            pluginmanager = new PluginManager(rootlogger)
            {
                ConfigPath = ConfigDirectory
            };
            extensionmanager = new ExtensionManager(rootlogger);

            // Register core libraries
            libglobal = new Global();
            extensionmanager.RegisterLibrary("Global", libglobal);
            libtimer = new Timer();
            extensionmanager.RegisterLibrary("Timer", libtimer);
            libtime = new Time();
            extensionmanager.RegisterLibrary("Time", libtime);
            libplugins = new Libraries.Plugins(pluginmanager);
            extensionmanager.RegisterLibrary("Plugins", libplugins);
            libwebrequests = new WebRequests();
            extensionmanager.RegisterLibrary("WebRequests", libwebrequests);

            // Initialise other things
            DataFileSystem = new DataFileSystem(DataDirectory);

            // Load all extensions
            rootlogger.Write(LogType.Info, "Loading extensions...");
            extensionmanager.LoadAllExtensions(ExtensionDirectory);

            // Load all watchers
            foreach (Extension ext in extensionmanager.GetAllExtensions())
            {
                ext.LoadPluginWatchers(PluginDirectory);
            }

            // Load all plugins
            rootlogger.Write(LogType.Info, "Loading plugins...");
            LoadAllPlugins();

            // Hook all watchers
            foreach (PluginChangeWatcher watcher in extensionmanager.GetPluginChangeWatchers())
            {
                watcher.OnPluginSourceChanged += watcher_OnPluginSourceChanged;
                watcher.OnPluginAdded         += watcher_OnPluginAdded;
                watcher.OnPluginRemoved       += watcher_OnPluginRemoved;
            }
        }
예제 #6
0
        /// <summary>
        /// Entry point of the worker piece of the process
        /// Notice that you can run as many workers as you want to in order to make the crawling faster
        /// </summary>
        /// <param name="args"></param>
        static void Main(string[] args)
        {
            // Configuring Log Object Threshold
            LogWriter.Threshold = TLogEventLevel.Information;
            LogWriter.Info ("Worker Started");

            // Parser
            PlayStoreParser parser = new PlayStoreParser();

            // Configuring MongoDB Wrapper
            MongoDBWrapper mongoDB   = new MongoDBWrapper();
            string fullServerAddress = String.Join(":", Consts.MONGO_SERVER, Consts.MONGO_PORT);
            mongoDB.ConfigureDatabase(Consts.MONGO_USER, Consts.MONGO_PASS, Consts.MONGO_AUTH_DB, fullServerAddress, Consts.MONGO_TIMEOUT, Consts.MONGO_DATABASE, Consts.MONGO_COLLECTION);

            // Creating Instance of Web Requests Server
            WebRequests server = new WebRequests ();
            
            QueuedApp app;

            // Retry Counter (Used for exponential wait increasing logic)
            int retryCounter = 0;

            // Iterating Over MongoDB Records while no document is found to be processed                
            while ((app = mongoDB.FindAndModify ()) != null)
            {
                try
                {
                    // Building APP URL
                    string appUrl = Consts.APP_URL_PREFIX + app.Url;

                    // Checking if this app is on the database already
                    if (mongoDB.AppProcessed(appUrl))
                    {
                        // Console Feedback, Comment this line to disable if you want to
                        Console.WriteLine("Duplicated App, skipped.");

                        // Delete it from the queue and continues the loop
                        mongoDB.RemoveFromQueue (app.Url);
                        continue;
                    }

                    // Vu
                    // Check if the app does not meet criteria
                    if (app.NotMeetCrit)
                    {
                        Console.WriteLine("App Not meet Criteria, Skipped.");
                    }

                    // Configuring server and Issuing Request
                    server.Headers.Add (Consts.ACCEPT_LANGUAGE);
                    server.Host              = Consts.HOST;
                    server.Encoding          = "utf-8";
                    server.EncodingDetection = WebRequests.CharsetDetection.DefaultCharset;
                    string response          = server.Get (appUrl);

                    // Flag Indicating Success while processing and parsing this app
                    bool ProcessingWorked = true;

                    // Sanity Check
                    if (String.IsNullOrEmpty (response) || server.StatusCode != System.Net.HttpStatusCode.OK)
                    {
                        LogWriter.Info ("Error opening app page : " + appUrl);
                        ProcessingWorked = false;
                        
                        // Renewing WebRequest Object to get rid of Cookies
                        server = new WebRequests ();

                        // Inc. retry counter
                        retryCounter++;

                        Console.WriteLine ("Retrying:" + retryCounter);

                        // Checking for maximum retry count
                        double waitTime;
                        if (retryCounter >= 7)
                        {
                            waitTime = TimeSpan.FromMinutes (35).TotalMilliseconds;

                            // Removing App from the database (this the app page may have expired)
                            mongoDB.RemoveFromQueue (app.Url);

                            Process.Start ("PlayStoreWorker.exe");
                            Process.GetCurrentProcess ().Kill ();
                        }
                        else
                        {
                            // Calculating next wait time ( 2 ^ retryCounter seconds)
                            waitTime = TimeSpan.FromSeconds (Math.Pow (2, retryCounter)).TotalMilliseconds;
                        }

                        // Hiccup to avoid google blocking connections in case of heavy traffic from the same IP
                        Thread.Sleep (Convert.ToInt32 (waitTime));
                    }
                    else
                    {
                        // Reseting retry counter
                        retryCounter = 0;

                        // Parsing Useful App Data
                        AppModel parsedApp = parser.ParseAppPage (response, appUrl);

                        // Vu
                        // Here is where insert the app into the ProcessedApps Database.
                        // Attemp to check for the condition base on number of instalation and rating

                        // First split the string into the string array
                        string[] installations;
                        string[] separators = new string[] { " - " };
                        // Getting the Installation number for the current app
                        installations = parsedApp.Instalations.Split(separators, StringSplitOptions.RemoveEmptyEntries);
                        installations[0] = installations[0].Replace(",", "");   // replace the "," in the number of installations
                        installations[1] = installations[1].Replace(",", "");
                        long install_num = 0;
                        try {
                            install_num = Convert.ToInt64(installations[0]);
                        }
                        catch (OverflowException) {
                            Console.WriteLine("{0} is outside the range of the Int64 type.");
                        }
                        catch (FormatException) {
                            Console.WriteLine("The {0} value '{1}' is not recognizable");
                        }
                        
                        bool removed = false;
                        // Getting the rating for the current app
                        double rating = parsedApp.Score.Total;

                        // Getting the developer name ( company name)
                        string developer = parsedApp.Developer;
                                               
                        // if the installation number is less than 1000,000 
                        // OR rating less than 3 stars
                        // OR appName is empty
                        // -> skip the app

                        string appName = parsedApp.Name;
                        if (install_num < 1000000 || rating < 3.5 || appName == "" || appName == null)
                        {
                            Console.WriteLine("Cannot add app <" + appName + "> -- NOT MEET CRITERIA");
                            // TODO: Update the NotMeetCriteria
                            // Removing App from the database
                            mongoDB.RemoveFromQueue(app.Url);
                            removed = true;
                        }
                        // Inserting App into MONGO_COLLECTION collection
                        // if the Insert func return false, then print a message indicates that
                        if (ProcessingWorked && !mongoDB.Insert<AppModel>(parsedApp) && !removed)
                        {
                            Console.WriteLine("Cannot add app <" + appName + "> -- FAIL TO ADD TO Database");
                            ProcessingWorked = false;
                        }

                        // If processing failed, do not remove the app from the database, instead, keep it and flag it as not busy 
                        // so that other workers can try to process it later
                        if (!ProcessingWorked)
                        {
                            mongoDB.ToggleBusyApp(app, false);
                        }
                        else // On the other hand, if processing worked, removes it from the database
                        {
                            // Console Feedback, Comment this line to disable if you want to
                            if (!removed)
                            {
                                Console.WriteLine("Inserted App : " + parsedApp.Name);
                                 mongoDB.RemoveFromQueue(app.Url);
                            }
                            else
                            {
                                Console.WriteLine("Removed App : " + parsedApp.Name);
                            }                           
                        }


                        // Vu
                        // TRY TO NOT DOWNLOAD THE RELATED APPS
                        /*
                        // Counters for console feedback only
                        int extraAppsCounter = 0, newExtraApps = 0;

                        // Parsing "Related Apps" and "More From Developer" Apps (URLS Only)
                        foreach (string extraAppUrl in parser.ParseExtraApps (response))
                        {
                            // Incrementing counter of extra apps
                            extraAppsCounter++;

                            // Assembling Full app Url to check with database
                            string fullExtraAppUrl = Consts.APP_URL_PREFIX + extraAppUrl;

                            // Checking if the app was either processed or queued to be processed already
                            if ((!mongoDB.AppProcessed (fullExtraAppUrl)) && (!mongoDB.IsAppOnQueue(extraAppUrl)))
                            {
                                // Incrementing counter of inserted apps
                                newExtraApps++;

                                // Adds it to the queue of apps to be processed
                                mongoDB.AddToQueue (extraAppUrl);
                            }
                        }

                        // Console Feedback
                        Console.WriteLine ("Queued " + newExtraApps + " / " + extraAppsCounter + " related apps");
                        
                        */

                        // Hiccup (used to minimize blocking issues)
                        Thread.Sleep (300);
                    }
                }
                catch (Exception ex)
                {
                    LogWriter.Error (ex);
                }
                finally
                {
                    try
                    {
                        // Toggles Busy status back to false
                        mongoDB.ToggleBusyApp(app, false);
                    }
                    catch (Exception ex)
                    {
                        // Toggle Busy App may raise an exception in case of lack of internet connection, so, i must use this
                        // "inner catch" to avoid it from happenning
                        LogWriter.Error (ex);
                    }
                }
            }
        }
예제 #7
0
        /// <summary>
        /// 
        /// </summary>
        /// <param name="categoryUrl"></param>
        private static void CrawlCategory (string categoryUrl, string categoryName, bool shouldUseProxies)
        {
            // Console Feedback
            _logger.Warn ("Crawling Category : [ " + categoryName + " ]");

            // Hashset of urls used to keep track of what's been parsed already
            HashSet<String> foundUrls = new HashSet<String> ();

            // Control variable to avoid "Loop" on pagging
            bool isDonePagging = false;

            // HTML Response
            string response;

            // MongoDB Helper
            // Configuring MongoDB Wrapper
            MongoDBWrapper mongoDB   = new MongoDBWrapper ();
            string fullServerAddress = String.Join (":", Consts.MONGO_SERVER, Consts.MONGO_PORT);
            mongoDB.ConfigureDatabase (Consts.MONGO_USER, Consts.MONGO_PASS, Consts.MONGO_AUTH_DB, fullServerAddress, Consts.MONGO_TIMEOUT, Consts.MONGO_DATABASE, Consts.MONGO_COLLECTION);

            // Ensuring the database has the proper indexe
            mongoDB.EnsureIndex ("Url");

            // Response Parser
            PlayStoreParser parser = new PlayStoreParser (); 

            // Executing Web Requests
            using (WebRequests server = new WebRequests ())
            {
                // Creating Request Object
                server.Headers.Add (Consts.ACCEPT_LANGUAGE);
                server.Host      = Consts.HOST;
                server.UserAgent = Consts.GITHUBURL;
                server.Encoding  = "utf-8";

                // Executing Initial Request
                response = server.Get (categoryUrl);

                // Parsing Links out of Html Page (Initial Request)                
                foreach (string url in parser.ParseAppUrls (response))
                {
                    // Saving found url on local hashset
                    foundUrls.Add (url);

                    // Checks whether the app have been already processed 
                    // or is queued to be processed
                    if ((!mongoDB.AppProcessed (Consts.APP_URL_PREFIX + url)) && (!mongoDB.AppQueued (url)))
                    {
                        // Than, queue it :)
                        mongoDB.AddToQueue (url);
                    }
                }

                // Executing Requests for more Play Store Links
                int baseSkip       = 60;
                int currentMultiplier = 1;
                int errorsCount       = 0;
                do
                {
                    // Assembling new PostData with paging values
                    string postData = String.Format (Consts.CATEGORIES_POST_DATA, (currentMultiplier * baseSkip), baseSkip);

                    // Executing request for values
                    response = server.Post (String.Format (categoryUrl + "?authuser=0"), postData);

                    // Checking Server Status
                    if (server.StatusCode != System.Net.HttpStatusCode.OK)
                    {
                        _logger.Error ("Http Error" + " - Status Code [ " + server.StatusCode + " ]");
                        errorsCount++;
                        continue;
                    }

                    // Parsing Links
                    foreach (string url in parser.ParseAppUrls (response))
                    {
                        // If a certain app is found twice, it means that the "pagging" logic got stuck into a 
                        // Loop, so the all the apps for this category were parsed already
                        if (foundUrls.Contains (url))
                        {
                            isDonePagging = true;
                            break;
                        }

                        // Saving found url on local hashset
                        foundUrls.Add (url);

                        // Checks whether the app have been already processed 
                        // or is queued to be processed
                        if ((!mongoDB.AppProcessed (Consts.APP_URL_PREFIX + url)) && (!mongoDB.AppQueued (url)))
                        {
                            // Than, queue it :)
                            mongoDB.AddToQueue (url);
                        }
                    }

                    // Incrementing Paging Multiplier
                    currentMultiplier++;

                }  while (!isDonePagging && errorsCount <= Consts.MAX_REQUEST_ERRORS);
            }
        }
예제 #8
0
        /// <summary>
        /// Updates a specific value from the client data store for the user profile details
        /// and attempts to update the server to match those details.
        /// For example, updating the first name of the user.
        /// </summary>
        /// <param name="displayName">The display name for logging and display purposes of the property we are updating</param>
        /// <param name="propertyToUpdate">The property from the <see cref="LoginCredentialsDataModel"/> to be updated</param>
        /// <param name="newValue">The new value to update the property to</param>
        /// <param name="setApiModel">Sets the correct property in the <see cref="UpdateUserProfileApiModel"/> model that this property maps to</param>
        /// <returns></returns>
        private async Task <bool> UpdateUserCredentialsValueAsync(string displayName, Expression <Func <LoginCredentialsDataModel, string> > propertyToUpdate, string newValue, Action <UpdateUserProfileApiModel, string> setApiModel)
        {
            // Log it
            Logger.LogDebugSource($"Saving {displayName}...");

            // Get the current known credentials
            var credentials = await ClientDataStore.GetLoginCredentialsAsync();

            // Get the property to update from the credentials
            var toUpdate = propertyToUpdate.GetPropertyValue(credentials);

            // Log it
            Logger.LogDebugSource($"{displayName} currently {toUpdate}, updating to {newValue}");

            // Check if the value is the same. If so...
            if (toUpdate == newValue)
            {
                // Log it
                Logger.LogDebugSource($"{displayName} is the same, ignoring");

                // Return true
                return(true);
            }

            // Set the property
            propertyToUpdate.SetPropertyValue(newValue, credentials);

            // Create update details
            var updateApiModel = new UpdateUserProfileApiModel();

            // Ask caller to set appropriate value
            setApiModel(updateApiModel, newValue);

            // Update the server with the details
            var result = await WebRequests.PostAsync <ApiResponse>(
                // Set URL
                RouteHelpers.GetAbsoluteRoute(ApiRoutes.UpdateUserProfile),
                // Pass the Api model
                updateApiModel,
                // Pass in user Token
                bearerToken : credentials.Token);

            // If the response has an error...
            if (await result.DisplayErrorIfFailedAsync($"Update {displayName}"))
            {
                // Log it
                Logger.LogDebugSource($"Failed to update {displayName}. {result.ErrorMessage}");

                // Return false
                return(false);
            }

            // Log it
            Logger.LogDebugSource($"Successfully updated {displayName}. Saving to local database cache...");

            // Store the new user credentials the data store
            await ClientDataStore.SaveLoginCredentialsAsync(credentials);

            // Return successful
            return(true);
        }
예제 #9
0
        static void Main (string[] args)
        {
            // Checking for Input Parameters
            if (args == null || args.Length != 1)
            {
                Console.WriteLine ("Incorrect number of arguments received. Expected One");
                System.Environment.Exit (-100);
            }

            // Human Readable Variable
            string inputFile = args[0];

            // Checking if the Input file received exists
            if (!File.Exists (inputFile))
            {
                Console.WriteLine (String.Format("Received input file does not exist : {0}", inputFile));
                System.Environment.Exit (-101);
            }

            // App Status 
            _appStatus = new Dictionary<String, AppStatusModel> ();

            // Creating Instance of Database Manager
            MongoDBWrapper mongoDB   = new MongoDBWrapper ();
            string fullServerAddress = String.Join (":", Consts.MONGO_SERVER, Consts.MONGO_PORT);
            mongoDB.ConfigureDatabase (Consts.MONGO_USER, Consts.MONGO_PASS, Consts.MONGO_AUTH_DB, fullServerAddress, Consts.MONGO_TIMEOUT, Consts.MONGO_DATABASE, Consts.MONGO_COLLECTION);

            // Creating Instance of Parser
            PlayStoreParser dataParser = new PlayStoreParser ();

            goto PeopleData;

            using (WebRequests httpClient = new WebRequests ())
            {
                // Minor Configuration of the Http Client - Ensures that the requests response will be in english
                // By doing so, we have no problems parsing the dates to their proper formats
                httpClient.Headers.Add (Consts.ACCEPT_LANGUAGE);
                httpClient.Host     = Consts.HOST;
                httpClient.Encoding = "utf-8";
                httpClient.EncodingDetection = WebRequests.CharsetDetection.DefaultCharset;

                // Iterating over File Lines (App Urls) - To Extract Data, Not The Reviews Yet.
                foreach (string appUrl in File.ReadAllLines (inputFile))
                {
                    // Logging Progress
                    Console.WriteLine ("\n => Processing App : " + appUrl);

                    // Executing Http Get Request for the Apps's Data - With max of 5 Retries
                    String appDataResponse = String.Empty;
                    int currentRetry       = 0;

                    do
                    {
                        // Http Get
                        appDataResponse = httpClient.Get (appUrl);

                    } while (String.IsNullOrWhiteSpace(appDataResponse) || ++currentRetry <= _maxRetries);

                    // Sanity Check
                    if (String.IsNullOrWhiteSpace (appDataResponse))
                    {
                        Console.WriteLine ("\t\t.Error - Failed to find page of app : " + appUrl + ". Skipping it");
                        continue;
                    }

                    Console.WriteLine("\t\t.Page Found. Firing Parser");

                    // Parsing App Data
                    AppModel appData = dataParser.ParseAppPage (appDataResponse, appUrl);

                    // Checking If this app is on the database already
                    if (mongoDB.AppProcessed (appUrl))
                    {
                        Console.WriteLine ("\t\t.Previous Version of App Found. Updating It");
                        mongoDB.UpdateRecord (appData, "Url", appData.Url);

                        // Updating App Status
                        _appStatus.Add
                            (
                                appData.Url,
                                new AppStatusModel ()
                                {
                                    appId   = appData.Url.Replace (Consts.PLAY_STORE_PREFIX, String.Empty),
                                    appUrl  = appData.Url,
                                    appName = appData.Name,
                                    status  = "Updated"
                                }
                            );
                    }
                    else
                    {
                        Console.WriteLine ("\t\t.No Previous Version of the App Found. Adding to Database");
                        mongoDB.Insert<AppModel> (appData);

                        // Updating App Status
                        _appStatus.Add
                            (
                                appData.Url,
                                new AppStatusModel ()
                                {
                                    appId   = appData.Url.Replace (Consts.PLAY_STORE_PREFIX, String.Empty),
                                    appUrl  = appData.Url,
                                    appName = appData.Name,
                                    status  = "Inserted"
                                }
                            );
                    }
                }
            }

            Reviews:
            // Next Phase: Parse Reviews of those Apps
            Console.WriteLine ("\n => Parsing Complete. Obtaining Reviews");

            // Iterating again over app urls to parse the reviews from this app
            foreach (string appUrl in File.ReadAllLines (inputFile))
            {
                // Reaching App Id
                string appID = _appStatus[appUrl].appId;

                // Reviews-Break-Parsing Flag
                bool shouldContinueParsing = true;

                // Parsing Review Pages from the apps
                for (int currentPage = 1; /* no stop condition */; currentPage++)
                {
                    // Getting Reviews Data Bundle
                    string reviewsData = ReviewsWrapper.GetAppReviews (appID, currentPage);

                    // Checking for Blocking Situation
                    if (String.IsNullOrEmpty (reviewsData))
                    {
                        Console.WriteLine("Blocked by Play Store. Sleeping process for 10 minutes before retrying.");

                        // Thread Wait for 10 Minutes
                        Thread.Sleep (10 * 60 * 1000);
                    }

                    // Checking for "No Reviews" app
                    if (reviewsData.Length < 50)
                    {
                        Console.WriteLine ("No Reviews left for this app. Skipping");
                        break;
                    }

                    // Normalizing Response to Proper HTML
                    reviewsData = ReviewsWrapper.NormalizeResponse (reviewsData);

                    // Iterating over Parsed Reviews
                    foreach (var review in dataParser.ParseReviews (reviewsData))
                    {
                        // Adding App Data to the review
                        review.appID   = _appStatus[appUrl].appId;
                        review.appName = _appStatus[appUrl].appName;
                        review.appURL  = _appStatus[appUrl].appUrl;

                        // Incrementing Reviews Count for this app
                        _appStatus[appUrl].reviews++;

                        // Adding Review Object to Database
                        review.timestamp = DateTime.Now;

                        // Building Query to check for duplicated review
                        var duplicatedReviewQuery = Query.EQ ("permalink", review.permalink);

                        // Checking for duplicated review before inserting it
                        if (mongoDB.FindMatch<AppReview> (duplicatedReviewQuery, 1, 0, Consts.REVIEWS_COLLECTION).Count () == 0)
                        {
                            // Inserting Review into MongoDB
                            mongoDB.Insert<AppReview> (review, Consts.REVIEWS_COLLECTION);
                        }
                        else
                        {
                            Console.WriteLine ("Duplicated Review. Skipping App");

                            // When this happens, there are no more reviews to be parsed
                            shouldContinueParsing = false; // Skipping this apps processing
                        }
                    }

                    // Hiccup to avoid Blocking problems
                    Console.WriteLine ("Parsed Reviews: " + _appStatus[appUrl].reviews);
                    Thread.Sleep (new Random ().Next (14000, 21000));

                    if (!shouldContinueParsing)
                    {
                        break;
                    }
                }
            }

            PeopleData:

            Console.WriteLine ("\n\n => Processing People Data");

            Console.WriteLine ("\nSimulating Google Login Using Selenium.");
            using (var firefoxDriver = new FirefoxDriver ())
            {
                // Navigating to Dummy Url - One that I Know that well be asked for a login
                firefoxDriver.Navigate ().GoToUrl ("https://play.google.com/store/people/details?id=101242565951396343093");

                // Reaching Login Fields
                var loginField    = firefoxDriver.FindElementById ("Email");
                var passwordField = firefoxDriver.FindElementById ("Passwd");
                var btnSignIn     = firefoxDriver.FindElementById ("signIn");

                // Sending Credentials to the browser
                loginField.SendKeys ("YOUREMAIL");
                passwordField.SendKeys ("YOURPASSWORD");
                btnSignIn.Click ();

                string lastPeople = "https://play.google.com/store/people/details?id=115037241907660526856";
                bool shouldcontinue = false;

                // Processing Reviewers Data
                foreach (string peopleUrl in mongoDB.FindPeopleUrls ())
                {
                    // Skipping until last link
                    if (peopleUrl == lastPeople)
                    {
                        shouldcontinue = true;
                    }

                    if (!shouldcontinue) continue;

                    // Navigating To the Reviewer Page
                    firefoxDriver.Navigate ().GoToUrl (peopleUrl);

                    // Executing Get Request for the Reviewer page on Google Play
                    string reviewerPage = firefoxDriver.PageSource;

                    // Extracting Reviewer Data from the Page
                    ReviewerPageData reviewerData = dataParser.ParsePeopleData (reviewerPage);

                    // Adding Url to the model
                    reviewerData.reviewerUrl = peopleUrl;

                    // Inserting it to the database - If no previous record of this Reviewer is found
                    if (!mongoDB.IsReviewerOnDatabase (peopleUrl))
                    {
                        mongoDB.Insert<ReviewerPageData> (reviewerData, "ReviewersData");
                    }
                }
            }

            // End of Processing + Console Feedback
            Console.WriteLine ("\n\n == Processing Summary ==");

            foreach (var status in _appStatus.Select (t => t.Value))
            {
                // Message
                string cMessage = "=> App : {0} - Status {1} - Reviews : {2}";

                Console.WriteLine (String.Format (cMessage, status.appName, status.status, status.reviews));
            }

            Console.ReadLine ();
        }
        public async Task <WeatherForecast> GetWeatherDataAsync(Coordinates coordinates)
        {
            try
            {
                _logger.Information("Beginning to retrieve weather forecast.");

                if (coordinates != null)
                {
                    Dictionary <string, string> requestHeaders = new Dictionary <string, string>
                    {
                        { "User-Agent", UserAgent }
                    };

                    string urlGrids  = string.Format(URL_FORMAT_GRIDS, coordinates.Latitude, coordinates.Longitude);
                    string jsonGrids = await WebRequests.GetCurlResponseAsync(urlGrids, _logger, requestHeaders);

                    Domains.WeatherGov.Grid.WeatherGovRoot rootGrid = await Json.ToObjectAsync <Domains.WeatherGov.Grid.WeatherGovRoot>(jsonGrids);

                    //TODO: Save coord/grid data so this request can be skipped in the future.

                    if (rootGrid?.Properties != null)
                    {
                        string urlForecast  = rootGrid.Properties.Forecast;
                        string jsonForecast = await WebRequests.GetCurlResponseAsync(urlForecast, _logger, requestHeaders);

                        WeatherGovRoot rootForecast = await Json.ToObjectAsync <WeatherGovRoot>(jsonForecast);

                        string urlForecastHourlyURL = rootGrid.Properties.ForecastHourly;
                        string jsonForecastHourly   = await WebRequests.GetCurlResponseAsync(urlForecastHourlyURL, _logger, requestHeaders);

                        WeatherGovRoot rootForecastHourly = await Json.ToObjectAsync <WeatherGovRoot>(jsonForecastHourly);

                        if (rootForecast != null && rootForecastHourly != null)
                        {
                            _attemptCounter = 1;
                            return(new WeatherForecast
                            {
                                Days = ConvertPeriods(rootForecast?.Properties?.Periods),
                                Hours = ConvertPeriods(rootForecastHourly?.Properties?.Periods, true)
                            });
                        }
                    }
                }
            }
            catch (Exception e)
            {
                _logger.Error($"Failed to retrieve weather forecast: {e.Message}");
                if (_attemptCounter >= ATTEMPT_LIMIT)
                {
                    _attemptCounter = 1;
                    return(null);
                }
                // Sometimes the first request fails even when everything is fine.
                _attemptCounter++;
                return(await GetWeatherDataAsync(coordinates));
            }

            _logger.Error("Failed to retrieve weather forecast.");
            _attemptCounter = 0;
            return(null);
        }
예제 #11
0
        private static void DoTheWork(Section section, ref List <Topic> topics, WebRequests client)
        {
            int numberOfPage = 1;

            string url             = String.Empty;
            string sectionPieceUrl = String.Empty;


            // Find the right URL
            Regex importantPieceUrlRegex = new Regex(@"\/(\d{1,4}.*)\?", RegexOptions.Compiled | RegexOptions.IgnoreCase);
            Match match = importantPieceUrlRegex.Match(section.FullUrl);

            if (match.Success)
            {
                sectionPieceUrl = match.Groups[1].Value.Trim();
                url             = String.Format(_mapURLs["SectionTopics"], sectionPieceUrl, numberOfPage);
            }

            while (!String.IsNullOrWhiteSpace(url))
            {
                logger.Trace("Section {0} ... Page: {1}", section.Title, numberOfPage);


                // Get Request
                string htmlResponse = SharedLibrary.Utils.WebRequestsUtils.Get(ref client, logger, url);


                // Checking if html response is valid
                if (String.IsNullOrWhiteSpace(htmlResponse))
                {
                    logger.Warn("HtmlResponse is null or empty. URL: " + url);
                    numberOfPage += 1;
                    continue;
                }


                // Loading HtmlDocument
                HtmlDocument htmlDoc = new HtmlDocument();
                htmlDoc.LoadHtml(htmlResponse);


                // Extract Topics
                HtmlNodeCollection topicsNode = htmlDoc.DocumentNode.SelectNodes(".//li[contains(@class,'threadbit')]");
                if (topicsNode != null && topicsNode.Count > 0)
                {
                    ParseTopic(topicsNode, ref section, numberOfPage, ref topics);

                    if (topics.Count % 10 == 0 && topics.Count != 0)
                    {
                        // Send messages to Queue
                        logger.Trace("Sending to Topics Collection...");
                        SendMessage(topics);

                        logger.Trace("Sending message to Topics Queue...");
                        SendMessage(TopicsQueueName, topics);

                        topics.Clear();
                    }
                }
                else
                {
                    logger.Warn("Problem to extract topicsNode");
                    numberOfPage += 1;
                    continue;
                }

                // Has more?
                if (topics.Count > 0)
                {
                    // Send messages to Queue
                    logger.Trace("Sending to Topics Collection...");
                    SendMessage(topics);

                    logger.Trace("Sending message to Topics Queue...");
                    SendMessage(TopicsQueueName, topics);

                    topics.Clear();
                }

                // Is it the last Page?
                HtmlNode statsPageNode = htmlDoc.DocumentNode.SelectSingleNode(".//div[@class='threadpagestats']");
                if (statsPageNode != null)
                {
                    string stats = statsPageNode.InnerText.Trim();

                    Regex statsRegex = new Regex(@"\s(\d{1,})\sa\s(\d{1,})", RegexOptions.Compiled);
                    match = statsRegex.Match(stats);

                    if (match.Success)
                    {
                        if (match.Groups.Count == 3 && match.Groups[1].Value.Trim().Equals(match.Groups[2].Value.Trim()))
                        {
                            break;
                        }
                    }
                }

                // Next Page
                numberOfPage += 1;
                url           = String.Format(_mapURLs["SectionTopics"], sectionPieceUrl, numberOfPage);

                // Keep Calm and don't shutdown the forum!
                Thread.Sleep(2 * 1000);
            }
        }
예제 #12
0
        /// <summary>
        /// Initializes a new instance of the OxideMod class
        /// </summary>
        public void Load()
        {
            RootDirectory = Environment.CurrentDirectory;
            if (RootDirectory.StartsWith(Environment.GetFolderPath(Environment.SpecialFolder.ApplicationData)))
            {
                RootDirectory = AppDomain.CurrentDomain.BaseDirectory;
            }

            // Create the commandline
            commandline = new CommandLine(Environment.GetCommandLineArgs());

            // Load the config
            var oxideConfig = Path.Combine(RootDirectory, "oxide.root.json");

            if (!File.Exists(oxideConfig))
            {
                throw new FileNotFoundException("Could not load Oxide root configuration", oxideConfig);
            }
            rootconfig = ConfigFile.Load <OxideConfig>(oxideConfig);

            // Work out the instance directory
            for (int i = 0; i < rootconfig.InstanceCommandLines.Length; i++)
            {
                string varname, format;
                rootconfig.GetInstanceCommandLineArg(i, out varname, out format);
                if (string.IsNullOrEmpty(varname) || commandline.HasVariable(varname))
                {
                    InstanceDirectory = Path.Combine(RootDirectory, CleanPath(string.Format(format, commandline.GetVariable(varname))));
                    break;
                }
            }
            if (InstanceDirectory == null)
            {
                throw new Exception("Could not identify instance directory");
            }
            ExtensionDirectory = Path.Combine(RootDirectory, CleanPath(rootconfig.ExtensionDirectory));
            PluginDirectory    = Path.Combine(InstanceDirectory, CleanPath(rootconfig.PluginDirectory));
            DataDirectory      = Path.Combine(InstanceDirectory, CleanPath(rootconfig.DataDirectory));
            LogDirectory       = Path.Combine(InstanceDirectory, CleanPath(rootconfig.LogDirectory));
            ConfigDirectory    = Path.Combine(InstanceDirectory, CleanPath(rootconfig.ConfigDirectory));
            if (!Directory.Exists(ExtensionDirectory))
            {
                throw new Exception("Could not identify extension directory");
            }
            if (!Directory.Exists(InstanceDirectory))
            {
                Directory.CreateDirectory(InstanceDirectory);
            }
            if (!Directory.Exists(PluginDirectory))
            {
                Directory.CreateDirectory(PluginDirectory);
            }
            if (!Directory.Exists(DataDirectory))
            {
                Directory.CreateDirectory(DataDirectory);
            }
            if (!Directory.Exists(LogDirectory))
            {
                Directory.CreateDirectory(LogDirectory);
            }
            if (!Directory.Exists(ConfigDirectory))
            {
                Directory.CreateDirectory(ConfigDirectory);
            }

            RegisterLibrarySearchPath(Path.Combine(ExtensionDirectory, IntPtr.Size == 8 ? "x64" : "x86"));

            // Create the loggers
            RootLogger = new CompoundLogger();
            RootLogger.AddLogger(new RotatingFileLogger {
                Directory = LogDirectory
            });
            if (debugCallback != null)
            {
                RootLogger.AddLogger(new CallbackLogger(debugCallback));
            }

            // Log Oxide core loading
            LogInfo("Loading Oxide core v{0}...", Version);

            // Create the managers
            RootPluginManager = new PluginManager(RootLogger)
            {
                ConfigPath = ConfigDirectory
            };
            extensionmanager = new ExtensionManager(RootLogger);

            // Initialize other things
            DataFileSystem = new DataFileSystem(DataDirectory);

            // Register core libraries
            extensionmanager.RegisterLibrary("Global", new Global());
            extensionmanager.RegisterLibrary("Time", new Time());
            extensionmanager.RegisterLibrary("Timer", libtimer = new Timer());
            extensionmanager.RegisterLibrary("Permission", new Permission());
            extensionmanager.RegisterLibrary("Plugins", new Libraries.Plugins(RootPluginManager));
            extensionmanager.RegisterLibrary("WebRequests", libwebrequests = new WebRequests());
            extensionmanager.RegisterLibrary("Covalence", covalence        = new Covalence());

            // Load all extensions
            LogInfo("Loading extensions...");
            extensionmanager.LoadAllExtensions(ExtensionDirectory);

            // Initialize covalence library after extensions (as it depends on things from within an ext)
            covalence.Initialize();

            // If no clock has been defined, make our own
            if (getTimeSinceStartup == null)
            {
                timer = new Stopwatch();
                timer.Start();
                getTimeSinceStartup = () => (float)timer.Elapsed.TotalSeconds;
            }

            // Load all watchers
            foreach (var ext in extensionmanager.GetAllExtensions())
            {
                ext.LoadPluginWatchers(PluginDirectory);
            }

            // Load all plugins
            LogInfo("Loading plugins...");
            LoadAllPlugins();

            // Hook all watchers
            foreach (var watcher in extensionmanager.GetPluginChangeWatchers())
            {
                watcher.OnPluginSourceChanged += watcher_OnPluginSourceChanged;
                watcher.OnPluginAdded         += watcher_OnPluginAdded;
                watcher.OnPluginRemoved       += watcher_OnPluginRemoved;
            }
        }
예제 #13
0
        public async Task <Base <UserLogin> > GetById(int usuarioId)
        {
            try
            {
                return(await WebRequests.RequestJsonSerialize <Base <UserLogin> >(
                           $"{UrlApi}/Authentication/getUser", new UserLogin { ID_Usuario = Convert.ToString(usuarioId) }, WebRequests.Metodo.POST));
            }
            catch (RequestException e)
            {
                var stsCode = (HttpStatusCode)e.StatusCode;
                switch (stsCode)
                {
                case HttpStatusCode.Accepted:
                    break;

                case HttpStatusCode.AlreadyReported:
                    break;

                case HttpStatusCode.Ambiguous:
                    break;

                case HttpStatusCode.BadGateway:
                    break;

                case HttpStatusCode.BadRequest:
                    break;

                case HttpStatusCode.Conflict:
                    break;

                case HttpStatusCode.Continue:
                    break;

                case HttpStatusCode.Created:
                    break;

                case HttpStatusCode.EarlyHints:
                    break;

                case HttpStatusCode.ExpectationFailed:
                    break;

                case HttpStatusCode.FailedDependency:
                    break;

                case HttpStatusCode.Forbidden:
                    throw new AccessDeniedDomainException();

                case HttpStatusCode.Found:
                    break;

                case HttpStatusCode.GatewayTimeout:
                    break;

                case HttpStatusCode.Gone:
                    break;

                case HttpStatusCode.HttpVersionNotSupported:
                    break;

                case HttpStatusCode.IMUsed:
                    break;

                case HttpStatusCode.InsufficientStorage:
                    break;

                case HttpStatusCode.InternalServerError:
                    break;

                case HttpStatusCode.LengthRequired:
                    break;

                case HttpStatusCode.Locked:
                    break;

                case HttpStatusCode.LoopDetected:
                    break;

                case HttpStatusCode.MethodNotAllowed:
                    break;

                case HttpStatusCode.MisdirectedRequest:
                    break;

                case HttpStatusCode.Moved:
                    break;

                case HttpStatusCode.MultiStatus:
                    break;

                case HttpStatusCode.NetworkAuthenticationRequired:
                    break;

                case HttpStatusCode.NoContent:
                    break;

                case HttpStatusCode.NonAuthoritativeInformation:
                    break;

                case HttpStatusCode.NotAcceptable:
                    break;

                case HttpStatusCode.NotExtended:
                    break;

                case HttpStatusCode.NotFound:
                    break;

                case HttpStatusCode.NotImplemented:
                    break;

                case HttpStatusCode.NotModified:
                    break;

                case HttpStatusCode.OK:
                    break;

                case HttpStatusCode.PartialContent:
                    break;

                case HttpStatusCode.PaymentRequired:
                    break;

                case HttpStatusCode.PermanentRedirect:
                    break;

                case HttpStatusCode.PreconditionFailed:
                    break;

                case HttpStatusCode.PreconditionRequired:
                    break;

                case HttpStatusCode.Processing:
                    break;

                case HttpStatusCode.ProxyAuthenticationRequired:
                    break;

                case HttpStatusCode.RedirectKeepVerb:
                    break;

                case HttpStatusCode.RedirectMethod:
                    break;

                case HttpStatusCode.RequestedRangeNotSatisfiable:
                    break;

                case HttpStatusCode.RequestEntityTooLarge:
                    break;

                case HttpStatusCode.RequestHeaderFieldsTooLarge:
                    break;

                case HttpStatusCode.RequestTimeout:
                    break;

                case HttpStatusCode.RequestUriTooLong:
                    break;

                case HttpStatusCode.ResetContent:
                    break;

                case HttpStatusCode.ServiceUnavailable:
                    break;

                case HttpStatusCode.SwitchingProtocols:
                    break;

                case HttpStatusCode.TooManyRequests:
                    break;

                case HttpStatusCode.Unauthorized:
                    throw new AccessDeniedDomainException();

                case HttpStatusCode.UnavailableForLegalReasons:
                    break;

                case HttpStatusCode.UnprocessableEntity:
                    break;

                case HttpStatusCode.UnsupportedMediaType:
                    break;

                case HttpStatusCode.Unused:
                    break;

                case HttpStatusCode.UpgradeRequired:
                    break;

                case HttpStatusCode.UseProxy:
                    break;

                case HttpStatusCode.VariantAlsoNegotiates:
                    break;
                }
                throw new DomainException("Erro na requsição");
            }
        }
예제 #14
0
 private void Start()
 {
     StartCoroutine(WebRequests.GetCoroutine("https://jsonplaceholder.typicode.com/todos/1"));
     StartCoroutine(WebRequests.GetTextureCoroutine("https://via.placeholder.com/600/92c952", _spriteRenderer));
 }
예제 #15
0
        private static void ParseAppUrls(ISet <string> urls, int downloadDelay = 0, IExporter exporter = null,
                                         Action <AppModel> writeCallback       = null)
        {
            log.Info("Parsing App URLs...");

            int parsedAppCount = 0;

            // Retry Counter (Used for exponential wait increasing logic)
            int retryCounter = 0;

            // Creating Instance of Web Requests Server
            WebRequests server = new WebRequests();

            foreach (string url in urls)
            {
                try
                {
                    // Building APP URL
                    string appUrl = Consts.APP_URL_PREFIX + url;

                    // Configuring server and Issuing Request
                    server.Headers.Add(Consts.ACCEPT_LANGUAGE);
                    server.Host              = Consts.HOST;
                    server.Encoding          = "utf-8";
                    server.EncodingDetection = WebRequests.CharsetDetection.DefaultCharset;

                    //  this is how we actually connect to all this shit
                    //  the only thing left - we need to randomize it and check if 200
                    //WebProxy proxyObject = new WebProxy("http://" + ProxyLoader.ReturnRandomProxy(), true);
                    //server.Proxy = proxyObject;

                    string response = server.Get(appUrl);

                    // Sanity Check
                    if (String.IsNullOrEmpty(response) || server.StatusCode != System.Net.HttpStatusCode.OK)
                    {
                        log.Info("Error opening app page : " + appUrl);

                        // Renewing WebRequest Object to get rid of Cookies
                        server = new WebRequests();

                        // Inc. retry counter
                        retryCounter++;

                        log.Info("Retrying:" + retryCounter);

                        // Checking for maximum retry count
                        double waitTime;
                        if (retryCounter >= 11)
                        {
                            waitTime = TimeSpan.FromMinutes(35).TotalMilliseconds;
                        }
                        else
                        {
                            // Calculating next wait time ( 2 ^ retryCounter seconds)
                            waitTime = TimeSpan.FromSeconds(Math.Pow(2, retryCounter)).TotalMilliseconds;
                        }

                        // Hiccup to avoid google blocking connections in case of heavy traffic from the same IP
                        Thread.Sleep(Convert.ToInt32(waitTime));
                    }
                    else
                    {
                        // Reseting retry counter
                        retryCounter = 0;

                        // Parsing App Data
                        AppModel parsedApp = parser.ParseAppPage(response, appUrl);

                        // Export the App Data
                        if (exporter != null)
                        {
                            log.Info("Parsed App: " + parsedApp.Name);

                            exporter.Write(parsedApp);
                        }

                        // Pass the App Data to callback method
                        if (writeCallback != null)
                        {
                            writeCallback(parsedApp);
                        }

                        // Default action is print to screen
                        if (exporter == null && writeCallback == null)
                        {
                            Console.WriteLine(parsedApp);
                        }

                        ++parsedAppCount;

                        // Apply download delay
                        if (downloadDelay > 0)
                        {
                            Thread.Sleep(downloadDelay);
                        }
                    }
                }
                catch (Exception ex)
                {
                    log.Error(ex);
                    Console.WriteLine(url);
                }
            }

            log.Info("Finished. Parsed App count: " + parsedAppCount + "\n");
        }
예제 #16
0
        private static ISet <string> CollectAppUrls(string searchField, int maxAppUrls)
        {
            ISet <string> resultUrls = new HashSet <string>();

            log.Info("Crawling Search Term : [ " + searchField + " ]");

            string crawlUrl = String.Format(Consts.CRAWL_URL, searchField);

            // HTML Response
            string response;

            // Executing Web Requests
            using (WebRequests server = new WebRequests())
            {
                // Creating Request Object
                server.Host = Consts.HOST;

                int insertedAppCount = 0;
                int skippedAppCount  = 0;
                int errorsCount      = 0;

                string postData = Consts.INITIAL_POST_DATA;

                do
                {
                    // Executing Request
                    response = server.Post(crawlUrl, postData);

                    // Checking Server Status
                    if (server.StatusCode != System.Net.HttpStatusCode.OK)
                    {
                        log.Error("Http Error - Status Code: " + server.StatusCode);

                        errorsCount++;

                        if (errorsCount > Consts.MAX_REQUEST_ERRORS)
                        {
                            log.Info("Crawl Stopped: MAX_REQUEST_ERRORS reached");
                            break;
                        }
                        else
                        {
                            continue;
                        }
                    }

                    // Parsing Links out of Html Page
                    foreach (string url in parser.ParseAppUrls(response))
                    {
                        if (!resultUrls.Contains(url))
                        {
                            resultUrls.Add(url);

                            log.Info("Inserted App: " + url);

                            ++insertedAppCount;

                            if (maxAppUrls > 0 && insertedAppCount >= maxAppUrls)
                            {
                                goto exit;
                            }
                        }
                        else
                        {
                            ++skippedAppCount;
                            log.Info("Duplicated App. Skipped: " + url);
                        }
                    }

                    // Get pagTok value that will be used to fetch next stream data.
                    // If not found, that means we have reached the end of stream.
                    string pagTok = getPageToken(response);
                    if (pagTok.Length == 0)
                    {
                        break;
                    }

                    // Build the next post data
                    postData = String.Format(Consts.POST_DATA, pagTok);
                } while (true);

exit:
                log.Info("Inserted App Count: " + insertedAppCount);
                log.Info("Skipped App Count: " + skippedAppCount);
                log.Info("Error Count: " + errorsCount + "\n");
            }

            return(resultUrls);
        }
예제 #17
0
        public async Task Login(object parameter)
        {
            try
            {
                await RunCommand(() => this.IsLoginRunning, async() =>
                {
                    Container.Get <ApplicationViewModel>().IsGifHidden = false;

                    var result = await WebRequests.PostAsync <ApiResponse <LoginResultApiModel> >("http://localhost:5000/api/login", new LoginCredentialApiModel
                    {
                        UsernameOrEmail = Email,
                        Password        = (parameter as IHavePassword).SecurePassword.Unsecure()
                    });

                    if (result == null || result.ServerResponse == null || !result.ServerResponse.Successful)
                    {
                        //TODO Localize
                        var message = "Unknown error";
                        if (result?.ServerResponse != null)
                        {
                            message = result.ServerResponse.ErrorMessage;
                        }
                        else if (string.IsNullOrWhiteSpace(result?.RawServerResponse))
                        {
                            message = $"Unexpected response {result.RawServerResponse}";
                        }
                        else if (result != null)
                        {
                            message = $"Failed to communicate {result.StatusCode} {result.StatusDescription}";
                        }

                        await Container.UI.ShowMessage(new MessageBoxViewModel
                        {
                            //TODO Localize
                            Title   = "Login Failed",
                            Message = message
                        });

                        Container.Get <ApplicationViewModel>().IsGifHidden = true;
                        return;
                    }

                    var userData = result.ServerResponse.Response;

                    await Container.ClientDataStore.SaveLoginCredentialsAsync(new LoginCredentialsDataModel
                    {
                        Email     = userData.Email,
                        FirstName = userData.FirstName,
                        LastName  = userData.LastName,
                        Username  = userData.Username,
                        Token     = userData.Token
                    });

                    Container.Settings.Load();

                    Container.Get <ApplicationViewModel>().IsGifHidden = true;
                    Container.Get <ApplicationViewModel>().GoToPage(ApplicationPage.Chat);
                });
            }
            catch
            {
                await Container.UI.ShowMessage(new MessageBoxViewModel
                {
                    //TODO Localize
                    Title   = "Login Failed",
                    Message = "Cannot connect to the server"
                });

                Container.Get <ApplicationViewModel>().IsGifHidden = true;
                return;
            }
        }
예제 #18
0
        /// <summary>
        /// Entry point of the worker piece of the process
        /// Notice that you can run as many workers as you want to in order to make the crawling faster
        /// </summary>
        /// <param name="args"></param>
        static void Main(string[] args)
        {
            // Configuring Log Object Threshold
            LogWriter.Threshold = TLogEventLevel.Information;
            LogWriter.Info("Worker Started");

            // Parser
            PlayStoreParser parser = new PlayStoreParser();

            // Configuring MongoDB Wrapper
            MongoDBWrapper mongoDB           = new MongoDBWrapper();
            string         fullServerAddress = String.Join(":", Consts.MONGO_SERVER, Consts.MONGO_PORT);

            mongoDB.ConfigureDatabase(Consts.MONGO_USER, Consts.MONGO_PASS, Consts.MONGO_AUTH_DB, fullServerAddress, Consts.MONGO_TIMEOUT, Consts.MONGO_DATABASE, Consts.MONGO_COLLECTION);

            // Creating Instance of Web Requests Server
            WebRequests server = new WebRequests();

            QueuedApp app;

            // Retry Counter (Used for exponential wait increasing logic)
            int retryCounter = 0;

            // Iterating Over MongoDB Records while no document is found to be processed
            while ((app = mongoDB.FindAndModify()) != null)
            {
                try
                {
                    // Building APP URL
                    string appUrl = Consts.APP_URL_PREFIX + app.Url;

                    // Checking if this app is on the database already
                    if (mongoDB.AppProcessed(appUrl))
                    {
                        // Console Feedback, Comment this line to disable if you want to
                        Console.WriteLine("Duplicated App, skipped.");

                        // Delete it from the queue and continues the loop
                        mongoDB.RemoveFromQueue(app.Url);
                        continue;
                    }

                    // Configuring server and Issuing Request
                    server.Headers.Add(Consts.ACCEPT_LANGUAGE);
                    server.Host              = Consts.HOST;
                    server.Encoding          = "utf-8";
                    server.EncodingDetection = WebRequests.CharsetDetection.DefaultCharset;
                    string response = server.Get(appUrl);

                    // Flag Indicating Success while processing and parsing this app
                    bool ProcessingWorked = true;

                    // Sanity Check
                    if (String.IsNullOrEmpty(response) || server.StatusCode != System.Net.HttpStatusCode.OK)
                    {
                        LogWriter.Info("Error opening app page : " + appUrl);
                        ProcessingWorked = false;

                        // Renewing WebRequest Object to get rid of Cookies
                        server = new WebRequests();

                        // Inc. retry counter
                        retryCounter++;

                        Console.WriteLine("Retrying:" + retryCounter);

                        // Checking for maximum retry count
                        double waitTime;
                        if (retryCounter >= 11)
                        {
                            waitTime = TimeSpan.FromMinutes(35).TotalMilliseconds;

                            // Removing App from the database (this the app page may have expired)
                            mongoDB.RemoveFromQueue(app.Url);
                        }
                        else
                        {
                            // Calculating next wait time ( 2 ^ retryCounter seconds)
                            waitTime = TimeSpan.FromSeconds(Math.Pow(2, retryCounter)).TotalMilliseconds;
                        }

                        // Hiccup to avoid google blocking connections in case of heavy traffic from the same IP
                        Thread.Sleep(Convert.ToInt32(waitTime));
                    }
                    else
                    {
                        // Reseting retry counter
                        retryCounter = 0;

                        // Parsing Useful App Data
                        AppModel parsedApp = parser.ParseAppPage(response, appUrl);

                        // Inserting App into Mongo DB Database
                        if (!mongoDB.Insert <AppModel>(parsedApp))
                        {
                            ProcessingWorked = false;
                        }

                        // If the processing failed, do not remove the app from the database, instead, keep it and flag it as not busy
                        // so that other workers can try to process it later
                        if (!ProcessingWorked)
                        {
                            mongoDB.ToggleBusyApp(app, false);
                        }
                        else // On the other hand, if processing worked, removes it from the database
                        {
                            // Console Feedback, Comment this line to disable if you want to
                            Console.WriteLine("Inserted App : " + parsedApp.Name);

                            mongoDB.RemoveFromQueue(app.Url);
                        }

                        // Counters for console feedback only
                        int extraAppsCounter = 0, newExtraApps = 0;

                        // Parsing "Related Apps" and "More From Developer" Apps (URLS Only)
                        foreach (string extraAppUrl in parser.ParseExtraApps(response))
                        {
                            // Incrementing counter of extra apps
                            extraAppsCounter++;

                            // Assembling Full app Url to check with database
                            string fullExtraAppUrl = Consts.APP_URL_PREFIX + extraAppUrl;

                            // Checking if the app was either processed or queued to be processed already
                            if ((!mongoDB.AppProcessed(fullExtraAppUrl)) && (!mongoDB.IsAppOnQueue(extraAppUrl)))
                            {
                                // Incrementing counter of inserted apps
                                newExtraApps++;

                                // Adds it to the queue of apps to be processed
                                mongoDB.AddToQueue(extraAppUrl);
                            }
                        }

                        // Console Feedback
                        Console.WriteLine("Queued " + newExtraApps + " / " + extraAppsCounter + " related apps");
                    }
                }
                catch (Exception ex)
                {
                    LogWriter.Error(ex);
                }
                finally
                {
                    try
                    {
                        // Toggles Busy status back to false
                        mongoDB.ToggleBusyApp(app, false);
                    }
                    catch (Exception ex)
                    {
                        // Toggle Busy App may raise an exception in case of lack of internet connection, so, i must use this
                        // "inner catch" to avoid it from happenning
                        LogWriter.Error(ex);
                    }
                }
            }
        }
 private void llDownloadDemoFile_WebGoat_LinkClicked(object sender, LinkLabelLinkClickedEventArgs e)
 {
     WebRequests.downloadFileUsingAscxDownload(O2CoreResources.DemoOzasmtFile_Hacmebank_WebGoat,
                                               downloadDemoFileCallback);
 }
예제 #20
0
        /// <summary>
        /// Entry point of the worker piece of the process
        /// Notice that you can run as many workers as you want to in order to make the crawling faster
        /// </summary>
        /// <param name="args"></param>
        static void Main(string[] args)
        {
            // Configuring Log Object
            LogSetup.InitializeLog ("PlayStoreWorker.log", "info");
            Logger logger = LogManager.GetCurrentClassLogger ();
            logger.Info ("Worker Started");

            // Control Variable (Bool - Should the process use proxies? )
            bool isUsingProxies = false;

            // Checking for the need to use proxies
            if (args != null && args.Length == 1)
            {
                // Setting flag to true
                isUsingProxies = true;

                // Loading proxies from .txt received as argument
                String fPath = args[0];

                // Sanity Check
                if (!File.Exists (fPath))
                {
                    logger.Fatal ("Couldnt find proxies on path : " + fPath);
                    System.Environment.Exit (-100);
                }

                // Reading Proxies from File
                string[] fLines = File.ReadAllLines (fPath, Encoding.GetEncoding ("UTF-8"));

                try
                {
                    // Actual Load of Proxies
                    ProxiesLoader.Load (fLines.ToList ());
                }
                catch (Exception ex)
                {
                    logger.Fatal (ex);
                    System.Environment.Exit (-101);
                }
            }

            // Parser
            PlayStoreParser parser = new PlayStoreParser();

            // Configuring MongoDB Wrapper
            MongoDBWrapper mongoDB   = new MongoDBWrapper();
            string fullServerAddress = String.Join(":", Consts.MONGO_SERVER, Consts.MONGO_PORT);
            mongoDB.ConfigureDatabase (Consts.MONGO_USER, Consts.MONGO_PASS, Consts.MONGO_AUTH_DB, fullServerAddress, Consts.MONGO_TIMEOUT, Consts.MONGO_DATABASE, Consts.MONGO_COLLECTION);

            // Creating Instance of Web Requests Server
            WebRequests server = new WebRequests ();

            // Queued App Model
            QueuedApp app;

            // Retry Counter (Used for exponential wait increasing logic)
            int retryCounter = 0;

            // Iterating Over MongoDB Records while no document is found to be processed
            while ((app = mongoDB.FindAndModify ()) != null)
            {
                try
                {
                    // Building APP URL
                    string appUrl = app.Url;

                    // Sanity check of app page url
                    if (app.Url.IndexOf ("http", StringComparison.OrdinalIgnoreCase) < 0)
                    {
                        appUrl = Consts.APP_URL_PREFIX + app.Url;
                    }

                    // Checking if this app is on the database already
                    if (mongoDB.AppProcessed (appUrl))
                    {
                        // Console Feedback, Comment this line to disable if you want to
                        logger.Info ("Duplicated App, skipped.");

                        // Delete it from the queue and continues the loop
                        mongoDB.RemoveFromQueue (app.Url);
                        continue;
                    }

                    // Configuring server and Issuing Request
                    server.Headers.Add (Consts.ACCEPT_LANGUAGE);
                    server.Host              = Consts.HOST;
                    server.UserAgent         = Consts.GITHUBURL;
                    server.Encoding          = "utf-8";
                    server.EncodingDetection = WebRequests.CharsetDetection.DefaultCharset;

                    // Checking for the need to use "HTTP Proxies"
                    if (isUsingProxies)
                    {
                        server.Proxy = ProxiesLoader.GetWebProxy ();
                    }

                    // Issuing HTTP Request
                    string response          = server.Get (appUrl);

                    // Flag Indicating Success while processing and parsing this app
                    bool ProcessingWorked = true;

                    // Sanity Check
                    if (String.IsNullOrEmpty (response) || server.StatusCode != System.Net.HttpStatusCode.OK)
                    {
                        logger.Info ("Error opening app page : " + appUrl);
                        ProcessingWorked = false;

                        // Renewing WebRequest Object to get rid of Cookies
                        server = new WebRequests ();

                        // Fallback time variable
                        double waitTime;

                        // Checking which "Waiting Logic" to use - If there are proxies being used, there's no need to wait too much
                        // If there are no proxies in use, on the other hand, the process must wait more time
                        if (isUsingProxies)
                        {
                            // Waits two seconds everytime
                            waitTime = TimeSpan.FromSeconds (2).TotalMilliseconds;
                        }
                        else
                        {
                            // Increments retry counter
                            retryCounter++;

                            // Checking for maximum retry count
                            if (retryCounter >= 8)
                            {
                                waitTime = TimeSpan.FromMinutes (20).TotalMilliseconds;
                            }
                            else
                            {
                                // Calculating next wait time ( 2 ^ retryCounter seconds)
                                waitTime = TimeSpan.FromSeconds (Math.Pow (2, retryCounter)).TotalMilliseconds;
                            }
                        }

                        // Hiccup to avoid google blocking connections in case of heavy traffic from the same IP
                        logger.Info ("======================================================");
                        logger.Info ("\n\tFallback : " + waitTime + " Seconds");
                        Thread.Sleep (Convert.ToInt32 (waitTime));

                        // If The Status code is "ZERO" (it means 404) - App must be removed from "Queue"
                        if (server.StatusCode == 0)
                        {
                            // Console Feedback
                            logger.Info ("\tApp Not Found (404) - " + app.Url);

                            mongoDB.RemoveFromQueue (app.Url);
                        }
                        logger.Info ("======================================================");
                    }
                    else
                    {
                        // Reseting retry counter
                        retryCounter = 0;

                        // Parsing Useful App Data
                        AppModel parsedApp = parser.ParseAppPage (response, appUrl);

                        List<String> relatedApps = new List<String> ();

                        // Avoiding Exceptions caused by "No Related Apps" situations - Must be treated differently
                        try
                        {

                            // Parsing "Related Apps" and "More From Developer" Apps (URLS Only)
                            foreach (string extraAppUrl in parser.ParseExtraApps (response))
                            {
                                relatedApps.Add (Consts.APP_URL_PREFIX + extraAppUrl);
                            }

                            // Adding "Related Apps" to Apps Model
                            parsedApp.RelatedUrls = relatedApps.Distinct ().ToArray ();
                        }
                        catch
                        {
                            logger.Info ("\tNo Related Apps Found. Skipping");
                        }

                        // Inserting App into Mongo DB Database
                        if (!mongoDB.Insert<AppModel>(parsedApp))
                        {
                            ProcessingWorked = false;
                        }

                        // If the processing failed, do not remove the app from the database, instead, keep it and flag it as not busy
                        // so that other workers can try to process it later
                        if (!ProcessingWorked)
                        {
                            mongoDB.ToggleBusyApp(app, false);
                        }
                        else // On the other hand, if processing worked, removes it from the database
                        {
                            // Console Feedback, Comment this line to disable if you want to
                            Console.ForegroundColor = ConsoleColor.Red;
                            logger.Info ("Inserted App : " + parsedApp.Name);
                            Console.ForegroundColor = ConsoleColor.White;

                            mongoDB.RemoveFromQueue(app.Url);
                        }

                        // Counters for console feedback only
                        int extraAppsCounter = 0, newExtraApps = 0;

                        // Parsing "Related Apps" and "More From Developer" Apps (URLS Only)
                        foreach (string extraAppUrl in relatedApps)
                        {
                            // Incrementing counter of extra apps
                            extraAppsCounter++;

                            // Assembling Full app Url to check with database
                            string fullExtraAppUrl = Consts.APP_URL_PREFIX + extraAppUrl;

                            // Checking if the app was either processed or queued to be processed already
                            if ((!mongoDB.AppProcessed (fullExtraAppUrl)) && (!mongoDB.IsAppOnQueue(extraAppUrl)))
                            {
                                // Incrementing counter of inserted apps
                                newExtraApps++;

                                // Adds it to the queue of apps to be processed
                                mongoDB.AddToQueue (extraAppUrl);
                            }
                        }

                        // Console Feedback
                        logger.Info ("Queued " + newExtraApps + " / " + extraAppsCounter + " related apps");
                    }
                }
                catch (Exception ex)
                {
                    logger.Error (ex);
                }
                finally
                {
                    try
                    {
                        // Toggles Busy status back to false
                        mongoDB.ToggleBusyApp (app, false);
                    }
                    catch (Exception ex)
                    {
                        // Toggle Busy App may raise an exception in case of lack of internet connection, so, i must use this
                        // "inner catch" to avoid it from happenning
                        logger.Error (ex);
                    }
                }
            }
        }
예제 #21
0
        /// <summary>
        /// Executes a Search using the searchField as the search parameter, 
        /// paginates / scrolls the search results to the end adding all the url of apps
        /// it finds to a AWS SQS queue
        /// </summary>
        /// <param name="searchField"></param>
        private static void CrawlStore (string searchField)
        {
            // Console Feedback
            Console.WriteLine ("Crawling Search Term : [ " + searchField + " ]");

            // Compiling Regular Expression used to parse the "pagToken" out of the Play Store
            Regex pagTokenRegex = new Regex (@"GAEi+.+\:S\:.{11}\\42", RegexOptions.Compiled);

            // HTML Response
            string response;

            // MongoDB Helper
            // Configuring MongoDB Wrapper
            MongoDBWrapper mongoDB   = new MongoDBWrapper ();
            string fullServerAddress = String.Join (":", Consts.MONGO_SERVER, Consts.MONGO_PORT);
            mongoDB.ConfigureDatabase (Consts.MONGO_USER, Consts.MONGO_PASS, Consts.MONGO_AUTH_DB, fullServerAddress, Consts.MONGO_TIMEOUT, Consts.MONGO_DATABASE, Consts.MONGO_COLLECTION);

            // Ensuring the database has the proper indexe
            mongoDB.EnsureIndex ("Url");

            // Response Parser
            PlayStoreParser parser = new PlayStoreParser (); 

            // Executing Web Requests
            using (WebRequests server = new WebRequests ())
            {
                // Creating Request Object
                server.Host = Consts.HOST;

                // Executing Initial Request
                response    = server.Post (String.Format (Consts.CRAWL_URL, searchField), Consts.INITIAL_POST_DATA);

                // Parsing Links out of Html Page (Initial Request)                
                foreach (string url in parser.ParseAppUrls (response))
                {
                    // Checks whether the app have been already processed 
                    // or is queued to be processed
                    if ((!mongoDB.AppProcessed (Consts.APP_URL_PREFIX + url)) && (!mongoDB.AppQueued (url)))
                    {
                        // Console Feedback
                        Console.WriteLine (" . Queued App");

                        // Than, queue it :)
                        mongoDB.AddToQueue (url);
                        Thread.Sleep (250); // Hiccup
                    }
                    else
                    {
                        // Console Feedback
                        Console.WriteLine (" . Duplicated App. Skipped");
                    }
                }

                // Executing Requests for more Play Store Links
                int initialSkip       = 48;
                int currentMultiplier = 1;
                int errorsCount       = 0;
                do
                {
                    // Finding pagToken from HTML
                    var rgxMatch = pagTokenRegex.Match (response);

                    // If there's no match, skips it
                    if (!rgxMatch.Success)
                    {
                        break;
                    }

                    // Reading Match from Regex, and applying needed replacements
                    string pagToken = rgxMatch.Value.Replace (":S:", "%3AS%3A").Replace("\\42", String.Empty).Replace(@"\\u003d", String.Empty);

                    // Assembling new PostData with paging values
                    string postData = String.Format (Consts.POST_DATA, pagToken);

                    // Executing request for values
                    response = server.Post (String.Format (Consts.CRAWL_URL, searchField), postData);

                    // Checking Server Status
                    if (server.StatusCode != System.Net.HttpStatusCode.OK)
                    {
                        LogWriter.Error ("Http Error", "Status Code [ " + server.StatusCode + " ]");
                        errorsCount++;
                        continue;
                    }

                    // Parsing Links
                    foreach (string url in parser.ParseAppUrls (response))
                    {
                        // Checks whether the app have been already processed 
                        // or is queued to be processed
                        if ((!mongoDB.AppProcessed (Consts.APP_URL_PREFIX + url)) && (!mongoDB.AppQueued (url)))
                        {
                            // Console Feedback
                            Console.WriteLine (" . Queued App");

                            // Than, queue it :)
                            mongoDB.AddToQueue (url);
                            Thread.Sleep (250); // Hiccup
                        }
                        else
                        {
                            // Console Feedback
                            Console.WriteLine (" . Duplicated App. Skipped");
                        }
                    }

                    // Incrementing Paging Multiplier
                    currentMultiplier++;

                }  while (parser.AnyResultFound (response) && errorsCount <= Consts.MAX_REQUEST_ERRORS);
            }
        }
예제 #22
0
 public void GetBtcRate()
 {
     Assert.DoesNotThrow(() => WebRequests.GET(url));
 }
예제 #23
0
        /// <summary>
        ///
        /// </summary>
        /// <param name="categoryUrl"></param>
        private static void CrawlCategory(string categoryUrl, string categoryName, bool shouldUseProxies)
        {
            // Console Feedback
            _logger.Warn("Crawling Category : [ " + categoryName + " ]");

            // Hashset of urls used to keep track of what's been parsed already
            HashSet <String> foundUrls = new HashSet <String> ();

            // Control variable to avoid "Loop" on pagging
            bool isDonePagging = false;

            // HTML Response
            string response;

            // MongoDB Helper
            // Configuring MongoDB Wrapper
            MongoDBWrapper mongoDB           = new MongoDBWrapper();
            string         fullServerAddress = String.Join(":", Consts.MONGO_SERVER, Consts.MONGO_PORT);

            mongoDB.ConfigureDatabase(Consts.MONGO_USER, Consts.MONGO_PASS, Consts.MONGO_AUTH_DB, fullServerAddress, Consts.MONGO_TIMEOUT, Consts.MONGO_DATABASE, Consts.MONGO_COLLECTION);

            // Ensuring the database has the proper indexe
            mongoDB.EnsureIndex("Url");

            // Response Parser
            PlayStoreParser parser = new PlayStoreParser();

            // Executing Web Requests
            using (WebRequests server = new WebRequests())
            {
                // Creating Request Object
                server.Headers.Add(Consts.ACCEPT_LANGUAGE);
                server.Host      = Consts.HOST;
                server.UserAgent = Consts.GITHUBURL;
                server.Encoding  = "utf-8";

                // Executing Initial Request
                response = server.Get(categoryUrl);

                // Parsing Links out of Html Page (Initial Request)
                foreach (string url in parser.ParseAppUrls(response))
                {
                    // Saving found url on local hashset
                    foundUrls.Add(url);

                    // Checks whether the app have been already processed
                    // or is queued to be processed
                    if ((!mongoDB.AppProcessed(Consts.APP_URL_PREFIX + url)) && (!mongoDB.AppQueued(url)))
                    {
                        // Than, queue it :)
                        mongoDB.AddToQueue(url);
                    }
                }

                // Executing Requests for more Play Store Links
                int baseSkip          = 60;
                int currentMultiplier = 1;
                int errorsCount       = 0;
                do
                {
                    // Assembling new PostData with paging values
                    string postData = String.Format(Consts.CATEGORIES_POST_DATA, (currentMultiplier * baseSkip), baseSkip);

                    // Executing request for values
                    response = server.Post(String.Format(categoryUrl + "?authuser=0"), postData);

                    // Checking Server Status
                    if (server.StatusCode != System.Net.HttpStatusCode.OK)
                    {
                        _logger.Error("Http Error" + " - Status Code [ " + server.StatusCode + " ]");
                        errorsCount++;
                        continue;
                    }

                    // Parsing Links
                    foreach (string url in parser.ParseAppUrls(response))
                    {
                        // If a certain app is found twice, it means that the "pagging" logic got stuck into a
                        // Loop, so the all the apps for this category were parsed already
                        if (foundUrls.Contains(url))
                        {
                            isDonePagging = true;
                            break;
                        }

                        // Saving found url on local hashset
                        foundUrls.Add(url);

                        // Checks whether the app have been already processed
                        // or is queued to be processed
                        if ((!mongoDB.AppProcessed(Consts.APP_URL_PREFIX + url)) && (!mongoDB.AppQueued(url)))
                        {
                            // Than, queue it :)
                            mongoDB.AddToQueue(url);
                        }
                    }

                    // Incrementing Paging Multiplier
                    currentMultiplier++;
                }  while (!isDonePagging && errorsCount <= Consts.MAX_REQUEST_ERRORS);
            }
        }
예제 #24
0
        /// <summary>
        /// Entry point of the worker piece of the process
        /// Notice that you can run as many workers as you want to in order to make the crawling faster
        /// </summary>
        /// <param name="args"></param>
        static void Main(string[] args)
        {
            // Configuring Log Object Threshold
            LogWriter.Threshold = TLogEventLevel.Information;
            LogWriter.Info ("Worker Started");

            // Parser
            PlayStoreParser parser = new PlayStoreParser();

            // Configuring MongoDB Wrapper
            MongoDBWrapper mongoDB = new MongoDBWrapper();
            string fullServerAddress = String.Join(":", Consts.MONGO_SERVER, Consts.MONGO_PORT);
            mongoDB.ConfigureDatabase(Consts.MONGO_USER, Consts.MONGO_PASS, Consts.MONGO_AUTH_DB, fullServerAddress, Consts.MONGO_TIMEOUT, Consts.MONGO_DATABASE, Consts.MONGO_COLLECTION);

            // Creating Instance of Web Requests Server
            WebRequests server = new WebRequests ();
            
            QueuedApp app;

            // Retry Counter (Used for exponential wait increasing logic)
            int retryCounter = 0;

            // Iterating Over MongoDB Records while no document is found to be processed                
            while ((app = mongoDB.FindAndModify ()) != null)
            {
                try
                {
                    // Building APP URL
                    string appUrl = Consts.APP_URL_PREFIX + app.Url;

                    // Checking if this app is on the database already
                    if (mongoDB.AppProcessed(appUrl))
                    {
                        // Console Feedback, Comment this line to disable if you want to
                        Console.WriteLine("Duplicated App, skipped.");

                        // Delete it from the queue and continues the loop
                        mongoDB.RemoveFromQueue (app.Url);
                        continue;
                    }

                    // Configuring server and Issuing Request
                    server.Headers.Add(Consts.ACCEPT_LANGUAGE);
                    server.Host = Consts.HOST;
                    server.Encoding = "utf-8";
                    server.EncodingDetection = WebRequests.CharsetDetection.DefaultCharset;
                    string response = server.Get (appUrl);

                    // Flag Indicating Success while processing and parsing this app
                    bool ProcessingWorked = true;

                    // Sanity Check
                    if (String.IsNullOrEmpty (response) || server.StatusCode != System.Net.HttpStatusCode.OK)
                    {
                        LogWriter.Info ("Error opening app page : " + appUrl);
                        ProcessingWorked = false;
                        
                        // Renewing WebRequest Object to get rid of Cookies
                        server = new WebRequests ();

                        // Inc. retry counter
                        retryCounter++;

                        Console.WriteLine ("Retrying:" + retryCounter);

                        // Checking for maximum retry count
                        double waitTime;
                        if (retryCounter >= 11)
                        {
                            waitTime = TimeSpan.FromMinutes (35).TotalMilliseconds;

                            // Removing App from the database (this the app page may have expired)
                            mongoDB.RemoveFromQueue (appUrl);
                        }
                        else
                        {
                            // Calculating next wait time ( 2 ^ retryCounter seconds)
                            waitTime = TimeSpan.FromSeconds (Math.Pow (2, retryCounter)).TotalMilliseconds;
                        }

                        // Hiccup to avoid google blocking connections in case of heavy traffic from the same IP
                        Thread.Sleep (Convert.ToInt32 (waitTime));
                    }
                    else
                    {
                        // Reseting retry counter
                        retryCounter = 0;

                        // Parsing Useful App Data
                        AppModel parsedApp = parser.ParseAppPage (response, appUrl);

                        // Inserting App into Mongo DB Database
                        if (!mongoDB.Insert<AppModel>(parsedApp))
                        {
                            ProcessingWorked = false;
                        }

                        // If the processing failed, do not remove the app from the database, instead, keep it and flag it as not busy 
                        // so that other workers can try to process it later
                        if (!ProcessingWorked)
                        {
                            mongoDB.ToggleBusyApp(app, false);
                        }
                        else // On the other hand, if processing worked, removes it from the database
                        {
                            // Console Feedback, Comment this line to disable if you want to
                            Console.WriteLine("Inserted App : " + parsedApp.Name);

                            mongoDB.RemoveFromQueue(app.Url);
                        }

                        // Counters for console feedback only
                        int extraAppsCounter = 0, newExtraApps = 0;

                        // Parsing "Related Apps" and "More From Developer" Apps (URLS Only)
                        foreach (string extraAppUrl in parser.ParseExtraApps (response))
                        {
                            // Incrementing counter of extra apps
                            extraAppsCounter++;

                            // Assembling Full app Url to check with database
                            string fullExtraAppUrl = Consts.APP_URL_PREFIX + extraAppUrl;

                            // Checking if the app was either processed or queued to be processed already
                            if ((!mongoDB.AppProcessed (fullExtraAppUrl)) && (!mongoDB.IsAppOnQueue(extraAppUrl)))
                            {
                                // Incrementing counter of inserted apps
                                newExtraApps++;

                                // Adds it to the queue of apps to be processed
                                mongoDB.AddToQueue (extraAppUrl);
                            }
                        }

                        // Console Feedback
                        Console.WriteLine ("Queued " + newExtraApps + " / " + extraAppsCounter + " related apps");
                    }
                }
                catch (Exception ex)
                {
                    LogWriter.Error (ex);
                }
                finally
                {
                    try
                    {
                        // Toggles Busy status back to false
                        mongoDB.ToggleBusyApp(app, false);
                    }
                    catch (Exception ex)
                    {
                        // Toggle Busy App may raise an exception in case of lack of internet connection, so, i must use this
                        // "inner catch" to avoid it from happenning
                        LogWriter.Error (ex);
                    }
                }
            }
        }
예제 #25
0
 private static HttpWebRequest GetRequest() => WebRequests.GetPostRequest(url, Query);
예제 #26
0
 public void FetchRatesURL()
 {
     Assert.DoesNotThrow(() => WebRequests.GET(url));
 }
예제 #27
0
        /// <summary>
        /// Executes a Search using the searchField as the search parameter, 
        /// paginates / scrolls the search results to the end adding all the url of apps
        /// it finds to a Mongo "QUEUE" collection
        /// </summary>
        /// <param name="searchField"></param>
        private static void CrawlStore (string searchField, bool shouldUseProxies)
        {
            // Console Feedback
			_logger.Warn ("Crawling Search Term : [ " + searchField + " ]");

			// Hashset of urls used to keep track of what's been parsed already
			HashSet<String> foundUrls = new HashSet<String> ();

			// Control variable to avoid "Loop" on pagging
			bool isDonePagging = false;

            // Compiling Regular Expression used to parse the "pagToken" out of the Play Store
            Regex pagTokenRegex = new Regex (@"GAEi+.+\:S\:.{11}\\42", RegexOptions.Compiled);

            // HTML Response
            string response;

            // MongoDB Helper
            // Configuring MongoDB Wrapper
            MongoDBWrapper mongoDB   = new MongoDBWrapper ();
            string fullServerAddress = String.Join (":", Consts.MONGO_SERVER, Consts.MONGO_PORT);
            mongoDB.ConfigureDatabase (Consts.MONGO_USER, Consts.MONGO_PASS, Consts.MONGO_AUTH_DB, fullServerAddress, Consts.MONGO_TIMEOUT, Consts.MONGO_DATABASE, Consts.MONGO_COLLECTION);

            // Ensuring the database has the proper indexe
            mongoDB.EnsureIndex ("Url");

            // Response Parser
            PlayStoreParser parser = new PlayStoreParser (); 

            // Executing Web Requests
            using (WebRequests server = new WebRequests ())
            {
                // Creating Request Object
                server.Headers.Add (Consts.ACCEPT_LANGUAGE);
                server.Host      = Consts.HOST;
                server.UserAgent = Consts.GITHUBURL;
                server.Encoding  = "utf-8";

                // Checking for the need to use "HTTP Proxies"
                if (shouldUseProxies)
                {
                    server.Proxy = ProxiesLoader.GetWebProxy ();
                }   

                // Executing Initial Request
                response    = server.Post (String.Format (Consts.CRAWL_URL, searchField), Consts.INITIAL_POST_DATA);

                // Parsing Links out of Html Page (Initial Request)                
                foreach (string url in parser.ParseAppUrls (response))
                {
                    // Checks whether the app have been already processed 
					// or is queued to be processed
					foundUrls.Add (url);
                    if ((!mongoDB.AppProcessed (Consts.APP_URL_PREFIX + url)) && (!mongoDB.AppQueued (url)))
                    {
                        // Than, queue it :)
                        mongoDB.AddToQueue (url);
                        Thread.Sleep (250); // Hiccup
                    }
                }

                // Executing Requests for more Play Store Links
                int initialSkip       = 48;
                int currentMultiplier = 1;
                int errorsCount       = 0;
                do
                {
                    // Finding pagToken from HTML
                    var rgxMatch = pagTokenRegex.Match (response);

                    // If there's no match, skips it
                    if (!rgxMatch.Success)
                    {
                        break;
                    }

                    // Reading Match from Regex, and applying needed replacements
                    string pagToken = rgxMatch.Value.Replace (":S:", "%3AS%3A").Replace("\\42", String.Empty).Replace(@"\\u003d", String.Empty);

                    // Assembling new PostData with paging values
                    string postData = String.Format (Consts.POST_DATA, pagToken);

                    // Executing request for values
                    response = server.Post (String.Format (Consts.CRAWL_URL, searchField), postData);

                    // Checking Server Status
                    if (server.StatusCode != System.Net.HttpStatusCode.OK)
                    {
                        _logger.Error ("Http Error" + " - Status Code [ " + server.StatusCode + " ]");
                        errorsCount++;
                        continue;
                    }

                    // Parsing Links
                    foreach (string url in parser.ParseAppUrls (response))
					{
						if (foundUrls.Contains (url))
						{
							isDonePagging = true;
							break;
						}
                        // Checks whether the app have been already processed 
						foundUrls.Add (url);
                        if ((!mongoDB.AppProcessed (Consts.APP_URL_PREFIX + url)) && (!mongoDB.AppQueued (url)))
                        {
                            // Than, queue it :)
                            mongoDB.AddToQueue (url);
                            Thread.Sleep (250); // Hiccup
                        }
                    }

                    // Incrementing Paging Multiplier
                    currentMultiplier++;

				}  while (!isDonePagging && parser.AnyResultFound (response) && errorsCount <= Consts.MAX_REQUEST_ERRORS);
            }
        }
예제 #28
0
        /// <summary>
        /// Executes a Search using the searchField as the search parameter,
        /// paginates / scrolls the search results to the end adding all the url of apps
        /// it finds to a Mongo "QUEUE" collection
        /// </summary>
        /// <param name="searchField"></param>
        private static void CrawlStore(string searchField, bool shouldUseProxies)
        {
            // Console Feedback
            _logger.Warn("Crawling Search Term : [ " + searchField + " ]");

            // Hashset of urls used to keep track of what's been parsed already
            HashSet <String> foundUrls = new HashSet <String> ();

            // Control variable to avoid "Loop" on pagging
            bool isDonePagging = false;

            // Compiling Regular Expression used to parse the "pagToken" out of the Play Store
            Regex pagTokenRegex = new Regex(@"GAEi+.+\:S\:.{11}\\x22", RegexOptions.Compiled);

            // HTML Response
            string response;

            // Response Parser
            PlayStoreParser parser = new PlayStoreParser();

            // Executing Web Requests
            using (WebRequests server = new WebRequests())
            {
                // Creating Request Object
                server.Headers.Add(Consts.ACCEPT_LANGUAGE);
                server.Host      = Consts.HOST;
                server.UserAgent = Consts.GITHUBURL;
                server.Encoding  = "utf-8";

                // Checking for the need to use "HTTP Proxies"
                if (shouldUseProxies)
                {
                    server.Proxy = ProxiesLoader.GetWebProxy();
                }

                // Executing Initial Request
                response = server.Post(String.Format(Consts.CRAWL_URL, searchField), Consts.INITIAL_POST_DATA);

                // Parsing Links out of Html Page (Initial Request)
                foreach (string url in parser.ParseAppUrls(response))
                {
                    // Checks whether the app have been already processed
                    // or is queued to be processed
                    foundUrls.Add(url);
                    if ((!_mongoDB.AppProcessed(Consts.APP_URL_PREFIX + url)) && (!_mongoDB.AppQueued(url)))
                    {
                        // Than, queue it :)
                        _mongoDB.AddToQueue(url);
                        Thread.Sleep(250);  // Hiccup
                    }
                }

                // Executing Requests for more Play Store Links
                int initialSkip       = 48;
                int currentMultiplier = 1;
                int errorsCount       = 0;
                do
                {
                    // Finding pagToken from HTML
                    var rgxMatch = pagTokenRegex.Match(response);

                    // If there's no match, skips it
                    if (!rgxMatch.Success)
                    {
                        break;
                    }

                    // Reading Match from Regex, and applying needed replacements
                    string pagToken = rgxMatch.Value.Replace(":S:", "%3AS%3A").Replace("\\x22", String.Empty).Replace(@"\\u003d", String.Empty);

                    // Assembling new PostData with paging values
                    string postData = String.Format(Consts.POST_DATA, pagToken);

                    // Executing request for values
                    response = server.Post(String.Format(Consts.CRAWL_URL, searchField), postData);

                    // Checking Server Status
                    if (server.StatusCode != System.Net.HttpStatusCode.OK)
                    {
                        _logger.Error("Http Error" + " - Status Code [ " + server.StatusCode + " ]");
                        errorsCount++;
                        continue;
                    }

                    // Parsing Links
                    foreach (string url in parser.ParseAppUrls(response))
                    {
                        if (foundUrls.Contains(url))
                        {
                            isDonePagging = true;
                            break;
                        }
                        // Checks whether the app have been already processed
                        foundUrls.Add(url);
                        if ((!_mongoDB.AppProcessed(Consts.APP_URL_PREFIX + url)) && (!_mongoDB.AppQueued(url)))
                        {
                            // Than, queue it :)
                            _mongoDB.AddToQueue(url);
                            Thread.Sleep(250);  // Hiccup
                        }
                    }

                    // Incrementing Paging Multiplier
                    currentMultiplier++;
                }  while (!isDonePagging && parser.AnyResultFound(response) && errorsCount <= Consts.MAX_REQUEST_ERRORS);
            }
        }
예제 #29
0
        public void SetRates()
        {
            string result = WebRequests.GET(url);

            Assert.DoesNotThrow(() => JsonConvert.DeserializeObject <ExchangeRates>(result));
        }
예제 #30
0
 private static async Task <string> GetPostMessageAsync()
 {
     return(await WebRequests.PostRequestStringSync(url, Query));
 }
예제 #31
0
 private void llDownloadDemoFile_HacmeBank_Website_LinkClicked(object sender, LinkLabelLinkClickedEventArgs e)
 {
     WebRequests.downloadFileUsingAscxDownload(O2CoreResources.DemoScanTarget_Dll_Hacmebank_WebSite, processFile);
 }
 private void llLoadHacmeBank_LinkClicked(object sender, LinkLabelLinkClickedEventArgs e)
 {
     WebRequests.downloadFileUsingAscxDownload(O2CoreResources.DemoOzasmtFile_Hacmebank_WebServices,
                                               loadOzasmtFile);
 }
예제 #33
0
        private static bool DoTheWork(Topic topic, ref List <Comment> comments, WebRequests client)
        {
            int numberOfPage          = 1;
            int processedLastPosition = 0;

            string url           = String.Empty;
            string topicPieceUrl = String.Empty;

            // Find the right URL
            Regex importantPieceUrlRegex = null;

            if (topic.Url.Contains("?"))
            {
                importantPieceUrlRegex = new Regex(@"/(\d{1,4}.*)\?", RegexOptions.Compiled | RegexOptions.IgnoreCase);
            }
            else
            {
                importantPieceUrlRegex = new Regex(@"/(\d{1,4}.*)", RegexOptions.Compiled | RegexOptions.IgnoreCase);
            }

            Match match = importantPieceUrlRegex.Match(topic.Url);

            if (match.Success)
            {
                topicPieceUrl = match.Groups[1].Value.Trim();
                url           = String.Format(_mapURLs["TopicComments"], topicPieceUrl, numberOfPage);
            }

            while (!String.IsNullOrWhiteSpace(url))
            {
                logger.Trace("Topic <{0}> ... Page: {1}", topic.Title, numberOfPage);

                // Get Request
                string htmlResponse = SharedLibrary.Utils.WebRequestsUtils.Get(ref client, logger, url);


                // Checking if html response is valid
                if (String.IsNullOrWhiteSpace(htmlResponse))
                {
                    logger.Warn("HtmlResponse is null or empty. URL: " + url);
                    numberOfPage += 1;
                    continue;
                }


                // Loading HtmlDocument
                HtmlDocument htmlDoc = new HtmlDocument();
                htmlDoc.LoadHtml(htmlResponse);


                // Extract Comments
                HtmlNodeCollection commentsNode = htmlDoc.DocumentNode.SelectNodes(".//ol[@id='posts']//li[contains(@class,'postbitlegacy')]");
                if (commentsNode != null && commentsNode.Count > 0)
                {
                    int firstPositionFromTopic = -1;

                    ParseComment(commentsNode, ref topic, numberOfPage, ref comments, ref firstPositionFromTopic);

                    // Condition to know if I am on the last available page of the topic
                    if (processedLastPosition == firstPositionFromTopic)
                    {
                        return(false);
                    }
                    else
                    {
                        processedLastPosition = firstPositionFromTopic;
                    }


                    if (comments.Count % 5 == 0 && comments.Count != 0)
                    {
                        SendMessage(comments);
                        comments.Clear();
                    }
                }
                else
                {
                    // Need permission to parser?
                    HtmlNode permissionNode = htmlDoc.DocumentNode.SelectSingleNode("//div[@class='blockbody formcontrols']");
                    if (permissionNode != null)
                    {
                        logger.Warn("Permission Error to access the topic");
                    }
                    else
                    {
                        logger.Error("Problem to extract commentsNode. URL .: {0}", url);
                    }

                    return(false);
                }

                // Has more?
                if (comments.Count > 0)
                {
                    SendMessage(comments);
                    comments.Clear();
                }

                // Is it the last Page?
                HtmlNode statsPageNode = htmlDoc.DocumentNode.SelectSingleNode(".//div[@class='threadpagestats']");
                if (statsPageNode == null)
                {
                    statsPageNode = htmlDoc.DocumentNode.SelectSingleNode(".//div[@class='postpagestats']");
                }
                if (statsPageNode != null)
                {
                    string stats = statsPageNode.InnerText.Trim();

                    Regex statsRegex = new Regex(@"\s\d{1,}\sa\s(\d{1,})\sde\s(\d{1,})", RegexOptions.Compiled);
                    match = statsRegex.Match(stats);
                    if (match.Success)
                    {
                        if (match.Groups.Count == 3 && match.Groups[1].Value.Trim().Equals(match.Groups[2].Value.Trim()))
                        {
                            return(false);
                        }
                    }
                }

                // Next Page
                numberOfPage += 1;
                url           = String.Format(_mapURLs["TopicComments"], topicPieceUrl, numberOfPage);

                // Keep Calm and do not shutdown the forum!
                Thread.Sleep(2 * 1000);
            }

            return(true);
        }
예제 #34
0
        static void Main(string[] args)
        {
            // Checking for Input Parameters
            if (args == null || args.Length != 1)
            {
                Console.WriteLine("Incorrect number of arguments received. Expected One");
                System.Environment.Exit(-100);
            }

            // Human Readable Variable
            string inputFile = args[0];

            // Checking if the Input file received exists
            if (!File.Exists(inputFile))
            {
                Console.WriteLine(String.Format("Received input file does not exist : {0}", inputFile));
                System.Environment.Exit(-101);
            }

            // App Status
            _appStatus = new Dictionary <String, AppStatusModel> ();

            // Creating Instance of Database Manager
            MongoDBWrapper mongoDB           = new MongoDBWrapper();
            string         fullServerAddress = String.Join(":", Consts.MONGO_SERVER, Consts.MONGO_PORT);

            mongoDB.ConfigureDatabase(Consts.MONGO_USER, Consts.MONGO_PASS, Consts.MONGO_AUTH_DB, fullServerAddress, Consts.MONGO_TIMEOUT, Consts.MONGO_DATABASE, Consts.MONGO_COLLECTION);

            // Creating Instance of Parser
            PlayStoreParser dataParser = new PlayStoreParser();

            goto PeopleData;

            using (WebRequests httpClient = new WebRequests())
            {
                // Minor Configuration of the Http Client - Ensures that the requests response will be in english
                // By doing so, we have no problems parsing the dates to their proper formats
                httpClient.Headers.Add(Consts.ACCEPT_LANGUAGE);
                httpClient.Host              = Consts.HOST;
                httpClient.Encoding          = "utf-8";
                httpClient.EncodingDetection = WebRequests.CharsetDetection.DefaultCharset;

                // Iterating over File Lines (App Urls) - To Extract Data, Not The Reviews Yet.
                foreach (string appUrl in File.ReadAllLines(inputFile))
                {
                    // Logging Progress
                    Console.WriteLine("\n => Processing App : " + appUrl);

                    // Executing Http Get Request for the Apps's Data - With max of 5 Retries
                    String appDataResponse = String.Empty;
                    int    currentRetry    = 0;

                    do
                    {
                        // Http Get
                        appDataResponse = httpClient.Get(appUrl);
                    } while (String.IsNullOrWhiteSpace(appDataResponse) || ++currentRetry <= _maxRetries);

                    // Sanity Check
                    if (String.IsNullOrWhiteSpace(appDataResponse))
                    {
                        Console.WriteLine("\t\t.Error - Failed to find page of app : " + appUrl + ". Skipping it");
                        continue;
                    }

                    Console.WriteLine("\t\t.Page Found. Firing Parser");

                    // Parsing App Data
                    AppModel appData = dataParser.ParseAppPage(appDataResponse, appUrl);

                    // Checking If this app is on the database already
                    if (mongoDB.AppProcessed(appUrl))
                    {
                        Console.WriteLine("\t\t.Previous Version of App Found. Updating It");
                        mongoDB.UpdateRecord(appData, "Url", appData.Url);

                        // Updating App Status
                        _appStatus.Add
                        (
                            appData.Url,
                            new AppStatusModel()
                        {
                            appId   = appData.Url.Replace(Consts.PLAY_STORE_PREFIX, String.Empty),
                            appUrl  = appData.Url,
                            appName = appData.Name,
                            status  = "Updated"
                        }
                        );
                    }
                    else
                    {
                        Console.WriteLine("\t\t.No Previous Version of the App Found. Adding to Database");
                        mongoDB.Insert <AppModel> (appData);

                        // Updating App Status
                        _appStatus.Add
                        (
                            appData.Url,
                            new AppStatusModel()
                        {
                            appId   = appData.Url.Replace(Consts.PLAY_STORE_PREFIX, String.Empty),
                            appUrl  = appData.Url,
                            appName = appData.Name,
                            status  = "Inserted"
                        }
                        );
                    }
                }
            }

Reviews:
            // Next Phase: Parse Reviews of those Apps
            Console.WriteLine("\n => Parsing Complete. Obtaining Reviews");

            // Iterating again over app urls to parse the reviews from this app
            foreach (string appUrl in File.ReadAllLines(inputFile))
            {
                // Reaching App Id
                string appID = _appStatus[appUrl].appId;

                // Reviews-Break-Parsing Flag
                bool shouldContinueParsing = true;

                // Parsing Review Pages from the apps
                for (int currentPage = 1; /* no stop condition */; currentPage++)
                {
                    // Getting Reviews Data Bundle
                    string reviewsData = ReviewsWrapper.GetAppReviews(appID, currentPage);

                    // Checking for Blocking Situation
                    if (String.IsNullOrEmpty(reviewsData))
                    {
                        Console.WriteLine("Blocked by Play Store. Sleeping process for 10 minutes before retrying.");

                        // Thread Wait for 10 Minutes
                        Thread.Sleep(10 * 60 * 1000);
                    }

                    // Checking for "No Reviews" app
                    if (reviewsData.Length < 50)
                    {
                        Console.WriteLine("No Reviews left for this app. Skipping");
                        break;
                    }

                    // Normalizing Response to Proper HTML
                    reviewsData = ReviewsWrapper.NormalizeResponse(reviewsData);

                    // Iterating over Parsed Reviews
                    foreach (var review in dataParser.ParseReviews(reviewsData))
                    {
                        // Adding App Data to the review
                        review.appID   = _appStatus[appUrl].appId;
                        review.appName = _appStatus[appUrl].appName;
                        review.appURL  = _appStatus[appUrl].appUrl;

                        // Incrementing Reviews Count for this app
                        _appStatus[appUrl].reviews++;

                        // Adding Review Object to Database
                        review.timestamp = DateTime.Now;

                        // Building Query to check for duplicated review
                        var duplicatedReviewQuery = Query.EQ("permalink", review.permalink);

                        // Checking for duplicated review before inserting it
                        if (mongoDB.FindMatch <AppReview> (duplicatedReviewQuery, 1, 0, Consts.REVIEWS_COLLECTION).Count() == 0)
                        {
                            // Inserting Review into MongoDB
                            mongoDB.Insert <AppReview> (review, Consts.REVIEWS_COLLECTION);
                        }
                        else
                        {
                            Console.WriteLine("Duplicated Review. Skipping App");

                            // When this happens, there are no more reviews to be parsed
                            shouldContinueParsing = false; // Skipping this apps processing
                        }
                    }

                    // Hiccup to avoid Blocking problems
                    Console.WriteLine("Parsed Reviews: " + _appStatus[appUrl].reviews);
                    Thread.Sleep(new Random().Next(14000, 21000));

                    if (!shouldContinueParsing)
                    {
                        break;
                    }
                }
            }

PeopleData:

            Console.WriteLine("\n\n => Processing People Data");

            Console.WriteLine("\nSimulating Google Login Using Selenium.");
            using (var firefoxDriver = new FirefoxDriver())
            {
                // Navigating to Dummy Url - One that I Know that well be asked for a login
                firefoxDriver.Navigate().GoToUrl("https://play.google.com/store/people/details?id=101242565951396343093");

                // Reaching Login Fields
                var loginField    = firefoxDriver.FindElementById("Email");
                var passwordField = firefoxDriver.FindElementById("Passwd");
                var btnSignIn     = firefoxDriver.FindElementById("signIn");

                // Sending Credentials to the browser
                loginField.SendKeys("YOUREMAIL");
                passwordField.SendKeys("YOURPASSWORD");
                btnSignIn.Click();

                string lastPeople     = "https://play.google.com/store/people/details?id=115037241907660526856";
                bool   shouldcontinue = false;

                // Processing Reviewers Data
                foreach (string peopleUrl in mongoDB.FindPeopleUrls())
                {
                    // Skipping until last link
                    if (peopleUrl == lastPeople)
                    {
                        shouldcontinue = true;
                    }

                    if (!shouldcontinue)
                    {
                        continue;
                    }

                    // Navigating To the Reviewer Page
                    firefoxDriver.Navigate().GoToUrl(peopleUrl);

                    // Executing Get Request for the Reviewer page on Google Play
                    string reviewerPage = firefoxDriver.PageSource;

                    // Extracting Reviewer Data from the Page
                    ReviewerPageData reviewerData = dataParser.ParsePeopleData(reviewerPage);

                    // Adding Url to the model
                    reviewerData.reviewerUrl = peopleUrl;

                    // Inserting it to the database - If no previous record of this Reviewer is found
                    if (!mongoDB.IsReviewerOnDatabase(peopleUrl))
                    {
                        mongoDB.Insert <ReviewerPageData> (reviewerData, "ReviewersData");
                    }
                }
            }

            // End of Processing + Console Feedback
            Console.WriteLine("\n\n == Processing Summary ==");

            foreach (var status in _appStatus.Select(t => t.Value))
            {
                // Message
                string cMessage = "=> App : {0} - Status {1} - Reviews : {2}";

                Console.WriteLine(String.Format(cMessage, status.appName, status.status, status.reviews));
            }

            Console.ReadLine();
        }
예제 #35
0
        private string FetchRates()
        {
            string url = "http://api.fixer.io/latest?base=USD";

            return(WebRequests.GET(url));
        }
예제 #36
0
        /// <summary>
        /// Executes a Search using the searchField as the search parameter, 
        /// paginates / scrolls the search results to the end adding all the url of apps
        /// it finds to a AWS SQS queue
        /// </summary>
        /// <param name="searchField"></param>
        private static void CrawlStore (string searchField)
        {
            // Console Feedback
            Console.WriteLine ("Crawling Search Term : [ " + searchField + " ]");

            // HTML Response
            string response;

            // MongoDB Helper
            // Configuring MongoDB Wrapper
            MongoDBWrapper mongoDB   = new MongoDBWrapper ();
            string fullServerAddress = String.Join (":", Consts.MONGO_SERVER, Consts.MONGO_PORT);
            mongoDB.ConfigureDatabase (Consts.MONGO_USER, Consts.MONGO_PASS, Consts.MONGO_AUTH_DB, fullServerAddress, Consts.MONGO_TIMEOUT, Consts.MONGO_DATABASE, Consts.MONGO_COLLECTION);

            // Response Parser
            PlayStoreParser parser = new PlayStoreParser (); 

            // Executing Web Requests
            using (WebRequests server = new WebRequests ())
            {
                // Creating Request Object
                server.Host = Consts.HOST;

                // Executing Initial Request
                response    = server.Post (Consts.CRAWL_URL, Consts.INITIAL_POST_DATA);

                // Parsing Links out of Html Page (Initial Request)                
                foreach (string url in parser.ParseAppUrls (response))
                {
                    // Checks whether the app have been already processed 
                    // or is queued to be processed
                    if ((!mongoDB.AppProcessed (Consts.APP_URL_PREFIX + url)) && (!mongoDB.AppQueued (url)))
                    {
                        // Console Feedback
                        Console.WriteLine (" . Queued App");

                        // Than, queue it :)
                        mongoDB.AddToQueue (url);
                    }
                    else
                    {
                        // Console Feedback
                        Console.WriteLine (" . Duplicated App. Skipped");
                    }
                }

                // Executing Requests for more Play Store Links
                int initialSkip       = 48;
                int currentMultiplier = 1;
                int errorsCount       = 0;
                do
                {
                    // Assembling new PostData with paging values
                    string postData = String.Format (Consts.POST_DATA, (initialSkip * currentMultiplier));

                    // Executing request for values
                    response = server.Post (Consts.CRAWL_URL, postData);

                    // Checking Server Status
                    if (server.StatusCode != System.Net.HttpStatusCode.OK)
                    {
                        LogWriter.Error ("Http Error", "Status Code [ " + server.StatusCode + " ]");
                        errorsCount++;
                        continue;
                    }

                    // Parsing Links
                    foreach (string url in parser.ParseAppUrls (response))
                    {
                        // Checks whether the app have been already processed 
                        // or is queued to be processed
                        if ((!mongoDB.AppProcessed (Consts.APP_URL_PREFIX + url)) && (!mongoDB.AppQueued (url)))
                        {
                            // Console Feedback
                            Console.WriteLine (" . Queued App");

                            // Than, queue it :)
                            mongoDB.AddToQueue (url);
                        }
                        else
                        {
                            // Console Feedback
                            Console.WriteLine (" . Duplicated App. Skipped");
                        }
                    }

                    // Incrementing Paging Multiplier
                    currentMultiplier++;

                }  while (parser.AnyResultFound (response) && errorsCount <= Consts.MAX_REQUEST_ERRORS);
            }
        }
예제 #37
0
        /// <summary>
        /// Default Constructor
        /// </summary>
        /// <param name="endpoint"></param>
        /// <param name="interval"></param>
        /// <param name="stateChangedCallback"></param>
        /// <param name="validResponseParser"></param>
        /// <param name="logger"></param>
        public HttpEndPointChecker(string endpoint,
                                   int interval,
                                   Action <bool> stateChangedCallback,
                                   Func <HttpWebResponse, Exception, bool> validResponseParser = null,
                                   ILogger logger = null)
        {
            //Set the endpoint
            Endpoint = endpoint;

            //Store callback
            mStateChangedCallback = stateChangedCallback;

            logger?.LogTraceSource($"HttpEndpointChacker started for {endpoint}");

            // Start task
            Task.Run(async() =>
            {
                while (!mDisposing)
                {
                    //Create defaults
                    var webResponse = default(HttpWebResponse);
                    var exception   = default(Exception);

                    // Start by calling the endpoint

                    try
                    {
                        //Log it
                        logger?.LogTraceSource($"HttpEndpointChecker fetching {endpoint}");

                        //
                        // By default, presume any response that doesn't throw
                        // (so the server replied, even if its 401 for example
                        // meaning the server we hit up actually responded
                        // with something even if it was a page not found or server
                        // error.
                        //
                        // The user is free to override this default behavior
                        //
                        webResponse = await WebRequests.GetAsync(endpoint);
                    }
                    catch (Exception ex)
                    {
                        exception = ex;
                    }

                    // Figure out the new state
                    // - If we have a custom parser, ask it for the state based on the response
                    // - Otherwise, so long as we have a response of any kind, it's valid
                    var responsive = validResponseParser?.Invoke(webResponse, exception) ?? webResponse != null;

                    //Close web response
                    webResponse?.Close();

                    //Log it
                    logger.LogTraceSource($"HttpEndpointChecker {endpoint} { (responsive ? "is" : "is not")} responsive");

                    // If the state has changed...
                    if (responsive != Responsive)
                    {
                        // Set new value
                        Responsive = responsive;

                        // Inform listener
                        mStateChangedCallback?.Invoke(responsive);
                    }

                    // if not disposing, wait interval.. then poll again
                    if (!mDisposing)
                    {
                        await Task.Delay(interval);
                    }
                }
            });
        }
예제 #38
0
        /// <summary>
        /// Entry point of the worker piece of the process
        /// Notice that you can run as many workers as you want to in order to make the crawling faster
        /// </summary>
        /// <param name="args"></param>
        static void Main(string[] args)
        {
            // Configuring Log Object
            LogSetup.InitializeLog("PlayStoreWorker.log", "info");
            Logger logger = LogManager.GetCurrentClassLogger();

            logger.Info("Worker Started");

            // Control Variable (Bool - Should the process use proxies? )
            bool isUsingProxies = false;

            // Checking for the need to use proxies
            if (args != null && args.Length == 1)
            {
                // Setting flag to true
                isUsingProxies = true;

                // Loading proxies from .txt received as argument
                String fPath = args[0];

                // Sanity Check
                if (!File.Exists(fPath))
                {
                    logger.Fatal("Couldnt find proxies on path : " + fPath);
                    System.Environment.Exit(-100);
                }

                // Reading Proxies from File
                string[] fLines = File.ReadAllLines(fPath, Encoding.GetEncoding("UTF-8"));

                try
                {
                    // Actual Load of Proxies
                    ProxiesLoader.Load(fLines.ToList());
                }
                catch (Exception ex)
                {
                    logger.Fatal(ex);
                    System.Environment.Exit(-101);
                }
            }

            // Parser
            PlayStoreParser parser = new PlayStoreParser();

            // Configuring MongoDB Wrapper
            MongoDBWrapper mongoDB           = new MongoDBWrapper();
            string         fullServerAddress = String.Join(":", Consts.MONGO_SERVER, Consts.MONGO_PORT);

            mongoDB.ConfigureDatabase(Consts.MONGO_USER, Consts.MONGO_PASS, Consts.MONGO_AUTH_DB, fullServerAddress, Consts.MONGO_TIMEOUT, Consts.MONGO_DATABASE, Consts.MONGO_COLLECTION);

            /*
             * // populate min downloaded & max downloaded
             * int count = 0;
             * var apps = mongoDB.FindAll<AppModel>();
             * foreach(var a in apps)
             * {
             *  a.FillMinAndMaxInstalls();
             ++count;
             *
             *  if((count % 100) == 0)
             *  {
             *      Console.WriteLine("updated {0}", count);
             *  }
             *
             *  if (!mongoDB.UpsertKeyEq<AppModel>(a, "Url", a.Url))
             *  {
             *      Console.WriteLine("UpsertKey failed");
             *  }
             * }
             */

            // Creating Instance of Web Requests Server
            WebRequests server = new WebRequests();

            // Queued App Model
            QueuedApp app;

            // Retry Counter (Used for exponential wait increasing logic)
            int retryCounter = 0;

            // Iterating Over MongoDB Records while no document is found to be processed
            while ((app = mongoDB.FindAndModify()) != null)
            {
                try
                {
                    // Building APP URL
                    string appUrl = app.Url;

                    // Sanity check of app page url
                    if (app.Url.IndexOf("http", StringComparison.OrdinalIgnoreCase) < 0)
                    {
                        appUrl = Consts.APP_URL_PREFIX + app.Url;
                    }

                    // Checking if this app is on the database already
                    if (mongoDB.AppProcessed(appUrl))
                    {
                        // Console Feedback, Comment this line to disable if you want to
                        logger.Info("Duplicated App, skipped.");

                        // Delete it from the queue and continues the loop
                        mongoDB.RemoveFromQueue(app.Url);
                        continue;
                    }

                    // Configuring server and Issuing Request
                    server.Headers.Add(Consts.ACCEPT_LANGUAGE);
                    server.Host              = Consts.HOST;
                    server.UserAgent         = Consts.GITHUBURL;
                    server.Encoding          = "utf-8";
                    server.EncodingDetection = WebRequests.CharsetDetection.DefaultCharset;

                    // Checking for the need to use "HTTP Proxies"
                    if (isUsingProxies)
                    {
                        server.Proxy = ProxiesLoader.GetWebProxy();
                    }

                    // Issuing HTTP Request
                    string response = server.Get(appUrl);

                    // Flag Indicating Success while processing and parsing this app
                    bool ProcessingWorked = true;

                    // Sanity Check
                    if (String.IsNullOrEmpty(response) || server.StatusCode != System.Net.HttpStatusCode.OK)
                    {
                        logger.Info("Error opening app page : " + appUrl);
                        ProcessingWorked = false;

                        if (isUsingProxies)
                        {
                            ProxiesLoader.IncrementCurrentProxy();
                        }

                        // Renewing WebRequest Object to get rid of Cookies
                        server = new WebRequests();

                        // Fallback time variable
                        double waitTime;

                        // Checking which "Waiting Logic" to use - If there are proxies being used, there's no need to wait too much
                        // If there are no proxies in use, on the other hand, the process must wait more time
                        if (isUsingProxies)
                        {
                            // Waits two seconds everytime
                            waitTime = TimeSpan.FromSeconds(2).TotalMilliseconds;
                        }
                        else
                        {
                            // Increments retry counter
                            retryCounter++;

                            // Checking for maximum retry count
                            if (retryCounter >= 8)
                            {
                                waitTime = TimeSpan.FromMinutes(20).TotalMilliseconds;
                            }
                            else
                            {
                                // Calculating next wait time ( 2 ^ retryCounter seconds)
                                waitTime = TimeSpan.FromSeconds(Math.Pow(2, retryCounter)).TotalMilliseconds;
                            }
                        }

                        // Hiccup to avoid google blocking connections in case of heavy traffic from the same IP
                        logger.Info("======================================================");
                        logger.Info("\n\tFallback : " + waitTime + " Seconds");
                        Thread.Sleep(Convert.ToInt32(waitTime));

                        // If The Status code is "ZERO" (it means 404) - App must be removed from "Queue"
                        if (server.StatusCode == 0)
                        {
                            // Console Feedback
                            logger.Info("\tApp Not Found (404) - " + app.Url);

                            mongoDB.RemoveFromQueue(app.Url);
                        }
                        logger.Info("======================================================");
                    }
                    else
                    {
                        // Reseting retry counter
                        retryCounter = 0;

                        // Parsing Useful App Data
                        AppModel parsedApp = parser.ParseAppPage(response, appUrl);

                        // Normalizing URLs
                        if (!String.IsNullOrWhiteSpace(parsedApp.DeveloperPrivacyPolicy))
                        {
                            parsedApp.DeveloperPrivacyPolicy = parsedApp.DeveloperPrivacyPolicy.Replace("https://www.google.com/url?q=", String.Empty);
                        }

                        if (!String.IsNullOrWhiteSpace(parsedApp.DeveloperWebsite))
                        {
                            parsedApp.DeveloperNormalizedDomain = parser.NormalizeDomainName(parsedApp.DeveloperWebsite);
                        }

                        List <String> relatedApps = new List <String> ();

                        // Avoiding Exceptions caused by "No Related Apps" situations - Must be treated differently
                        try
                        {
                            // Parsing "Related Apps" and "More From Developer" Apps (URLS Only)
                            foreach (string extraAppUrl in parser.ParseExtraApps(response))
                            {
                                relatedApps.Add(Consts.APP_URL_PREFIX + extraAppUrl);
                            }

                            // Adding "Related Apps" to Apps Model
                            parsedApp.RelatedUrls = relatedApps.Distinct().ToArray();
                        }
                        catch
                        {
                            logger.Info("\tNo Related Apps Found. Skipping");
                        }

                        // Inserting App into Mongo DB Database
                        if (!mongoDB.UpsertKeyEq <AppModel>(parsedApp, "Url", appUrl))
                        {
                            ProcessingWorked = false;
                        }

                        // If the processing failed, do not remove the app from the database, instead, keep it and flag it as not busy
                        // so that other workers can try to process it later
                        if (!ProcessingWorked)
                        {
                            mongoDB.ToggleBusyApp(app, false);
                        }
                        else // On the other hand, if processing worked, removes it from the database
                        {
                            // Console Feedback, Comment this line to disable if you want to
                            Console.ForegroundColor = ConsoleColor.Red;
                            logger.Info("Inserted App : " + parsedApp.Name);
                            Console.ForegroundColor = ConsoleColor.White;

                            mongoDB.RemoveFromQueue(app.Url);
                        }

                        // Counters for console feedback only
                        int extraAppsCounter = 0, newExtraApps = 0;

                        // Parsing "Related Apps" and "More From Developer" Apps (URLS Only)
                        foreach (string extraAppUrl in relatedApps)
                        {
                            // Incrementing counter of extra apps
                            extraAppsCounter++;

                            // Assembling Full app Url to check with database
                            string fullExtraAppUrl;
                            if (extraAppUrl.IndexOf("https://play.google.com/") >= 0)
                            {
                                fullExtraAppUrl = extraAppUrl;
                            }
                            else
                            {
                                fullExtraAppUrl = Consts.APP_URL_PREFIX + extraAppUrl;
                            }

                            // Checking if the app was either processed or queued to be processed already
                            if ((!mongoDB.AppProcessed(fullExtraAppUrl)) && (!mongoDB.IsAppOnQueue(extraAppUrl)))
                            {
                                // Incrementing counter of inserted apps
                                newExtraApps++;

                                // Adds it to the queue of apps to be processed
                                mongoDB.AddToQueue(extraAppUrl);
                            }
                        }

                        // Console Feedback
                        logger.Info("Queued " + newExtraApps + " / " + extraAppsCounter + " related apps");
                    }
                }
                catch (Exception ex)
                {
                    logger.Error(ex);
                }
                finally
                {
                    try
                    {
                        // Toggles Busy status back to false
                        mongoDB.ToggleBusyApp(app, false);
                    }
                    catch (Exception ex)
                    {
                        // Toggle Busy App may raise an exception in case of lack of internet connection, so, i must use this
                        // "inner catch" to avoid it from happenning
                        logger.Error(ex);
                    }
                }
            }
        }
예제 #39
0
        /// <summary>
        /// Employee authentication
        /// </summary>
        /// <param name="parameter">The employee password</param>
        /// <returns></returns>
        public async Task LoginAsync(object parameter)
        {
            await RunCommandAsync(() => LoginIsRunning, async() =>
            {
                // Call the server and attempt to login with credentials
                // TODO: Move all URLs and API routes to static class in core
                var result = await WebRequests.PostAsync <ApiResponse <LoginResultApiModel> >(
                    "http://localhost:5000/api/auth/login",
                    new LoginEmployeeDto
                {
                    Identify = MyIdentify,
                    Password = (parameter as IHavePassword)?.SecurePassword.UnSecure()
                });

                // If there was no response, bad data or a response with a error message
                if (result.DisplayErrorIfFailedAsync())
                {
                    Success      = false;
                    ErrorMessage = result.ErrorMessage;
                    return;
                }
                Success = true;

                // Ok successfully logged in.. now get employee data
                var employeeData = result.ServerResponse.Response;

                IoC.Settings.Pesel     = employeeData.Pesel;
                IoC.Settings.Token     = employeeData.Token;
                IoC.Settings.FirstName = new TextEntryViewModel {
                    Label = "Imię", OriginalText = employeeData?.FirstName
                };
                IoC.Settings.LastName = new TextEntryViewModel {
                    Label = "Nazwisko", OriginalText = employeeData?.LastName
                };
                IoC.Settings.Identify = new TextEntryViewModel {
                    Label = "Identyfikator", OriginalText = employeeData?.Username
                };
                IoC.Settings.Type = new TextEntryViewModel {
                    Label = "Posada", OriginalText = employeeData?.Type
                };
                IoC.Settings.Specialize = new TextEntryViewModel {
                    Label = "Specjalizacja", OriginalText = employeeData?.Specialize
                };
                IoC.Settings.PwdNumber = new TextEntryViewModel {
                    Label = "Numer PWD", OriginalText = employeeData?.NumberPwz
                };
                IoC.Settings.Password = new PasswordEntryViewModel {
                    Label = "Hasło", FakePassword = "******", UserPassword = (parameter as IHavePassword)?.SecurePassword
                };

                if (employeeData != null && employeeData.Type == "Administrator")
                {
                    IoC.Settings.IsEmployeeAdm = true;
                }

                // and get employee data
                await IoC.Employees.LoadEmployees();

                await IoC.Duties.LoadDutiesAsync();
                await IoC.Duties.LoadEmployeeDutiesAsync(employeeData.Username);

                await Task.Delay(2000);

                // Go to work page
                IoC.Get <ApplicationViewModel>().GoToPage(ApplicationPage.Work);
            });
        }
 private void WebRequestsInitialize()
 {
     WebRequest = new WebRequests();
 }
예제 #41
0
        private string FetchRates()
        {
            string url = "https://www.bitstamp.net/api/v2/ticker/btcusd";

            return(WebRequests.GET(url));
        }
예제 #42
0
        /// <summary>
        /// Executes a Search using the searchField as the search parameter,
        /// paginates / scrolls the search results to the end adding all the url of apps
        /// it finds to a AWS SQS queue
        /// </summary>
        /// <param name="searchField"></param>
        private static void CrawlStore(string searchField)
        {
            // Console Feedback
            Console.WriteLine("Crawling Search Term : [ " + searchField + " ]");

            // Compiling Regular Expression used to parse the "pagToken" out of the Play Store
            Regex pagTokenRegex = new Regex(@"GAEi+.+\:S\:.{11}\\42", RegexOptions.Compiled);

            // HTML Response
            string response;

            // MongoDB Helper
            // Configuring MongoDB Wrapper
            MongoDBWrapper mongoDB           = new MongoDBWrapper();
            string         fullServerAddress = String.Join(":", Consts.MONGO_SERVER, Consts.MONGO_PORT);

            mongoDB.ConfigureDatabase(Consts.MONGO_USER, Consts.MONGO_PASS, Consts.MONGO_AUTH_DB, fullServerAddress, Consts.MONGO_TIMEOUT, Consts.MONGO_DATABASE, Consts.MONGO_COLLECTION);

            // Ensuring the database has the proper indexe
            mongoDB.EnsureIndex("Url");

            // Response Parser
            PlayStoreParser parser = new PlayStoreParser();

            // Executing Web Requests
            using (WebRequests server = new WebRequests())
            {
                // Creating Request Object
                server.Host = Consts.HOST;

                // Executing Initial Request
                response = server.Post(String.Format(Consts.CRAWL_URL, searchField), Consts.INITIAL_POST_DATA);

                // Parsing Links out of Html Page (Initial Request)
                foreach (string url in parser.ParseAppUrls(response))
                {
                    // Checks whether the app have been already processed
                    // or is queued to be processed
                    if ((!mongoDB.AppProcessed(Consts.APP_URL_PREFIX + url)) && (!mongoDB.AppQueued(url)))
                    {
                        // Console Feedback
                        Console.WriteLine(" . Queued App");

                        // Than, queue it :)
                        mongoDB.AddToQueue(url);
                        Thread.Sleep(250);  // Hiccup
                    }
                    else
                    {
                        // Console Feedback
                        Console.WriteLine(" . Duplicated App. Skipped");
                    }
                }

                // Executing Requests for more Play Store Links
                int initialSkip       = 48;
                int currentMultiplier = 1;
                int errorsCount       = 0;
                do
                {
                    // Finding pagToken from HTML
                    var rgxMatch = pagTokenRegex.Match(response);

                    // If there's no match, skips it
                    if (!rgxMatch.Success)
                    {
                        break;
                    }

                    // Reading Match from Regex, and applying needed replacements
                    string pagToken = rgxMatch.Value.Replace(":S:", "%3AS%3A").Replace("\\42", String.Empty).Replace(@"\\u003d", String.Empty);

                    // Assembling new PostData with paging values
                    string postData = String.Format(Consts.POST_DATA, pagToken);

                    // Executing request for values
                    response = server.Post(String.Format(Consts.CRAWL_URL, searchField), postData);

                    // Checking Server Status
                    if (server.StatusCode != System.Net.HttpStatusCode.OK)
                    {
                        LogWriter.Error("Http Error", "Status Code [ " + server.StatusCode + " ]");
                        errorsCount++;
                        continue;
                    }

                    // Parsing Links
                    foreach (string url in parser.ParseAppUrls(response))
                    {
                        // Checks whether the app have been already processed
                        // or is queued to be processed
                        if ((!mongoDB.AppProcessed(Consts.APP_URL_PREFIX + url)) && (!mongoDB.AppQueued(url)))
                        {
                            // Console Feedback
                            Console.WriteLine(" . Queued App");

                            // Than, queue it :)
                            mongoDB.AddToQueue(url);
                            Thread.Sleep(250);  // Hiccup
                        }
                        else
                        {
                            // Console Feedback
                            Console.WriteLine(" . Duplicated App. Skipped");
                        }
                    }

                    // Incrementing Paging Multiplier
                    currentMultiplier++;
                }  while (parser.AnyResultFound(response) && errorsCount <= Consts.MAX_REQUEST_ERRORS);
            }
        }
예제 #43
0
        private static void Execute(FlexibleOptions options)
        {
            logger.Info ("Start");

            // Sanity Checks
            string inputPath  = options.Get("inputFile");
            string outputPath = options.Get("outputFile");

            if (String.IsNullOrEmpty(inputPath))
            {
                logger.Fatal ("Parameter {--inputfile} is mandatory");
                System.Environment.Exit (-101);
            }

            if (String.IsNullOrEmpty (inputPath))
            {
                logger.Fatal ("Parameter {--outputFile} is mandatory");
                System.Environment.Exit (-102);
            }

            if (!File.Exists(inputPath))
            {
                logger.Fatal ("Parameter {--inputfile} should point to an existing file");
                System.Environment.Exit (-103);
            }

            // Control Variables
            int processed = 0, errors = 0;

            // Processing Urls
            using(StreamReader fReader = new StreamReader (inputPath))
            {
                using (StreamWriter fWriter = new StreamWriter (outputPath))
                {
                    fWriter.AutoFlush = true;
                    string fLine;
                    using (WebRequests client = new WebRequests ())
                    {
                        // Configuring HTTP Client
                        client.Referer = "http://sortfolio.com/";
                        client.Host = "sortfolio.com";
                        client.Encoding = "gzip, deflate, sdch";
                        client.Headers.Add ("Upgrade-Insecure-Requests", "1");

                        while ((fLine = fReader.ReadLine ()) != null)
                        {
                            // Processing Only "Listings"
                            if (fLine.IndexOf ("/listings/") >= 0)
                            {
                                try
                                {
                                    // Reaching Listing Page
                                    string url = fLine.Split (';')[0];
                                    string htmlReponse = client.Get (url);

                                    // Control Variable Inc.
                                    processed++;

                                    // Sanity Check
                                    if (String.IsNullOrEmpty (htmlReponse))
                                    {
                                        logger.Error ("Failed to read response for {0}", url);
                                        errors++;
                                        continue;
                                    }

                                    // Parsing Data
                                    HtmlDocument map = new HtmlDocument ();
                                    map.LoadHtml (htmlReponse);

                                    var tmpNode = map.DocumentNode.SelectSingleNode ("//script[contains(@id,'mail_to')]");

                                    if (tmpNode == null)
                                    {
                                        continue;
                                    }

                                    string email = tmpNode.InnerText;

                                    // Extracting actual email data out of decoded string
                                    email = email.Replace ("eval(decodeURIComponent('", String.Empty).Replace ("'", String.Empty).Replace (")", String.Empty);
                                    email = System.Web.HttpUtility.UrlDecode (email);

                                    Regex emailRegex = new Regex (@"Node\('(.+?)'\)\);");
                                    email = emailRegex.Match (email).Groups[1].Value;

                                    tmpNode = map.DocumentNode.SelectSingleNode ("//*[@id='content']/div/div[2]/p/strong");
                                    string budget      = tmpNode == null ? String.Empty :  tmpNode.InnerText;

                                    tmpNode = map.DocumentNode.SelectSingleNode ("//span[@itemprop='addressLocality']");
                                    string headQuarter = tmpNode == null ? String.Empty : tmpNode.InnerText;

                                    tmpNode = map.DocumentNode.SelectSingleNode ("//a[@class='website']");
                                    string website = tmpNode == null ? String.Empty : tmpNode.InnerText;

                                    // Writing to output file
                                    fWriter.WriteLine ("{0};{1};{2};{3};{4}", url, headQuarter, website, email, budget);

                                    logger.Info ("Processed: {0}", url);

                                    if (processed % 100 == 0)
                                    {
                                        logger.Info ("\tProgress:{0} - {1}", processed, errors);
                                    }
                                }
                                catch(Exception ex)
                                {
                                    logger.Error (ex);
                                    errors++;
                                }
                            }
                        }
                    }
                }
            }

            logger.Info ("End");
            Console.ReadLine ();
        }