/// <summary> /// Entry point of the worker piece of the process /// Notice that you can run as many workers as you want to in order to make the crawling faster /// </summary> /// <param name="args"></param> static void Main(string[] args) { // Configuring Log Object LogSetup.InitializeLog("PlayStoreWorker.log", "info"); Logger logger = LogManager.GetCurrentClassLogger(); logger.Info("Worker Started"); // Control Variable (Bool - Should the process use proxies? ) bool isUsingProxies = false; // Checking for the need to use proxies if (args != null && args.Length == 1) { // Setting flag to true isUsingProxies = true; // Loading proxies from .txt received as argument String fPath = args[0]; // Sanity Check if (!File.Exists(fPath)) { logger.Fatal("Couldnt find proxies on path : " + fPath); System.Environment.Exit(-100); } // Reading Proxies from File string[] fLines = File.ReadAllLines(fPath, Encoding.GetEncoding("UTF-8")); try { // Actual Load of Proxies ProxiesLoader.Load(fLines.ToList()); } catch (Exception ex) { logger.Fatal(ex); System.Environment.Exit(-101); } } // Parser PlayStoreParser parser = new PlayStoreParser(); // Configuring MongoDB Wrapper MongoDBWrapper mongoDB = new MongoDBWrapper(); string fullServerAddress = String.Join(":", Consts.MONGO_SERVER, Consts.MONGO_PORT); mongoDB.ConfigureDatabase(Consts.MONGO_USER, Consts.MONGO_PASS, Consts.MONGO_AUTH_DB, fullServerAddress, Consts.MONGO_TIMEOUT, Consts.MONGO_DATABASE, Consts.MONGO_COLLECTION); // Creating Instance of Web Requests Server WebRequests httpClient = new WebRequests(); // Queued App Model QueuedApp app; // Retry Counter (Used for exponential wait increasing logic) int retryCounter = 0; // Iterating Over MongoDB Records while no document is found to be processed while ((app = mongoDB.FindAndModify()) != null) { try { // Building APP URL string appUrl = app.Url; // Sanity check of app page url if (app.Url.IndexOf("http", StringComparison.OrdinalIgnoreCase) < 0) { appUrl = Consts.APP_URL_PREFIX + app.Url; } // Checking if this app is on the database already if (mongoDB.AppProcessedByUrl(appUrl)) { // Console Feedback, Comment this line to disable if you want to logger.Info("Duplicated App, skipped."); // Delete it from the queue and continues the loop mongoDB.RemoveFromQueue(app.Url); continue; } // Configuring server and Issuing Request httpClient.Headers.Add(Consts.ACCEPT_LANGUAGE); httpClient.Host = Consts.HOST; httpClient.UserAgent = Consts.GITHUBURL; httpClient.Encoding = "utf-8"; httpClient.EncodingDetection = WebRequests.CharsetDetection.DefaultCharset; // Checking for the need to use "HTTP Proxies" if (isUsingProxies) { httpClient.Proxy = ProxiesLoader.GetWebProxy(); } // Issuing HTTP Request string response = httpClient.Get(appUrl); // Flag Indicating Success while processing and parsing this app bool ProcessingWorked = true; bool PermissionsWorked = true; // Sanity Check if (String.IsNullOrEmpty(response) || httpClient.StatusCode != System.Net.HttpStatusCode.OK) { logger.Info("Error opening app page : " + appUrl); ProcessingWorked = false; if (isUsingProxies) { ProxiesLoader.IncrementCurrentProxy(); } // Renewing WebRequest Object to get rid of Cookies httpClient = new WebRequests(); // Fallback time variable double waitTime; // Checking which "Waiting Logic" to use - If there are proxies being used, there's no need to wait too much // If there are no proxies in use, on the other hand, the process must wait more time if (isUsingProxies) { // Waits two seconds everytime waitTime = TimeSpan.FromSeconds(2).TotalMilliseconds; } else { // Increments retry counter retryCounter++; // Checking for maximum retry count if (retryCounter >= 8) { waitTime = TimeSpan.FromMinutes(20).TotalMilliseconds; } else { // Calculating next wait time ( 2 ^ retryCounter seconds) waitTime = TimeSpan.FromSeconds(Math.Pow(2, retryCounter)).TotalMilliseconds; } } // Hiccup to avoid google blocking connections in case of heavy traffic from the same IP logger.Info("======================================================"); logger.Info("\n\tFallback : " + waitTime + " Seconds"); Thread.Sleep(Convert.ToInt32(waitTime)); // If The Status code is "ZERO" (it means 404) - App must be removed from "Queue" if (httpClient.StatusCode == 0) { // Console Feedback logger.Info("\tApp Not Found (404) - " + app.Url); mongoDB.RemoveFromQueue(app.Url); } logger.Info("======================================================"); } else { // Reseting retry counter retryCounter = 0; // Parsing Useful App Data AppModel parsedApp = parser.ParseAppPage(response, appUrl); // Normalizing URLs if (!String.IsNullOrWhiteSpace(parsedApp.DeveloperPrivacyPolicy)) { parsedApp.DeveloperPrivacyPolicy = parsedApp.DeveloperPrivacyPolicy.Replace("https://www.google.com/url?q=", String.Empty); } if (!String.IsNullOrWhiteSpace(parsedApp.DeveloperWebsite)) { parsedApp.DeveloperNormalizedDomain = parser.NormalizeDomainName(parsedApp.DeveloperWebsite); } List <String> relatedApps = new List <String> (); // Avoiding Exceptions caused by "No Related Apps" situations - Must be treated differently try { // Parsing "Related Apps" and "More From Developer" Apps (URLS Only) foreach (string extraAppUrl in parser.ParseExtraApps(response)) { relatedApps.Add(Consts.APP_URL_PREFIX + extraAppUrl); } // Adding "Related Apps" to Apps Model parsedApp.RelatedUrls = relatedApps.Distinct().ToArray(); } catch { logger.Info("\tNo Related Apps Found. Skipping"); } // Fetching Permissions response = httpClient.Post(Consts.PERMISSIONS_URL, String.Format(Consts.PERMISSIONS_POST_DATA, parsedApp.AppId)); // After requesting it's permissions, the AppId can be normalized to it's "to lower" form parsedApp.AppId = parsedApp.AppId.ToLower(); // Reseting Retry Counter retryCounter = 0; // Sanity Check if (String.IsNullOrEmpty(response) || httpClient.StatusCode != System.Net.HttpStatusCode.OK) { logger.Info("Error parsing apps permissions: " + appUrl); PermissionsWorked = false; if (isUsingProxies) { ProxiesLoader.IncrementCurrentProxy(); } // Renewing WebRequest Object to get rid of Cookies httpClient = new WebRequests(); // Fallback time variable double waitTime; // Checking which "Waiting Logic" to use - If there are proxies being used, there's no need to wait too much // If there are no proxies in use, on the other hand, the process must wait more time if (isUsingProxies) { // Waits two seconds everytime waitTime = TimeSpan.FromSeconds(2).TotalMilliseconds; } else { // Increments retry counter retryCounter++; // Checking for maximum retry count if (retryCounter >= 8) { waitTime = TimeSpan.FromMinutes(20).TotalMilliseconds; } else { // Calculating next wait time ( 2 ^ retryCounter seconds) waitTime = TimeSpan.FromSeconds(Math.Pow(2, retryCounter)).TotalMilliseconds; } } } else { // Reseting retry counter retryCounter = 0; List <Tuple <String, String> > permissionsTuple = parser.ParsePermissions(response); // Sanity check if (permissionsTuple != null && permissionsTuple.Count > 0) { // Adding Permissions and their descriptions to the list of permissions for each app for (int i = 0; i < permissionsTuple.Count; i++) { var tuple = permissionsTuple[i]; // Initializing Lists if (parsedApp.Permissions == null) { parsedApp.Permissions = new List <String> (); parsedApp.PermissionDescriptions = new List <String> (); } parsedApp.Permissions.Add(tuple.Item1); parsedApp.PermissionDescriptions.Add(tuple.Item2); } } } // Inserting App into Mongo DB Database if (!mongoDB.UpsertKeyEq <AppModel>(parsedApp, "Url", appUrl)) { ProcessingWorked = false; } // If the processing failed, do not remove the app from the database, instead, keep it and flag it as not busy // so that other workers can try to process it later if (!ProcessingWorked) { mongoDB.ToggleBusyApp(app, false); } else // On the other hand, if processing worked, removes it from the database { // Console Feedback, Comment this line to disable if you want to Console.ForegroundColor = ConsoleColor.Red; logger.Info("Inserted App : " + parsedApp.Name); Console.ForegroundColor = ConsoleColor.White; mongoDB.RemoveFromQueue(app.Url); } // Counters for console feedback only int extraAppsCounter = 0, newExtraApps = 0; // Parsing "Related Apps" and "More From Developer" Apps (URLS Only) foreach (string extraAppUrl in relatedApps) { // Incrementing counter of extra apps extraAppsCounter++; // Assembling Full app Url to check with database string fullExtraAppUrl; if (extraAppUrl.IndexOf("https://play.google.com/") >= 0) { fullExtraAppUrl = extraAppUrl; } else { fullExtraAppUrl = Consts.APP_URL_PREFIX + extraAppUrl; } // Checking if the app was either processed or queued to be processed already if ((!mongoDB.AppProcessedByUrl(fullExtraAppUrl)) && (!mongoDB.IsAppOnQueue(extraAppUrl))) { // Incrementing counter of inserted apps newExtraApps++; // Adds it to the queue of apps to be processed mongoDB.AddToQueue(extraAppUrl); } } // Console Feedback logger.Info("Queued " + newExtraApps + " / " + extraAppsCounter + " related apps"); } } catch (Exception ex) { logger.Error(ex); } finally { try { // Toggles Busy status back to false mongoDB.ToggleBusyApp(app, false); } catch (Exception ex) { // Toggle Busy App may raise an exception in case of lack of internet connection, so, i must use this // "inner catch" to avoid it from happenning logger.Error(ex); } } } }
/// <summary> /// Entry point of the crawler /// </summary> /// <param name="args"></param> static void Main(string[] args) { // Setting Up Log LogSetup.InitializeLog("PlayStoreCrawler.log", "info"); _logger = LogManager.GetCurrentClassLogger(); // Control Variable (Bool - Should the process use proxies? ) bool isUsingProxies = false; // Checking for the need to use HTTP proxies or not if (args != null && args.Length == 1) { _logger.Info("Loading Proxies from File"); // Setting flag to true isUsingProxies = true; // Loading proxies from .txt received as argument String fPath = args[0]; // Sanity Check if (!File.Exists(fPath)) { _logger.Fatal("Couldnt find proxies on path : " + fPath); System.Environment.Exit(-100); } // Reading Proxies from File string[] fLines = File.ReadAllLines(fPath, Encoding.GetEncoding("UTF-8")); try { // Actual Load of Proxies ProxiesLoader.Load(fLines.ToList()); } catch (Exception ex) { _logger.Fatal(ex); System.Environment.Exit(-101); } } // Configuring MongoDB Wrapper _logger.Info("Setting up MongoDB Collections and Indexes"); _mongoDB = new MongoDBWrapper(); string fullServerAddress = String.Join(":", Consts.MONGO_SERVER, Consts.MONGO_PORT); _mongoDB.ConfigureDatabase(Consts.MONGO_USER, Consts.MONGO_PASS, Consts.MONGO_AUTH_DB, fullServerAddress, Consts.MONGO_TIMEOUT, Consts.MONGO_DATABASE, Consts.MONGO_COLLECTION); // Ensuring the database collections have the proper indexes _mongoDB.EnsureIndex("Url"); _mongoDB.EnsureIndex("IsBusy", Consts.QUEUED_APPS_COLLECTION); _mongoDB.EnsureIndex("Url", Consts.QUEUED_APPS_COLLECTION); // Main Flow _logger.Info("Started Bootstrapping Steps"); // Scrapping "Play Store Categories" foreach (var categoriesKVP in BootstrapTerms.categoriesAndNames) { CrawlCategory(categoriesKVP.Key, categoriesKVP.Value, isUsingProxies); } // Queueing Apps that start with each of the characters from "A" to "Z" foreach (var character in BootstrapTerms.charactersSearchTerms) { CrawlStore(character, isUsingProxies); } /// ... Keep Adding characters / search terms in order to increase the crawler's reach // APP CATEGORIES foreach (var category in BootstrapTerms.categoriesSearchTerms) { CrawlStore(category, isUsingProxies); } // Extra "Random" Search terms to increase even more the crawler's reach foreach (var miscTerm in BootstrapTerms.miscSearchTerms) { CrawlStore(miscTerm, isUsingProxies); } // Country Names as Search terms to increase even more the crawler's reach foreach (var countryName in BootstrapTerms.countryNames) { CrawlStore(countryName, isUsingProxies); } _logger.Info("\n\nBootstrapping Apps of Past Collections"); // Iterating over past collections HashSet <String> appUrls = new HashSet <String> (); foreach (string collection in _mongoDB.GetHistoryOfCollections()) { _logger.Info("Reading Collection [{0}]", collection); foreach (string app in _mongoDB.FindAllFromCollectionAs <AppModel> (collection).Select(t => t.Url)) { if (!appUrls.Contains(app)) { appUrls.Add(app); } } _logger.Info("\t=> Distinct Apps Found {0}", appUrls.Count); } // Adding Distinct Apps to the collection of apps to be processed int appsQueued = 0; foreach (string appUrl in appUrls) { _mongoDB.AddToQueue(appUrl); if (appsQueued++ % 10000 == 0) { _logger.Info("[Progress] Apps Queued From Past Collections [{0}]", appsQueued); } } }
public static Logger GetLogger(this Type type) { LogSetup.Run(); return(LogManager.GetLogger(type.Name)); }
static internal Server CreateServer() { Logger logger = LogSetup.CreateLogger(); return(ServerFactory.CreateServer(logger)); }
/// <summary> /// Entry point of the crawler /// </summary> /// <param name="args"></param> static void Main(string[] args) { // Setting Up Log LogSetup.InitializeLog("PlayStoreCrawler.log", "info"); _logger = LogManager.GetCurrentClassLogger(); // Control Variable (Bool - Should the process use proxies? ) bool isUsingProxies = false; // Checking for the need to use HTTP proxies or not if (args != null && args.Length == 1) { _logger.Info("Loading Proxies from File"); // Setting flag to true isUsingProxies = true; // Loading proxies from .txt received as argument String fPath = args[0]; // Sanity Check if (!File.Exists(fPath)) { _logger.Fatal("Couldnt find proxies on path : " + fPath); System.Environment.Exit(-100); } // Reading Proxies from File string[] fLines = File.ReadAllLines(fPath, Encoding.GetEncoding("UTF-8")); try { // Actual Load of Proxies ProxiesLoader.Load(fLines.ToList()); } catch (Exception ex) { _logger.Fatal(ex); System.Environment.Exit(-101); } } // Main Flow _logger.Info("Started Bootstrapping Steps"); // Scrapping "Play Store Categories" foreach (var categoriesKVP in BootstrapTerms.categoriesAndNames) { CrawlCategory(categoriesKVP.Key, categoriesKVP.Value, isUsingProxies); } // Queueing Apps that start with each of the characters from "A" to "Z" foreach (var character in BootstrapTerms.charactersSearchTerms) { CrawlStore(character, isUsingProxies); } /// ... Keep Adding characters / search terms in order to increase the crawler's reach // APP CATEGORIES foreach (var category in BootstrapTerms.categoriesSearchTerms) { CrawlStore(category, isUsingProxies); } // Extra "Random" Search terms to increase even more the crawler's reach foreach (var miscTerm in BootstrapTerms.miscSearchTerms) { CrawlStore(miscTerm, isUsingProxies); } // Country Names as Search terms to increase even more the crawler's reach foreach (var countryName in BootstrapTerms.countryNames) { CrawlStore(countryName, isUsingProxies); } }
/// <summary> /// Entry point of the crawler /// </summary> /// <param name="args"></param> static void Main(string[] args) { // Setting Up Log LogSetup.InitializeLog("PlayStoreCrawler.log", "info"); _logger = LogManager.GetCurrentClassLogger(); // Crawling App Store using all characters as the Search Input CrawlStore("A"); CrawlStore("B"); CrawlStore("C"); CrawlStore("D"); CrawlStore("E"); CrawlStore("F"); CrawlStore("G"); CrawlStore("H"); CrawlStore("I"); CrawlStore("J"); CrawlStore("K"); CrawlStore("L"); CrawlStore("M"); CrawlStore("N"); CrawlStore("O"); CrawlStore("P"); CrawlStore("Q"); CrawlStore("R"); CrawlStore("S"); CrawlStore("T"); CrawlStore("U"); CrawlStore("V"); CrawlStore("X"); CrawlStore("Y"); CrawlStore("Z"); CrawlStore("W"); /// ... Keep Adding characters / search terms in order to increase the crawler's reach // APP CATEGORIES CrawlStore("BOOKS"); CrawlStore("BUSINESS"); CrawlStore("COMICS"); CrawlStore("COMMUNICATION"); CrawlStore("EDUCATION"); CrawlStore("ENTERTAINMENT"); CrawlStore("FINANCE"); CrawlStore("HEALTH"); CrawlStore("LIFESTYLE"); CrawlStore("LIVE WALLPAPER"); CrawlStore("MEDIA"); CrawlStore("MEDICAL"); CrawlStore("MUSIC"); CrawlStore("NEWS"); CrawlStore("PERSONALIZATION"); CrawlStore("PHOTOGRAPHY"); CrawlStore("PRODUCTIVITY"); CrawlStore("SHOPPING"); CrawlStore("SOCIAL"); CrawlStore("SPORTS"); CrawlStore("TOOLS"); CrawlStore("TRANSPORTATION"); CrawlStore("TRAVEL"); CrawlStore("WEATHER"); CrawlStore("WIDGETS"); CrawlStore("ARCADE"); CrawlStore("BRAIN"); CrawlStore("CASUAL"); CrawlStore("CARDS"); CrawlStore("RACING"); // Extra "Random" Search terms to increase even more the crawler's reach CrawlStore("INDIE"); CrawlStore("ZOMBIE"); CrawlStore("CATS"); CrawlStore("ROOT"); CrawlStore("GPS"); CrawlStore("BLUETOOTH"); CrawlStore("COMPASS"); CrawlStore("WALLPAPER"); CrawlStore("TORRENT"); CrawlStore("P**N"); CrawlStore("PLAYER"); CrawlStore("WINE"); CrawlStore("ANTIVIRUS"); CrawlStore("P**N"); // Country Names as Search terms to increase even more the crawler's reach CrawlStore("Afghanistan"); CrawlStore("Albania"); CrawlStore("Algeria"); CrawlStore("American"); CrawlStore("Andorra"); CrawlStore("Angola"); CrawlStore("Anguilla"); CrawlStore("Antigua"); CrawlStore("Argentina"); CrawlStore("Armenia"); CrawlStore("Aruba"); CrawlStore("Australia"); CrawlStore("Austria"); CrawlStore("Azerbaijan"); CrawlStore("Bahamas"); CrawlStore("Bahrain"); CrawlStore("Bangladesh"); CrawlStore("Barbados"); CrawlStore("Belarus"); CrawlStore("Belgium"); CrawlStore("Belize"); CrawlStore("Benin"); CrawlStore("Bermuda"); CrawlStore("Bhutan"); CrawlStore("Bolivia"); CrawlStore("Bosnia"); CrawlStore("Botswana"); CrawlStore("Bouvet"); CrawlStore("Brazil"); CrawlStore("Brunei"); CrawlStore("Bulgaria"); CrawlStore("Burkina"); CrawlStore("Burundi"); CrawlStore("Cambodia"); CrawlStore("Cameroon"); CrawlStore("Canada"); CrawlStore("Cape"); CrawlStore("Cayman"); CrawlStore("Central"); CrawlStore("Chad"); CrawlStore("Chile"); CrawlStore("China"); CrawlStore("Christmas"); CrawlStore("Cocos"); CrawlStore("Colombia"); CrawlStore("Comoros"); CrawlStore("Congo"); CrawlStore("Congo"); CrawlStore("Cook"); CrawlStore("Costa"); CrawlStore("Croatia"); CrawlStore("Cuba"); CrawlStore("Cyprus"); CrawlStore("Czech"); CrawlStore("Denmark"); CrawlStore("Djibouti"); CrawlStore("Dominica"); CrawlStore("Dominican"); CrawlStore("Ecuador"); CrawlStore("Egypt"); CrawlStore("El"); CrawlStore("Equatorial"); CrawlStore("Eritrea"); CrawlStore("Estonia"); CrawlStore("Ethiopia"); CrawlStore("Falkland"); CrawlStore("Faroe"); CrawlStore("Fiji"); CrawlStore("Finland"); CrawlStore("France"); CrawlStore("French"); CrawlStore("Gabon"); CrawlStore("Gambia"); CrawlStore("Georgia"); CrawlStore("Germany"); CrawlStore("Ghana"); CrawlStore("Gibraltar"); CrawlStore("Greece"); CrawlStore("Greenland"); CrawlStore("Grenada"); CrawlStore("Guadeloupe"); CrawlStore("Guam"); CrawlStore("Guatemala"); CrawlStore("Guinea"); CrawlStore("Guinea"); CrawlStore("Guyana"); CrawlStore("Haiti"); CrawlStore("Holy"); CrawlStore("Honduras"); CrawlStore("Hong"); CrawlStore("Hungary"); CrawlStore("Iceland"); CrawlStore("India"); CrawlStore("Indonesia"); CrawlStore("Iran"); CrawlStore("Iraq"); CrawlStore("Ireland"); CrawlStore("Israel"); CrawlStore("Italy"); CrawlStore("Ivory"); CrawlStore("Jamaica"); CrawlStore("Japan"); CrawlStore("Jordan"); CrawlStore("Kazakhstan"); CrawlStore("Kenya"); CrawlStore("Kiribati"); CrawlStore("Kuwait"); CrawlStore("Kyrgyzstan"); CrawlStore("Laos"); CrawlStore("Latvia"); CrawlStore("Lebanon"); CrawlStore("Lesotho"); CrawlStore("Liberia"); CrawlStore("Libya"); CrawlStore("Liechtenstein"); CrawlStore("Lithuania"); CrawlStore("Luxembourg"); CrawlStore("Macau"); CrawlStore("Macedonia"); CrawlStore("Madagascar"); CrawlStore("Malawi"); CrawlStore("Malaysia"); CrawlStore("Maldives"); CrawlStore("Mali"); CrawlStore("Malta"); CrawlStore("Marshall"); CrawlStore("Martinique"); CrawlStore("Mauritania"); CrawlStore("Mauritius"); CrawlStore("Mayotte"); CrawlStore("Mexico"); CrawlStore("Micronesia"); CrawlStore("Moldova"); CrawlStore("Monaco"); CrawlStore("Mongolia"); CrawlStore("Montenegro"); CrawlStore("Montserrat"); CrawlStore("Morocco"); CrawlStore("Mozambique"); CrawlStore("Myanmar"); CrawlStore("Namibia"); CrawlStore("Nauru"); CrawlStore("Nepal"); CrawlStore("Netherlands"); CrawlStore("Netherlands"); CrawlStore("New"); CrawlStore("New"); CrawlStore("Nicaragua"); CrawlStore("Niger"); CrawlStore("Nigeria"); CrawlStore("Niue"); CrawlStore("Norfolk"); CrawlStore("North"); CrawlStore("Northern"); CrawlStore("Norway"); CrawlStore("Oman"); CrawlStore("Pakistan"); CrawlStore("Palau"); CrawlStore("Panama"); CrawlStore("Papua"); CrawlStore("Paraguay"); CrawlStore("Peru"); CrawlStore("Philippines"); CrawlStore("Pitcairn"); CrawlStore("Poland"); CrawlStore("Polynesia"); CrawlStore("Portugal"); CrawlStore("Puerto"); CrawlStore("Qatar"); CrawlStore("Reunion"); CrawlStore("Romania"); CrawlStore("Russia"); CrawlStore("Rwanda"); CrawlStore("Saint"); CrawlStore("Saint"); CrawlStore("Saint"); CrawlStore("Saint"); CrawlStore("Saint"); CrawlStore("Samoa"); CrawlStore("San"); CrawlStore("Sao"); CrawlStore("Saudi"); CrawlStore("Senegal"); CrawlStore("Serbia"); CrawlStore("Seychelles"); CrawlStore("Sierra"); CrawlStore("Singapore"); CrawlStore("Slovakia"); CrawlStore("Slovenia"); CrawlStore("Solomon"); CrawlStore("Somalia"); CrawlStore("South"); CrawlStore("South"); CrawlStore("South"); CrawlStore("South"); CrawlStore("Spain"); CrawlStore("Sri"); CrawlStore("Sudan"); CrawlStore("Suriname"); CrawlStore("Svalbard"); CrawlStore("Swaziland"); CrawlStore("Sweden"); CrawlStore("Switzerland"); CrawlStore("Syria"); CrawlStore("Taiwan"); CrawlStore("Tajikistan"); CrawlStore("Tanzania"); CrawlStore("Thailand"); CrawlStore("Timor"); CrawlStore("Togo"); CrawlStore("Tokelau"); CrawlStore("Tonga"); CrawlStore("Trinidad"); CrawlStore("Tunisia"); CrawlStore("Turkey"); CrawlStore("Turkmenistan"); CrawlStore("Turks"); CrawlStore("Tuvalu"); CrawlStore("Uganda"); CrawlStore("Ukraine"); CrawlStore("United"); CrawlStore("United"); CrawlStore("United"); CrawlStore("Uruguay"); CrawlStore("Uzbekistan"); CrawlStore("Vanuatu"); CrawlStore("Venezuela"); CrawlStore("Vietnam"); CrawlStore("Virgin"); CrawlStore("Wallis"); CrawlStore("Yemen"); CrawlStore("Zambia"); CrawlStore("Zimbabwe"); }
static void Main(string[] args) { // Loading Configuration LogSetup.InitializeLog("Apple_Store_Recorder.log", "info"); _logger = LogManager.GetCurrentClassLogger(); // Loading Config _logger.Info("Loading Configurations from App.config"); LoadConfiguration(); // Initializing Queue _logger.Info("Initializing Queue"); AWSSQSHelper appsDataQueue = new AWSSQSHelper(_appsDataQueueName, _maxMessagesPerDequeue, _awsKey, _awsKeySecret); // Creating MongoDB Instance _logger.Info("Loading MongoDB / Creating Instances"); MongoDBWrapper mongoDB = new MongoDBWrapper(); string serverAddr = String.Join(":", Consts.MONGO_SERVER, Consts.MONGO_PORT); mongoDB.ConfigureDatabase(Consts.MONGO_USER, Consts.MONGO_PASS, Consts.MONGO_AUTH_DB, serverAddr, 10000, Consts.MONGO_DATABASE, Consts.MONGO_COLLECTION); // Setting Error Flag to No Error ( 0 ) System.Environment.ExitCode = 0; // Initialiazing Control Variables int fallbackWaitTime = 1; // Buffer of Messages to be recorder List <AppleStoreAppModel> recordsBuffer = new List <AppleStoreAppModel> (); List <Message> messagesBuffer = new List <Message> (); // Insert Batch Size int batchSize = 1000; _logger.Info("Started Recording App Data"); do { try { // Dequeueing messages from the Queue if (!appsDataQueue.DeQueueMessages()) { Thread.Sleep(_hiccupTime); // Hiccup continue; } // Checking for no message received, and false positives situations if (!appsDataQueue.AnyMessageReceived()) { // If no message was found, increases the wait time int waitTime; if (fallbackWaitTime <= 12) { // Exponential increase on the wait time, truncated after 12 retries waitTime = Convert.ToInt32(Math.Pow(2, fallbackWaitTime) * 1000); } else // Reseting Wait after 12 fallbacks { waitTime = 2000; fallbackWaitTime = 0; } fallbackWaitTime++; // Sleeping before next try Console.WriteLine("Fallback (seconds) => " + waitTime); Thread.Sleep(waitTime); continue; } // Reseting fallback time fallbackWaitTime = 1; // Iterating over dequeued Messages foreach (var appDataMessage in appsDataQueue.GetDequeuedMessages()) { try { // Deserializing message var appData = AppleStoreAppModel.FromJson(appDataMessage.Body); // Dumping "Url" to "_id" appData._id = appData.url; // Adding it to the buffer of records to be recorded recordsBuffer.Add(appData); // Adding message to the buffer of messages to be deleted messagesBuffer.Add(appDataMessage); // Is it time to batch insert ? if ((recordsBuffer.Count % batchSize) == 0) { // Batch Insertion mongoDB.BatchInsert <AppleStoreAppModel> (recordsBuffer); // Logging Feedback _logger.Info("\tApps Recorded : " + recordsBuffer.Count); // Deleting Messages messagesBuffer.ForEach((msg) => appsDataQueue.DeleteMessage(msg)); _logger.Info("\tMessages Deleted: " + messagesBuffer.Count); // Clearing Buffers recordsBuffer.Clear(); messagesBuffer.Clear(); } } catch (Exception ex) { _logger.Error(ex); } finally { // Deleting the message appsDataQueue.DeleteMessage(appDataMessage); } } } catch (Exception ex) { _logger.Error(ex); } } while (true); }
public ShardsCoordinator() { //load main stuff LogSetup.LoggerSetup(-1); _log = LogManager.GetCurrentClassLogger(); _creds = new CoreCredentials(); _log.Info("Starting CoreBot v" + StatsService.BotVersion); //_key = _creds.RedisKey(); //_redis = ConnectionMultiplexer.Connect("127.0.0.1"); //new RedisImagesCache(_redis, _creds).Reload(); //reload images into redis //setup initial shard statuses _defaultShardState = new ShardComMessage() { Guilds = 0, Time = DateTime.UtcNow }; var db = _redis.GetDatabase(); //clear previous statuses db.KeyDelete(_key + "_shardstats"); _shardProcesses = new Process[_creds.TotalShards]; for (int i = 0; i < _creds.TotalShards; i++) { //add it to the list of shards which should be started #if DEBUG if (i > 0) { _shardStartQueue.Enqueue(i); } else { _shardProcesses[i] = Process.GetCurrentProcess(); } #else _shardStartQueue.Enqueue(i); #endif //set the shard's initial state in redis cache var msg = _defaultShardState.Clone(); msg.ShardId = i; //this is to avoid the shard coordinator thinking that //the shard is unresponsive while startup up var delay = 45; #if GLOBAL_NADEKO delay = 180; #endif msg.Time = DateTime.UtcNow + TimeSpan.FromSeconds(delay * (i + 1)); db.ListRightPush(_key + "_shardstats", JsonConvert.SerializeObject(msg), flags: CommandFlags.FireAndForget); } _curProcessId = Process.GetCurrentProcess().Id; //subscribe to shardcoord events var sub = _redis.GetSubscriber(); //send is called when shard status is updated. Every 7.5 seconds atm sub.Subscribe(_key + "_shardcoord_send", OnDataReceived, CommandFlags.FireAndForget); //restart is called when shzard should be stopped and then started again sub.Subscribe(_key + "_shardcoord_restart", OnRestart, CommandFlags.FireAndForget); //called to kill the shard sub.Subscribe(_key + "_shardcoord_stop", OnStop, CommandFlags.FireAndForget); //called kill the bot sub.Subscribe(_key + "_die", (ch, x) => Environment.Exit(0), CommandFlags.FireAndForget); }
static void Main(string[] args) { // Creating Needed Instances RequestsHandler httpClient = new RequestsHandler(); AppStoreParser parser = new AppStoreParser(); // Loading Configuration LogSetup.InitializeLog("Apple_Store_Urls_Worker.log", "info"); _logger = LogManager.GetCurrentClassLogger(); // Loading Config _logger.Info("Loading Configurations from App.config"); LoadConfiguration(); // Control Variable (Bool - Should the process use proxies? ) bool shouldUseProxies = false; // Checking for the need to use proxies if (args != null && args.Length == 1) { // Setting flag to true shouldUseProxies = true; // Loading proxies from .txt received as argument String fPath = args[0]; // Sanity Check if (!File.Exists(fPath)) { _logger.Fatal("Couldnt find proxies on path : " + fPath); System.Environment.Exit(-100); } // Reading Proxies from File string[] fLines = File.ReadAllLines(fPath, Encoding.GetEncoding("UTF-8")); try { // Actual Load of Proxies ProxiesLoader.Load(fLines.ToList()); } catch (Exception ex) { _logger.Fatal(ex); System.Environment.Exit(-101); } } // AWS Queue Handler _logger.Info("Initializing Queues"); AWSSQSHelper appsUrlQueue = new AWSSQSHelper(_appUrlsQueueName, _maxMessagesPerDequeue, _awsKey, _awsKeySecret); AWSSQSHelper appsDataQueue = new AWSSQSHelper(_appsDataQueueName, _maxMessagesPerDequeue, _awsKey, _awsKeySecret); // Setting Error Flag to No Error ( 0 ) System.Environment.ExitCode = 0; // Initialiazing Control Variables int fallbackWaitTime = 1; _logger.Info("Started Processing Individual Apps Urls"); do { try { // Dequeueing messages from the Queue if (!appsUrlQueue.DeQueueMessages()) { Thread.Sleep(_hiccupTime); // Hiccup continue; } // Checking for no message received, and false positives situations if (!appsUrlQueue.AnyMessageReceived()) { // If no message was found, increases the wait time int waitTime; if (fallbackWaitTime <= 12) { // Exponential increase on the wait time, truncated after 12 retries waitTime = Convert.ToInt32(Math.Pow(2, fallbackWaitTime) * 1000); } else // Reseting Wait after 12 fallbacks { waitTime = 2000; fallbackWaitTime = 0; } fallbackWaitTime++; // Sleeping before next try Console.WriteLine("Fallback (seconds) => " + waitTime); Thread.Sleep(waitTime); continue; } // Reseting fallback time fallbackWaitTime = 1; // Iterating over dequeued Messages foreach (var appUrl in appsUrlQueue.GetDequeuedMessages()) { bool processingWorked = true; try { // Retries Counter int retries = 0; string htmlResponse; // Retrying if necessary do { // Executing Http Request for the Category Url htmlResponse = httpClient.Get(appUrl.Body, shouldUseProxies); if (String.IsNullOrEmpty(htmlResponse)) { // Extending Fallback time retries++; int sleepTime = retries * _hiccupTime <= 30000 ? retries * _hiccupTime : 30000; _logger.Info("Retrying Request for App Page [ " + sleepTime / 1000 + " ]"); Thread.Sleep(sleepTime); } } while (String.IsNullOrWhiteSpace(htmlResponse) && retries <= _maxRetries); // Checking if retries failed if (String.IsNullOrWhiteSpace(htmlResponse)) { continue; } // Feedback _logger.Info("Current page " + appUrl.Body, "Parsing App Data"); // Parsing Data out of the Html Page AppleStoreAppModel parsedApp = parser.ParseAppPage(htmlResponse); parsedApp.url = appUrl.Body; // Enqueueing App Data appsDataQueue.EnqueueMessage(parsedApp.ToJson()); // Little Hiccup Thread.Sleep(_hiccupTime); } catch (Exception ex) { _logger.Error(ex); // Setting Flag to "False" processingWorked = false; } finally { //Deleting the message - Only if the processing worked if (processingWorked) { appsUrlQueue.DeleteMessage(appUrl); } } } } catch (Exception ex) { _logger.Error(ex); } } while (true); }
public Core(int ParentId, int shardId) { //check if shardId assigned is < 0 if (shardId < 0) { throw new ArgumentOutOfRangeException(nameof(shardId)); } //set up credentials LogSetup.LoggerSetup(shardId); _config = new BotConfig(); _log = LogManager.GetCurrentClassLogger(); Credentials = new CoreCredentials(); _db = new DbService(Credentials); var coreConfig = new DiscordConfiguration { AutoReconnect = true, LargeThreshold = 250, LogLevel = DSharpPlus.LogLevel.Debug, Token = Credentials.Token, TokenType = Credentials.UseUserToken ? TokenType.Bot : TokenType.Bot, UseInternalLogHandler = false, ShardId = shardId, ShardCount = Credentials.TotalShards, GatewayCompressionLevel = GatewayCompressionLevel.Stream, MessageCacheSize = 50, AutomaticGuildSync = true, DateTimeFormat = "dd-MM-yyyy HH:mm:ss zzz" }; _discord = new DiscordClient(coreConfig); //attach Discord events _discord.DebugLogger.LogMessageReceived += this.DebugLogger_LogMessageReceived; _discord.Ready += this.Discord_Ready; _discord.GuildAvailable += this.Discord_GuildAvailable; _discord.MessageCreated += this.Discord_MessageCreated; _discord.ClientErrored += this.Discord_ClientErrored; _discord.SocketErrored += this.Discord_SocketError; _discord.GuildCreated += this.Discord_GuildAvailable; _discord.VoiceStateUpdated += this.Discord_VoiceStateUpdated; var voiceConfig = new VoiceNextConfiguration { VoiceApplication = DSharpPlus.VoiceNext.Codec.VoiceApplication.Music, EnableIncoming = false }; _discord.UseVoiceNext(voiceConfig); var googleService = new GoogleApiService(Credentials); //enable voice service IGoogleApiService googleApiService = new GoogleApiService(Credentials); CoreMusicService cms = new CoreMusicService(_discord, _db, Credentials, this, googleApiService); var depoBuild = new ServiceCollection(); //taken from NadeoBot's Service loading depoBuild.AddSingleton <DiscordClient>(_discord); depoBuild.AddSingleton <CoreCredentials>(Credentials); depoBuild.AddSingleton <IGoogleApiService>(googleApiService); depoBuild.AddSingleton(_db); depoBuild.AddSingleton(cms); //add dependency here using (var uow = _db.UnitOfWork) { _config = uow.BotConfig.GetOrCreate(); } //build command configuration //see Dsharpplus configuration _log.Info($"{_config.DefaultPrefix}"); var commandConfig = new CommandsNextConfiguration { StringPrefix = _config.DefaultPrefix, EnableDms = true, EnableMentionPrefix = true, CaseSensitive = true, Services = depoBuild.BuildServiceProvider(), Selfbot = Credentials.UseUserToken, IgnoreExtraArguments = false }; //attach command events this.CommandsNextService = _discord.UseCommandsNext(commandConfig); this.CommandsNextService.CommandErrored += this.CommandsNextService_CommandErrored; this.CommandsNextService.CommandExecuted += this.CommandsNextService_CommandExecuted; this.CommandsNextService.RegisterCommands(typeof(CoreCommands).GetTypeInfo().Assembly); this.CommandsNextService.SetHelpFormatter <CoreBotHelpFormatter>(); //interactive service var interConfig = new InteractivityConfiguration() { PaginationBehaviour = TimeoutBehaviour.DeleteMessage, //default paginationtimeout (30 seconds) PaginationTimeout = TimeSpan.FromSeconds(30), //timeout for current action Timeout = TimeSpan.FromMinutes(2) }; //attach interactive component this.InteractivityService = _discord.UseInteractivity(interConfig); //this.CommandsNextService.RegisterCommands<CoreInteractivityModuleCommands>(); //register commands from coreinteractivitymodulecommands //this.CommandsNextService.RegisterCommands(typeof(CoreInteractivityModuleCommands).GetTypeInfo().Assembly); }
static void Main(string[] args) { // Creating Needed Instances RequestsHandler httpClient = new RequestsHandler(); AppStoreParser parser = new AppStoreParser(); // Loading Configuration LogSetup.InitializeLog("Apple_Store_Categories_Worker.log", "info"); _logger = LogManager.GetCurrentClassLogger(); // Loading Config _logger.Info("Loading Configurations from App.config"); LoadConfiguration(); // Control Variable (Bool - Should the process use proxies? ) bool shouldUseProxies = false; // Checking for the need to use proxies if (args != null && args.Length == 1) { // Setting flag to true shouldUseProxies = true; // Loading proxies from .txt received as argument String fPath = args[0]; // Sanity Check if (!File.Exists(fPath)) { _logger.Fatal("Couldnt find proxies on path : " + fPath); System.Environment.Exit(-100); } // Reading Proxies from File string[] fLines = File.ReadAllLines(fPath, Encoding.GetEncoding("UTF-8")); try { // Actual Load of Proxies ProxiesLoader.Load(fLines.ToList()); } catch (Exception ex) { _logger.Fatal(ex); System.Environment.Exit(-101); } } // AWS Queue Handler _logger.Info("Initializing Queues"); AWSSQSHelper categoriesUrlQueue = new AWSSQSHelper(_categoriesQueueName, _maxMessagesPerDequeue, _awsKey, _awsKeySecret); AWSSQSHelper charactersUrlQueue = new AWSSQSHelper(_characterUrlsQueueName, _maxMessagesPerDequeue, _awsKey, _awsKeySecret); // Setting Error Flag to No Error ( 0 ) System.Environment.ExitCode = 0; // Initialiazing Control Variables int fallbackWaitTime = 1; _logger.Info("Started Processing Category Urls"); do { try { // Dequeueing messages from the Queue if (!categoriesUrlQueue.DeQueueMessages()) { Thread.Sleep(_hiccupTime); // Hiccup continue; } // Checking for no message received, and false positives situations if (!categoriesUrlQueue.AnyMessageReceived()) { // If no message was found, increases the wait time int waitTime; if (fallbackWaitTime <= 12) { // Exponential increase on the wait time, truncated after 12 retries waitTime = Convert.ToInt32(Math.Pow(2, fallbackWaitTime) * 1000); } else // Reseting Wait after 12 fallbacks { waitTime = 2000; fallbackWaitTime = 0; } fallbackWaitTime++; // Sleeping before next try _logger.Info("Fallback (seconds) => " + waitTime); Thread.Sleep(waitTime); continue; } // Reseting fallback time fallbackWaitTime = 1; // Iterating over dequeued Messages foreach (var categoryUrl in categoriesUrlQueue.GetDequeuedMessages()) { // Console Feedback _logger.Info("Started Parsing Category : " + categoryUrl.Body); try { // Retries Counter int retries = 0; string htmlResponse; // Retrying if necessary do { // Executing Http Request for the Category Url htmlResponse = httpClient.Get(categoryUrl.Body, shouldUseProxies); if (String.IsNullOrEmpty(htmlResponse)) { _logger.Error("Retrying Request for Category Page"); retries++; } } while (String.IsNullOrWhiteSpace(htmlResponse) && retries <= _maxRetries); // Checking if retries failed if (String.IsNullOrWhiteSpace(htmlResponse)) { // Deletes Message and moves on categoriesUrlQueue.DeleteMessage(categoryUrl); continue; } // If the request worked, parses the urls out of the page foreach (string characterUrls in parser.ParseCharacterUrls(htmlResponse)) { // Enqueueing Urls charactersUrlQueue.EnqueueMessage(HttpUtility.HtmlDecode(characterUrls)); } } catch (Exception ex) { _logger.Error(ex); } finally { // Deleting the message categoriesUrlQueue.DeleteMessage(categoryUrl); } } } catch (Exception ex) { _logger.Error(ex); } } while (true); }
public void Dispose() { LogSetup.DisposeLogger(logger); }
public IntegrationTest() { logger = LogSetup.CreateLogger(); }
public void Should_distribute_message_only_to_clients_that_have_subscribed_to_same_channel() { // Given int port = new PortSetup(logger).GetNextPort(); Logger serverLogger = LogSetup.CreateLogger("Server_"); serverLogger.MinimumInfoLevelBeforeWrite = InfoLevel.Trace; logger.MinimumInfoLevelBeforeWrite = InfoLevel.Trace; Server server = Bootstrapper.CreateServer(serverLogger); server.ListenForConnectionsInANewThread(port); logger.Write <IntegrationTest>("Connecting client 1"); var client1 = Bootstrapper.CreateClient(logger); client1.Connect(Localhost, port); var client1SubscribedEvent = new AutoResetEvent(false); client1.OnSubscribed += (theEvent) => { logger.Write <IntegrationTest>("Client 1 received subscribeevent: " + theEvent); client1SubscribedEvent.Set(); }; client1.SubscribeTo("SkeletonEvent"); logger.Write <IntegrationTest>("Connecting client 2"); var client2 = Bootstrapper.CreateClient(logger); client2.Connect(Localhost, port); var client2SubscribedEvent = new AutoResetEvent(false); client2.OnSubscribed += (theEvent) => { logger.Write <IntegrationTest>("Client 2 received subscribeevent: " + theEvent); client2SubscribedEvent.Set(); }; client2.SubscribeTo("SkeletonEvent"); logger.Write <IntegrationTest>("Connecting client 3"); var client3 = Bootstrapper.CreateClient(logger); client3.Connect(Localhost, port); var client3SubscribedEvent = new AutoResetEvent(false); client3.OnSubscribed += (theEvent) => { logger.Write <IntegrationTest>("Client 3 received subscribeevent: " + theEvent); client3SubscribedEvent.Set(); }; client3.SubscribeTo("SkeletonEvent"); logger.Write <IntegrationTest>("Connecting client 4"); var client4 = Bootstrapper.CreateClient(logger); client4.Connect(Localhost, port); var client4SubscribedEvent = new AutoResetEvent(false); client4.OnSubscribed += (theEvent) => { logger.Write <IntegrationTest>("Client 4 received subscribeevent: " + theEvent); client4SubscribedEvent.Set(); }; client4.SubscribeTo("CowEvent"); logger.Write <IntegrationTest>("Connecting client 5"); var client5 = Bootstrapper.CreateClient(logger); client5.Connect(Localhost, port); var client5SubscribedEvent = new AutoResetEvent(false); client5.OnSubscribed += (theEvent) => { logger.Write <IntegrationTest>("Client 5 received subscribeevent: " + theEvent); client5SubscribedEvent.Set(); }; client5.SubscribeTo("CowEvent"); var client2MessageReceived = new AutoResetEvent(false); client2.OnMessageReceived += (msg) => { logger.Write <IntegrationTest>("Client 2 received message: " + msg); client2MessageReceived.Set(); Assert.Equal("Boo!", msg); }; var client3MessageReceived = new AutoResetEvent(false); client3.OnMessageReceived += (msg) => { logger.Write <IntegrationTest>("Client 3 received message: " + msg); client3MessageReceived.Set(); Assert.Equal("Boo!", msg); }; var client5MessageReceived = new AutoResetEvent(false); client5.OnMessageReceived += (msg) => { logger.Write <IntegrationTest>("Client 5 received message: " + msg); client5MessageReceived.Set(); Assert.Equal("Moo!", msg); }; try { client1SubscribedEvent.WaitAndThrowErrorIfNoSignalIsSet(DefaultWaitTIme, "Client 1 never received SubscribedEvent"); client2SubscribedEvent.WaitAndThrowErrorIfNoSignalIsSet(DefaultWaitTIme, "Client 2 never received SubscribedEvent"); client3SubscribedEvent.WaitAndThrowErrorIfNoSignalIsSet(DefaultWaitTIme, "Client 3 never received SubscribedEvent"); client4SubscribedEvent.WaitAndThrowErrorIfNoSignalIsSet(DefaultWaitTIme, "Client 4 never received SubscribedEvent"); client5SubscribedEvent.WaitAndThrowErrorIfNoSignalIsSet(DefaultWaitTIme, "Client 5 never received SubscribedEvent"); } catch (ApplicationException e) { logger.Dispose(); serverLogger.Dispose(); throw e; } // When logger.Write <IntegrationTest>("Sending message from client 1."); client1.SendMessage("[SkeletonEvent] Boo!"); logger.Write <IntegrationTest>("Sending message from client 4."); client4.SendMessage("[CowEvent] Moo!"); // Then try { client2MessageReceived.WaitAndThrowErrorIfNoSignalIsSet(DefaultWaitTIme, "Never received message from client 1"); client3MessageReceived.WaitAndThrowErrorIfNoSignalIsSet(DefaultWaitTIme, "Never received message from client 1"); client5MessageReceived.WaitAndThrowErrorIfNoSignalIsSet(DefaultWaitTIme, "Never received message from client 4"); } catch (ApplicationException e) { logger.Dispose(); serverLogger.Dispose(); throw e; } // Finally server.Shutdown(); logger.Write <IntegrationTest>("Integration test done."); serverLogger.Dispose(); }
public MessageStreamReaderTest() { _logger = LogSetup.CreateLogger(); }
static void Main(string[] args) { // Creating Needed Instances RequestsHandler httpClient = new RequestsHandler(); AppStoreParser parser = new AppStoreParser(); // Loading Configuration LogSetup.InitializeLog("Apple_Store_Urls_Worker.log", "info"); _logger = LogManager.GetCurrentClassLogger(); // Loading Config _logger.Info("Loading Configurations from App.config"); LoadConfiguration(); // Control Variable (Bool - Should the process use proxies? ) bool shouldUseProxies = false; // Checking for the need to use proxies if (args != null && args.Length == 1) { // Setting flag to true shouldUseProxies = true; // Loading proxies from .txt received as argument String fPath = args[0]; // Sanity Check if (!File.Exists(fPath)) { _logger.Fatal("Couldnt find proxies on path : " + fPath); System.Environment.Exit(-100); } // Reading Proxies from File string[] fLines = File.ReadAllLines(fPath, Encoding.GetEncoding("UTF-8")); try { // Actual Load of Proxies ProxiesLoader.Load(fLines.ToList()); } catch (Exception ex) { _logger.Fatal(ex); System.Environment.Exit(-101); } } // AWS Queue Handler _logger.Info("Initializing Queues"); AWSSQSHelper charactersUrlQueue = new AWSSQSHelper(_characterUrlsQueueName, _maxMessagesPerDequeue, _awsKey, _awsKeySecret); AWSSQSHelper numericUrlQueue = new AWSSQSHelper(_numericUrlsQueueName, _maxMessagesPerDequeue, _awsKey, _awsKeySecret); // Setting Error Flag to No Error ( 0 ) System.Environment.ExitCode = 0; // Initialiazing Control Variables int fallbackWaitTime = 1; _logger.Info("Started Processing Character Urls"); do { try { // Dequeueing messages from the Queue if (!charactersUrlQueue.DeQueueMessages()) { Thread.Sleep(_hiccupTime); // Hiccup continue; } // Checking for no message received, and false positives situations if (!charactersUrlQueue.AnyMessageReceived()) { // If no message was found, increases the wait time int waitTime; if (fallbackWaitTime <= 12) { // Exponential increase on the wait time, truncated after 12 retries waitTime = Convert.ToInt32(Math.Pow(2, fallbackWaitTime) * 1000); } else // Reseting Wait after 12 fallbacks { waitTime = 2000; fallbackWaitTime = 0; } fallbackWaitTime++; // Sleeping before next try Console.WriteLine("Fallback (seconds) => " + waitTime); Thread.Sleep(waitTime); continue; } // Reseting fallback time fallbackWaitTime = 1; // Iterating over dequeued Messages foreach (var characterUrl in charactersUrlQueue.GetDequeuedMessages()) { // Console Feedback _logger.Info("Started Parsing Url : " + characterUrl.Body); try { // Retries Counter int retries = 0; string htmlResponse; // Retrying if necessary do { // Executing Http Request for the Category Url htmlResponse = httpClient.Get(characterUrl.Body, shouldUseProxies); if (String.IsNullOrEmpty(htmlResponse)) { _logger.Info("Retrying Request for Character Page"); retries++; // Small Hiccup Thread.Sleep(_hiccupTime); } } while (String.IsNullOrWhiteSpace(htmlResponse) && retries <= _maxRetries); // Checking if retries failed if (String.IsNullOrWhiteSpace(htmlResponse)) { // Deletes Message and moves on charactersUrlQueue.DeleteMessage(characterUrl); continue; } // Hashset of urls processed (to avoid duplicates) HashSet <String> urlsQueued = new HashSet <String> (); // Executing Request and Queueing Urls until there's no other Url to be queued do { // Flag to check whether any url was added after the last iteration (avoids endless loop) bool anyNewUrl = false; // If the request worked, parses the Urls out of the page foreach (string numericUrls in parser.ParseNumericUrls(htmlResponse).Select(t => HttpUtility.HtmlDecode(t))) { // Checking if this url was previously queued if (!urlsQueued.Contains(numericUrls)) { // Enqueueing Urls numericUrlQueue.EnqueueMessage(HttpUtility.HtmlDecode(numericUrls)); // Adding url to the local hashset urlsQueued.Add(numericUrls); anyNewUrl = true; } } // Checking for the need to perform another http request for the next page if (parser.IsLastPage(htmlResponse) || !anyNewUrl) { break; // Breaks "While" Loop } // Feedback _logger.Info("Urls Queued For This Page : " + urlsQueued.Count, "\n\tProcessing Feedback"); // If it got to this point, it means that there are more pages to be processed // Parsing URL of the "Last" page (the last that's visible) string lastPageUrl = HttpUtility.HtmlDecode(parser.ParseLastPageUrl(htmlResponse)); // Executing Http Request for this Url (with retries) retries = 0; do { // HTTP Get for the Page htmlResponse = httpClient.Get(lastPageUrl, shouldUseProxies); if (String.IsNullOrEmpty(htmlResponse)) { _logger.Error("Retrying Request for Last Page"); retries++; // Small Hiccup Thread.Sleep(_hiccupTime); } } while (String.IsNullOrEmpty(htmlResponse) && retries <= _maxRetries); } while (true); } catch (Exception ex) { _logger.Error(ex); } finally { charactersUrlQueue.DeleteMessage(characterUrl); } } } catch (Exception ex) { _logger.Error(ex); } } while (true); }