static void Main(string[] args) { // Creating Needed Instances RequestsHandler httpClient = new RequestsHandler (); AppStoreParser parser = new AppStoreParser (); // Loading Configuration LogSetup.InitializeLog ("Apple_Store_Urls_Worker.log", "info"); _logger = LogManager.GetCurrentClassLogger (); // Loading Config _logger.Info ("Loading Configurations from App.config"); LoadConfiguration (); // Control Variable (Bool - Should the process use proxies? ) bool shouldUseProxies = false; // Checking for the need to use proxies if (args != null && args.Length == 1) { // Setting flag to true shouldUseProxies = true; // Loading proxies from .txt received as argument String fPath = args[0]; // Sanity Check if (!File.Exists (fPath)) { _logger.Fatal ("Couldnt find proxies on path : " + fPath); System.Environment.Exit (-100); } // Reading Proxies from File string[] fLines = File.ReadAllLines (fPath, Encoding.GetEncoding ("UTF-8")); try { // Actual Load of Proxies ProxiesLoader.Load (fLines.ToList ()); } catch (Exception ex) { _logger.Fatal (ex); System.Environment.Exit (-101); } } // AWS Queue Handler _logger.Info ("Initializing Queues"); AzureSQSHelper appsUrlQueue = new AzureSQSHelper(_appUrlsQueueName , _maxMessagesPerDequeue, _azureQueueconn); AzureSQSHelper appsDataQueue = new AzureSQSHelper(_appsDataQueueName, _maxMessagesPerDequeue, _azureQueueconn); // Setting Error Flag to No Error ( 0 ) System.Environment.ExitCode = 0; // Initialiazing Control Variables int fallbackWaitTime = 1; _logger.Info ("Started Processing Individual Apps Urls"); do { try { // Dequeueing messages from the Queue if (!appsUrlQueue.DeQueueMessages ()) { Thread.Sleep (_hiccupTime); // Hiccup continue; } // Checking for no message received, and false positives situations if (!appsUrlQueue.AnyMessageReceived ()) { // If no message was found, increases the wait time int waitTime; if (fallbackWaitTime <= 12) { // Exponential increase on the wait time, truncated after 12 retries waitTime = Convert.ToInt32 (Math.Pow (2, fallbackWaitTime) * 1000); } else // Reseting Wait after 12 fallbacks { waitTime = 2000; fallbackWaitTime = 0; } fallbackWaitTime++; // Sleeping before next try Console.WriteLine ("Fallback (seconds) => " + waitTime); Thread.Sleep (waitTime); continue; } // Reseting fallback time fallbackWaitTime = 1; // Iterating over dequeued Messages foreach (var appUrl in appsUrlQueue.GetDequeuedMessages ()) { bool processingWorked = true; try { // Retries Counter int retries = 0; string htmlResponse; // Retrying if necessary do { // Executing Http Request for the Category Url htmlResponse = httpClient.Get (appUrl.AsString, shouldUseProxies); if (String.IsNullOrEmpty (htmlResponse)) { // Extending Fallback time retries++; int sleepTime = retries * _hiccupTime <= 30000 ? retries * _hiccupTime : 30000; _logger.Info ("Retrying Request for App Page [ " + sleepTime / 1000 + " ]"); Thread.Sleep (sleepTime); } } while (String.IsNullOrWhiteSpace (htmlResponse) && retries <= _maxRetries); // Checking if retries failed if (String.IsNullOrWhiteSpace (htmlResponse)) { continue; } // Feedback _logger.Info ("Current page " + appUrl.AsString, "Parsing App Data"); // Parsing Data out of the Html Page AppleStoreAppModel parsedApp = parser.ParseAppPage (htmlResponse); parsedApp.url = appUrl.AsString; // Enqueueing App Data appsDataQueue.EnqueueMessage (parsedApp.ToJson ()); // Little Hiccup Thread.Sleep (_hiccupTime); } catch (Exception ex) { _logger.Error (ex); // Setting Flag to "False" processingWorked = false; } finally { //Deleting the message - Only if the processing worked if (processingWorked) { appsUrlQueue.DeleteMessage (appUrl); } } } } catch (Exception ex) { _logger.Error (ex); } } while (true); }
static void Main(string[] args) { // Creating Needed Instances RequestsHandler httpClient = new RequestsHandler (); AppStoreParser parser = new AppStoreParser (); // Loading Configuration LogSetup.InitializeLog ("Apple_Store_Categories_Worker.log", "info"); _logger = LogManager.GetCurrentClassLogger (); // Loading Config _logger.Info ("Loading Configurations from App.config"); LoadConfiguration (); // Control Variable (Bool - Should the process use proxies? ) bool shouldUseProxies = false; // Checking for the need to use proxies if (args != null && args.Length == 1) { // Setting flag to true shouldUseProxies = true; // Loading proxies from .txt received as argument String fPath = args[0]; // Sanity Check if (!File.Exists (fPath)) { _logger.Fatal ("Couldnt find proxies on path : " + fPath); System.Environment.Exit (-100); } // Reading Proxies from File string[] fLines = File.ReadAllLines (fPath, Encoding.GetEncoding ("UTF-8")); try { // Actual Load of Proxies ProxiesLoader.Load (fLines.ToList ()); } catch (Exception ex) { _logger.Fatal (ex); System.Environment.Exit (-101); } } // AWS Queue Handler _logger.Info ("Initializing Queues"); AzureSQSHelper categoriesUrlQueue = new AzureSQSHelper (_categoriesQueueName , _maxMessagesPerDequeue, _azureQueueconn); AzureSQSHelper charactersUrlQueue = new AzureSQSHelper (_characterUrlsQueueName, _maxMessagesPerDequeue, _azureQueueconn); // Setting Error Flag to No Error ( 0 ) System.Environment.ExitCode = 0; // Initialiazing Control Variables int fallbackWaitTime = 1; _logger.Info ("Started Processing Category Urls"); do { try { // Dequeueing messages from the Queue if (!categoriesUrlQueue.DeQueueMessages()) { Thread.Sleep (_hiccupTime); // Hiccup continue; } // Checking for no message received, and false positives situations if (!categoriesUrlQueue.AnyMessageReceived()) { // If no message was found, increases the wait time int waitTime; if (fallbackWaitTime <= 12) { // Exponential increase on the wait time, truncated after 12 retries waitTime = Convert.ToInt32 (Math.Pow (2, fallbackWaitTime) * 1000); } else // Reseting Wait after 12 fallbacks { waitTime = 2000; fallbackWaitTime = 0; } fallbackWaitTime++; // Sleeping before next try _logger.Info ("Fallback (seconds) => " + waitTime); Thread.Sleep (waitTime); continue; } // Reseting fallback time fallbackWaitTime = 1; // Iterating over dequeued Messages foreach (var categoryUrl in categoriesUrlQueue.GetDequeuedMessages()) { // Console Feedback _logger.Info ("Started Parsing Category : " + categoryUrl.AsString); try { // Retries Counter int retries = 0; string htmlResponse; // Retrying if necessary do { // Executing Http Request for the Category Url htmlResponse = httpClient.Get (categoryUrl.AsString, shouldUseProxies); if (String.IsNullOrEmpty (htmlResponse)) { _logger.Error ("Retrying Request for Category Page"); retries++; } } while (String.IsNullOrWhiteSpace (htmlResponse) && retries <= _maxRetries); // Checking if retries failed if (String.IsNullOrWhiteSpace (htmlResponse)) { // Deletes Message and moves on categoriesUrlQueue.DeleteMessage (categoryUrl); continue; } // If the request worked, parses the urls out of the page foreach (string characterUrls in parser.ParseCharacterUrls (htmlResponse)) { // Enqueueing Urls charactersUrlQueue.EnqueueMessage (HttpUtility.HtmlDecode (characterUrls)); } } catch (Exception ex) { _logger.Error (ex); } finally { // Deleting the message categoriesUrlQueue.DeleteMessage(categoryUrl); } } } catch (Exception ex) { _logger.Error (ex); } } while (true); }
static void Main(string[] args) { // Creating Needed Instances RequestsHandler httpClient = new RequestsHandler (); AppStoreParser parser = new AppStoreParser (); // Loading Configuration LogSetup.InitializeLog ("Apple_Store_Urls_Worker.log", "info"); _logger = LogManager.GetCurrentClassLogger (); // Loading Config _logger.Info ("Loading Configurations from App.config"); LoadConfiguration (); // Control Variable (Bool - Should the process use proxies? ) bool shouldUseProxies = false; // Checking for the need to use proxies if (args != null && args.Length == 1) { // Setting flag to true shouldUseProxies = true; // Loading proxies from .txt received as argument String fPath = args[0]; // Sanity Check if (!File.Exists (fPath)) { _logger.Fatal ("Couldnt find proxies on path : " + fPath); System.Environment.Exit (-100); } // Reading Proxies from File string[] fLines = File.ReadAllLines (fPath, Encoding.GetEncoding ("UTF-8")); try { // Actual Load of Proxies ProxiesLoader.Load (fLines.ToList ()); } catch (Exception ex) { _logger.Fatal (ex); System.Environment.Exit (-101); } } // AWS Queue Handler _logger.Info ("Initializing Queues"); AzureSQSHelper charactersUrlQueue = new AzureSQSHelper (_characterUrlsQueueName, _maxMessagesPerDequeue, _azureQueueconn); AzureSQSHelper numericUrlQueue = new AzureSQSHelper(_numericUrlsQueueName , _maxMessagesPerDequeue, _azureQueueconn); // Setting Error Flag to No Error ( 0 ) System.Environment.ExitCode = 0; // Initialiazing Control Variables int fallbackWaitTime = 1; _logger.Info ("Started Processing Character Urls"); do { try { // Dequeueing messages from the Queue if (!charactersUrlQueue.DeQueueMessages()) { Thread.Sleep (_hiccupTime); // Hiccup continue; } // Checking for no message received, and false positives situations if (!charactersUrlQueue.AnyMessageReceived()) { // If no message was found, increases the wait time int waitTime; if (fallbackWaitTime <= 12) { // Exponential increase on the wait time, truncated after 12 retries waitTime = Convert.ToInt32 (Math.Pow (2, fallbackWaitTime) * 1000); } else // Reseting Wait after 12 fallbacks { waitTime = 2000; fallbackWaitTime = 0; } fallbackWaitTime++; // Sleeping before next try Console.WriteLine ("Fallback (seconds) => " + waitTime); Thread.Sleep (waitTime); continue; } // Reseting fallback time fallbackWaitTime = 1; // Iterating over dequeued Messages foreach (var characterUrl in charactersUrlQueue.GetDequeuedMessages ()) { // Console Feedback _logger.Info ("Started Parsing Url : " + characterUrl.AsString); try { // Retries Counter int retries = 0; string htmlResponse; // Retrying if necessary do { // Executing Http Request for the Category Url htmlResponse = httpClient.Get (characterUrl.AsString, shouldUseProxies); if (String.IsNullOrEmpty (htmlResponse)) { _logger.Info ("Retrying Request for Character Page"); retries++; // Small Hiccup Thread.Sleep (_hiccupTime); } } while (String.IsNullOrWhiteSpace (htmlResponse) && retries <= _maxRetries); // Checking if retries failed if (String.IsNullOrWhiteSpace (htmlResponse)) { // Deletes Message and moves on charactersUrlQueue.DeleteMessage (characterUrl); continue; } // Hashset of urls processed (to avoid duplicates) HashSet<String> urlsQueued = new HashSet<String> (); // Executing Request and Queueing Urls until there's no other Url to be queued do { // Flag to check whether any url was added after the last iteration (avoids endless loop) bool anyNewUrl = false; // If the request worked, parses the Urls out of the page foreach (string numericUrls in parser.ParseNumericUrls (htmlResponse).Select (t => HttpUtility.HtmlDecode (t))) { // Checking if this url was previously queued if (!urlsQueued.Contains (numericUrls)) { // Enqueueing Urls numericUrlQueue.EnqueueMessage (HttpUtility.HtmlDecode (numericUrls)); // Adding url to the local hashset urlsQueued.Add (numericUrls); anyNewUrl = true; } } // Checking for the need to perform another http request for the next page if (parser.IsLastPage (htmlResponse) || !anyNewUrl) { break; // Breaks "While" Loop } // Feedback _logger.Info ("Urls Queued For This Page : " + urlsQueued.Count, "\n\tProcessing Feedback"); // If it got to this point, it means that there are more pages to be processed // Parsing URL of the "Last" page (the last that's visible) string lastPageUrl = HttpUtility.HtmlDecode (parser.ParseLastPageUrl (htmlResponse)); // Executing Http Request for this Url (with retries) retries = 0; do { // HTTP Get for the Page htmlResponse = httpClient.Get (lastPageUrl, shouldUseProxies); if (String.IsNullOrEmpty (htmlResponse)) { _logger.Error ("Retrying Request for Last Page"); retries++; // Small Hiccup Thread.Sleep (_hiccupTime); } } while (String.IsNullOrEmpty (htmlResponse) && retries <= _maxRetries); } while (true); } catch (Exception ex) { _logger.Error (ex); } finally { charactersUrlQueue.DeleteMessage (characterUrl); } } } catch (Exception ex) { _logger.Error (ex); } } while (true); }
static void Main(string[] args) { // Loading Configuration LogSetup.InitializeLog ("Apple_Store_Recorder.log", "info"); _logger = LogManager.GetCurrentClassLogger (); // Loading Config _logger.Info ("Loading Configurations from App.config"); LoadConfiguration (); // Initializing Queue _logger.Info ("Initializing Queue"); AzureSQSHelper appsDataQueue = new AzureSQSHelper (_appsDataQueueName, _maxMessagesPerDequeue, _azureQueueconn); // Creating MongoDB Instance _logger.Info ("Loading MongoDB / Creating Instances"); MongoDBWrapper mongoDB = new MongoDBWrapper (); string serverAddr = String.Join (":", Consts.MONGO_SERVER, Consts.MONGO_PORT); mongoDB.ConfigureDatabase (Consts.MONGO_USER, Consts.MONGO_PASS, Consts.MONGO_AUTH_DB, serverAddr, 10000, Consts.MONGO_DATABASE, Consts.MONGO_COLLECTION); // Setting Error Flag to No Error ( 0 ) System.Environment.ExitCode = 0; // Initialiazing Control Variables int fallbackWaitTime = 1; // Buffer of Messages to be recorder List<AppleStoreAppModel> recordsBuffer = new List<AppleStoreAppModel> (); List<CloudQueueMessage> messagesBuffer = new List<CloudQueueMessage> (); // Insert Batch Size int batchSize = 1000; _logger.Info ("Started Recording App Data"); do { try { // Dequeueing messages from the Queue if (!appsDataQueue.DeQueueMessages ()) { Thread.Sleep (_hiccupTime); // Hiccup continue; } // Checking for no message received, and false positives situations if (!appsDataQueue.AnyMessageReceived ()) { // If no message was found, increases the wait time int waitTime; if (fallbackWaitTime <= 12) { // Exponential increase on the wait time, truncated after 12 retries waitTime = Convert.ToInt32 (Math.Pow (2, fallbackWaitTime) * 1000); } else // Reseting Wait after 12 fallbacks { waitTime = 2000; fallbackWaitTime = 0; } fallbackWaitTime++; // Sleeping before next try Console.WriteLine ("Fallback (seconds) => " + waitTime); Thread.Sleep (waitTime); continue; } // Reseting fallback time fallbackWaitTime = 1; // Iterating over dequeued Messages foreach (var appDataMessage in appsDataQueue.GetDequeuedMessages ()) { try { // Deserializing message var appData = AppleStoreAppModel.FromJson (appDataMessage.AsString); // Dumping "Url" to "_id" appData._id = appData.url; // Adding it to the buffer of records to be recorded recordsBuffer.Add (appData); // Adding message to the buffer of messages to be deleted messagesBuffer.Add (appDataMessage); // Is it time to batch insert ? if ((recordsBuffer.Count % batchSize) == 0) { // Batch Insertion mongoDB.BatchInsert<AppleStoreAppModel> (recordsBuffer); // Logging Feedback _logger.Info ("\tApps Recorded : " + recordsBuffer.Count); // Deleting Messages messagesBuffer.ForEach ( (msg) => appsDataQueue.DeleteMessage (msg)); _logger.Info ("\tMessages Deleted: " + messagesBuffer.Count); // Clearing Buffers recordsBuffer.Clear (); messagesBuffer.Clear (); } } catch (Exception ex) { _logger.Error (ex); } finally { // Deleting the message appsDataQueue.DeleteMessage (appDataMessage); } } } catch (Exception ex) { _logger.Error (ex); } } while (true); }