Ejemplo n.º 1
0
        static void Main(string[] args)
        {
            // Creating Needed Instances
            RequestsHandler httpClient = new RequestsHandler ();
            AppStoreParser  parser     = new AppStoreParser ();

            // Setting Up Log
            LogSetup.InitializeLog ("Apple_Store_Crawler.log", "info");
            _logger = LogManager.GetCurrentClassLogger ();

            // Starting Flow
            _logger.Info ("Worker Started");

            // Loading Configuration
            _logger.Info ("Reading Configuration");
            LoadConfiguration ();

            // Control Variable (Bool - Should the process use proxies? )
            bool shouldUseProxies = false;

            // Checking for the need to use proxies
            if (args != null && args.Length == 1)
            {
                // Setting flag to true
                shouldUseProxies = true;

                // Loading proxies from .txt received as argument
                String fPath = args[0];

                // Sanity Check
                if (!File.Exists (fPath))
                {
                    _logger.Fatal ("Couldnt find proxies on path : " + fPath);
                    System.Environment.Exit (-100);
                }

                // Reading Proxies from File
                string[] fLines = File.ReadAllLines (fPath, Encoding.GetEncoding ("UTF-8"));

                try
                {
                    // Actual Load of Proxies
                    ProxiesLoader.Load (fLines.ToList ());
                }
                catch (Exception ex)
                {
                    _logger.Fatal (ex);
                    System.Environment.Exit (-101);
                }
            }

            // AWS Queue Handler
            _logger.Info ("Initializing Queues");
            AzureSQSHelper sqsWrapper = new AzureSQSHelper (_categoriesQueueName, 10, _azureQueueconn);

            // Step 1 - Trying to obtain the root page html (source of all the apps)
            var rootPageResponse = httpClient.GetRootPage (shouldUseProxies);

            // Sanity Check
            if (String.IsNullOrWhiteSpace (rootPageResponse))
            {
                _logger.Info ("Error obtaining Root Page HTMl - Aborting", "Timeout Error");
                return;
            }

            // Step 2 - Extracting Category Urls from the Root Page and queueing their Urls
            foreach (var categoryUrl in parser.ParseCategoryUrls (rootPageResponse))
            {
                // Logging Feedback
                _logger.Info ("Queueing Category : " + categoryUrl);

                // Queueing Category Urls
                sqsWrapper.EnqueueMessage (categoryUrl);
            }

            _logger.Info ("End of Bootstrapping phase");

            //Console.ReadLine();
        }