Ejemplo n.º 1
0
        static void Main(string[] args)
        {
            // Creating Needed Instances
            RequestsHandler httpClient = new RequestsHandler();
            AppStoreParser  parser     = new AppStoreParser();

            _logger = new LogWrapper();

            // Loading Configuration
            _logger.LogMessage("Reading Configuration");
            LoadConfiguration();

            // AWS Queue Handler
            _logger.LogMessage("Initializing Queues");
            AWSSQSHelper sqsWrapper = new AWSSQSHelper(_categoriesQueueName, 10, _awsKey, _awsKeySecret);

            // Step 1 - Trying to obtain the root page html (source of all the apps)
            var rootPageResponse = httpClient.GetRootPage();

            // Sanity Check
            if (String.IsNullOrWhiteSpace(rootPageResponse))
            {
                _logger.LogMessage("Error obtaining Root Page HTMl - Aborting", "Timeout Error");
                return;
            }

            // Step 2 - Extracting Category Urls from the Root Page and queueing their Urls
            foreach (var categoryUrl in parser.ParseCategoryUrls(rootPageResponse))
            {
                // Logging Feedback
                _logger.LogMessage("Queueing Category : " + categoryUrl);

                // Queueing Category Urls
                sqsWrapper.EnqueueMessage(categoryUrl);
            }

            _logger.LogMessage("End of Bootstrapping phase");
        }
Ejemplo n.º 2
0
        static void Main(string[] args)
        {
            // Creating Needed Instances
            RequestsHandler httpClient = new RequestsHandler();
            AppStoreParser  parser     = new AppStoreParser();

            // Setting Up Log
            LogSetup.InitializeLog("Apple_Store_Crawler.log", "info");
            _logger = LogManager.GetCurrentClassLogger();

            // Starting Flow
            _logger.Info("Worker Started");

            // Loading Configuration
            _logger.Info("Reading Configuration");
            LoadConfiguration();

            // Control Variable (Bool - Should the process use proxies? )
            bool shouldUseProxies = false;

            // Checking for the need to use proxies
            if (args != null && args.Length == 1)
            {
                // Setting flag to true
                shouldUseProxies = true;

                // Loading proxies from .txt received as argument
                String fPath = args[0];

                // Sanity Check
                if (!File.Exists(fPath))
                {
                    _logger.Fatal("Couldnt find proxies on path : " + fPath);
                    System.Environment.Exit(-100);
                }

                // Reading Proxies from File
                string[] fLines = File.ReadAllLines(fPath, Encoding.GetEncoding("UTF-8"));

                try
                {
                    // Actual Load of Proxies
                    ProxiesLoader.Load(fLines.ToList());
                }
                catch (Exception ex)
                {
                    _logger.Fatal(ex);
                    System.Environment.Exit(-101);
                }
            }

            // AWS Queue Handler
            _logger.Info("Initializing Queues");
            AWSSQSHelper sqsWrapper = new AWSSQSHelper(_categoriesQueueName, 10, _awsKey, _awsKeySecret);

            // Step 1 - Trying to obtain the root page html (source of all the apps)
            var rootPageResponse = httpClient.GetRootPage(shouldUseProxies);

            // Sanity Check
            if (String.IsNullOrWhiteSpace(rootPageResponse))
            {
                _logger.Info("Error obtaining Root Page HTMl - Aborting", "Timeout Error");
                return;
            }

            // Step 2 - Extracting Category Urls from the Root Page and queueing their Urls
            foreach (var categoryUrl in parser.ParseCategoryUrls(rootPageResponse))
            {
                // Logging Feedback
                _logger.Info("Queueing Category : " + categoryUrl);

                // Queueing Category Urls
                sqsWrapper.EnqueueMessage(categoryUrl);
            }

            _logger.Info("End of Bootstrapping phase");
        }