/// <summary> /// Entry point of the crawler /// </summary> /// <param name="args"></param> static void Main(string[] args) { // Setting Up Log LogSetup.InitializeLog("PlayStoreCrawler.log", "info"); _logger = LogManager.GetCurrentClassLogger(); // Control Variable (Bool - Should the process use proxies? ) bool isUsingProxies = false; // Checking for the need to use HTTP proxies or not if (args != null && args.Length == 1) { _logger.Info("Loading Proxies from File"); // Setting flag to true isUsingProxies = true; // Loading proxies from .txt received as argument String fPath = args[0]; // Sanity Check if (!File.Exists(fPath)) { _logger.Fatal("Couldnt find proxies on path : " + fPath); System.Environment.Exit(-100); } // Reading Proxies from File string[] fLines = File.ReadAllLines(fPath, Encoding.GetEncoding("UTF-8")); try { // Actual Load of Proxies ProxiesLoader.Load(fLines.ToList()); } catch (Exception ex) { _logger.Fatal(ex); System.Environment.Exit(-101); } } // Configuring MongoDB Wrapper _logger.Info("Setting up MongoDB Collections and Indexes"); _mongoDB = new MongoDBWrapper(); string fullServerAddress = String.Join(":", Consts.MONGO_SERVER, Consts.MONGO_PORT); _mongoDB.ConfigureDatabase(Consts.MONGO_USER, Consts.MONGO_PASS, Consts.MONGO_AUTH_DB, fullServerAddress, Consts.MONGO_TIMEOUT, Consts.MONGO_DATABASE, Consts.MONGO_COLLECTION); // Ensuring the database collections have the proper indexes _mongoDB.EnsureIndex("Url"); _mongoDB.EnsureIndex("IsBusy", Consts.QUEUED_APPS_COLLECTION); _mongoDB.EnsureIndex("Url", Consts.QUEUED_APPS_COLLECTION); // Main Flow _logger.Info("Started Bootstrapping Steps"); // Scrapping "Play Store Categories" foreach (var categoriesKVP in BootstrapTerms.categoriesAndNames) { CrawlCategory(categoriesKVP.Key, categoriesKVP.Value, isUsingProxies); } // Queueing Apps that start with each of the characters from "A" to "Z" foreach (var character in BootstrapTerms.charactersSearchTerms) { CrawlStore(character, isUsingProxies); } /// ... Keep Adding characters / search terms in order to increase the crawler's reach // APP CATEGORIES foreach (var category in BootstrapTerms.categoriesSearchTerms) { CrawlStore(category, isUsingProxies); } // Extra "Random" Search terms to increase even more the crawler's reach foreach (var miscTerm in BootstrapTerms.miscSearchTerms) { CrawlStore(miscTerm, isUsingProxies); } // Country Names as Search terms to increase even more the crawler's reach foreach (var countryName in BootstrapTerms.countryNames) { CrawlStore(countryName, isUsingProxies); } _logger.Info("\n\nBootstrapping Apps of Past Collections"); // Iterating over past collections HashSet <String> appUrls = new HashSet <String> (); foreach (string collection in _mongoDB.GetHistoryOfCollections()) { _logger.Info("Reading Collection [{0}]", collection); foreach (string app in _mongoDB.FindAllFromCollectionAs <AppModel> (collection).Select(t => t.Url)) { if (!appUrls.Contains(app)) { appUrls.Add(app); } } _logger.Info("\t=> Distinct Apps Found {0}", appUrls.Count); } // Adding Distinct Apps to the collection of apps to be processed int appsQueued = 0; foreach (string appUrl in appUrls) { _mongoDB.AddToQueue(appUrl); if (appsQueued++ % 10000 == 0) { _logger.Info("[Progress] Apps Queued From Past Collections [{0}]", appsQueued); } } }