private void ScrapeSubAreas() { // scrape out sub area from gig site // re-run after running a sub area scrap to update site txt var sites = Document.DocumentNode.SelectSingleNode("//select[@id='areaAbb']"); if (sites == null) { return; } List <string> areas = sites.Descendants("option") .Skip(1) .Select(n => n.Attributes["value"].Value) .ToList(); List <string> newAreas = new List <string>(); foreach (var area in areas) { if (!Program.Cities.Contains(area)) { newAreas.Add(area); } } if (newAreas.Count > 0) { SitesLoader.AppendCities(newAreas); } }
public void LoadSitesMustSaveAListOfFilesInMemory() { FileStream sitesFile = new FileStream(AppDomain.CurrentDomain.BaseDirectory + @"\\alexa1M.txt", FileMode.Open); SitesLoader sitesLoader = new SitesLoader(sitesFile); List<string> sites = sitesLoader.SitesURLs; Assert.AreEqual(sites.Count, 1000000); }
public void TechnologyFinderMustSetupTechnologyOnConstructor() { FileStream patternsFile = new FileStream(AppDomain.CurrentDomain.BaseDirectory + @"\\MagentoStringPatterns.txt", FileMode.Open); ITechnology technology = new Technology("Magento", patternsFile); FileStream sitesFile = new FileStream(System.AppDomain.CurrentDomain.BaseDirectory + @"\\MagentoSitesURLs.txt", FileMode.Open); SitesLoader sitesLoader = new SitesLoader(sitesFile); TechnologyFinder finder = new TechnologyFinder(technology, sitesLoader.SitesURLs); Assert.IsNotNull(finder); }
public void TechnologyFinderRunCrawlerOnMagento() { FileStream patternsFile = new FileStream(AppDomain.CurrentDomain.BaseDirectory + @"\\MagentoStringPatterns.txt", FileMode.Open); ITechnology technology = new Technology("Magento", patternsFile); FileStream sitesFile = new FileStream(AppDomain.CurrentDomain.BaseDirectory + @"\\MagentoSitesURLs.txt", FileMode.Open); SitesLoader sitesLoader = new SitesLoader(sitesFile); TechnologyFinder finder = new TechnologyFinder(technology, sitesLoader.SitesURLs); finder.RunCrawler(); Assert.AreEqual(7, finder.SiteURLsWithTheTechnology.Count); finder.WriteFoundSites(); }
static void Main(string[] args) { // load city text file Cities = SitesLoader.LoadCities(); // scrape cities for gigs foreach (var site in Cities) { Console.WriteLine($"[*] crawling {site}"); Gig gcg = new Gig(site); Console.WriteLine($"[*] crawled {gcg.Site} @ {gcg.Gigs.Count} gigs"); Gigs.Add(gcg); GigCount += gcg.Gigs.Count; Console.WriteLine($"[*] {Gigs.Count} cities crawled @ {GigCount} gigs"); Thread.Sleep(1000); } CreateHTML(); }
public ActionResult Technology(string id) { string technologyName = id; string patternsFilePath = Server.MapPath("~/App_Data/") + technologyName + "StringPatterns.txt"; FileStream patternsFile = new FileStream(patternsFilePath, FileMode.Open); ITechnology technology = new Technology("technologyName", patternsFile); string sitesFilePath = Server.MapPath("~/App_Data/") + technologyName + "SitesURLs.txt"; FileStream sitesFile = new FileStream(sitesFilePath, FileMode.Open); SitesLoader sitesLoader = new SitesLoader(sitesFile); TechnologyFinder finder = new TechnologyFinder(technology, sitesLoader.SitesURLs); finder.RunCrawler(); TechnologyViewModel model = new TechnologyViewModel(); model.Name = technology.Name; model.URLs = finder.SiteURLsWithTheTechnology; ViewData["Title"] = technologyName; ViewData["Message"] = "It's the " + technologyName + " message to view page"; return View(model); }
public void LoadSitesFromFileMustSucessfullReadSitesFile() { FileStream sitesFile = new FileStream(System.AppDomain.CurrentDomain.BaseDirectory + @"\\alexa1M.txt", FileMode.Open); SitesLoader sitesLoader = new SitesLoader(sitesFile); Assert.IsNotNull(sitesLoader); }
private static void SetupShopifyTechnology(out ITechnology technology, out SitesLoader sitesLoader) { FileStream patternsFile = new FileStream(AppDomain.CurrentDomain.BaseDirectory + @"\\ShopifyStringPatterns.txt", FileMode.Open); technology = new Technology("Shopify", patternsFile); FileStream sitesFile = new FileStream(AppDomain.CurrentDomain.BaseDirectory + @"\\ShopifySitesURLs.txt", FileMode.Open); sitesLoader = new SitesLoader(sitesFile); }