Пример #1
0
        private void ScrapeSubAreas()
        {
            // scrape out sub area from gig site
            // re-run after running a sub area scrap to update site txt
            var sites = Document.DocumentNode.SelectSingleNode("//select[@id='areaAbb']");

            if (sites == null)
            {
                return;
            }
            List <string> areas = sites.Descendants("option")
                                  .Skip(1)
                                  .Select(n => n.Attributes["value"].Value)
                                  .ToList();

            List <string> newAreas = new List <string>();

            foreach (var area in areas)
            {
                if (!Program.Cities.Contains(area))
                {
                    newAreas.Add(area);
                }
            }
            if (newAreas.Count > 0)
            {
                SitesLoader.AppendCities(newAreas);
            }
        }
        public void LoadSitesMustSaveAListOfFilesInMemory()
        {
            FileStream sitesFile = new FileStream(AppDomain.CurrentDomain.BaseDirectory + @"\\alexa1M.txt", FileMode.Open);
            SitesLoader sitesLoader = new SitesLoader(sitesFile);

            List<string> sites = sitesLoader.SitesURLs;

            Assert.AreEqual(sites.Count, 1000000);
        }
 public void TechnologyFinderMustSetupTechnologyOnConstructor()
 {
     FileStream patternsFile = new FileStream(AppDomain.CurrentDomain.BaseDirectory + @"\\MagentoStringPatterns.txt", FileMode.Open);
     ITechnology technology = new Technology("Magento", patternsFile);
     FileStream sitesFile = new FileStream(System.AppDomain.CurrentDomain.BaseDirectory + @"\\MagentoSitesURLs.txt", FileMode.Open);
     SitesLoader sitesLoader = new SitesLoader(sitesFile);
     TechnologyFinder finder = new TechnologyFinder(technology, sitesLoader.SitesURLs);
     Assert.IsNotNull(finder);
 }
        public void TechnologyFinderRunCrawlerOnMagento()
        {
            FileStream patternsFile = new FileStream(AppDomain.CurrentDomain.BaseDirectory + @"\\MagentoStringPatterns.txt", FileMode.Open);
            ITechnology technology = new Technology("Magento", patternsFile);
            FileStream sitesFile = new FileStream(AppDomain.CurrentDomain.BaseDirectory + @"\\MagentoSitesURLs.txt", FileMode.Open);
            SitesLoader sitesLoader = new SitesLoader(sitesFile);

            TechnologyFinder finder = new TechnologyFinder(technology, sitesLoader.SitesURLs);
            finder.RunCrawler();
            Assert.AreEqual(7, finder.SiteURLsWithTheTechnology.Count);
            finder.WriteFoundSites();
        }
Пример #5
0
        static void Main(string[] args)
        {
            // load city text file
            Cities = SitesLoader.LoadCities();

            // scrape cities for gigs
            foreach (var site in Cities)
            {
                Console.WriteLine($"[*] crawling {site}");
                Gig gcg = new Gig(site);
                Console.WriteLine($"[*] crawled {gcg.Site} @ {gcg.Gigs.Count} gigs");
                Gigs.Add(gcg);
                GigCount += gcg.Gigs.Count;
                Console.WriteLine($"[*] {Gigs.Count} cities crawled @ {GigCount} gigs");
                Thread.Sleep(1000);
            }
            CreateHTML();
        }
        public ActionResult Technology(string id)
        {
            string technologyName = id;
            string patternsFilePath = Server.MapPath("~/App_Data/") + technologyName + "StringPatterns.txt";
            FileStream patternsFile = new FileStream(patternsFilePath, FileMode.Open);
            ITechnology technology = new Technology("technologyName", patternsFile);
            string sitesFilePath = Server.MapPath("~/App_Data/") + technologyName + "SitesURLs.txt";
            FileStream sitesFile = new FileStream(sitesFilePath, FileMode.Open);
            SitesLoader sitesLoader = new SitesLoader(sitesFile);

            TechnologyFinder finder = new TechnologyFinder(technology, sitesLoader.SitesURLs);
            finder.RunCrawler();

            TechnologyViewModel model = new TechnologyViewModel();
            model.Name = technology.Name;
            model.URLs = finder.SiteURLsWithTheTechnology;

            ViewData["Title"] = technologyName;
            ViewData["Message"] = "It's the " + technologyName + " message to view page";

            return View(model);
        }
 public void LoadSitesFromFileMustSucessfullReadSitesFile()
 {
     FileStream sitesFile = new FileStream(System.AppDomain.CurrentDomain.BaseDirectory + @"\\alexa1M.txt", FileMode.Open);
     SitesLoader sitesLoader = new SitesLoader(sitesFile);
     Assert.IsNotNull(sitesLoader);
 }
 private static void SetupShopifyTechnology(out ITechnology technology, out SitesLoader sitesLoader)
 {
     FileStream patternsFile = new FileStream(AppDomain.CurrentDomain.BaseDirectory + @"\\ShopifyStringPatterns.txt", FileMode.Open);
     technology = new Technology("Shopify", patternsFile);
     FileStream sitesFile = new FileStream(AppDomain.CurrentDomain.BaseDirectory + @"\\ShopifySitesURLs.txt", FileMode.Open);
     sitesLoader = new SitesLoader(sitesFile);
 }