public void TestJsonToObject() { string fl = Path.Combine(Environment.CurrentDirectory, "MockData", "internet1.json"); var obj = CrawlHelper.JsonToObject <PagesResponse>(fl); Assert.AreEqual(5, obj.Pages.Length); }
private List <ResultModel> doCrawl(string loc) { var obj = CrawlHelper.JsonToObject <PagesResponse>(loc); var rows = Crawler.ProcessCrawl(loc); return(rows); }
public void TestToPageModelList() { string fl = Path.Combine(Environment.CurrentDirectory, "MockData", "internet1.json"); var obj = CrawlHelper.JsonToObject <PagesResponse>(fl); var rows = (from m in obj.Pages select new PageModel { Address = m.Address, IsSeedURL = true }).ToList(); Assert.AreEqual(5, rows.Count()); }
public async Task CrawlProfileImagesAsync(Profile profile) { if (profile == null) { throw new ArgumentNullException(nameof(profile)); } CrawlHelper.CrawlProfile(profile.UserName); profile.ProfileStatus = ProfileStatus.CRAWLED; await _profiles.ReplaceOneAsync(p => p.Id == profile.Id, profile); }
public void TestPageModelLinksList() { string fl = Path.Combine(Environment.CurrentDirectory, "MockData", "internet1.json"); var obj = CrawlHelper.JsonToObject <PagesResponse>(fl); var rows = (from m in obj.Pages.SelectMany(x => x.Links.Select( s => new PageModel { Address = s, IsSeedURL = false }).ToList()) group m by m.Address into g select g.First()).ToList(); Assert.AreEqual(7, rows.Count()); }
public async Task CrawlProfileImagesAsync(string id) { var profile = await _profiles.Find <Profile>(p => p.Id == id).FirstOrDefaultAsync(); if (profile == null) { throw new Exception($"Cannot find profile with Id: {id}"); } CrawlHelper.CrawlProfile(profile.UserName); profile.ProfileStatus = ProfileStatus.CRAWLED; await _profiles.ReplaceOneAsync(p => p.Id == id, profile); }
public bool Crawl(bool log, bool secure) { NodeMapping nm = (NodeMapping)Nodes.GetByIndex(0); ProtocolSecurityOverlord bso = null; if (secure) { bso = nm.BSO; } CrawlHelper ch = new CrawlHelper(nm.Node, Nodes.Count, bso, log); ch.Start(); while (ch.Done == 0) { SimpleTimer.RunStep(); } return(ch.Success); }
static void Main(string[] args) { Console.WriteLine("Enter the url of website"); var url = "https://techgeek.nowfloats.com"; var path = @"C:\Users\Ravindra Naik\Desktop\sitemap.xml"; var urls = CrawlHelper.GetAllUrlsForSitemap(url); var siteMap = new Sitemap.Sitemap(); foreach (var webUrl in urls) { siteMap.Add(new SitemapLocation { ChangeFrequency = SitemapLocation.eChangeFrequency.daily, Url = webUrl }); } siteMap.WriteSitemapToFile(path); }
public bool Crawl(bool log, bool secure) { NodeMapping nm = Nodes.Values[0]; SymphonySecurityOverlord bso = null; if(secure) { bso = nm.Sso; } CrawlHelper ch = new CrawlHelper(nm.Node, Nodes.Count, bso, log); ch.Start(); while(ch.Done == 0) { SimpleTimer.RunStep(); } return ch.Success; }
protected static bool Crawl(bool log) { NodeMapping nm = (NodeMapping) nodes.GetByIndex(0); CrawlHelper ch = new CrawlHelper(nm.Node, nodes.Count, log); ch.Start(); while(ch.Done == 0) { RunStep(); } return ch.Success; }
protected static bool Crawl(bool log, bool secure) { NodeMapping nm = (NodeMapping) nodes.GetByIndex(0); BrunetSecurityOverlord bso = null; if(secure) { bso = nm.BSO; } CrawlHelper ch = new CrawlHelper(nm.Node, nodes.Count, bso, log); ch.Start(); while(ch.Done == 0) { RunStep(); } return ch.Success; }
public bool Crawl(bool log, bool secure) { NodeMapping nm = (NodeMapping) Nodes.GetByIndex(0); ProtocolSecurityOverlord bso = null; if(secure) { bso = nm.BSO; } CrawlHelper ch = new CrawlHelper(nm.Node, Nodes.Count, bso, log); ch.Start(); while(ch.Done == 0) { SimpleTimer.RunStep(); } return ch.Success; }