/// <summary> /// インスタンスを生成します。 /// </summary> /// <param name="scrapingService"></param> /// <param name="blobClient"></param> /// <param name="searchClient"></param> /// <param name="logger"></param> public SearchService(ScrapingService scrapingService, CloudBlobClient blobClient, SearchServiceClient searchClient, ILogger <SearchService> logger) { this.ScrapingService = scrapingService; this.BlobClient = blobClient; this.SearchClient = searchClient; this.Logger = logger; }
public override void ScrapeUrl(ScrapingService scrapingService, string url, string userName, string categoryName, string galleryName) { if (!string.IsNullOrEmpty(scrapingService.Options.Name)) { userName = scrapingService.Options.Name; } if (!string.IsNullOrEmpty(scrapingService.Options.Category)) { categoryName = scrapingService.Options.Category; } Console.Write($"GALLERY {userName} / {categoryName} / {galleryName} "); HtmlDocument htmlDoc = OpenDocument(url); var pictures = (IEnumerable <HtmlNode>)htmlDoc.DocumentNode.SelectNodes("//div[@id='gallery']//a"); Parallel.ForEach( pictures, (node) => { string href = node.GetAttributeValue("href", string.Empty); if (href.StartsWith("/photo/")) { DownloadPicture(scrapingService.Options, href, userName, categoryName, galleryName); } }); Console.WriteLine(); }
private void ScrapOrganizerPage(ScrapingService scrapingService, string userName, string categoryName, HtmlDocument htmlDoc) { Console.WriteLine($"ORGANIZER {userName} / {categoryName}"); var galleries = (IEnumerable <HtmlNode>)htmlDoc.DocumentNode.SelectNodes("//a[@class='blk_galleries']"); var processedUrls = new List <string>(); foreach (var node in galleries) { if (node.InnerHtml.Contains("Overview") || node.InnerHtml.Contains("[Show All]") || node.InnerHtml.Contains("[Hide All]") || !node.Attributes["href"].Value.StartsWith("/gallery/") || processedUrls.Contains(node.Attributes["href"].Value)) { continue; } processedUrls.Add(node.Attributes["href"].Value); var responseUrl = node.Attributes["href"].Value.StartsWith(BASE_URL) ? GetResponseUrl(node.Attributes["href"].Value) : GetResponseUrl(BASE_URL + "/" + node.Attributes["href"].Value); scrapingService.ScrapeUrl(responseUrl + "?view=2", userName, categoryName, node.InnerText.Replace("\t", string.Empty).Replace("\n", string.Empty)); } }
public override void ScrapeUrl(ScrapingService scrapingService, string url, string userName, string categoryName, string galleryName) { var startIndex = url.IndexOf("profile/") + 8; var length = url.IndexOf("/galleries") - startIndex; userName = url.Substring(startIndex, length); Console.WriteLine($"USER {userName}"); HtmlDocument htmlDoc = OpenDocument(url); var galleries = (IEnumerable <HtmlNode>)htmlDoc.DocumentNode.SelectNodes("//a[@class='blk_galleries']"); foreach (var node in galleries) { if (node.InnerHtml.Contains("<b>Overview</b>") || node.InnerHtml.Contains("[Show All]") || node.InnerHtml.Contains("[Hide All]")) { continue; } var responseUrl = GetResponseUrl(node.Attributes["href"].Value); scrapingService.ScrapeUrl(responseUrl, userName, node.InnerText, null); } }
public void TestMethod6() { // stock by Toyota string code = "7203.T"; Task.Run(async() => { int res = await ScrapingService.GetFinancePriceAsync(code); Console.WriteLine($"stock price : {res}"); Assert.IsTrue(res > 4000); }).GetAwaiter().GetResult(); }
public async Task <ActionResult <IEnumerable <ViewModel> > > GetAsync( [FromQuery] string user, [FromQuery] string repo, [FromServices] ScrapingService scrapingService) { try { var endpoint = $"{user}/{repo}"; return((await scrapingService.GetResultRepositoryAsync(endpoint)).ToList()); } catch (Exception) { return(BadRequest("It was not possible to obtain results for this repository")); } }
public override void ScrapeUrl(ScrapingService scrapingService, string url, string userName, string categoryName, string galleryName) { HtmlDocument htmlDoc = OpenDocument(url); var pages = ExtractPages(htmlDoc); if (pages.Count == 0) { ScrapOrganizerPage(scrapingService, userName, categoryName, htmlDoc); } else { foreach (string containedUrl in pages) { htmlDoc = OpenDocument(url + containedUrl); ScrapOrganizerPage(scrapingService, userName, categoryName, htmlDoc); } } }
static async Task Main(string[] args) { //CsvReaderService _csvReader = new CsvReaderService(); //_csvReader.ReadTeamFile(); LadderService _ladder = new LadderService(); List <Team> teams = new List <Team>(); ScrapingService _scrapper = new ScrapingService(); Team theFormed = JsonSerializer.Deserialize <Team>(File.ReadAllText(filePathBase + filePathTheFormed)); Team theTwisted = JsonSerializer.Deserialize <Team>(File.ReadAllText(filePathBase + filePathTheTwisted)); Team theFeared = JsonSerializer.Deserialize <Team>(File.ReadAllText(filePathBase + filePathTheFeared)); Team theHidden = JsonSerializer.Deserialize <Team>(File.ReadAllText(filePathBase + filePathTheHidden)); List <Item> theFormedItems = JsonSerializer.Deserialize <List <Item> >(File.ReadAllText(filePathBase + filePathTheFormedItems)); List <Item> theTwistedItems = JsonSerializer.Deserialize <List <Item> >(File.ReadAllText(filePathBase + filePathTheTwistedItems)); List <Item> theFearedItems = JsonSerializer.Deserialize <List <Item> >(File.ReadAllText(filePathBase + filePathTheFearedItems)); List <Item> theHiddenTeams = JsonSerializer.Deserialize <List <Item> >(File.ReadAllText(filePathBase + filePathTheHiddenItems)); List <Member> members = JsonSerializer.Deserialize <List <Member> >(File.ReadAllText(filePathBase + "\\BPL3Members.json")); List <Member> updateMember = await GetLadder(members, new List <Team> { theFormed, theTwisted, theFeared, theHidden }); SerializeJsonService.SerializeJson(updateMember, filePathBase + "\\BPL3Members.json"); TeamItem updateFormed = await _scrapper.GetItems(theFormed.StashUrl, new TeamItem { Team = theFormed, Items = theFormedItems }); TeamItem updateFearedItems = await _scrapper.GetItems(theFeared.StashUrl, new TeamItem { Team = theFeared, Items = theFearedItems }); TeamItem updateTwistedItems = await _scrapper.GetItems(theTwisted.StashUrl, new TeamItem { Team = theTwisted, Items = theTwistedItems }); TeamItem updateHiddenItems = await _scrapper.GetItems(theHidden.StashUrl, new TeamItem { Team = theHidden, Items = theHiddenTeams }); List <Member> theTwistedMembers = members.Where(m => m.TeamName == "The Twisted").ToList(); List <Member> theFearedMembers = members.Where(m => m.TeamName == "The Feared").ToList(); List <Member> theHiddenMembers = members.Where(m => m.TeamName == "The Hidden").ToList(); List <Member> theFormedMembers = members.Where(m => m.TeamName == "The Formed").ToList(); List <int> points = new List <int>(); points.AddRange(CalcPoints(theTwistedMembers)); points.AddRange(CalcPoints(theFearedMembers)); points.AddRange(CalcPoints(theHiddenMembers)); points.AddRange(CalcPoints(theFormedMembers)); theTwisted.LevelPoints = points[0]; theTwisted.DelvePoints = points[1]; theTwisted.TotalPoints = theTwisted.LevelPoints + theTwisted.DelvePoints + theTwisted.SetPoints; theFeared.LevelPoints = points[2]; theFeared.DelvePoints = points[3]; theFeared.TotalPoints = theFeared.LevelPoints + theFeared.DelvePoints + theFeared.SetPoints; theHidden.LevelPoints = points[4]; theHidden.DelvePoints = points[5]; theHidden.TotalPoints = theHidden.LevelPoints + theHidden.DelvePoints + theHidden.SetPoints; theFormed.LevelPoints = points[6]; theFormed.DelvePoints = points[7]; theFormed.TotalPoints = theFormed.LevelPoints + theFormed.DelvePoints + theFormed.SetPoints; SerializeJsonService.SerializeJson(updateFormed.Team, filePathBase + filePathTheFormed); SerializeJsonService.SerializeJson(updateFormed.Items, filePathBase + filePathTheFormedItems); SerializeJsonService.SerializeJson(updateHiddenItems.Team, filePathBase + filePathTheHidden); SerializeJsonService.SerializeJson(updateHiddenItems.Items, filePathBase + filePathTheHiddenItems); SerializeJsonService.SerializeJson(updateTwistedItems.Team, filePathBase + filePathTheTwisted); SerializeJsonService.SerializeJson(updateTwistedItems.Items, filePathBase + filePathTheTwistedItems); SerializeJsonService.SerializeJson(updateFearedItems.Team, filePathBase + filePathTheFeared); SerializeJsonService.SerializeJson(updateFearedItems.Items, filePathBase + filePathTheFearedItems); }
static void Main(string[] args) { ScrapingService scrapingService = new ScrapingService(); scrapingService.Scrape(); }
static void Main(string[] args) { string agentId = "qgddevdqzm"; var agentyService = new ScrapingService(); List <string> urls = new List <string> { "http://books.toscrape.com/catalogue/set-me-free_988/index.html", "http://books.toscrape.com/catalogue/starving-hearts-triangular-trade-trilogy-1_990/index.html", "http://books.toscrape.com/catalogue/the-black-maria_991/index.html" }; Console.WriteLine("************Starting Agenty Service************"); // Add URLs to Agent Console.WriteLine("****Add URLs to Agent****"); var response1 = agentyService.AddUrlsToAgent(agentId, urls); Console.WriteLine($"StatusCode : {response1.status_code}"); Console.WriteLine($"Message : {response1.message}"); // Start the Scraping Job Console.WriteLine("****Start the Scraping Job****"); var response2 = agentyService.StartScrapingAgent(agentId); Console.WriteLine($"StatusCode : {response2.status_code}"); Console.WriteLine($"Message : {response2.message}"); Console.WriteLine($"Job Id : {response2.job_id}"); // Check job status in while Loop until its' completed/stopped/aborted Console.WriteLine("****Check job status in loop****"); string[] completedStatus = new string[] { "completed", "stopped", "aborted" }; string jobStatus = "running"; while (!completedStatus.Any(x => x == jobStatus)) { var response3 = agentyService.GetJobStatus(response2.job_id); jobStatus = response3.status; Console.WriteLine($"Job status : {response3.status} - Pages processed: ({response3.pages_processed}/{response3.pages_total})"); if (!completedStatus.Any(x => x == jobStatus)) { Console.WriteLine($"Rechecking status after 2 second..."); Thread.Sleep(2000); } } Console.WriteLine("****Download scraping job result****"); var response4 = agentyService.GetJobResult(response2.job_id); Console.WriteLine($"Total rows in result : {response4.total}"); string localPath = @"Y:\scraping\result.txt"; var json = JsonConvert.SerializeObject(response4.result); DataTable table = (DataTable)JsonConvert.DeserializeObject(json, (typeof(DataTable))); WriteToFile(table, localPath); Console.WriteLine($"Result download at: {localPath}"); Console.ReadKey(); }
public RankingService(IServiceProvider provider) : base(provider) { _scraper = provider.GetRequiredService <ScrapingService>(); _maplerRegex = new Regex(@"<td class=""align-middle"">(<img src=""/static/images/rank/(guild_master|islander)\.png"">(</img>)?(<br>|<br/>))?<b>(?<name>[A-Za-z0-9]{4,12})</b>(<br>|<br/>)\s*(<img src=""/static/images/rank/emblem/\d{8}\.\d{1,2}\s*\.png"">\s*)?\w{0,12}</td>\s*<!--job-->\s*<td class=""align-middle""><img src=""/static/images/rank/[a-zA-Z]{5,8}\.png"">(</img>)?(<br>|<br/>)(?<job>[\w()/\s]{4,25})</td>\s*<!--level & exp -->\s*<td class=""align-middle""><b>(?<level>\d{1,3})</b>(<br>|<br/>)"); }
public ScoreBoardController(ILogger <ScrapingService> log, ScrapingService scrapingService) { _log = log; _scrapingService = scrapingService; }
public ScrapingServiceTests() { _httpClient = new Mock <ITvMazeHttpClient>(); _target = new ScrapingService(_httpClient.Object); }
public virtual void ScrapeUrl(ScrapingService scrapingService, string url, string userName, string categoryName, string galleryName) { }
public ArpenspController(MPSPDbContext db) { _db = db; scrap = new ScrapingService(); }
public CagedController(MPSPDbContext db) { _db = db; scrap = new ScrapingService(); }
public InfocrimController(MPSPDbContext db, IConfiguration conf) { _db = db; scrap = new ScrapingService(conf); }
public WebJobMethods(ScrapingService scrapingService) { _scrapingService = scrapingService; }