public async Task <IActionResult> ExtractLinksFromPage([FromBody] PageRequestId page) { logger?.LogInformation($"Processing Links for {page.PageId}"); if (false) { return(BadRequest("Unable to parse URI")); } _ = await findLinks.ParseLinksFromPageAsync(page); return(Accepted()); }
public async Task <IActionResult> ProcessStaticContent([FromBody] PageRequestId page) { logger?.LogInformation($"Processing Static Content for {page.PageId}"); if (false) { return(BadRequest("Unable to parse URI")); } await Task.Delay(100); return(Accepted()); //var pageResult = await pageCollector.SavePageAsync(newPage); // return CreatedAtAction(nameof(GetPageById), new { id = pageResult.Id }, pageResult); }
public async Task <(bool IsSuccessful, int LinksFollowedCount, string ErrorMessage)> ParseLinksFromPageAsync(PageRequestId requestPage) { int linksFollowed = 0; try { var pageStorageClient = httpClientFactory.CreateClient("PageStorage"); var result = await pageStorageClient.GetAsync($"api/Pages/{requestPage.PageId}"); if (!result.IsSuccessStatusCode) { return(false, linksFollowed, result.ReasonPhrase); } var page = JsonConvert.DeserializeObject <Model.Page>(await result.Content.ReadAsStringAsync()); var fetchUrlClient = httpClientFactory.CreateClient("FetchPage"); // if domain link matches ship it off to FetchPage foreach (var(href, label) in ParseLinksFromContent(page.RawContent)) { // Need Requirement here, I'm assuming we save it even if we cannot Follow the link due to format issues //_ = pageStorageClient.PostAsync($"api/Pages/{page.Id}/link"); if (Uri.TryCreate(href, UriKind.Absolute, out var uri)) { if (uri.Host.Equals(page.Domain, StringComparison.InvariantCultureIgnoreCase)) { logger?.LogInformation($"Crawling to Page -{page.Domain}/{page.ResourceLocation}-"); using var stringContent = new StringContent(JsonConvert.SerializeObject(new { url = href }), Encoding.UTF8, "application/json"); _ = fetchUrlClient.PostAsync("api/ProcessUrl", stringContent); linksFollowed++; } } } return(true, linksFollowed, null); } catch (Exception e) { logger?.LogError(e.ToString()); return(false, linksFollowed, e.Message); } }