public async Task <IActionResult> ExtractLinksFromPage([FromBody] PageRequestId page)
        {
            logger?.LogInformation($"Processing Links for {page.PageId}");
            if (false)
            {
                return(BadRequest("Unable to parse URI"));
            }

            _ = await findLinks.ParseLinksFromPageAsync(page);

            return(Accepted());
        }
        public async Task <IActionResult> ProcessStaticContent([FromBody] PageRequestId page)
        {
            logger?.LogInformation($"Processing Static Content for {page.PageId}");
            if (false)
            {
                return(BadRequest("Unable to parse URI"));
            }

            await Task.Delay(100);

            return(Accepted());
            //var pageResult = await pageCollector.SavePageAsync(newPage);
            // return CreatedAtAction(nameof(GetPageById), new { id = pageResult.Id }, pageResult);
        }
Ejemplo n.º 3
0
        public async Task <(bool IsSuccessful, int LinksFollowedCount, string ErrorMessage)> ParseLinksFromPageAsync(PageRequestId requestPage)
        {
            int linksFollowed = 0;

            try
            {
                var pageStorageClient = httpClientFactory.CreateClient("PageStorage");
                var result            = await pageStorageClient.GetAsync($"api/Pages/{requestPage.PageId}");

                if (!result.IsSuccessStatusCode)
                {
                    return(false, linksFollowed, result.ReasonPhrase);
                }
                var page = JsonConvert.DeserializeObject <Model.Page>(await result.Content.ReadAsStringAsync());

                var fetchUrlClient = httpClientFactory.CreateClient("FetchPage");

                // if domain link matches ship it off to FetchPage
                foreach (var(href, label) in ParseLinksFromContent(page.RawContent))
                {
                    // Need Requirement here, I'm assuming we save it even if we cannot Follow the link due to format issues
                    //_ = pageStorageClient.PostAsync($"api/Pages/{page.Id}/link");

                    if (Uri.TryCreate(href, UriKind.Absolute, out var uri))
                    {
                        if (uri.Host.Equals(page.Domain, StringComparison.InvariantCultureIgnoreCase))
                        {
                            logger?.LogInformation($"Crawling to Page -{page.Domain}/{page.ResourceLocation}-");
                            using var stringContent = new StringContent(JsonConvert.SerializeObject(new { url = href }), Encoding.UTF8, "application/json");
                            _ = fetchUrlClient.PostAsync("api/ProcessUrl", stringContent);
                            linksFollowed++;
                        }
                    }
                }

                return(true, linksFollowed, null);
            }
            catch (Exception e)
            {
                logger?.LogError(e.ToString());
                return(false, linksFollowed, e.Message);
            }
        }