private async Task DownloadLinkAsync(ParentLink nextLink) { DownloadResult result = new DownloadResult(nextLink); bool isDomainResource = result.Uri.StartsWith(_seed); bool shouldAddLink(ContentType x) => (x == ContentType.Html && (_followExternalLinks || isDomainResource)) || (x != ContentType.Html && (_downloadExternalContent || isDomainResource)); try { Stopwatch watch = new Stopwatch(); watch.Start(); var getResult = await _client.GetAsync(result.Uri); watch.Stop(); result.Status = ((int)getResult.StatusCode).ToString(); if (getResult.IsSuccessStatusCode) { result.IsSuccessCode = true; var resultContent = getResult.Content; result.ContentType = GetContentType(resultContent.Headers.ContentType?.MediaType); if (shouldAddLink(result.ContentType)) { var bytes = await resultContent.ReadAsByteArrayAsync(); result.ContentLengthBytes = bytes.Length; result.Content = Encoding.ASCII.GetString(bytes); result.DownloadTime = watch.ElapsedMilliseconds; result.Redirectedto = getResult.RequestMessage.RequestUri.AbsoluteUri; } } } catch (HttpRequestException e) { result.Exception = e; Console.WriteLine($"ERROR: {nextLink.Link}, {e.Message}"); } finally { if (shouldAddLink(result.ContentType)) { _progress.Add(result.ToViewdownloadResult()); _downloadResults.Add(result); } } }
public int SaveLink(DownloadResult response, int websiteId, int sessionId) { try { using ItsyBitsyDbContext context = new ItsyBitsyDbContext(); var newPage = new Page() { SessionId = sessionId, StatusCode = response.Status, WebsiteId = websiteId, Uri = response.Uri, ContentType = (byte)response.ContentType, DownloadTime = response.DownloadTime, ContentLength = response.ContentLengthBytes }; context.Page.Add(newPage); if (response.ParentId.HasValue) { var newPageRelation = new PageRelation() { ParentPageId = response.ParentId.Value, ChildPage = newPage, }; context.PageRelation.Add(newPageRelation); } context.SaveChanges(); return(newPage.Id); } catch (Exception e) { throw e; } }