Example #1
0
        private async Task DownloadLinkAsync(ParentLink nextLink)
        {
            DownloadResult result = new DownloadResult(nextLink);

            bool isDomainResource = result.Uri.StartsWith(_seed);

            bool shouldAddLink(ContentType x) => (x == ContentType.Html && (_followExternalLinks || isDomainResource)) ||
            (x != ContentType.Html && (_downloadExternalContent || isDomainResource));

            try
            {
                Stopwatch watch = new Stopwatch();

                watch.Start();
                var getResult = await _client.GetAsync(result.Uri);

                watch.Stop();
                result.Status = ((int)getResult.StatusCode).ToString();

                if (getResult.IsSuccessStatusCode)
                {
                    result.IsSuccessCode = true;
                    var resultContent = getResult.Content;
                    result.ContentType = GetContentType(resultContent.Headers.ContentType?.MediaType);

                    if (shouldAddLink(result.ContentType))
                    {
                        var bytes = await resultContent.ReadAsByteArrayAsync();

                        result.ContentLengthBytes = bytes.Length;
                        result.Content            = Encoding.ASCII.GetString(bytes);
                        result.DownloadTime       = watch.ElapsedMilliseconds;
                        result.Redirectedto       = getResult.RequestMessage.RequestUri.AbsoluteUri;
                    }
                }
            }
            catch (HttpRequestException e)
            {
                result.Exception = e;
                Console.WriteLine($"ERROR: {nextLink.Link}, {e.Message}");
            }
            finally
            {
                if (shouldAddLink(result.ContentType))
                {
                    _progress.Add(result.ToViewdownloadResult());
                    _downloadResults.Add(result);
                }
            }
        }
Example #2
0
        public int SaveLink(DownloadResult response, int websiteId, int sessionId)
        {
            try
            {
                using ItsyBitsyDbContext context = new ItsyBitsyDbContext();
                var newPage = new Page()
                {
                    SessionId     = sessionId,
                    StatusCode    = response.Status,
                    WebsiteId     = websiteId,
                    Uri           = response.Uri,
                    ContentType   = (byte)response.ContentType,
                    DownloadTime  = response.DownloadTime,
                    ContentLength = response.ContentLengthBytes
                };
                context.Page.Add(newPage);

                if (response.ParentId.HasValue)
                {
                    var newPageRelation = new PageRelation()
                    {
                        ParentPageId = response.ParentId.Value,
                        ChildPage    = newPage,
                    };
                    context.PageRelation.Add(newPageRelation);
                }

                context.SaveChanges();

                return(newPage.Id);
            }
            catch (Exception e)
            {
                throw e;
            }
        }