예제 #1
0
파일: Spider.cs 프로젝트: eg467/webshot-old
 public UriSources GetOrCreateSources(StandardizedUri uri)
 {
     if (!_uris.TryGetValue(uri, out var sources))
     {
         sources    = new UriSources(uri);
         _uris[uri] = sources;
     }
     return(sources);
 }
예제 #2
0
파일: Spider.cs 프로젝트: eg467/webshot-old
        private async Task Visit(StandardizedUri uri)
        {
            UriSources sources = null;
            string     html    = "";

            try
            {
                (Uri finalUri, string content) = await DownloadPageAsync(uri.Standardized);

                html    = content;
                sources = _linkTracker.CombineIfRedirection(uri, finalUri);
            }
            catch (HttpRequestException ex)
            {
                sources        = _linkTracker.GetOrCreateSources(uri);
                sources.Status = SpiderPageStatus.Error;
                sources.Error  = ex.Message;
                Debug.WriteLine($"Error downloading page ({uri}): {ex.Message}");
            }

            if (sources.Status != SpiderPageStatus.Unvisited)
            {
                return;
            }

            if (!IsHtml(html))
            {
                sources.Status = SpiderPageStatus.Excluded;
                return;
            }

            sources.Status = SpiderPageStatus.Visited;

            if (_options.FollowLinks)
            {
                ParseLinks(sources.Uri.Uri, html).ForEach(FoundLink);
            }
        }