public void HarvestLinks(DownloadTarget target) { if (target == null) { throw new ArgumentNullException("target"); } Console.WriteLine("[Harvester] Processing " + target.Target.ToString()); IEnumerable <string> links = HarvestUrls(target.Content); foreach (string link in links) { Uri uri = new Uri(link, UriKind.RelativeOrAbsolute); if (!uri.IsAbsoluteUri) { uri = new Uri(target.Target, uri); } DownloadTarget newTarget = new DownloadTarget(uri, target.Depth + 1); newTarget.Referrer = target.Target; this.Resolver.Post(resolver => resolver.Process(newTarget)); Console.WriteLine( string.Format(CultureInfo.InvariantCulture, "Url To Harvest {0} {1}", newTarget.Depth, newTarget.TargetAddress)); } Console.WriteLine("[Harvester] Processed " + target.Target.ToString()); }
public void Download(DownloadTarget target) { if (target == null) { throw new ArgumentNullException("target"); } Console.WriteLine("[Downloader] Processing " + target.Target.ToString()); try { WebClient client = new WebClient(); target.Content = client.DownloadString(target.Target); Console.WriteLine( string.Format(CultureInfo.InvariantCulture, "URL {0} downloaded", target.TargetAddress)); } catch (System.Net.WebException) { Console.WriteLine( string.Format( CultureInfo.InvariantCulture, "URL could not be downloaded", target.TargetAddress)); return; } this.Harvester.Post(harvester => harvester.HarvestLinks(target)); Console.WriteLine("[Downloader] Processed " + target.Target.ToString()); }
public void Process(string partialUri) { Uri url; Console.WriteLine("[Resolver] Dispatching " + partialUri); if (!Uri.TryCreate(partialUri, UriKind.Absolute, out url)) { throw new ArgumentException("Invalid Message Format"); } DownloadTarget target = new DownloadTarget(url, 1); this.Process(target); }
public void Process(DownloadTarget target) { if (target == null) { throw new ArgumentNullException("target"); } Console.WriteLine("[Resolver] processing " + target.Target.ToString()); if (target.Depth > 5) { Console.WriteLine( string.Format(CultureInfo.InvariantCulture, "URL rejected {0} by max depth", target.TargetAddress)); return; } if ((target.Target.Scheme != Uri.UriSchemeHttp) && (target.Target.Scheme != Uri.UriSchemeHttps)) { Console.WriteLine( string.Format(CultureInfo.InvariantCulture, "URL rejected {0}: unsupported protocol", target.TargetAddress)); return; } if (target.Referrer != null && target.Target.Host != target.Referrer.Host) { Console.WriteLine( string.Format(CultureInfo.InvariantCulture, "URL rejected {0}: different host", target.TargetAddress)); return; } if (this.downloadedAddresses.Contains(target.Target)) { Console.WriteLine( string.Format(CultureInfo.InvariantCulture, "URL rejected {0}: already downloaded", target.TargetAddress)); } else { this.downloadedAddresses.Add(target.Target); this.Downloader.Post(downloader => downloader.Download(target)); Console.WriteLine( string.Format(CultureInfo.InvariantCulture, "URL accepted: {0}", target.TargetAddress)); } }