예제 #1
0
        public void HarvestLinks(DownloadTarget target)
        {
            if (target == null)
            {
                throw new ArgumentNullException("target");
            }

            Console.WriteLine("[Harvester] Processing " + target.Target.ToString());

            IEnumerable <string> links = HarvestUrls(target.Content);

            foreach (string link in links)
            {
                Uri uri = new Uri(link, UriKind.RelativeOrAbsolute);
                if (!uri.IsAbsoluteUri)
                {
                    uri = new Uri(target.Target, uri);
                }

                DownloadTarget newTarget = new DownloadTarget(uri, target.Depth + 1);
                newTarget.Referrer = target.Target;
                this.Resolver.Post(resolver => resolver.Process(newTarget));

                Console.WriteLine(
                    string.Format(CultureInfo.InvariantCulture, "Url To Harvest {0} {1}", newTarget.Depth, newTarget.TargetAddress));
            }

            Console.WriteLine("[Harvester] Processed " + target.Target.ToString());
        }
예제 #2
0
        public void Download(DownloadTarget target)
        {
            if (target == null)
            {
                throw new ArgumentNullException("target");
            }

            Console.WriteLine("[Downloader] Processing " + target.Target.ToString());

            try
            {
                WebClient client = new WebClient();
                target.Content = client.DownloadString(target.Target);

                Console.WriteLine(
                    string.Format(CultureInfo.InvariantCulture, "URL {0} downloaded", target.TargetAddress));
            }
            catch (System.Net.WebException)
            {
                Console.WriteLine(
                    string.Format(
                        CultureInfo.InvariantCulture,
                        "URL could not be downloaded",
                        target.TargetAddress));

                return;
            }

            this.Harvester.Post(harvester => harvester.HarvestLinks(target));

            Console.WriteLine("[Downloader] Processed " + target.Target.ToString());
        }
예제 #3
0
        public void Process(string partialUri)
        {
            Uri url;

            Console.WriteLine("[Resolver] Dispatching " + partialUri);

            if (!Uri.TryCreate(partialUri, UriKind.Absolute, out url))
            {
                throw new ArgumentException("Invalid Message Format");
            }

            DownloadTarget target = new DownloadTarget(url, 1);

            this.Process(target);
        }
예제 #4
0
        public void Process(DownloadTarget target)
        {
            if (target == null)
            {
                throw new ArgumentNullException("target");
            }

            Console.WriteLine("[Resolver] processing " + target.Target.ToString());

            if (target.Depth > 5)
            {
                Console.WriteLine(
                    string.Format(CultureInfo.InvariantCulture, "URL rejected {0} by max depth", target.TargetAddress));
                return;
            }

            if ((target.Target.Scheme != Uri.UriSchemeHttp) &&
                (target.Target.Scheme != Uri.UriSchemeHttps))
            {
                Console.WriteLine(
                    string.Format(CultureInfo.InvariantCulture, "URL rejected {0}: unsupported protocol", target.TargetAddress));
                return;
            }

            if (target.Referrer != null &&
                target.Target.Host != target.Referrer.Host)
            {
                Console.WriteLine(
                    string.Format(CultureInfo.InvariantCulture, "URL rejected {0}: different host", target.TargetAddress));
                return;
            }

            if (this.downloadedAddresses.Contains(target.Target))
            {
                Console.WriteLine(
                    string.Format(CultureInfo.InvariantCulture, "URL rejected {0}: already downloaded", target.TargetAddress));
            }
            else
            {
                this.downloadedAddresses.Add(target.Target);

                this.Downloader.Post(downloader => downloader.Download(target));

                Console.WriteLine(
                    string.Format(CultureInfo.InvariantCulture, "URL accepted: {0}", target.TargetAddress));
            }
        }