Example #1
0
        private void ExecuteInternal(NetClient client, string uri, string[] extensions, string directory,
                                     int maxDepth, int currentDepth = 0)
        {
            _visitedUris.Add(uri);
            var content = WebUtility.HtmlDecode(DownloadString(client, uri, currentDepth));

            if (!string.IsNullOrEmpty(content))
            {
                var resources = ContentParser.ExtractUris(content)
                                .Where(x => x.Value.EndsWithAny(extensions))
                                .Select(x => new UriData
                {
                    Value = x.Value.ToAbsoluteUri(uri),
                    Label = x.Label.Normailze(100)
                })
                                .Distinct(x => x.Value);
                DownloadFiles(client, resources, directory, currentDepth).GetAwaiter().GetResult();

                if (maxDepth < 0 || currentDepth < maxDepth)
                {
                    var sites = ContentParser.ExtractUris(content)
                                .Where(x => x.Value.WithoutExtension())
                                .Select(x => x.Value.ToAbsoluteUri(uri))
                                .Where(x => !_visitedUris.Contains(x, StringComparison.OrdinalIgnoreCase));
                    if (!string.IsNullOrEmpty(RecursionTarget))
                    {
                        sites = sites.Where(x => (new Regex(RecursionTarget, RegexOptions.IgnoreCase)).IsMatch(x));
                    }

                    currentDepth++;
                    foreach (var s in sites)
                    {
                        ExecuteInternal(client, s, extensions, directory, maxDepth, currentDepth);
                    }
                }
            }
        }
Example #2
0
        private async Task DownloadFiles(NetClient client, IEnumerable <UriData> uris, string directory, int currentDepth)
        {
            var i = -1;
            var maxConcurrency = 5;

            var allTasks  = new List <Task>();
            var throttler = new SemaphoreSlim(initialCount: 5);

            foreach (var uri in uris)
            {
                // do an async wait until we can schedule again
                await throttler.WaitAsync();

                var name = LinkLabel && !string.IsNullOrEmpty(uri.Label)
                               ? string.Format("{0}.{1}", uri.Label, uri.Value.Split('.').Last())
                               : uri.Value.Split('/').Last();

                if (!string.IsNullOrEmpty(NameFilter) &&
                    !(new Regex(NameFilter, RegexOptions.IgnoreCase)).IsMatch(name))
                {
                    continue;
                }

                var path = Path.Combine(directory, name);
                if (File.Exists(path))
                {
                    Console.WriteLine(@"""{0}"" already exists, skipping...", name);
                    continue;
                }



                try
                {
                    if (GreaterThan > 0 || LessThan > 0)
                    {
                        var size = DownloadHeader(client, uri.Value, "Content-Length");
                        int contentLength;
                        if (int.TryParse(size, out contentLength))
                        {
                            if (GreaterThan > 0 && contentLength < GreaterThan)
                            {
                                continue;
                            }
                            if (LessThan > 0 && contentLength > LessThan)
                            {
                                continue;
                            }
                        }
                    }

                    // using Task.Run(...) to run the lambda in its own parallel
                    // flow on the threadpool
                    allTasks.Add(
                        Task.Run(async() =>
                    {
                        try
                        {
                            var localClient = new NetClient()
                            {
                                ProxyData      = ProxyData,
                                Encoding       = Encoding.UTF8,
                                UserAgent      = UserAgent,
                                RequestTimeout = RequestTimeout
                            };

                            var file = string.Format(@"[{0}.{1}]: downloading ""{2}""...", currentDepth, ++i, name);
                            WriteNewLineToconsole("");

                            localClient.DownloadFileCompleted   += DownloadFileCompleted(Console.CursorLeft, Console.CursorTop);
                            localClient.DownloadProgressChanged += UpdateDownloadStatus(Console.CursorLeft, Console.CursorTop, file);

                            WriteNewLineToconsole("");
                            await localClient.DownloadFileTaskAsync(uri.Value, path);
                        }
                        finally
                        {
                            throttler.Release();
                        }
                    }));
                }
                catch (Exception ex)
                {
                    Console.WriteLine("error: {0}", ex.Message);
                }
            }

            // won't get here until all urls have been put into tasks
            await Task.WhenAll(allTasks);
        }
Example #3
0
        public void Execute()
        {
            if (!Directory.Exists(SaveDirectory))
                Directory.CreateDirectory(SaveDirectory);

            using (var client = new NetClient
                {
                    ProxyData = ProxyData,
                    Encoding = Encoding.UTF8,
                    UserAgent = UserAgent,
                    RequestTimeout = RequestTimeout
                })
            {
                ExecuteInternal(client, Uri, Extensions, SaveDirectory, RecursionDepth);
            }
        }
Example #4
0
        private void ExecuteInternal(NetClient client, string uri, string[] extensions, string directory,
            int maxDepth, int currentDepth = 0)
        {
            _visitedUris.Add(uri);
            var content = WebUtility.HtmlDecode(DownloadString(client, uri, currentDepth));
            if (!string.IsNullOrEmpty(content))
            {
                var resources = ContentParser.ExtractUris(content)
                                             .Where(x => x.Value.EndsWithAny(extensions))
                                             .Select(x => new UriData
                                                 {
                                                     Value = x.Value.ToAbsoluteUri(uri),
                                                     Label = x.Label.Normailze(100)
                                                 })
                                             .Distinct(x => x.Value);
                DownloadFiles(client, resources, directory, currentDepth);

                if (maxDepth < 0 || currentDepth < maxDepth)
                {
                    var sites = ContentParser.ExtractUris(content)
                                             .Where(x => x.Value.WithoutExtension())
                                             .Select(x => x.Value.ToAbsoluteUri(uri))
                                             .Where(x => !_visitedUris.Contains(x, StringComparison.OrdinalIgnoreCase));
                    if (!string.IsNullOrEmpty(RecursionTarget))
                        sites = sites.Where(x => (new Regex(RecursionTarget, RegexOptions.IgnoreCase)).IsMatch(x));

                    currentDepth++;
                    foreach (var s in sites)
                    {
                        ExecuteInternal(client, s, extensions, directory, maxDepth, currentDepth);
                    }
                }
            }
        }
Example #5
0
 private string DownloadString(NetClient client, string uri, int currentDepth)
 {
     try
     {
         Console.WriteLine(@"[--> {0}]: ""{1}""...", currentDepth, uri);
         return client.DownloadString(uri);
     }
     catch (Exception ex)
     {
         Console.WriteLine("error: {0}", ex.Message);
         return null;
     }
 }
Example #6
0
 private string DownloadHeader(NetClient client, string uri, string header)
 {
     client.HeadOnly = true;
     client.DownloadData(uri);
     client.HeadOnly = false;
     return client.ResponseHeaders.Get(header);
 }
Example #7
0
        private void DownloadFiles(NetClient client, IEnumerable<UriData> uris, string directory, int currentDepth)
        {
            var i = -1;
            foreach (var uri in uris)
            {
                var name = LinkLabel && !string.IsNullOrEmpty(uri.Label)
                               ? string.Format("{0}.{1}", uri.Label, uri.Value.Split('.').Last())
                               : uri.Value.Split('/').Last();

                if (!string.IsNullOrEmpty(NameFilter) &&
                    !(new Regex(NameFilter, RegexOptions.IgnoreCase)).IsMatch(name))
                    continue;

                var path = Path.Combine(directory, name);
                if (File.Exists(path))
                {
                    Console.WriteLine(@"""{0}"" already exists, skipping...", name);
                    continue;
                }

                try
                {
                    if (GreaterThan > 0 || LessThan > 0)
                    {
                        var size = DownloadHeader(client, uri.Value, "Content-Length");
                        int contentLength;
                        if (int.TryParse(size, out contentLength))
                        {
                            if (GreaterThan > 0 && contentLength < GreaterThan)
                                continue;
                            if (LessThan > 0 && contentLength > LessThan)
                                continue;
                        }
                    }
                    Console.WriteLine(@"[{0}.{1}]: downloading ""{2}""...", currentDepth, ++i, name);
                    client.DownloadFile(uri.Value, path);
                }
                catch (Exception ex)
                {
                    Console.WriteLine("error: {0}", ex.Message);
                }
            }
        }