Exemplo n.º 1
0
        public Logger(params AbstractWriter[] writers)
        {
            Contract.Requires(writers != null);
            Contract.Requires(writers.Length >= 1);

            cts = new CancellationTokenSource();

            bufferBlocks = new List <BufferBlock <LogItem> >();
            writerBlocks = new List <ActionBlock <LogItem[]> >();

            foreach (var writer in writers)
            {
                writer.Setup();

                var bufferBlock = new BufferBlock <LogItem>(
                    new DataflowBlockOptions()
                {
                    CancellationToken = cts.Token
                });

                var batchBlock = new BatchBlock <LogItem>(
                    writer.BatchSize,
                    new GroupingDataflowBlockOptions()
                {
                    Greedy            = true,
                    CancellationToken = cts.Token
                });

                var timer = new Timer(state => batchBlock.TriggerBatch());

                var timeoutBlock = new TransformBlock <LogItem, LogItem>(
                    value =>
                {
                    timer.Change((int)writer.Timeout.TotalMilliseconds,
                                 Timeout.Infinite);

                    return(value);
                },
                    new ExecutionDataflowBlockOptions()
                {
                    CancellationToken         = cts.Token,
                    SingleProducerConstrained = true
                });

                var writerBlock = new ActionBlock <LogItem[]>(
                    async logItems =>
                {
                    await writer.WriteAsync(logItems);
                },
                    new ExecutionDataflowBlockOptions()
                {
                    MaxDegreeOfParallelism    = 1,
                    CancellationToken         = cts.Token,
                    SingleProducerConstrained = true
                });

                writerBlock.Completion.ContinueWith(task => writer.Teardown());

                timeoutBlock.LinkTo(batchBlock);
                bufferBlock.LinkTo(timeoutBlock);
                batchBlock.LinkTo(writerBlock);

                writerBlocks.Add(writerBlock);
                bufferBlocks.Add(bufferBlock);

                timeoutBlock.HandleCompletion(batchBlock);
                bufferBlock.HandleCompletion(timeoutBlock);
                batchBlock.HandleCompletion(writerBlock);
            }
        }
Exemplo n.º 2
0
        public static async void Fetch()
        {
            long fetched = 0;
            long skipped = 0;
            long errored = 0;

            var startedOn = DateTime.UtcNow;

            var imageRegex = new Regex("(?<=<IMG\\sSRC=\").*?(?=\"\\s.*?>)",
                                       RegexOptions.IgnoreCase | RegexOptions.Singleline | RegexOptions.CultureInvariant | RegexOptions.Compiled);

            List <Link> links;

            try
            {
                links = await GetLinks();

                Status.Info.Log("Parsed {0:N0} URIs", links.Count);

                if (Properties.Settings.Default.MaxToFetch > 0)
                {
                    links = links.Take(Properties.Settings.Default.MaxToFetch).ToList();
                }

                Status.Info.Log("Queued {0:N0} images to be downloaded", links.Count);
            }
            catch (Exception error)
            {
                Status.BadGetUrls.Log(error.Message);

                PressAnyKeyToTerminate();

                return;
            }

            var scraper = new TransformBlock <Link, Link>(
                async link =>
            {
                try
                {
                    Status.Scraping.Log(link.PageUri.AbsoluteUri);

                    var html = await new HttpClient().GetStringAsync(link.PageUri);

                    var src = imageRegex.Match(html).Value;

                    if (string.IsNullOrWhiteSpace(src))
                    {
                        Interlocked.Increment(ref skipped);

                        Status.NoImage.Log(link.PageUri.AbsoluteUri);

                        return(null);
                    }

                    link.ImageUri = new Uri(Properties.Settings.Default.BaseUri, src);

                    Status.Scraped.Log(link.PageUri.AbsoluteUri);

                    return(link);
                }
                catch (Exception error)
                {
                    Interlocked.Increment(ref errored);

                    Status.BadScrape.Log(error.Message);

                    return(null);
                }
            },
                new ExecutionDataflowBlockOptions()
            {
                MaxDegreeOfParallelism = 2
            });

            var fetcher = new ActionBlock <Link>(
                async link =>
            {
                try
                {
                    var fileName = Path.Combine(Properties.Settings.Default.SaveToPath,
                                                Path.GetFileName(link.ImageUri.AbsoluteUri).ToLower());

                    var nameOnly = Path.GetFileName(fileName);

                    if (!Properties.Settings.Default.OverwriteAll)
                    {
                        if (File.Exists(fileName))
                        {
                            Interlocked.Increment(ref skipped);

                            Status.DupImage.Log("{0:MM/dd/yyyy} - {1}", link.Date, nameOnly);

                            return;
                        }
                    }

                    Status.Fetching.Log("{0:MM/dd/yyyy} - {1}", link.Date, nameOnly);

                    await link.ImageUri.Download(fileName);

                    Interlocked.Increment(ref fetched);

                    Status.Fetched.Log("{0:MM/dd/yyyy} - {1}", link.Date, nameOnly);
                }
                catch (Exception error)
                {
                    Interlocked.Increment(ref errored);

                    Status.BadFetch.Log(error.Message);
                }
            },
                new ExecutionDataflowBlockOptions()
            {
                MaxDegreeOfParallelism = DataflowBlockOptions.Unbounded
            });

            scraper.LinkTo(fetcher, link => link != null);
            scraper.LinkTo(DataflowBlock.NullTarget <Link>());

            scraper.HandleCompletion(fetcher);

            Status.Info.Log("Fetching APOD's archive list");

            links.ForEach(link => scraper.Post(link));

            scraper.Complete();

            try
            {
                await fetcher.Completion;

                Status.Finished.Log("Fetched: {0:N0}, Skipped: {1:N0}, Errors: {2:N0}, Seconds: {3:N2}",
                                    fetched, skipped, errored, (DateTime.UtcNow - startedOn).TotalMilliseconds / 1000.0);
            }
            catch (AggregateException errors)
            {
                foreach (var error in errors.InnerExceptions)
                {
                    Status.Failure.Log(error.Message);
                }
            }
            catch (TaskCanceledException)
            {
                Status.Cancelled.Log("The process was manually cancelled!");
            }
            catch (Exception error)
            {
                Status.Failure.Log(error.Message);
            }

            PressAnyKeyToTerminate();
        }