Example #1
0
        private void doStuff(object obj)
        {
            while (running)
            {
                try
                {
                    Thread.Sleep(10);

                    if (cancellationToken.Token.IsCancellationRequested)
                    {
                        break;
                    }

                    if (config.Paused || config.Paused_Cacher)
                    {
                        Thread.Sleep(500);
                        continue;
                    }

                    if (queue.TryDequeue(out Link current))
                    {
                        var args = new ShouldFetchEventArgs(current);
                        ShouldFetch(this, args);
                        if (args.Cancel)
                        {
                            continue;
                        }

                        try
                        {
                            fetch(current);
                        }
                        catch (Exception ex)
                        {
                            FetchFailed(this, new FetchFailEventArgs(current, 0, ex, new HeaderCollection()));
                        }
                    }
                    else
                    {
                        Thread.Sleep(100);
                    }
                }
                catch (ThreadInterruptedException) { }
                catch (Exception ex)
                {
                    config.Logger.Error(ex, "ContentCacher:doStuff_Loop exception");
                    throw;
                }
            }
        }
        private void enqueueDirectory(ListingInfo l, ListingOptions options)
        {
            if (options.ShouldFetch != null)
            {
                var args = new ShouldFetchEventArgs(new Link(l.Uri, l.Uri));
                options.ShouldFetch(this, args);
                if (args.Cancel)
                {
                    return;
                }
            }

            toVisit.Enqueue(l.Uri);
        }
Example #3
0
        private void doStuff(object obj)
        {
            IsProcessing = false;
            while (run)
            {
                if (cancellationToken.Token.IsCancellationRequested)
                {
                    break;
                }

                if (IsProcessing)
                {
                    Thread.Sleep(250);
                    continue;
                }

                if (config.Paused || config.Paused_Downloader)
                {
                    Thread.Sleep(500);
                    continue;
                }

                if (queue.TryDequeue(out Link current))
                {
                    var args = new ShouldFetchEventArgs(current);
                    ShouldFetch(this, args);
                    if (args.Cancel)
                    {
                        continue;
                    }

                    IsProcessing = true;
                    config.Logger.Information($"[WEB] {current.Uri.UrlWithoutHost()}");
                    current.FetchStart = DateTime.Now;

                    try
                    {
                        fetch(current);
                    }
                    catch (Exception ex)
                    {
                        FetchFailed(this, new FetchFailEventArgs(current, 0, ex, new HeaderCollection()));
                    }

                    IsProcessing = false;
                }
                Thread.Sleep(Math.Max(10, config.DownloadDelay));
            }
        }
Example #4
0
        private void OnTimedEvent(object sender, ElapsedEventArgs e)
        {
            if (!workQueue.TryDequeue(out Link l))
            {
                return;
            }

            if (ShouldFetch != null)
            {
                var args = new ShouldFetchEventArgs(l);
                ShouldFetch(this, args);
                if (args.Cancel)
                {
                    return;              // do not fail/complete
                }
            }

            switch (DiscardMode)
            {
            case DiscardModeTypes.Fail:
                FetchFailed?.Invoke(this,
                                    new FetchFailEventArgs(l,
                                                           0,
                                                           new Exception("NullDownloader:DiscardModeTypes.Fail"),
                                                           new HeaderCollection()));
                break;

            case DiscardModeTypes.CompleteEmpty:
                FetchCompleted?.Invoke(this,
                                       new FetchCompleteEventArgs(l,
                                                                  new byte[0],
                                                                  new HeaderCollection(),
                                                                  new HeaderCollection()));
                break;

            case DiscardModeTypes.Ignore:
            default:
                // ignore ...
                break;
            }
        }
        private void spider_ShouldFetch(object Sender, ShouldFetchEventArgs args)
        {
            // O site da câmara retorna um erro 500 para os registros vazios
            // para fins de exemplo, vou só pular a maioria aqui
            var IDs = new string[] { "3070", "3005", "958", "2377",  // Lista geral (ano?)
                                     "2338", "1549", "3358", "3437",
                                     "3367", "3036", "2979", "999",
                                     "3463", "3255", "2250", "3456",
                                     "1829", "2924", "3462", "3394",
                                     "2311", "1649", "3357",
                                     // Mes específico
                                     "1571", "3269", "3324", "3052",
                                     "3220", "2934", "3061", "1893",
                                     "2919", "1344", "3471", "1627",
                                     "80", "3464", "3362", "3142",
                                     "2317", "3438", "3081", "3472",
                                     "3455", "1161", "2902", "2277",
                                     "2475", "3066", "3417", "3162",
                                     "1946", "2233", "3268" };

            if (IDs.Any(id => args.Link.ToString().Contains($"={id}&")))
            {
                args.Cancel = true;
            }

            // Alguns links são redirecionados, já arruma aqui
            if (args.Link.Uri.Host.Contains("www.fazenda.df.gov.br"))
            {
                args.Cancel = true;
                (Sender as SimpleSpider).AddPage(new Uri($"https://dec.fazenda.df.gov.br/ConsultarNFCe.aspx{args.Link.Uri.Query}"), args.Link.SourceUri);
            }
            // ?? não carregar
            if (args.Link.ToString().Contains("jsessionid="))
            {
                args.Cancel = true;
            }
        }
 private static void Spider_ShouldFetch(object Sender, ShouldFetchEventArgs args)
 {
     args.CancelIfContains("/login")
     .CancelIfContains("/tag");
 }