Beispiel #1
0
        private Link addPage(Uri pageToVisit, Uri sourcePage)
        {
            if (pageToVisit.Host != BaseUri.Host && !Configuration.SpiderAllowHostViolation)
            {
                if (!hViolated.Contains(pageToVisit.Host)) // ignore the entire domain
                {
                    lock (hViolated)
                    {
                        hViolated.Add(pageToVisit.Host);
                    }
                    log.Warning($"[WRN] Host Violation {pageToVisit}");
                }
                return(null);
            }

            var lnk = new Link(pageToVisit, sourcePage);

            if (FetchRewrite != null)
            {
                var ev = new FetchRewriteEventArgs(pageToVisit);
                FetchRewrite(this, ev);
                // Default Uri Equality ignores Fragment
                if (ev.NewUri != null && ev.NewUri.ToString() != pageToVisit.ToString())
                {
                    if (ev.ShowOnLog)
                    {
                        log.Information($"[REW] {pageToVisit} -> {ev.NewUri}");
                    }
                    lnk.ResourceRewritten(ev.NewUri);
                }
            }

            if (SpiderWorkData.Moved301.ContainsKey(lnk.Uri.ToString()))
            {
                string newUri = SpiderWorkData.Moved301[lnk.Uri.ToString()];
                lnk.ResourceMoved(new Uri(newUri));

                if (alreadyExecuted(lnk.Uri))
                {
                    return(null);
                }
            }

            if (alreadyExecuted(lnk.Uri))
            {
                return(null);
            }

            var args = new ShouldFetchEventArgs(lnk);

            ShouldFetch?.Invoke(this, args);
            if (args.Cancel)
            {
                return(null);
            }

            qAdded.Enqueue(lnk);
            return(lnk);
        }
Beispiel #2
0
 private void fetchRewrite_AutoRewrite(object Sender, FetchRewriteEventArgs args)
 {
     try
     {
         if (!Configuration.Auto_RewriteRemoveFragment)
         {
             return;
         }
         if (args.CurrentUri.HasFragment())
         {
             args.NewUri    = args.CurrentUri.RemoveFragment();
             args.ShowOnLog = false;
         }
     }
     catch (Exception ex)
     {
         Configuration.Auto_RewriteRemoveFragment = false;
         log.Error(ex, "Failed while auto-removing fragments. Auto-removing disabled");
     }
 }
Beispiel #3
0
 private void fetchRewrite_AutoRewrite(object Sender, FetchRewriteEventArgs args)
 {
     try
     {
         if (!Configuration.Auto_RewriteRemoveFragment)
         {
             return;
         }
         if (args.CurrentUri.HasFragment())
         {
             args.NewUri    = args.CurrentUri.RemoveFragment();
             args.ShowOnLog = false;
         }
     }
     catch (Exception ex)
     {
         Configuration.Auto_RewriteRemoveFragment = false;
         log.Error(ex, "Failed while auto-removing fragments. Auto-removing disabled");
         OnError?.Invoke(this, new ErrorEventArgs()
         {
             Source = FetchEventArgs.EventSource.Scheduler, Exception = ex
         });
     }
 }
Beispiel #4
0
        private Link addPage(Uri pageToVisit, Uri sourcePage)
        {
            if (!Configuration.SpiderAllowHostViolation && isHostViolation(pageToVisit, BaseUri))
            {
                lock (hViolated)
                {
                    if (!hViolated.Contains(pageToVisit.Host)) // ignore the entire domain
                    {
                        hViolated.Add(pageToVisit.Host);
                        log.Warning($"[WRN] Host Violation {pageToVisit}");
                    }
                }
                return(null);
            }

            var lnk = new Link(pageToVisit, sourcePage);

            try
            {
                if (AddPageFilter != null && !AddPageFilter(lnk))
                {
                    return(null);
                }
            }
            catch (Exception ex)
            {
                log.Error(ex, "AddPageFilter error");
                OnError?.Invoke(this, new ErrorEventArgs()
                {
                    Source = FetchEventArgs.EventSource.Scheduler, Exception = ex
                });
            }

            if (FetchRewrite != null)
            {
                var ev = new FetchRewriteEventArgs(pageToVisit);
                FetchRewrite(this, ev);
                // Default Uri Equality ignores Fragment
                if (ev.NewUri != null && ev.NewUri.ToString() != pageToVisit.ToString())
                {
                    if (ev.ShowOnLog)
                    {
                        log.Information($"[REW] {pageToVisit} -> {ev.NewUri}");
                    }
                    lnk.ResourceRewritten(ev.NewUri);
                }
            }

            if (SpiderWorkData.Moved301.ContainsKey(lnk.Uri.ToString()))
            {
                string newUri = SpiderWorkData.Moved301[lnk.Uri.ToString()];
                lnk.ResourceMoved(new Uri(newUri));

                if (alreadyExecuted(lnk.Uri))
                {
                    return(null);
                }
            }

            if (alreadyExecuted(lnk.Uri))
            {
                return(null);
            }

            try
            {
                var args = new ShouldFetchEventArgs(lnk);
                ShouldFetch?.Invoke(this, args);
                if (args.Cancel)
                {
                    return(null);
                }
            }
            catch (Exception ex)
            {
                log.Error(ex, "ShouldFetch error");
                OnError?.Invoke(this, new ErrorEventArgs()
                {
                    Source = FetchEventArgs.EventSource.Scheduler, Exception = ex
                });
            }

            qAdded.Enqueue(lnk);
            return(lnk);
        }