Exemplo n.º 1
0
 protected bool Equals(QuerySourceOptions other)
 {
     return(string.Equals(ResultStreamName, other.ResultStreamName) &&
            string.Equals(PartitionResultStreamNamePattern, other.PartitionResultStreamNamePattern) &&
            ReorderEvents.Equals(other.ReorderEvents) && ProcessingLag == other.ProcessingLag &&
            IsBiState.Equals(other.IsBiState) && DefinesStateTransform.Equals(other.DefinesStateTransform) &&
            DefinesCatalogTransform.Equals(other.DefinesCatalogTransform) &&
            ProducesResults.Equals(other.ProducesResults) && DefinesFold.Equals(other.DefinesFold) &&
            HandlesDeletedNotifications.Equals(other.HandlesDeletedNotifications) &&
            IncludeLinks.Equals(other.IncludeLinks));
 }
 public override int GetHashCode()
 {
     unchecked
     {
         int hashCode = (ResultStreamName != null ? ResultStreamName.GetHashCode() : 0);
         hashCode = (hashCode * 397) ^ (PartitionResultStreamNamePattern != null ? PartitionResultStreamNamePattern.GetHashCode() : 0);
         hashCode = (hashCode * 397) ^ ReorderEvents.GetHashCode();
         hashCode = (hashCode * 397) ^ ProcessingLag;
         hashCode = (hashCode * 397) ^ IsBiState.GetHashCode();
         hashCode = (hashCode * 397) ^ DefinesStateTransform.GetHashCode();
         hashCode = (hashCode * 397) ^ DefinesCatalogTransform.GetHashCode();
         hashCode = (hashCode * 397) ^ ProducesResults.GetHashCode();
         hashCode = (hashCode * 397) ^ DefinesFold.GetHashCode();
         hashCode = (hashCode * 397) ^ HandlesDeletedNotifications.GetHashCode();
         hashCode = (hashCode * 397) ^ IncludeLinks.GetHashCode();
         return(hashCode);
     }
 }
Exemplo n.º 3
0
        //This is where the rubber hits the road
        async Task DoScrape(Uri uri)
        {
            OnScrape(uri);
            if (endDateTime.HasValue && DateTimeProvider.UtcNow > endDateTime)
            {
                return;
            }
            if (!scrapedUris.TryAdd(uri))
            {
                return;
            }
            if (!DisableRobotsProtocol && !Robots.PathIsAllowed(uri.PathAndQuery))
            {
                return;
            }
            var htmlDoc = new HtmlDoc {
                Uri = uri
            };

            try
            {
                htmlDoc.Html = await httpClient.GetString(uri);
            }
            catch (Exception exception)
            {
                OnHttpClientException(exception);
            }
            if (string.IsNullOrEmpty(htmlDoc.Html))
            {
                return;
            }
            if (!(ObserverLinkFilter != null && !ObserverLinkFilter.IsMatch(uri.ToString())))
            {
                NotifyObservers(htmlDoc);
            }

            var pageBase = htmlDoc.Uri.Segments.Last().Contains('.') ? htmlDoc.Uri.ToString().Substring(0, htmlDoc.Uri.ToString().LastIndexOf('/')) : htmlDoc.Uri.ToString();

            if (!pageBase.EndsWith("/"))
            {
                pageBase += "/";
            }
            var pageBaseUri = new Uri(pageBase);

            //only use of the CsQuery lib found so far
            CQ cq = htmlDoc.Html;

            //Doing some selecting: anchors and hrefs using JQuery-like syntax
            //out of the box, DoScrape does only the simplest finding of links
            var links = cq["a"].Select(x => x.GetAttribute("href")).Where(x => x != null);

            //looks like we're setup to not follow external links
            var localLinks = LocalLinks(links).Select(x => NormalizeLink(x, pageBaseUri)).Where(x => x.ToString().StartsWith(baseUri.ToString()) && x.ToString().Length <= 2048);

            if (IncludeLinks != null)
            {
                localLinks = localLinks.Where(x => IncludeLinks.IsMatch(x.ToString()));
            }
            if (IgnoreLinks != null)
            {
                localLinks = localLinks.Where(x => !IgnoreLinks.IsMatch(x.ToString()));
            }
            if (MaxDepth.HasValue)
            {
                localLinks = localLinks.Where(x => x.Segments.Length <= MaxDepth + 1);
            }
            var tasks = localLinks.Select(DoScrape).ToArray(); //recursive call to scape the links found

            Task.WaitAll(tasks);
        }
Exemplo n.º 4
0
        async Task DoScrape(Uri uri)
        {
            OnScrape(uri);
            if (endDateTime.HasValue && DateTimeProvider.UtcNow > endDateTime)
            {
                return;
            }
            if (!scrapedUris.TryAdd(uri))
            {
                return;
            }
            if (!DisableRobotsProtocol && !Robots.PathIsAllowed(uri.PathAndQuery))
            {
                return;
            }
            var htmlDoc = new HtmlDoc {
                Uri = uri
            };

            try
            {
                htmlDoc.Html = await httpClient.GetString(uri);
            }
            catch (Exception exception)
            {
                OnHttpClientException(exception);
            }
            if (string.IsNullOrEmpty(htmlDoc.Html))
            {
                return;
            }
            if (!(ObserverLinkFilter != null && !ObserverLinkFilter.IsMatch(uri.ToString())))
            {
                NotifyObservers(htmlDoc);
            }

            var pageBase = htmlDoc.Uri.Segments.Last().Contains('.') ? htmlDoc.Uri.ToString().Substring(0, htmlDoc.Uri.ToString().LastIndexOf('/')) : htmlDoc.Uri.ToString();

            if (!pageBase.EndsWith("/"))
            {
                pageBase += "/";
            }
            var pageBaseUri = new Uri(pageBase);
            CQ  cq          = htmlDoc.Html;
            var links       = cq["a"].Select(x => x.GetAttribute("href")).Where(x => x != null);
            var localLinks  = LocalLinks(links).Select(x => NormalizeLink(x, pageBaseUri)).Where(x => x.ToString().StartsWith(baseUri.ToString()) && x.ToString().Length <= 2048);

            if (IncludeLinks != null)
            {
                localLinks = localLinks.Where(x => IncludeLinks.IsMatch(x.ToString()));
            }
            if (IgnoreLinks != null)
            {
                localLinks = localLinks.Where(x => !IgnoreLinks.IsMatch(x.ToString()));
            }
            if (MaxDepth.HasValue)
            {
                localLinks = localLinks.Where(x => x.Segments.Length <= MaxDepth + 1);
            }
            var tasks = localLinks.Select(DoScrape).ToArray();

            Task.WaitAll(tasks);
        }