public void Process(Crawler crawler, PropertyBag propertyBag) { if (propertyBag.StatusCode != HttpStatusCode.OK) { return; } string extension = MapContentTypeToExtension(propertyBag.ContentType); if (extension.IsNullOrEmpty()) { return; } propertyBag.Title = propertyBag.Step.Uri.PathAndQuery; using (TempFile temp = new TempFile()) { temp.FileName += "." + extension; using (FileStream fs = new FileStream(temp.FileName, FileMode.Create, FileAccess.Write, FileShare.Read, 0x1000)) using (Stream input = propertyBag.GetResponse()) { input.CopyToStream(fs); } using (FilterReader filterReader = new FilterReader(temp.FileName)) { string content = filterReader.ReadToEnd(); propertyBag.Text = content.Trim(); } } }
public Task ProcessAsync(ICrawler crawler, PropertyBag propertyBag) { if (propertyBag.StatusCode != HttpStatusCode.OK) { return(Task.CompletedTask); } using (var tempFile = new TempFile()) { using (var fs = new FileStream(tempFile.FileName, FileMode.Create, FileAccess.Write, FileShare.Read, 0x1000)) using (var input = propertyBag.GetResponse()) { input.CopyTo(fs); } var id3 = new UltraID3(); id3.Read(tempFile.FileName); propertyBag["MP3_Album"].Value = id3.Album; propertyBag["MP3_Artist"].Value = id3.Artist; propertyBag["MP3_Comments"].Value = id3.Comments; propertyBag["MP3_Duration"].Value = id3.Duration; propertyBag["MP3_Genre"].Value = id3.Genre; propertyBag["MP3_Title"].Value = id3.Title; } return(Task.CompletedTask); }
public void Process(Crawler crawler, PropertyBag propertyBag) { if (propertyBag.StatusCode != HttpStatusCode.OK) { return; } using (TempFile tempFile = new TempFile()) { using (FileStream fs = new FileStream(tempFile.FileName, FileMode.Create, FileAccess.Write, FileShare.Read, 0x1000)) using (Stream input = propertyBag.GetResponse()) { input.CopyToStream(fs); } UltraID3 id3 = new UltraID3(); id3.Read(tempFile.FileName); propertyBag["MP3_Album"].Value = id3.Album; propertyBag["MP3_Artist"].Value = id3.Artist; propertyBag["MP3_Comments"].Value = id3.Comments; propertyBag["MP3_Duration"].Value = id3.Duration; propertyBag["MP3_Genre"].Value = id3.Genre; propertyBag["MP3_Title"].Value = id3.Title; } }
public async Task ProcessAsync(ICrawler crawler, PropertyBag propertyBag) { if (propertyBag.StatusCode != HttpStatusCode.OK) { return; } var extension = MapContentTypeToExtension(propertyBag.ContentType); if (extension.IsNullOrEmpty()) { return; } propertyBag.Title = propertyBag.Step.Uri.PathAndQuery; using (var temp = new TempFile()) { temp.FileName += "." + extension; using (var fs = new FileStream(temp.FileName, FileMode.Create, FileAccess.Write, FileShare.Read, 0x1000)) using (var input = propertyBag.GetResponse()) { input.CopyTo(fs); } using (var filterReader = new FilterReader(temp.FileName)) { var content = await filterReader.ReadToEndAsync(); propertyBag.Text = content.Trim(); } } }
public static HtmlDocument LoadFromHtml(PropertyBag propertyBag) { try { HtmlDocument htmlDoc = new HtmlDocument { OptionAddDebuggingAttributes = false, OptionAutoCloseOnEnd = true, OptionFixNestedTags = true, OptionReadEncoding = true }; using (Stream reader = propertyBag.GetResponse()) { Encoding documentEncoding = Encoding.GetEncoding(propertyBag.CharacterSet); if (propertyBag.CharacterSet == "ISO-8859-1") { documentEncoding = htmlDoc.DetectEncoding(reader); } reader.Seek(0, SeekOrigin.Begin); if (!documentEncoding.IsNull()) { htmlDoc.Load(reader, documentEncoding, true); } else { htmlDoc.Load(reader, true); } } return(htmlDoc); } catch (Exception) { return(null); } }
/// <summary> /// Gets or Sets a value indicating if cookies will be stored. /// </summary> private async Task <PropertyBag> DownloadInternalSync(CrawlStep crawlStep, CrawlStep referrer, DownloadMethod method) { PropertyBag result = null; Exception ex = null; using (var resetEvent = new ManualResetEvent(false)) { await DownloadAsync <object>(crawlStep, referrer, method, (RequestState <object> state) => { if (state.Exception.IsNull()) { result = state.PropertyBag; if (!result.GetResponse.IsNull()) { using (var response = result.GetResponse()) { byte[] data; if (response is MemoryStream) { data = ((MemoryStream)response).ToArray(); } else { using (var copy = response.CopyToMemory()) { data = copy.ToArray(); } } result.GetResponse = () => new MemoryStream(data); } } } else { ex = state.Exception; } resetEvent.Set(); return(Task.FromResult(0)); }, null, null); resetEvent.WaitOne(); } if (!ex.IsNull()) { throw new Exception("Error write downloading {0}".FormatWith(crawlStep.Uri), ex); } return(result); }
public void Process(Crawler crawler, PropertyBag propertyBag) { AspectF.Define. NotNull(crawler, "crawler"). NotNull(propertyBag, "propertyBag"); string content = propertyBag.Text; if (content.IsNullOrEmpty()) { return; } string contentLookupText = content.Max(MaxPostSize); string encodedRequestUrlFragment = "http://ajax.googleapis.com/ajax/services/language/detect?v=1.0&q={0}".FormatWith(contentLookupText); m_Logger.Verbose("Google language detection using: {0}", encodedRequestUrlFragment); try { IWebDownloader downloader = NCrawlerModule.Container.Resolve <IWebDownloader>(); PropertyBag result = downloader.Download(new CrawlStep(new Uri(encodedRequestUrlFragment), 0), null, DownloadMethod.GET); if (result.IsNull()) { return; } using (Stream responseReader = result.GetResponse()) using (StreamReader reader = new StreamReader(responseReader)) { string json = reader.ReadLine(); using (MemoryStream ms = new MemoryStream(Encoding.Unicode.GetBytes(json))) { DataContractJsonSerializer ser = new DataContractJsonSerializer(typeof(LanguageDetector)); LanguageDetector detector = ser.ReadObject(ms) as LanguageDetector; if (!detector.IsNull()) { CultureInfo culture = CultureInfo.GetCultureInfo(detector.responseData.language); propertyBag["Language"].Value = detector.responseData.language; propertyBag["LanguageCulture"].Value = culture; } } } } catch (Exception ex) { m_Logger.Error("Error during google language detection, the error was: {0}", ex.ToString()); } }
public async Task ProcessAsync(ICrawler crawler, PropertyBag propertyBag) { if (propertyBag.StatusCode != HttpStatusCode.OK) { return; } if (!IsXmlContent(propertyBag.ContentType)) { return; } using (var reader = propertyBag.GetResponse()) { using (var sr = new StreamReader(reader)) { var mydoc = XDocument.Load(sr); if (mydoc.Root == null) { return; } var qualifiedName = XName.Get("loc", "http://www.sitemaps.org/schemas/sitemap/0.9"); var urlNodes = from e in mydoc.Descendants(qualifiedName) where !e.Value.IsNullOrEmpty() && e.Value.StartsWith("http://", StringComparison.OrdinalIgnoreCase) select e.Value; foreach (var url in urlNodes) { // add new crawler steps var baseUrl = propertyBag.ResponseUri.GetLeftPart(UriPartial.Path); var decodedLink = ExtendedHtmlUtility.HtmlEntityDecode(url); var normalizedLink = NormalizeLink(baseUrl, decodedLink); if (normalizedLink.IsNullOrEmpty()) { continue; } await crawler.AddStepAsync(new Uri(normalizedLink), propertyBag.Step.Depth + 1, propertyBag.Step, new Dictionary <string, object> { { Resources.PropertyBagKeyOriginalUrl, url }, { Resources.PropertyBagKeyOriginalReferrerUrl, propertyBag.ResponseUri } }); } } } }
public void Process(Crawler crawler, PropertyBag propertyBag) { if (propertyBag.StatusCode != HttpStatusCode.OK) { return; } if (!IsXmlContent(propertyBag.ContentType)) { return; } using (Stream reader = propertyBag.GetResponse()) using (StreamReader sr = new StreamReader(reader)) { XDocument mydoc = XDocument.Load(sr); if (mydoc.Root == null) { return; } XName qualifiedName = XName.Get("loc", "http://www.sitemaps.org/schemas/sitemap/0.9"); IEnumerable<string> urlNodes = from e in mydoc.Descendants(qualifiedName) where !e.Value.IsNullOrEmpty() && e.Value.StartsWith("http://", StringComparison.OrdinalIgnoreCase) select e.Value; foreach (string url in urlNodes) { // add new crawler steps string baseUrl = propertyBag.ResponseUri.GetLeftPart(UriPartial.Path); string decodedLink = ExtendedHtmlUtility.HtmlEntityDecode(url); string normalizedLink = NormalizeLink(baseUrl, decodedLink); if (normalizedLink.IsNullOrEmpty()) { continue; } crawler.AddStep(new Uri(normalizedLink), propertyBag.Step.Depth + 1, propertyBag.Step, new Dictionary<string, object> { {Resources.PropertyBagKeyOriginalUrl, url}, {Resources.PropertyBagKeyOriginalReferrerUrl, propertyBag.ResponseUri} }); } } }
public void Process(Crawler crawler, PropertyBag propertyBag) { if (propertyBag.StatusCode != HttpStatusCode.OK) { return; } if (!IsTextContent(propertyBag.ContentType)) { return; } using (Stream reader = propertyBag.GetResponse()) { string content = reader.ReadToEnd(); propertyBag.Text = content.Trim(); } }
public void Process(Crawler crawler, PropertyBag propertyBag) { var rsp = propertyBag.GetResponse(); try { HtmlDocument htmlDoc = HtmlParse.LoadFromHtml(propertyBag); var siteType = HtmlParse.RecogSite(propertyBag.ResponseUri); var records = Parse(htmlDoc, siteType); if (records == null) { return; } } catch (NullReferenceException) { } }
private async Task <int> ProcessCoreAsync(ICrawler crawler, PropertyBag propertyBag) { if (propertyBag.StatusCode != HttpStatusCode.OK) { return(0); } if (!IsTextContent(propertyBag.ContentType)) { return(0); } using (var reader = propertyBag.GetResponse()) { var content = await reader.ReadToEndAsync().ConfigureAwait(false); propertyBag.Text = content.Trim(); } return(0); }
public Task ProcessAsync(ICrawler crawler, PropertyBag propertyBag) { AspectF.Define. NotNull(crawler, "crawler"). NotNull(propertyBag, "propertyBag"); if (propertyBag.StatusCode != HttpStatusCode.OK) { return(Task.CompletedTask); } if (!IsPdfContent(propertyBag.ContentType)) { return(Task.CompletedTask); } using (var input = propertyBag.GetResponse()) { var pdfReader = new PdfReader(input); try { if (pdfReader.Info.TryGetValue("Title", out var title)) { propertyBag.Title = Convert.ToString(title, CultureInfo.InvariantCulture).Trim(); } var textExtractionStrategy = new SimpleTextExtractionStrategy(); propertyBag.Text = Enumerable.Range(1, pdfReader.NumberOfPages). Select(pageNumber => PdfTextExtractor.GetTextFromPage(pdfReader, pageNumber, textExtractionStrategy)). Join(Environment.NewLine); } finally { pdfReader.Close(); } } return(Task.CompletedTask); }
public void Process(Crawler crawler, PropertyBag propertyBag) { AspectF.Define. NotNull(crawler, "crawler"). NotNull(propertyBag, "propertyBag"); if (propertyBag.StatusCode != HttpStatusCode.OK) { return; } if (!IsPdfContent(propertyBag.ContentType)) { return; } using (Stream input = propertyBag.GetResponse()) { PdfReader pdfReader = new PdfReader(input); try { string title; if (pdfReader.Info.TryGetValue("Title", out title)) { propertyBag.Title = Convert.ToString(title, CultureInfo.InvariantCulture).Trim(); } SimpleTextExtractionStrategy textExtractionStrategy = new SimpleTextExtractionStrategy(); propertyBag.Text = Enumerable.Range(1, pdfReader.NumberOfPages). Select(pageNumber => PdfTextExtractor.GetTextFromPage(pdfReader, pageNumber, textExtractionStrategy)). Join(Environment.NewLine); } finally { pdfReader.Close(); } } }
public void Process(NCrawler.Crawler crawler, PropertyBag propertyBag) { var rsp = propertyBag.GetResponse(); try { HtmlDocument htmlDoc = HtmlParse.LoadFromHtml(propertyBag); var siteType = HtmlParse.RecogSite(propertyBag.ResponseUri); var records = Parse(htmlDoc, siteType); if (records == null) { return; } foreach (var record in records) { DAL.Data.Add(record); ++ci.Count; } } catch (NullReferenceException) { } }
public void Process(Crawler crawler, PropertyBag propertyBag) { AspectF.Define. NotNull(crawler, "crawler"). NotNull(propertyBag, "propertyBag"); if (propertyBag.StatusCode != HttpStatusCode.OK) { return; } if (!IsPdfContent(propertyBag.ContentType)) { return; } using (Stream input = propertyBag.GetResponse()) { PdfReader pdfReader = new PdfReader(input); try { object title = pdfReader.Info["Title"]; if (!title.IsNull()) { string pdfTitle = Convert.ToString(title, CultureInfo.InvariantCulture).Trim(); if (!pdfTitle.IsNullOrEmpty()) { propertyBag.Title = pdfTitle; } } StringBuilder sb = new StringBuilder(); // Following code from: // http://www.vbforums.com/showthread.php?t=475759 for (int p = 1; p <= pdfReader.NumberOfPages; p++) { byte[] pageBytes = pdfReader.GetPageContent(p); if (pageBytes.IsNull()) { continue; } PRTokeniser token = new PRTokeniser(pageBytes); while (token.NextToken()) { int tknType = token.TokenType; string tknValue = token.StringValue; if (tknType == PRTokeniser.TK_STRING) { sb.Append(token.StringValue); sb.Append(" "); } else if (tknType == 1 && tknValue == "-600") { sb.Append(" "); } else if (tknType == 10 && tknValue == "TJ") { sb.Append(" "); } } } propertyBag.Text = sb.ToString(); } finally { pdfReader.Close(); } } }
private void Initialize() { try { Uri robotsUri = new Uri("http://{0}/robots.txt".FormatWith(m_StartPageUri.Host)); PropertyBag robots = m_WebDownloader.Download(new CrawlStep(robotsUri, 0), null, DownloadMethod.GET); if (robots == null || robots.StatusCode != HttpStatusCode.OK) { return; } string fileContents; using (StreamReader stream = new StreamReader(robots.GetResponse(), Encoding.ASCII)) { fileContents = stream.ReadToEnd(); } string[] fileLines = fileContents.Split(Environment.NewLine.ToCharArray(), StringSplitOptions.RemoveEmptyEntries); bool rulesApply = false; List <string> rules = new List <string>(); foreach (string line in fileLines) { RobotInstruction ri = new RobotInstruction(line); if (!ri.Instruction.IsNullOrEmpty()) { switch (ri.Instruction[0]) { case '#': //then comment - ignore break; case 'u': // User-Agent if ((ri.UrlOrAgent.IndexOf("*") >= 0) || (ri.UrlOrAgent.IndexOf(m_WebDownloader.UserAgent) >= 0)) { // these rules apply rulesApply = true; } else { rulesApply = false; } break; case 'd': // Disallow if (rulesApply) { rules.Add(ri.UrlOrAgent.ToUpperInvariant()); } break; case 'a': // Allow break; default: // empty/unknown/error break; } } } m_DenyUrls = rules.ToArray(); } catch (Exception) { } }
public override void Process(Crawler crawler, PropertyBag propertyBag) { AspectF.Define. NotNull(crawler, "crawler"). NotNull(propertyBag, "propertyBag"); if (propertyBag.StatusCode != HttpStatusCode.OK) { return; } if (!IsHtmlContent(propertyBag.ContentType)) { return; } HtmlDocument htmlDoc = new HtmlDocument { OptionAddDebuggingAttributes = false, OptionAutoCloseOnEnd = true, OptionFixNestedTags = true, OptionReadEncoding = true }; using (Stream reader = propertyBag.GetResponse()) { Encoding documentEncoding = htmlDoc.DetectEncoding(reader); reader.Seek(0, SeekOrigin.Begin); if (!documentEncoding.IsNull()) { htmlDoc.Load(reader, documentEncoding, true); } else { htmlDoc.Load(reader, true); } } string originalContent = htmlDoc.DocumentNode.OuterHtml; if (HasTextStripRules || HasSubstitutionRules) { string content = StripText(originalContent); content = Substitute(content, propertyBag.Step); using (TextReader tr = new StringReader(content)) { htmlDoc.Load(tr); } } propertyBag["HtmlDoc"].Value = htmlDoc; HtmlNodeCollection nodes = htmlDoc.DocumentNode.SelectNodes("//title"); // Extract Title if (!nodes.IsNull()) { propertyBag.Title = string.Join(";", nodes. Select(n => n.InnerText). ToArray()).Trim(); } // Extract Meta Data nodes = htmlDoc.DocumentNode.SelectNodes("//meta[@content and @name]"); if (!nodes.IsNull()) { propertyBag["Meta"].Value = ( from entry in nodes let name = entry.Attributes["name"] let content = entry.Attributes["content"] where !name.IsNull() && !name.Value.IsNullOrEmpty() && !content.IsNull() && !content.Value.IsNullOrEmpty() select name.Value + ": " + content.Value).ToArray(); } // Extract text propertyBag.Text = htmlDoc.ExtractText().Trim(); if (HasLinkStripRules || HasTextStripRules) { string content = StripLinks(originalContent); using (TextReader tr = new StringReader(content)) { htmlDoc.Load(tr); } } string baseUrl = propertyBag.ResponseUri.GetLeftPart(UriPartial.Path); // Extract Head Base nodes = htmlDoc.DocumentNode.SelectNodes("//head/base[@href]"); if (!nodes.IsNull()) { baseUrl = nodes. Select(entry => new { entry, href = entry.Attributes["href"] }). Where(@t => [email protected]() && [email protected]() && Uri.IsWellFormedUriString(@t.href.Value, UriKind.RelativeOrAbsolute)). Select(@t => @t.href.Value). AddToEnd(baseUrl). FirstOrDefault(); } // Extract Links DocumentWithLinks links = htmlDoc.GetLinks(); foreach (string link in links.Links.Union(links.References)) { if (link.IsNullOrEmpty()) { continue; } string decodedLink = ExtendedHtmlUtility.HtmlEntityDecode(link); try { string normalizedLink = NormalizeLink(baseUrl, decodedLink); if (normalizedLink.IsNullOrEmpty()) { continue; } AddStepToCrawler(crawler, propertyBag, normalizedLink, link); } catch (UriFormatException) { //When the link is not propper formatted the link mist be ignored } } }
public void Process(Crawler crawler, PropertyBag propertyBag) { AspectF.Define. NotNull(crawler, "crawler"). NotNull(propertyBag, "propertyBag"); if (propertyBag.StatusCode != HttpStatusCode.OK) { return; } if (!IsHtmlContent(propertyBag.ContentType)) { return; } HtmlDocument htmlDoc = new HtmlDocument { OptionAddDebuggingAttributes = false, OptionAutoCloseOnEnd = true, OptionFixNestedTags = true, OptionReadEncoding = true }; using (Stream reader = propertyBag.GetResponse()) { Encoding documentEncoding = htmlDoc.DetectEncoding(reader); reader.Seek(0, SeekOrigin.Begin); if (!documentEncoding.IsNull()) { htmlDoc.Load(reader, documentEncoding, true); } else { htmlDoc.Load(reader, true); } } string originalContent = htmlDoc.DocumentNode.OuterHtml; if (HasTextStripRules || HasSubstitutionRules) { string content = StripText(originalContent); content = Substitute(content, propertyBag.Step); using (TextReader tr = new StringReader(content)) { htmlDoc.Load(tr); } } propertyBag["HtmlDoc"].Value = htmlDoc; HtmlNodeCollection nodes = htmlDoc.DocumentNode.SelectNodes("//title"); // Extract Title if (!nodes.IsNull()) { propertyBag.Title = string.Join(";", nodes. Select(n => n.InnerText). ToArray()).Trim(); } // Extract Meta Data nodes = htmlDoc.DocumentNode.SelectNodes("//meta[@content and @name]"); if (!nodes.IsNull()) { propertyBag["Meta"].Value = ( from entry in nodes let name = entry.Attributes["name"] let content = entry.Attributes["content"] where !name.IsNull() && !name.Value.IsNullOrEmpty() && !content.IsNull() && !content.Value.IsNullOrEmpty() select name.Value + ": " + content.Value).ToArray(); } propertyBag.Text = htmlDoc.ExtractText().Trim(); if (HasLinkStripRules || HasTextStripRules) { string content = StripLinks(originalContent); using (TextReader tr = new StringReader(content)) { htmlDoc.Load(tr); } } // Extract Links DocumentWithLinks links = htmlDoc.GetLinks(); foreach (string link in links.Links.Union(links.References)) { if (link.IsNullOrEmpty()) { continue; } string baseUrl = propertyBag.ResponseUri.GetLeftPart(UriPartial.Path); string decodedLink = ExtendedHtmlUtility.HtmlEntityDecode(link); string normalizedLink = NormalizeLink(baseUrl, decodedLink); if (normalizedLink.IsNullOrEmpty()) { continue; } crawler.AddStep(new Uri(normalizedLink), propertyBag.Step.Depth + 1, propertyBag.Step, new Dictionary<string, object> { {Resources.PropertyBagKeyOriginalUrl, link}, {Resources.PropertyBagKeyOriginalReferrerUrl, propertyBag.ResponseUri} }); } }
public virtual async Task ProcessAsync(ICrawler crawler, PropertyBag propertyBag) { AspectF.Define. NotNull(crawler, "crawler"). NotNull(propertyBag, "propertyBag"); if (propertyBag.StatusCode != HttpStatusCode.OK) { return; } if (!IsHtmlContent(propertyBag.ContentType)) { return; } var htmlDoc = new HtmlDocument { OptionAddDebuggingAttributes = false, OptionAutoCloseOnEnd = true, OptionFixNestedTags = true, OptionReadEncoding = true }; using (var reader = propertyBag.GetResponse()) { var documentEncoding = htmlDoc.DetectEncoding(reader); reader.Seek(0, SeekOrigin.Begin); if (!documentEncoding.IsNull()) { htmlDoc.Load(reader, documentEncoding, true); } else { htmlDoc.Load(reader, true); } } var originalContent = htmlDoc.DocumentNode.OuterHtml; if (this.HasTextStripRules || this.HasSubstitutionRules) { var content = this.StripText(originalContent); content = this.Substitute(content, propertyBag.Step); using (TextReader tr = new StringReader(content)) { htmlDoc.Load(tr); } } propertyBag["HtmlDoc"].Value = htmlDoc; var nodes = htmlDoc.DocumentNode.SelectNodes("//title"); // Extract Title if (!nodes.IsNull()) { propertyBag.Title = string.Join(";", nodes. Select(n => n.InnerText). ToArray()).Trim(); } // Extract Meta Data nodes = htmlDoc.DocumentNode.SelectNodes("//meta[@content and @name]"); if (!nodes.IsNull()) { propertyBag["Meta"].Value = ( from entry in nodes let name = entry.Attributes["name"] let content = entry.Attributes["content"] where !name.IsNull() && !name.Value.IsNullOrEmpty() && !content.IsNull() && !content.Value.IsNullOrEmpty() select name.Value + ": " + content.Value).ToArray(); } // Extract text propertyBag.Text = htmlDoc.ExtractText().Trim(); if (this.HasLinkStripRules || this.HasTextStripRules) { var content = this.StripLinks(originalContent); using (TextReader tr = new StringReader(content)) { htmlDoc.Load(tr); } } var baseUrl = propertyBag.ResponseUri.GetLeftPath(); // Extract Head Base nodes = htmlDoc.DocumentNode.SelectNodes("//head/base[@href]"); if (!nodes.IsNull()) { baseUrl = nodes. Select(entry => new { entry, href = entry.Attributes["href"] }). Where(@t => [email protected]() && [email protected]() && Uri.IsWellFormedUriString(@t.href.Value, UriKind.RelativeOrAbsolute)). Select(@t => @t.href.Value). AddToEnd(baseUrl). FirstOrDefault(); } // Extract Links var links = htmlDoc.GetLinks(); foreach (var link in links.Links.Union(links.References)) { if (link.IsNullOrEmpty()) { continue; } var decodedLink = ExtendedHtmlUtility.HtmlEntityDecode(link); var normalizedLink = this.NormalizeLink(baseUrl, decodedLink); if (normalizedLink.IsNullOrEmpty()) { continue; } await crawler.AddStepAsync(new Uri(normalizedLink), propertyBag.Step.Depth + 1, propertyBag.Step, new Dictionary <string, object> { { Resources.PropertyBagKeyOriginalUrl, link }, { Resources.PropertyBagKeyOriginalReferrerUrl, propertyBag.ResponseUri } }).ConfigureAwait(false); } }