private UriResourceItem AnalyzeUri(Uri parentUri) { var uriResourceItem = new UriResourceItem(parentUri); if (uriResourceItem.SchemeType == WebToolsCore.Enumerations.UriSchemeType.HTTP || uriResourceItem.SchemeType == WebToolsCore.Enumerations.UriSchemeType.HTTPS) { // // Analyze elements that could contain hrefs var htmlDocumentContent = WebUtil.GetHtmlContent(uriResourceItem.CleanUri); if (htmlDocumentContent != default) { // Analyze links foreach (string attributeValue in htmlDocumentContent.GetAllNodes(new[] { "*[@href]", "*[@src]" }).GetAttributeValues(new[] { "href", "src" })) { try { var childUri = UriUtil.ToAbsoluteUri(uriResourceItem.CleanUri, attributeValue); if (uriResourceItem.ReferencedUris.Contains(childUri) == false) // If we don't already have this uri, lets add it { uriResourceItem.ReferencedUris.Add(childUri); } } catch { } } // Are we deep scanning? if (this.deepScanCheckbox.Checked == true) { // Raw regex check foreach (var match in RegexUtil.FindLinkMatches(htmlDocumentContent.Text)) { try { var childUri = UriUtil.ToAbsoluteUri(uriResourceItem.CleanUri, match); if (uriResourceItem.ReferencedUris.Contains(childUri) == false) // If we don't already have this uri, lets add it { uriResourceItem.ReferencedUris.Add(childUri); } } catch { } } } } else { // The current link isnt a html parsable document // Is deep scan enabled? if (this.deepScanCheckbox.Checked == true) { var genericDocumentContent = WebUtil.GetDocumentContent(parentUri); if (genericDocumentContent != default) { foreach (var match in RegexUtil.FindLinkMatches(genericDocumentContent)) { try { var childUri = UriUtil.ToAbsoluteUri(uriResourceItem.CleanUri, match); if (uriResourceItem.ReferencedUris.Contains(childUri) == false) // If we don't already have this uri, lets add it { uriResourceItem.ReferencedUris.Add(childUri); } } catch { } } } } } } return(uriResourceItem); }