Пример #1
0
        private UriResourceItem AnalyzeUri(Uri parentUri)
        {
            var uriResourceItem = new UriResourceItem(parentUri);

            if (uriResourceItem.SchemeType == WebToolsCore.Enumerations.UriSchemeType.HTTP || uriResourceItem.SchemeType == WebToolsCore.Enumerations.UriSchemeType.HTTPS)
            {
                //
                // Analyze elements that could contain hrefs
                var htmlDocumentContent = WebUtil.GetHtmlContent(uriResourceItem.CleanUri);
                if (htmlDocumentContent != default)
                {
                    // Analyze links
                    foreach (string attributeValue in htmlDocumentContent.GetAllNodes(new[] { "*[@href]", "*[@src]" }).GetAttributeValues(new[] { "href", "src" }))
                    {
                        try
                        {
                            var childUri = UriUtil.ToAbsoluteUri(uriResourceItem.CleanUri, attributeValue);
                            if (uriResourceItem.ReferencedUris.Contains(childUri) == false) // If we don't already have this uri, lets add it
                            {
                                uriResourceItem.ReferencedUris.Add(childUri);
                            }
                        }
                        catch { }
                    }

                    // Are we deep scanning?
                    if (this.deepScanCheckbox.Checked == true)
                    {
                        // Raw regex check
                        foreach (var match in RegexUtil.FindLinkMatches(htmlDocumentContent.Text))
                        {
                            try
                            {
                                var childUri = UriUtil.ToAbsoluteUri(uriResourceItem.CleanUri, match);
                                if (uriResourceItem.ReferencedUris.Contains(childUri) == false) // If we don't already have this uri, lets add it
                                {
                                    uriResourceItem.ReferencedUris.Add(childUri);
                                }
                            }
                            catch { }
                        }
                    }
                }
                else
                {
                    // The current link isnt a html parsable document
                    // Is deep scan enabled?
                    if (this.deepScanCheckbox.Checked == true)
                    {
                        var genericDocumentContent = WebUtil.GetDocumentContent(parentUri);
                        if (genericDocumentContent != default)
                        {
                            foreach (var match in RegexUtil.FindLinkMatches(genericDocumentContent))
                            {
                                try
                                {
                                    var childUri = UriUtil.ToAbsoluteUri(uriResourceItem.CleanUri, match);
                                    if (uriResourceItem.ReferencedUris.Contains(childUri) == false) // If we don't already have this uri, lets add it
                                    {
                                        uriResourceItem.ReferencedUris.Add(childUri);
                                    }
                                }
                                catch { }
                            }
                        }
                    }
                }
            }

            return(uriResourceItem);
        }