Beispiel #1
0
        public void IdentifyJavaScript()
        {
            try
            {
                HtmlNodeCollection scriptNodes = KHtml.GetScriptsNodeCollection();
                if (scriptNodes != null)
                {
                    foreach (var node in scriptNodes)
                    {
                        try
                        {
                            #region Get attribute value

                            string attributeValue = String.Empty;
                            string attributeName  = String.Empty;

                            switch (node.Name)
                            {
                            case "script":
                                attributeValue = node.GetAttributeValue("src", String.Empty);
                                attributeName  = "src";
                                break;

                            default:
                                continue;
                            }

                            #endregion

                            #region Create absolute Uri (Also consider the Base Uri)

                            Uri absoluteUri = null;
                            // For handling basetag
                            if (KHtml.BaseTag.Exists)
                            {
                                Uri.TryCreate(KHtml.BaseTag.Href, attributeValue, out absoluteUri);
                            }
                            else
                            {
                                Uri.TryCreate(RootUrl, attributeValue, out absoluteUri);
                            }

                            #endregion

                            #region Process the Uri

                            if (absoluteUri != null)
                            {
                                absoluteUri = Utils.GenerateUriToProcess(absoluteUri, Resources.IgnoreFileNameChangeRegex);
                                string placeHolder = KHtmlParser.CheckUrlPresentInAssetOrNot(absoluteUri, Resources);
                                if (placeHolder != null && placeHolder.Equals("IGNORE", StringComparison.InvariantCultureIgnoreCase))
                                {
                                    continue;
                                }
                                if (placeHolder == null)
                                {
                                    placeHolder = String.Format("[Kitsune_{0}]", absoluteUri.AbsoluteUri);

                                    AssetDetails javaScriptFileLink = new AssetDetails
                                    {
                                        LinkUrl     = absoluteUri.AbsoluteUri,
                                        PlaceHolder = placeHolder
                                    };

                                    Resources.UniqueScriptsDictionary.TryAdd(absoluteUri.AbsoluteUri, javaScriptFileLink);
                                }
                                node.SetAttributeValue(attributeName, placeHolder);
                            }
                            else
                            {
                                //TO LOG
                                //ErrorLogMethod(LOGTYPE.ERROR, $"Attribute Value was empty for {node.Name}, for Url : {absoluteUri.AbsoluteUri}", null);
                            }

                            #endregion

                            #region Add to External Domain

                            if (!absoluteUri.Host.Equals(RootUrl.Host, StringComparison.OrdinalIgnoreCase))
                            {
                                Resources.ExternalDomains.Add(absoluteUri.Host.ToUpper());
                            }

                            #endregion
                        }
                        catch (Exception ex)
                        {
                            ErrorLogMethod(LOGTYPE.ERROR, $"Error while processing the Node(IdentifyingJavaScript Method) for Url : {RootUrl.AbsoluteUri}", ex);
                        }
                    }
                }
                else
                {
                    ErrorLogMethod(LOGTYPE.INFORMATION, $"No Script Tag found for url : {RootUrl.AbsoluteUri}", null);
                }
            }
            catch (Exception ex)
            {
                ErrorLogMethod(LOGTYPE.ERROR, "Error While finding the script tag.", ex);
            }
        }
Beispiel #2
0
        public string ProcessAssetAndCreatePlaceHolder(HtmlNode node, string url)
        {
            try
            {
                string urlToProcess = url;
                String placeHolder  = null;
                if (!String.IsNullOrEmpty(urlToProcess))
                {
                    urlToProcess = WebUtility.HtmlDecode(urlToProcess);
                    var tagName = node.Name.ToLower();
                    Uri uri;
                    // For handling basetag
                    if (KHtml.BaseTag.Exists)
                    {
                        Uri.TryCreate(KHtml.BaseTag.Href, urlToProcess, out uri);
                    }
                    else
                    {
                        Uri.TryCreate(RootUrl, urlToProcess, out uri);
                    }
                    if (uri != null)
                    {
                        #region Process Uri

                        if (!tagName.Equals("script") && !tagName.Equals("iframe"))
                        {
                            uri         = Utils.GenerateUriToProcess(uri, Resources.IgnoreFileNameChangeRegex);
                            placeHolder = KHtmlParser.CheckUrlPresentInAssetOrNot(uri, Resources);
                            if (placeHolder != null && placeHolder.Equals("IGNORE", StringComparison.InvariantCultureIgnoreCase))
                            {
                                return(null);
                            }
                            if (placeHolder == null)
                            {
                                placeHolder = String.Format("[Kitsune_{0}]", uri.AbsoluteUri);

                                AssetDetails assetLink = new AssetDetails();
                                assetLink.LinkUrl     = uri.AbsoluteUri;
                                assetLink.PlaceHolder = placeHolder;

                                //TODO : what if try add fails
                                Resources.UniqueAssetsDictionary.TryAdd(uri.AbsoluteUri, assetLink);
                            }
                        }

                        #endregion

                        #region Adding to the externalDomain list

                        if (!uri.Host.Equals(RootUrl.Host))
                        {
                            Resources.ExternalDomains.Add(uri.Host.ToUpper());
                        }

                        #endregion
                    }
                }
                else
                {
                    ErrorLogMethod(LOGTYPE.INFORMATION, "Url was null or Empty", null);
                }
                return(placeHolder);
            }
            catch (Exception ex)
            {
                ErrorLogMethod(LOGTYPE.ERROR, String.Format("Error while processing the Url: {0}", url), ex);
                return(null);
            }
        }
Beispiel #3
0
        public void IdentifyAndUpdateWebpagesList()
        {
            try
            {
                HtmlNodeCollection anchorNodes = KHtml.GetAnchorsNodeCollection();
                if (anchorNodes != null)
                {
                    foreach (var node in anchorNodes)
                    {
                        try
                        {
                            #region Get the attribute value

                            string attributeValue = String.Empty;
                            string attributeName  = String.Empty;

                            switch (node.Name)
                            {
                            case "a":
                                attributeValue = node.GetAttributeValue("href", string.Empty);
                                attributeName  = "href";
                                break;

                            case "form":
                                string formMethod = node.GetAttributeValue("method", string.Empty);
                                string formAction = node.GetAttributeValue("action", string.Empty);
                                if (!string.IsNullOrEmpty(formAction) && !string.IsNullOrEmpty(formMethod) && formMethod.Equals("get", StringComparison.InvariantCultureIgnoreCase))
                                {
                                    attributeName  = "action";
                                    attributeValue = formAction;
                                }
                                else
                                {
                                    continue;
                                }
                                break;

                            default:
                                continue;
                            }

                            #endregion

                            //  AttributeValue = WebUtility.HtmlDecode(attributeValue);
                            //  TODO: check why to decode the value

                            #region Process the attribute value

                            Uri absoluteUri = null;
                            if (!string.IsNullOrEmpty(attributeValue))
                            {
                                //  Ignore #
                                if (!attributeValue.StartsWith("#"))
                                {
                                    if (Uri.TryCreate(RootUrl, attributeValue, out absoluteUri))
                                    {
                                        if (AllowedScheme.Contains(absoluteUri.Scheme.ToLower()))
                                        {
                                            if (absoluteUri.Host.Equals(RootUrl.Host, StringComparison.OrdinalIgnoreCase))
                                            {
                                                #region Process the new Url found

                                                string fragment    = absoluteUri.Fragment;
                                                string absoluteUrl = String.IsNullOrEmpty(fragment) ?
                                                                     absoluteUri.AbsoluteUri : absoluteUri.AbsoluteUri.Replace(absoluteUri.Fragment, String.Empty);

                                                absoluteUri = Utils.GenerateUriToProcess(absoluteUri, Resources.IgnoreFileNameChangeRegex);
                                                string placeHolder = KHtmlParser.CheckUrlPresentInAssetOrNot(absoluteUri, Resources);
                                                if (placeHolder != null && placeHolder.Equals("IGNORE", StringComparison.InvariantCultureIgnoreCase))
                                                {
                                                    continue;
                                                }
                                                if (placeHolder == null)
                                                {
                                                    //create a new one
                                                    placeHolder = String.Format("[kitsune_{0}]", absoluteUri.AbsoluteUri);
                                                    AssetDetails linkMap = new AssetDetails
                                                    {
                                                        PlaceHolder = placeHolder,
                                                        LinkUrl     = absoluteUri.AbsoluteUri
                                                    };

                                                    Resources.UniqueWebpagesDictionary.TryAdd(absoluteUri.AbsoluteUri, linkMap);    //  HACK: What if it is unable to add
                                                    UniqueWebPageQueue.Enqueue(absoluteUri.AbsoluteUri);
                                                }

                                                node.SetAttributeValue(attributeName, placeHolder + fragment);

                                                #endregion
                                            }
                                            else
                                            {
                                                //throw new Exception(String.Format("Different Domain Url found. Uri : {0}"
                                                //    , absoluteUri.AbsoluteUri));
                                            }
                                        }
                                        else
                                        {
                                            //throw new Exception((String.Format("Scheme of the Uri : {0} was {1}"
                                            //    , absoluteUri.AbsoluteUri, absoluteUri.Scheme)));
                                        }
                                    }
                                    else
                                    {
                                        throw new Exception(String.Format("Unable to create absoluteUri for RootUrl : {0} and relativeUri : {1}"
                                                                          , RootUrl, attributeValue));
                                    }
                                }
                            }
                            else
                            {
                                //TO LOG
                                //ErrorLogMethod(LOGTYPE.ERROR, $"Attribute Value was empty for {node.Name}, for Url : {absoluteUri.AbsoluteUri}", null);
                            }

                            #endregion
                        }
                        catch (Exception ex)
                        {
                            ErrorLogMethod(LOGTYPE.ERROR, String.Format("Error while processing the node."), ex);
                        }
                    }
                }
                else
                {
                    ErrorLogMethod(LOGTYPE.INFORMATION, $"No Links found in {RootUrl.AbsoluteUri}", null);
                }
            }
            catch (Exception ex)
            {
                //  TODO: LOG inner exxception
                ErrorLogMethod(LOGTYPE.ERROR, $"Error Message : Error while finding links in {RootUrl.AbsoluteUri}", null);
            }
        }