public void IdentifyJavaScript() { try { HtmlNodeCollection scriptNodes = KHtml.GetScriptsNodeCollection(); if (scriptNodes != null) { foreach (var node in scriptNodes) { try { #region Get attribute value string attributeValue = String.Empty; string attributeName = String.Empty; switch (node.Name) { case "script": attributeValue = node.GetAttributeValue("src", String.Empty); attributeName = "src"; break; default: continue; } #endregion #region Create absolute Uri (Also consider the Base Uri) Uri absoluteUri = null; // For handling basetag if (KHtml.BaseTag.Exists) { Uri.TryCreate(KHtml.BaseTag.Href, attributeValue, out absoluteUri); } else { Uri.TryCreate(RootUrl, attributeValue, out absoluteUri); } #endregion #region Process the Uri if (absoluteUri != null) { absoluteUri = Utils.GenerateUriToProcess(absoluteUri, Resources.IgnoreFileNameChangeRegex); string placeHolder = KHtmlParser.CheckUrlPresentInAssetOrNot(absoluteUri, Resources); if (placeHolder != null && placeHolder.Equals("IGNORE", StringComparison.InvariantCultureIgnoreCase)) { continue; } if (placeHolder == null) { placeHolder = String.Format("[Kitsune_{0}]", absoluteUri.AbsoluteUri); AssetDetails javaScriptFileLink = new AssetDetails { LinkUrl = absoluteUri.AbsoluteUri, PlaceHolder = placeHolder }; Resources.UniqueScriptsDictionary.TryAdd(absoluteUri.AbsoluteUri, javaScriptFileLink); } node.SetAttributeValue(attributeName, placeHolder); } else { //TO LOG //ErrorLogMethod(LOGTYPE.ERROR, $"Attribute Value was empty for {node.Name}, for Url : {absoluteUri.AbsoluteUri}", null); } #endregion #region Add to External Domain if (!absoluteUri.Host.Equals(RootUrl.Host, StringComparison.OrdinalIgnoreCase)) { Resources.ExternalDomains.Add(absoluteUri.Host.ToUpper()); } #endregion } catch (Exception ex) { ErrorLogMethod(LOGTYPE.ERROR, $"Error while processing the Node(IdentifyingJavaScript Method) for Url : {RootUrl.AbsoluteUri}", ex); } } } else { ErrorLogMethod(LOGTYPE.INFORMATION, $"No Script Tag found for url : {RootUrl.AbsoluteUri}", null); } } catch (Exception ex) { ErrorLogMethod(LOGTYPE.ERROR, "Error While finding the script tag.", ex); } }
public string ProcessAssetAndCreatePlaceHolder(HtmlNode node, string url) { try { string urlToProcess = url; String placeHolder = null; if (!String.IsNullOrEmpty(urlToProcess)) { urlToProcess = WebUtility.HtmlDecode(urlToProcess); var tagName = node.Name.ToLower(); Uri uri; // For handling basetag if (KHtml.BaseTag.Exists) { Uri.TryCreate(KHtml.BaseTag.Href, urlToProcess, out uri); } else { Uri.TryCreate(RootUrl, urlToProcess, out uri); } if (uri != null) { #region Process Uri if (!tagName.Equals("script") && !tagName.Equals("iframe")) { uri = Utils.GenerateUriToProcess(uri, Resources.IgnoreFileNameChangeRegex); placeHolder = KHtmlParser.CheckUrlPresentInAssetOrNot(uri, Resources); if (placeHolder != null && placeHolder.Equals("IGNORE", StringComparison.InvariantCultureIgnoreCase)) { return(null); } if (placeHolder == null) { placeHolder = String.Format("[Kitsune_{0}]", uri.AbsoluteUri); AssetDetails assetLink = new AssetDetails(); assetLink.LinkUrl = uri.AbsoluteUri; assetLink.PlaceHolder = placeHolder; //TODO : what if try add fails Resources.UniqueAssetsDictionary.TryAdd(uri.AbsoluteUri, assetLink); } } #endregion #region Adding to the externalDomain list if (!uri.Host.Equals(RootUrl.Host)) { Resources.ExternalDomains.Add(uri.Host.ToUpper()); } #endregion } } else { ErrorLogMethod(LOGTYPE.INFORMATION, "Url was null or Empty", null); } return(placeHolder); } catch (Exception ex) { ErrorLogMethod(LOGTYPE.ERROR, String.Format("Error while processing the Url: {0}", url), ex); return(null); } }
public void IdentifyAndUpdateWebpagesList() { try { HtmlNodeCollection anchorNodes = KHtml.GetAnchorsNodeCollection(); if (anchorNodes != null) { foreach (var node in anchorNodes) { try { #region Get the attribute value string attributeValue = String.Empty; string attributeName = String.Empty; switch (node.Name) { case "a": attributeValue = node.GetAttributeValue("href", string.Empty); attributeName = "href"; break; case "form": string formMethod = node.GetAttributeValue("method", string.Empty); string formAction = node.GetAttributeValue("action", string.Empty); if (!string.IsNullOrEmpty(formAction) && !string.IsNullOrEmpty(formMethod) && formMethod.Equals("get", StringComparison.InvariantCultureIgnoreCase)) { attributeName = "action"; attributeValue = formAction; } else { continue; } break; default: continue; } #endregion // AttributeValue = WebUtility.HtmlDecode(attributeValue); // TODO: check why to decode the value #region Process the attribute value Uri absoluteUri = null; if (!string.IsNullOrEmpty(attributeValue)) { // Ignore # if (!attributeValue.StartsWith("#")) { if (Uri.TryCreate(RootUrl, attributeValue, out absoluteUri)) { if (AllowedScheme.Contains(absoluteUri.Scheme.ToLower())) { if (absoluteUri.Host.Equals(RootUrl.Host, StringComparison.OrdinalIgnoreCase)) { #region Process the new Url found string fragment = absoluteUri.Fragment; string absoluteUrl = String.IsNullOrEmpty(fragment) ? absoluteUri.AbsoluteUri : absoluteUri.AbsoluteUri.Replace(absoluteUri.Fragment, String.Empty); absoluteUri = Utils.GenerateUriToProcess(absoluteUri, Resources.IgnoreFileNameChangeRegex); string placeHolder = KHtmlParser.CheckUrlPresentInAssetOrNot(absoluteUri, Resources); if (placeHolder != null && placeHolder.Equals("IGNORE", StringComparison.InvariantCultureIgnoreCase)) { continue; } if (placeHolder == null) { //create a new one placeHolder = String.Format("[kitsune_{0}]", absoluteUri.AbsoluteUri); AssetDetails linkMap = new AssetDetails { PlaceHolder = placeHolder, LinkUrl = absoluteUri.AbsoluteUri }; Resources.UniqueWebpagesDictionary.TryAdd(absoluteUri.AbsoluteUri, linkMap); // HACK: What if it is unable to add UniqueWebPageQueue.Enqueue(absoluteUri.AbsoluteUri); } node.SetAttributeValue(attributeName, placeHolder + fragment); #endregion } else { //throw new Exception(String.Format("Different Domain Url found. Uri : {0}" // , absoluteUri.AbsoluteUri)); } } else { //throw new Exception((String.Format("Scheme of the Uri : {0} was {1}" // , absoluteUri.AbsoluteUri, absoluteUri.Scheme))); } } else { throw new Exception(String.Format("Unable to create absoluteUri for RootUrl : {0} and relativeUri : {1}" , RootUrl, attributeValue)); } } } else { //TO LOG //ErrorLogMethod(LOGTYPE.ERROR, $"Attribute Value was empty for {node.Name}, for Url : {absoluteUri.AbsoluteUri}", null); } #endregion } catch (Exception ex) { ErrorLogMethod(LOGTYPE.ERROR, String.Format("Error while processing the node."), ex); } } } else { ErrorLogMethod(LOGTYPE.INFORMATION, $"No Links found in {RootUrl.AbsoluteUri}", null); } } catch (Exception ex) { // TODO: LOG inner exxception ErrorLogMethod(LOGTYPE.ERROR, $"Error Message : Error while finding links in {RootUrl.AbsoluteUri}", null); } }