/// <summary> /// Adds Include list to the Resources /// </summary> /// <param name="uri"></param> /// <param name="pathsToDownload"></param> public void ProcessIncludeList() { try { if (KrawlContext.Resources.IncludeStaticAssetList != null) { foreach (var path in KrawlContext.Resources.IncludeStaticAssetList) { try { if (String.IsNullOrEmpty(path)) { continue; } Uri absoluteUri = new Uri(KrawlContext.RootUri, path); string placeHolder = KHtmlParser.CheckUrlPresentInAssetOrNot(absoluteUri, KrawlContext.Resources); if (placeHolder == null || placeHolder.Equals("IGNORE", StringComparison.InvariantCultureIgnoreCase)) { placeHolder = String.Format("[kitsune_{0}]", absoluteUri.AbsoluteUri); AssetDetails linkMap = new AssetDetails { PlaceHolder = placeHolder, LinkUrl = absoluteUri.AbsoluteUri }; string fileExtension = Path.GetExtension(path).ToLower(); if (fileExtension == null) { fileExtension = String.Empty; } switch (fileExtension.ToLower()) { case ".css": KrawlContext.Resources.UniqueStylesDictionary.TryAdd(absoluteUri.AbsoluteUri, linkMap); break; case ".js": KrawlContext.Resources.UniqueScriptsDictionary.TryAdd(absoluteUri.AbsoluteUri, linkMap); break; default: KrawlContext.Resources.UniqueAssetsDictionary.TryAdd(absoluteUri.AbsoluteUri, linkMap); break; } } } catch (Exception ex) { //Log Error KrawlContext.ErrorLogMethod(LOGTYPE.ERROR, $"Error Message : Error adding the include_asset_list_file path : {path}", ex); } } } } catch (Exception ex) { } }
/// <summary> /// Process the given Html file /// </summary> /// <param name="uri"></param> /// <param name="htmlDocument"></param> public void ProcessHtml(Uri uri, KHtmlDocument htmlDocument) { if (uri == null) { throw new Exception("Error : Uri cannot be null"); } try { #region Parse Html KHtmlParser htmlParser = new KHtmlParser(uri, htmlDocument) { Resources = KrawlContext.Resources, UniqueWebPageQueue = KrawlContext.UniqueWebPageQueue, ErrorLogMethod = KrawlContext.ErrorLogMethod }; htmlParser.Parse(); #endregion #region Parse Css //TODO Parse inner styles try { htmlParser.IdentifyInternalStyles(); } catch (Exception ex) { KrawlContext.ErrorLogMethod(LOGTYPE.ERROR, "Error while parsing inner styles", ex); } #endregion } catch (Exception ex) { throw ex; } }