public static AnalyzerContext GetContext(string HtmlSource, string BaseUrl, Guid ObjectId, byte ObjectType, Repository.SiteDb SiteDb, string OriginalImortUrl = "") { string baseurl = BaseUrl; AnalyzerContext context = new AnalyzerContext(); context.SiteDb = SiteDb; context.ObjectId = ObjectId; context.ObjectType = ObjectType; context.HtmlSource = HtmlSource; string htmlbase = context.Dom.baseURI; if (!string.IsNullOrEmpty(htmlbase)) { baseurl = htmlbase; } context.AbsoluteUrl = baseurl; if (!string.IsNullOrEmpty(OriginalImortUrl)) { context.OriginalImportUrl = OriginalImortUrl; } return(context); }
public static AnalyzerContext Execute(string HtmlSource, string BaseUrl, Guid ObjectId, byte ObjectType, DownloadManager manager, string OriginalImortUrl = "") { AnalyzerContext context = GetContext(HtmlSource, BaseUrl, ObjectId, ObjectType, manager.SiteDb, OriginalImortUrl); context.DownloadManager = manager; return(Execute(context)); }
public void Execute(AnalyzerContext Context) { foreach (var item in Context.Dom.Links.item) { string itemsrc = Service.DomUrlService.GetLinkOrSrc(item); if (string.IsNullOrEmpty(itemsrc)) { continue; } string absoluteurl = UrlHelper.Combine(Context.AbsoluteUrl, itemsrc); bool issamehost = UrlHelper.isSameHost(absoluteurl, Context.OriginalImportUrl); var objectType = Service.ConstTypeService.GetConstTypeByUrl(absoluteurl); if (issamehost) { string relativeurl = UrlHelper.RelativePath(absoluteurl, issamehost); if (itemsrc != relativeurl) { string oldstring = Kooboo.Sites.Service.DomService.GetOpenTag(item); string newstring = oldstring.Replace(itemsrc, relativeurl); Context.Changes.Add(new AnalyzerUpdate() { StartIndex = item.location.openTokenStartIndex, EndIndex = item.location.openTokenEndIndex, NewValue = newstring }); } } else { if (itemsrc != absoluteurl) { string oldstring = Kooboo.Sites.Service.DomService.GetOpenTag(item); string newstring = oldstring.Replace(itemsrc, absoluteurl); Context.Changes.Add(new AnalyzerUpdate() { StartIndex = item.location.openTokenStartIndex, EndIndex = item.location.openTokenEndIndex, NewValue = newstring }); } } } }
/// <summary> /// Execute all the analyzer. /// </summary> public static AnalyzerContext Execute(AnalyzerContext context) { var allAnalyzer = getAnalyzers(); foreach (var item in allAnalyzer) { item.Execute(context); } AppendRemoveBaseHrefChange(context); if (context.Changes.Count > 0) { context.HtmlSource = ParseChanges(context.HtmlSource, context.Changes); } return(context); }
private static void AppendRemoveBaseHrefChange(AnalyzerContext context) { var basetag = context.Dom.documentElement.getOneElementByTagName("base"); if (basetag != null) { var endindx = basetag.location.openTokenEndIndex; if (basetag.location.endTokenEndIndex > endindx) { endindx = basetag.location.endTokenEndIndex; } context.Changes.Add(new AnalyzerUpdate() { StartIndex = basetag.location.openTokenStartIndex, EndIndex = endindx, NewValue = string.Empty }); } }
/// <summary> /// process the embedded style. /// </summary> /// <param name="context"></param> private void ProcessInPage(AnalyzerContext context) { HTMLCollection embedStyle = context.Dom.getElementsByTagName("style"); //int itemindexcounter = 0; foreach (var item in embedStyle.item) { string csstext = item.InnerHtml; if (string.IsNullOrEmpty(csstext)) { continue; } //var style = new Style //{ // IsEmbedded = true, // OwnerObjectId = context.ObjectId, // OwnerConstType = context.ObjectType, // ItemIndex = itemindexcounter, // Name = UrlHelper.FileName(context.AbsoluteUrl) //}; CssManager.ProcessResource(ref csstext, context.AbsoluteUrl, context.DownloadManager, context.ObjectId); //style.Body = csstext; //context.SiteDb.Styles.AddOrUpdate(style, context.DownloadManager.UserId); //itemindexcounter += 1; if (item.InnerHtml != csstext) { var change = new AnalyzerUpdate() { StartIndex = item.location.openTokenEndIndex + 1, EndIndex = item.location.endTokenStartIndex - 1, NewValue = csstext }; if (change.EndIndex > change.StartIndex) { context.Changes.Add(change); } } } }
private void ProcessExternal(AnalyzerContext Context) { HTMLCollection styletags = Context.Dom.getElementsByTagName("link"); foreach (var item in styletags.item) { if (item.hasAttribute("rel") && item.getAttribute("rel").ToLower().Contains("stylesheet")) { string itemurl = Service.DomUrlService.GetLinkOrSrc(item); if (!string.IsNullOrEmpty(itemurl)) { /// itemurl = TransferHelper.TrimQuestionMark(itemurl); string absoluteUrl = UrlHelper.Combine(Context.AbsoluteUrl, itemurl); bool issamehost = UrlHelper.isSameHost(Context.OriginalImportUrl, absoluteUrl); string relativeurl = UrlHelper.RelativePath(absoluteUrl, issamehost); if (itemurl != relativeurl) { string oldstring = Kooboo.Sites.Service.DomService.GetOpenTag(item); string newstring = oldstring.Replace(itemurl, relativeurl); Context.Changes.Add(new AnalyzerUpdate() { StartIndex = item.location.openTokenStartIndex, EndIndex = item.location.openTokenEndIndex, NewValue = newstring }); } Context.DownloadManager.AddTask(new Download.DownloadTask() { AbsoluteUrl = absoluteUrl, RelativeUrl = relativeurl, ConstType = ConstObjectType.Style, OwnerObjectId = Context.ObjectId }); } } } }
private Dictionary <string, string> ReplaceLinks(Kooboo.Dom.Element element, AnalyzerContext Context) { string itemsrc = Service.DomUrlService.GetLinkOrSrc(element); if (string.IsNullOrEmpty(itemsrc)) { return(null); } Dictionary <string, string> replace = new Dictionary <string, string>(); string absoluteurl = UrlHelper.Combine(Context.AbsoluteUrl, itemsrc); bool issamehost = UrlHelper.isSameHost(absoluteurl, Context.OriginalImportUrl); var objectType = Service.ConstTypeService.GetConstTypeByUrl(absoluteurl); if (issamehost) { string relativeurl = UrlHelper.RelativePath(absoluteurl, issamehost); if (itemsrc != relativeurl) { replace.Add(itemsrc, relativeurl); } } else { if (itemsrc != absoluteurl) { replace.Add(itemsrc, absoluteurl); } } return(replace); }
public void Execute(AnalyzerContext Context) { ProcessInPage(Context); ProcessExternal(Context); // ProcessInline(Context); }
public void Execute(AnalyzerContext Context) { int embeddedItemIndex = 0; HTMLCollection scripts = Context.Dom.getElementsByTagName("script"); foreach (var item in scripts.item) { if (item.hasAttribute("src")) { string srcurl = Service.DomUrlService.GetLinkOrSrc(item); if (string.IsNullOrEmpty(srcurl)) { /// script tag with a src source. does not consider as a script. continue; } string fullurl = UrlHelper.Combine(Context.AbsoluteUrl, srcurl); bool issamehost = Kooboo.Lib.Helper.UrlHelper.isSameHost(Context.OriginalImportUrl, fullurl); if (issamehost) { string relativeurl = UrlHelper.RelativePath(fullurl, issamehost); relativeurl = TransferHelper.TrimQuestionMark(relativeurl); if (srcurl != relativeurl) { string oldstring = Kooboo.Sites.Service.DomService.GetOpenTag(item); string newstring = oldstring.Replace(srcurl, relativeurl); Context.Changes.Add(new AnalyzerUpdate() { StartIndex = item.location.openTokenStartIndex, EndIndex = item.location.openTokenEndIndex, NewValue = newstring }); } Context.DownloadManager.AddTask(new Download.DownloadTask() { AbsoluteUrl = fullurl, RelativeUrl = relativeurl, ConstType = ConstObjectType.Script, OwnerObjectId = Context.ObjectId }); } } else { //string text = item.InnerHtml; //if (!string.IsNullOrEmpty(text)) //{ // // this is an embedded script. // var script = new Script // { // IsEmbedded = true, // Body = text, // OwnerObjectId = Context.ObjectId, // OwnerConstType = Context.ObjectType, // ItemIndex = embeddedItemIndex, // Name = UrlHelper.FileName(Context.AbsoluteUrl) // }; // embeddedItemIndex += 1; // Context.SiteDb.Scripts.AddOrUpdate(script); //} } } }
public void Execute(AnalyzerContext Context) { var imgurls = GetImageUrls(Context.Dom.images.item); foreach (var item in imgurls) { string itemsrc = item.Value; if (!string.IsNullOrEmpty(itemsrc)) { if (Kooboo.Lib.Utilities.DataUriService.isDataUri(itemsrc)) { var datauri = Kooboo.Lib.Utilities.DataUriService.PraseDataUri(itemsrc); if (datauri != null) { if (datauri.isBase64) { Guid newid = itemsrc.ToHashGuid(); Image koobooimage = new Image { Extension = UrlHelper.GetImageExtensionFromMine(datauri.MineType), ContentBytes = Convert.FromBase64String(datauri.DataString), Id = newid, Name = newid.ToString() }; string url = "/image/base64/page/" + koobooimage.Id.ToString(); Context.SiteDb.Routes.AddOrUpdate(url, ConstObjectType.Image, koobooimage.Id, Context.DownloadManager.UserId); Context.SiteDb.Images.AddOrUpdate(koobooimage, Context.DownloadManager.UserId); string oldstring = item.Key.OuterHtml; string newstring = oldstring.Replace(itemsrc, url); Context.Changes.Add(new AnalyzerUpdate() { StartIndex = item.Key.location.openTokenStartIndex, EndIndex = item.Key.location.openTokenEndIndex, NewValue = newstring }); } else { // TODO: other encoding not implemented yet. } } continue; } else { string absoluteUrl = Kooboo.Lib.Helper.UrlHelper.Combine(Context.AbsoluteUrl, itemsrc); bool issamehost = Kooboo.Lib.Helper.UrlHelper.isSameHost(absoluteUrl, Context.OriginalImportUrl); string relativeurl = EnsureUrlWithoutQuestionMark(UrlHelper.RelativePath(absoluteUrl, issamehost)); if (itemsrc != relativeurl) { string oldstring = item.Key.OuterHtml; string newstring = oldstring.Replace(itemsrc, relativeurl); Context.Changes.Add(new AnalyzerUpdate() { StartIndex = item.Key.location.openTokenStartIndex, EndIndex = item.Key.location.openTokenEndIndex, NewValue = newstring }); } Context.DownloadManager.AddTask(new Download.DownloadTask() { AbsoluteUrl = absoluteUrl, RelativeUrl = relativeurl, ConstType = ConstObjectType.Image, OwnerObjectId = Context.ObjectId }); } } } }
private Dictionary <string, string> ReplaceInlineCssUrl(Kooboo.Dom.Element element, AnalyzerContext Context) { string csstext = element.getAttribute("style"); if (string.IsNullOrEmpty(csstext)) { return(null); } Dictionary <string, string> replace = new Dictionary <string, string>(); var urlInfos = Service.CssService.GetUrlInfos(csstext); foreach (var item in urlInfos) { if (string.IsNullOrEmpty(item.PureUrl) || item.PureUrl.Trim().ToLower().StartsWith("#")) { continue; } string newurl = string.Empty; if (item.isImportRule) { newurl = CssManager.AddImport(item.PureUrl, Context.AbsoluteUrl, Context.DownloadManager, Context.ObjectId); } else { if (Kooboo.Lib.Utilities.DataUriService.isDataUri(item.PureUrl)) { newurl = CssManager.ParseDataUri(item.PureUrl, Context.DownloadManager); } else { newurl = CssManager.DownloadCssFile(item.PureUrl, Context.AbsoluteUrl, Context.DownloadManager, Context.ObjectId); } } if (newurl != item.PureUrl) { replace.Add(item.PureUrl, newurl); } } return(replace); }
public void Execute(AnalyzerContext Context) { List <Kooboo.Dom.Element> doneitems = new List <Dom.Element>(); foreach (var item in Context.Dom.Links.item) { doneitems.Add(item); Dictionary <string, string> replace = new Dictionary <string, string>(); var linkreplace = ReplaceLinks(item, Context); var cssreplace = ReplaceInlineCssUrl(item, Context); if (linkreplace != null && linkreplace.Count() > 0) { foreach (var linkrep in linkreplace) { replace[linkrep.Key] = linkrep.Value; } } if (cssreplace != null && cssreplace.Count() > 0) { foreach (var cssrep in cssreplace) { replace[cssrep.Key] = cssrep.Value; } } if (replace.Count() > 0) { string oldstring = Kooboo.Sites.Service.DomService.GetOpenTag(item); string newstring = oldstring; foreach (var repitem in replace) { newstring = newstring.Replace(repitem.Key, repitem.Value); } Context.Changes.Add(new AnalyzerUpdate() { StartIndex = item.location.openTokenStartIndex, EndIndex = item.location.openTokenEndIndex, NewValue = newstring }); } } // process inline css, but exclusive items that has been in the links. HTMLCollection inlinestyle = Context.Dom.getElementByAttribute("style"); foreach (var item in inlinestyle.item) { if (doneitems.Where(o => o.isEqualNode(item)).Any()) { continue; } var updates = ReplaceInlineCssUrl(item, Context); if (updates != null && updates.Count() > 0) { string oldstring = Kooboo.Sites.Service.DomService.GetOpenTag(item); string newstring = oldstring; foreach (var repitem in updates) { newstring = newstring.Replace(repitem.Key, repitem.Value); } Context.Changes.Add(new AnalyzerUpdate() { StartIndex = item.location.openTokenStartIndex, EndIndex = item.location.openTokenEndIndex, NewValue = newstring }); } } }
public void Execute(AnalyzerContext Context) { HTMLCollection embedElement = Context.Dom.getElementsByTagName("embed"); foreach (var item in embedElement.item) { string fileurl = Kooboo.Sites.Service.DomUrlService.GetLinkOrSrc(item); if (string.IsNullOrEmpty(fileurl)) { continue; } string absoluteUrl = UrlHelper.Combine(Context.AbsoluteUrl, fileurl); if (!isDownloadAble(absoluteUrl)) { /// if not going to download, just change the url. if (fileurl != absoluteUrl) { string oldstring = Service.DomService.GetOpenTag(item); string newstring = oldstring.Replace(fileurl, absoluteUrl); Context.Changes.Add(new AnalyzerUpdate() { StartIndex = item.location.openTokenStartIndex, EndIndex = item.location.openTokenEndIndex, NewValue = newstring }); } } else { bool issamehost = UrlHelper.isSameHost(Context.OriginalImportUrl, absoluteUrl); if (issamehost) { string relativeurl = UrlHelper.RelativePath(absoluteUrl, issamehost); if (fileurl != relativeurl) { string oldstring = Kooboo.Sites.Service.DomService.GetOpenTag(item); string newstring = oldstring.Replace(fileurl, relativeurl); Context.Changes.Add(new AnalyzerUpdate() { StartIndex = item.location.openTokenStartIndex, EndIndex = item.location.openTokenEndIndex, NewValue = newstring }); } Context.DownloadManager.AddTask(new Download.DownloadTask() { AbsoluteUrl = absoluteUrl, RelativeUrl = relativeurl, ConstType = ConstObjectType.CmsFile, OwnerObjectId = Context.ObjectId }); } } } }
public void Execute(AnalyzerContext Context) { HTMLCollection scripts = Context.Dom.getElementsByTagName("script"); foreach (var item in scripts.item) { if (item.hasAttribute("src")) { string srcurl = Service.DomUrlService.GetLinkOrSrc(item); if (string.IsNullOrEmpty(srcurl)) { /// script tag with a src source. does not consider as a script. continue; } string fullurl = UrlHelper.Combine(Context.AbsoluteUrl, srcurl); bool issamehost = Kooboo.Lib.Helper.UrlHelper.isSameHost(Context.OriginalImportUrl, fullurl); if (issamehost) { string relativeurl = UrlHelper.RelativePath(fullurl, issamehost); relativeurl = TransferHelper.TrimQuestionMark(relativeurl); if (srcurl != relativeurl) { string oldstring = Kooboo.Sites.Service.DomService.GetOpenTag(item); string newstring = oldstring.Replace(srcurl, relativeurl); Context.Changes.Add(new AnalyzerUpdate() { StartIndex = item.location.openTokenStartIndex, EndIndex = item.location.openTokenEndIndex, NewValue = newstring }); } Context.DownloadManager.AddTask(new Download.DownloadTask() { AbsoluteUrl = fullurl, RelativeUrl = relativeurl, ConstType = ConstObjectType.Script, OwnerObjectId = Context.ObjectId }); } } else { ///<script>if (document.location.protocol != "https:") {document.location = document.URL.replace(/^http:/i, "https:");}</script> string text = item.InnerHtml; if (!string.IsNullOrWhiteSpace(text) && text.Length < 200) { var lower = text.ToLower(); var hasProtocolOperator = lower.Contains("document.location.protocol") && lower.Contains("document.url.replace") && lower.Contains("https"); var hasLocationOperator = Regex.IsMatch(lower, "window.top.location\\s*="); if (hasProtocolOperator || hasLocationOperator) { Context.Changes.Add(new AnalyzerUpdate() { StartIndex = item.location.openTokenStartIndex, EndIndex = item.location.endTokenEndIndex, NewValue = "<script>/* https redirect removed */</script>" }); } } //string text = item.InnerHtml; //if (!string.IsNullOrEmpty(text)) //{ // // this is an embedded script. // var script = new Script // { // IsEmbedded = true, // Body = text, // OwnerObjectId = Context.ObjectId, // OwnerConstType = Context.ObjectType, // ItemIndex = embeddedItemIndex, // Name = UrlHelper.FileName(Context.AbsoluteUrl) // }; // embeddedItemIndex += 1; // Context.SiteDb.Scripts.AddOrUpdate(script); //} } } }