Example #1
0
        private void ProcessExternal(AnalyzerContext Context)
        {
            HTMLCollection styletags = Context.Dom.getElementsByTagName("link");

            foreach (var item in styletags.item)
            {
                if (item.hasAttribute("rel") && item.getAttribute("rel").ToLower().Contains("stylesheet"))
                {
                    string itemurl = Service.DomUrlService.GetLinkOrSrc(item);

                    if (!string.IsNullOrEmpty(itemurl))
                    {
                        itemurl = TransferHelper.TrimQuestionMark(itemurl);

                        string absoluteUrl = UrlHelper.Combine(Context.AbsoluteUrl, itemurl);

                        bool issamehost = UrlHelper.isSameHost(Context.OriginalImportUrl, absoluteUrl);

                        string relativeurl = UrlHelper.RelativePath(absoluteUrl, issamehost);

                        if (itemurl != relativeurl)
                        {
                            string oldstring = Kooboo.Sites.Service.DomService.GetOpenTag(item);
                            string newstring = oldstring.Replace(itemurl, relativeurl);
                            Context.Changes.Add(new AnalyzerUpdate()
                            {
                                StartIndex = item.location.openTokenStartIndex,
                                EndIndex   = item.location.openTokenEndIndex,
                                NewValue   = newstring
                            });
                        }

                        Context.DownloadManager.AddTask(new Download.DownloadTask()
                        {
                            AbsoluteUrl   = absoluteUrl,
                            RelativeUrl   = relativeurl,
                            ConstType     = ConstObjectType.Style,
                            OwnerObjectId = Context.ObjectId
                        });
                    }
                }
            }
        }
Example #2
0
        public void Execute(AnalyzerContext Context)
        {
            int embeddedItemIndex = 0;

            HTMLCollection scripts = Context.Dom.getElementsByTagName("script");

            foreach (var item in scripts.item)
            {
                if (item.hasAttribute("src"))
                {
                    string srcurl = Service.DomUrlService.GetLinkOrSrc(item);

                    if (string.IsNullOrEmpty(srcurl))
                    {
                        /// script tag with a src source. does not consider as a script.
                        continue;
                    }

                    string fullurl = UrlHelper.Combine(Context.AbsoluteUrl, srcurl);

                    bool issamehost = Kooboo.Lib.Helper.UrlHelper.isSameHost(Context.OriginalImportUrl, fullurl);

                    if (issamehost)
                    {
                        string relativeurl = UrlHelper.RelativePath(fullurl, issamehost);
                        relativeurl = TransferHelper.TrimQuestionMark(relativeurl);

                        if (srcurl != relativeurl)
                        {
                            string oldstring = Kooboo.Sites.Service.DomService.GetOpenTag(item);
                            string newstring = oldstring.Replace(srcurl, relativeurl);
                            Context.Changes.Add(new AnalyzerUpdate()
                            {
                                StartIndex = item.location.openTokenStartIndex,
                                EndIndex   = item.location.openTokenEndIndex,
                                NewValue   = newstring
                            });
                        }


                        Context.DownloadManager.AddTask(new Download.DownloadTask()
                        {
                            AbsoluteUrl   = fullurl,
                            RelativeUrl   = relativeurl,
                            ConstType     = ConstObjectType.Script,
                            OwnerObjectId = Context.ObjectId
                        });
                    }
                }
                else
                {
                    //string text = item.InnerHtml;
                    //if (!string.IsNullOrEmpty(text))
                    //{
                    //    // this is an embedded script.
                    //    var script = new Script
                    //    {
                    //        IsEmbedded = true,
                    //        Body = text,
                    //        OwnerObjectId = Context.ObjectId,
                    //        OwnerConstType = Context.ObjectType,
                    //        ItemIndex = embeddedItemIndex,
                    //        Name = UrlHelper.FileName(Context.AbsoluteUrl)
                    //    };

                    //    embeddedItemIndex += 1;

                    //    Context.SiteDb.Scripts.AddOrUpdate(script);

                    //}
                }
            }
        }
Example #3
0
        private List <JsTreeDataItem> GetPageLinks(Dictionary <string, string> urlContentDictionary, string rootUrl, List <JsTreeDataItem> list, int level, int maxLevel, int maxPages, Action <List <JsTreeDataItem> > callback)
        {
            level++;
            var content = DownloadHelper.DownloadUrl(rootUrl);

            if (content != null)
            {
                var html = content.ContentString;
                if (urlContentDictionary.ContainsKey(rootUrl))
                {
                    return(list);
                }
                urlContentDictionary[rootUrl] = html;
                var rootItem = new JsTreeDataItem
                {
                    Id     = rootUrl.ToHashGuid().ToString(),
                    Parent = level == 1 ? "#" : rootUrl.ToHashGuid().ToString(),
                    Text   = rootUrl
                };
                if (level == 1)
                {
                    rootItem.TreeState = new JsTreeState
                    {
                        Selected = true,
                        Disabled = true
                    };
                }
                list.Add(rootItem);
                callback(list);

                var links    = TransferHelper.GetLinkElements(html);
                var rootUri  = new Uri(rootUrl);
                var rootHost = rootUri.Host;
                IEnumerable <string> urls = links.Where(link => link.attributes != null &&
                                                        link.attributes.Any(attr => attr.name.Equals("href", StringComparison.OrdinalIgnoreCase) &&
                                                                            !attr.value.StartsWith("javascript:", StringComparison.OrdinalIgnoreCase) &&
                                                                            !attr.value.StartsWith("#", StringComparison.OrdinalIgnoreCase)
                                                                            )).Select(link =>
                {
                    var firstOrDefault = link.attributes.FirstOrDefault(it => it.name.Equals("href", StringComparison.OrdinalIgnoreCase));
                    if (firstOrDefault == null)
                    {
                        return(null);
                    }
                    var currentUri = new Uri(firstOrDefault.value, UriKind.RelativeOrAbsolute);
                    if (currentUri.IsAbsoluteUri)
                    {
                        if (!currentUri.Host.Equals(rootHost, StringComparison.OrdinalIgnoreCase))
                        {
                            return(null);
                        }
                    }
                    var absoluteUri = new Uri(new Uri(rootUrl), firstOrDefault.value);
                    return(absoluteUri.AbsoluteUri);
                }).Where(url => !String.IsNullOrEmpty(url))
                                            .Distinct(StringComparer.OrdinalIgnoreCase)
                                            .ToList();

                foreach (var url in urls)
                {
                    if (CancelToken.IsCancellationRequested)
                    {
                        break;
                    }
                    if (!urlContentDictionary.ContainsKey(url))
                    {
                        var subContent = DownloadHelper.DownloadUrl(rootUrl);
                        if (subContent != null)
                        {
                            urlContentDictionary[url] = subContent.ContentString;
                            list.Add(new JsTreeDataItem
                            {
                                Id     = url.ToHashGuid().ToString(),
                                Parent = rootUrl.ToHashGuid().ToString(),
                                Text   = url
                            });
                            callback(list);

                            if (list.Count >= maxPages)
                            {
                                break;
                            }
                            if (list.Count < maxPages && level < maxLevel)
                            {
                                GetPageLinks(urlContentDictionary, url, list, level, maxLevel, maxPages, callback);
                            }
                        }
                        else
                        {
                            urlContentDictionary[url] = String.Empty;
                        }
                    }
                }
            }
            return(list);
        }
Example #4
0
        public static async Task <SiteObject> continueDownload(SiteDb siteDb, string RelativeUrl)
        {
            if (!siteDb.WebSite.ContinueDownload)
            {
                return(null);
            }

            var oktoDownload = await siteDb.TransferTasks.CanStartDownload(RelativeUrl);

            if (!oktoDownload)
            {
                return(null);
            }

            var history = siteDb.TransferTasks.History().ToList();

            if (history.Count() == 0)
            {
                return(null);
            }
            /// track failed history...
            Guid downloadid             = RelativeUrl.ToHashGuid();
            DownloadFailTrack failtrack = siteDb.DownloadFailedLog.Get(downloadid);

            if (failtrack != null)
            {
                if (failtrack.HistoryTime.Where(o => o > DateTime.Now.AddMinutes(-30)).Any())
                {
                    return(null);
                }
            }
            else
            {
                failtrack    = new DownloadFailTrack();
                failtrack.Id = downloadid;
            }

            string          fullurl  = string.Empty;
            DownloadContent download = null;

            string hostname = TransferHelper.GetPossibleHostName(RelativeUrl);

            if (!string.IsNullOrEmpty(hostname))
            {
                var findurl = history.Find(o => o.ToLower().EndsWith(hostname.ToLower()));

                if (!string.IsNullOrEmpty(findurl))
                {
                    string newrelative = RelativeUrl.Replace(hostname + "/", "");
                    fullurl = UrlHelper.Combine(findurl, newrelative);
                    var cookiecontianer = siteDb.TransferTasks.GetCookieContainerByFullUrl(fullurl);
                    download = await DownloadHelper.DownloadUrlAsync(fullurl, cookiecontianer);
                }
            }

            if (download == null)
            {
                foreach (var item in history)
                {
                    fullurl = UrlHelper.Combine(item, RelativeUrl);
                    var cookiecontianer = siteDb.TransferTasks.GetCookieContainerByFullUrl(fullurl);
                    download = await DownloadHelper.DownloadUrlAsync(fullurl, cookiecontianer);

                    if (download != null)
                    {
                        break;
                    }
                }
            }

            ///// 301, 302, will be converted to 200 and return back as well. So it is safe to == 200.
            if (download != null && download.StatusCode == 200)
            {
                DownloadManager downloadManager = new DownloadManager()
                {
                    SiteDb = siteDb
                };
                SiteObject downloadobject = TransferHelper.AddDownload(downloadManager, download, fullurl, false, true, fullurl);

                if (downloadobject is Page || downloadobject is View)
                {
                    siteDb.TransferPages.AddOrUpdate(new TransferPage()
                    {
                        absoluteUrl = fullurl, PageId = downloadobject.Id
                    });
                }

                /// for continue download content...
                Continue.ContinueTask.Convert(siteDb, downloadobject);

                siteDb.TransferTasks.ReleaseDownload(RelativeUrl);
                return(downloadobject);
            }
            else
            {
                siteDb.TransferTasks.ReleaseDownload(RelativeUrl);
            }

            //download failed.
            failtrack.HistoryTime.Add(DateTime.Now);
            siteDb.DownloadFailedLog.AddOrUpdate(failtrack);

            if (failtrack.HistoryTime.Count() > 5)
            {
                var filetype = Kooboo.Lib.Helper.UrlHelper.GetFileType(RelativeUrl);

                byte consttype;
                switch (filetype)
                {
                case UrlHelper.UrlFileType.Image:
                    consttype = ConstObjectType.Image;
                    break;

                case UrlHelper.UrlFileType.JavaScript:
                    consttype = ConstObjectType.Script;
                    break;

                case UrlHelper.UrlFileType.Style:
                    consttype = ConstObjectType.Style;
                    break;

                case UrlHelper.UrlFileType.File:
                    consttype = ConstObjectType.CmsFile;
                    break;

                case UrlHelper.UrlFileType.PageOrView:
                    consttype = ConstObjectType.Page;
                    break;

                default:
                    consttype = 0;
                    break;
                }

                // siteDb.Routes.EnsureExists(RelativeUrl, consttype, default(Guid));
            }

            return(null);
        }
Example #5
0
        public void Execute(AnalyzerContext Context)
        {
            HTMLCollection scripts = Context.Dom.getElementsByTagName("script");

            foreach (var item in scripts.item)
            {
                if (item.hasAttribute("src"))
                {
                    string srcurl = Service.DomUrlService.GetLinkOrSrc(item);

                    if (string.IsNullOrEmpty(srcurl))
                    {
                        /// script tag with a src source. does not consider as a script.
                        continue;
                    }

                    string fullurl = UrlHelper.Combine(Context.AbsoluteUrl, srcurl);

                    bool issamehost = Kooboo.Lib.Helper.UrlHelper.isSameHost(Context.OriginalImportUrl, fullurl);

                    if (issamehost)
                    {
                        string relativeurl = UrlHelper.RelativePath(fullurl, issamehost);
                        relativeurl = TransferHelper.TrimQuestionMark(relativeurl);

                        if (srcurl != relativeurl)
                        {
                            string oldstring = Kooboo.Sites.Service.DomService.GetOpenTag(item);
                            string newstring = oldstring.Replace(srcurl, relativeurl);
                            Context.Changes.Add(new AnalyzerUpdate()
                            {
                                StartIndex = item.location.openTokenStartIndex,
                                EndIndex   = item.location.openTokenEndIndex,
                                NewValue   = newstring
                            });
                        }


                        Context.DownloadManager.AddTask(new Download.DownloadTask()
                        {
                            AbsoluteUrl   = fullurl,
                            RelativeUrl   = relativeurl,
                            ConstType     = ConstObjectType.Script,
                            OwnerObjectId = Context.ObjectId
                        });
                    }
                }
                else
                {
                    ///<script>if (document.location.protocol != "https:") {document.location = document.URL.replace(/^http:/i, "https:");}</script>

                    string text = item.InnerHtml;
                    if (!string.IsNullOrWhiteSpace(text) && text.Length < 200)
                    {
                        var lower = text.ToLower();
                        var hasProtocolOperator = lower.Contains("document.location.protocol") && lower.Contains("document.url.replace") && lower.Contains("https");
                        var hasLocationOperator = Regex.IsMatch(lower, "window.top.location\\s*=");
                        if (hasProtocolOperator || hasLocationOperator)
                        {
                            Context.Changes.Add(new AnalyzerUpdate()
                            {
                                StartIndex = item.location.openTokenStartIndex,
                                EndIndex   = item.location.endTokenEndIndex,
                                NewValue   = "<script>/* https redirect removed */</script>"
                            });
                        }
                    }


                    //string text = item.InnerHtml;
                    //if (!string.IsNullOrEmpty(text))
                    //{
                    //    // this is an embedded script.
                    //    var script = new Script
                    //    {
                    //        IsEmbedded = true,
                    //        Body = text,
                    //        OwnerObjectId = Context.ObjectId,
                    //        OwnerConstType = Context.ObjectType,
                    //        ItemIndex = embeddedItemIndex,
                    //        Name = UrlHelper.FileName(Context.AbsoluteUrl)
                    //    };

                    //    embeddedItemIndex += 1;

                    //    Context.SiteDb.Scripts.AddOrUpdate(script);

                    //}
                }
            }
        }
Example #6
0
        public static async Task <SiteObject> continueDownload(SiteDb siteDb, string RelativeUrl)
        {
            if (!siteDb.WebSite.ContinueDownload)
            {
                return(null);
            }

            string orgimport = null;

            var history = siteDb.TransferTasks.History().ToList();

            if (history.Count() == 0)
            {
                return(null);
            }
            else
            {
                orgimport = history.First();
            }

            /// track failed history...
            Guid downloadid = RelativeUrl.ToHashGuid();

            DownloadFailTrack failtrack = siteDb.DownloadFailedLog.Get(downloadid);

            if (failtrack != null)
            {
                if (failtrack.HistoryTime.Where(o => o > DateTime.Now.AddMinutes(-30)).Any())
                {
                    return(null);
                }

                if (failtrack.HistoryTime.Count() > 3)
                {
                    return(null);
                }
            }
            else
            {
                failtrack    = new DownloadFailTrack();
                failtrack.Id = downloadid;
            }

            var oktoDownload = await siteDb.TransferTasks.CanStartDownload(RelativeUrl);

            if (!oktoDownload)
            {
                return(null);
            }


            string          fullurl  = string.Empty;
            DownloadContent download = null;

            if (RelativeUrl.EndsWith("favicon.ico"))
            {
                return(null);
            }

            string hostname = TransferHelper.GetPossibleHostName(RelativeUrl);

            if (!string.IsNullOrEmpty(hostname))
            {
                var findurl = history.Find(o => o.ToLower().EndsWith(hostname.ToLower()));

                if (!string.IsNullOrEmpty(findurl))
                {
                    string newrelative = RelativeUrl.Replace(hostname + "/", "");
                    fullurl = UrlHelper.Combine(findurl, newrelative);
                    var cookiecontianer = siteDb.TransferTasks.GetCookieContainerByFullUrl(fullurl);
                    download = await DownloadHelper.DownloadUrlAsync(fullurl, cookiecontianer);
                }
                else
                {
                    string newrelative = RelativeUrl.Replace(hostname + "/", "");
                    // check whether it is https or not.
                    // fullurl = UrlHelper.Combine(hostname, newrelative);
                    var protocol = OrgProtocol(orgimport);
                    fullurl = protocol + hostname + newrelative;
                    var cookiecontianer = siteDb.TransferTasks.GetCookieContainerByFullUrl(fullurl);
                    download = await DownloadHelper.DownloadUrlAsync(fullurl, cookiecontianer);
                }
            }

            if (download == null)
            {
                foreach (var item in history)
                {
                    fullurl = UrlHelper.Combine(item, RelativeUrl);
                    var cookiecontianer = siteDb.TransferTasks.GetCookieContainerByFullUrl(fullurl);
                    download = await DownloadHelper.DownloadUrlAsync(fullurl, cookiecontianer);

                    if (download != null)
                    {
                        break;
                    }
                }
            }

            ///// 301, 302, will be converted to 200 and return back as well. So it is safe to == 200.
            if (download != null && download.StatusCode == 200)
            {
                DownloadManager downloadManager = new DownloadManager()
                {
                    SiteDb = siteDb
                };
                SiteObject downloadobject = TransferHelper.AddDownload(downloadManager, download, fullurl, false, true, orgimport);

                if (downloadobject is Page || downloadobject is View)
                {
                    siteDb.TransferPages.AddOrUpdate(new TransferPage()
                    {
                        absoluteUrl = fullurl, PageId = downloadobject.Id
                    });
                }

                siteDb.TransferTasks.ReleaseDownload(RelativeUrl);
                ///for continue download content...
                Continue.ContinueTask.Convert(siteDb, downloadobject);
                return(downloadobject);
            }
            else
            {
                siteDb.TransferTasks.ReleaseDownload(RelativeUrl);
            }

            //download failed.
            failtrack.HistoryTime.Add(DateTime.Now);
            siteDb.DownloadFailedLog.AddOrUpdate(failtrack);

            return(null);
        }