/// <summary> /// Init with optinals document and stylesheet. /// </summary> /// <param name="htmlSource">the html to init with, init empty if not given</param> /// <param name="baseCssData">optional: the stylesheet to init with, init default if not given</param> public void SetHtml(string htmlSource, CssData baseCssData = null) { if (_root != null) { _root.Dispose(); _root = null; if (_selectionHandler != null) { _selectionHandler.Dispose(); } _selectionHandler = null; } if (!string.IsNullOrEmpty(htmlSource)) { _cssData = baseCssData ?? CssUtils.DefaultCssData; _root = DomParser.GenerateCssTree(htmlSource, this, ref _cssData); if (_root != null) { _root.HtmlContainer = this; _selectionHandler = new SelectionHandler(_root); } } }
public void SetResoureServer(IResourceServer resourceServer) { Clear(); _resourceServer = resourceServer; if (!string.IsNullOrEmpty(resourceServer.Html)) { _loadComplete = false; var baseCssData = resourceServer.CssData; _cssData = baseCssData ?? _adapter.DefaultCssData; DomParser parser = new DomParser(_cssParser); var cssData = new DomParser.CssDataWithChanged { cssData = _cssData }; _root = parser.GenerateCssTree(resourceServer.Html, this, cssData); if (cssData.cssDataChanged) { _cssData = cssData.cssData; } if (_root != null) { _selectionHandler = new SelectionHandler(_root); } } }
public static void ParseHtml() { var parser = new DomParser(); var doc = (HtmlDocument)parser.ParseFromString("<html><title>qwe</title></html>", "text/html"); Assert.AreEqual("qwe", doc.Title); }
public void SelectFirstTest_MultiResult() { var parser = new DomParser(); GeckoDomDocument doc = parser.ParseFromString("<html><body><span id='myspan'>hello world</span><span id='myspan'>hello world</span><span id='myspan'>hello world</span></body></html>"); GeckoNode span = doc.SelectFirst(@".//*[@id='myspan']"); Assert.AreEqual("hello world", span.TextContent); }
public void SelectSingleTest() { var parser = new DomParser(); GeckoDomDocument doc = parser.ParseFromString("<html><body><span id='myspan'>hello world</span></body></html>"); var span = doc.SelectSingle(@".//*[@id='myspan']"); Assert.NotNull(span); Assert.AreEqual("hello world", span.TextContent); }
public void ParseFromString_AValidDocumentString_CreatesNavigatableDOM() { var parser = new DomParser(); GeckoDomDocument doc = parser.ParseFromString("<html><body><span id='myspan'>hello world</span></body></html>"); var span = doc.GetElementById("myspan"); Assert.NotNull(span); Assert.AreEqual("hello world", span.TextContent); }
public void SelectSingleTest_MultiResult() { var parser = new DomParser(); GeckoDomDocument doc = parser.ParseFromString("<html><body><span id='myspan'>hello world</span><span id='myspan'>hello world</span><span id='myspan'>hello world</span></body></html>"); GeckoNode span = null; Assert.Throws <GeckoDomException>(delegate { span = doc.SelectSingle(@".//*[@id='myspan']"); }); }
internal virtual Element GetElement(IDomObject domObject) { if (domObject == null || string.IsNullOrWhiteSpace(KoobooId)) { return(null); } var doc = DomParser.CreateDom(domObject.Body); var node = Service.DomService.GetElementByKoobooId(doc, KoobooId); if (node == null) { return(null); } return(node as Element); }
/// <summary> /// Init with optional document and stylesheet. /// </summary> /// <param name="htmlSource">the html to init with, init empty if not given</param> /// <param name="baseCssData">optional: the stylesheet to init with, init default if not given</param> public void SetHtml(string htmlSource, CssData baseCssData = null) { Clear(); if (!string.IsNullOrEmpty(htmlSource)) { _cssData = baseCssData ?? _adapter.DefaultCssData; DomParser parser = new DomParser(_cssParser); _root = parser.GenerateCssTree(htmlSource, this, ref _cssData); if (_root != null) { _selectionHandler = new SelectionHandler(_root); } } }
internal override Element GetElement(IDomObject domObject) { if (string.IsNullOrWhiteSpace(KoobooId)) { return(null); } _currentCultureDom = ((HtmlBlock)domObject).GetValue(_culture).ToString(); var doc = DomParser.CreateDom(_currentCultureDom); var node = Service.DomService.GetElementByKoobooId(doc, KoobooId); if (node == null) { return(null); } return(node as Element); }
public static string ApplyKoobooId(string html) { if (string.IsNullOrEmpty(html)) { return(html); } var doc = DomParser.CreateDom(html); var currentIndex = 0; var totallen = html.Length; var iterator = doc.createNodeIterator(doc.documentElement, enumWhatToShow.ELEMENT, null); var newHtml = new StringBuilder(); var nextNode = iterator.nextNode(); while (nextNode != null) { string koobooid = GetKoobooId(nextNode); var element = nextNode as Element; if (element != null && element.location.openTokenEndIndex > 0) { int openTokenEndIndex = nextNode.location.openTokenEndIndex; if (IsSelfCloseTag(element.tagName) && element.ownerDocument.HtmlSource[openTokenEndIndex - 1] == '/') { openTokenEndIndex = openTokenEndIndex - 1; } newHtml.Append(doc.HtmlSource.Substring(currentIndex, openTokenEndIndex - currentIndex)); newHtml.Append(" ").Append(Kooboo.Sites.SiteConstants.KoobooIdAttributeName).AppendFormat("=\"{0}\"", koobooid); currentIndex = openTokenEndIndex; } nextNode = iterator.nextNode(); } if (currentIndex < totallen) { newHtml.Append(doc.HtmlSource.Substring(currentIndex)); } return(newHtml.ToString()); }
/// <summary> /// Init with optional document and stylesheet. /// </summary> /// <param name="htmlSource">the html to init with, init empty if not given</param> /// <param name="baseCssData">optional: the stylesheet to init with, init default if not given</param> public void SetHtml(string htmlSource, CssData baseCssData = null) { this.Clear(); if (!string.IsNullOrEmpty(htmlSource)) { this._loadComplete = false; this._cssData = baseCssData ?? this._adapter.DefaultCssData; DomParser parser = new DomParser(this._cssParser); this._root = parser.GenerateCssTree(htmlSource, this, ref this._cssData); if (this._root != null) { this._selectionHandler = new SelectionHandler(this._root); this._imageDownloader = new ImageDownloader(); } } }
/// <summary> /// Init with optional document and stylesheet. /// </summary> /// <param name="htmlSource">the html to init with, init empty if not given</param> /// <param name="baseCssData">optional: the stylesheet to init with, init default if not given</param> public void SetHtml(string htmlSource, CssData baseCssData = null) { Clear(); if (!string.IsNullOrEmpty(htmlSource)) { _loadComplete = false; _cssData = baseCssData ?? _adapter.DefaultCssData; DomParser parser = new DomParser(_cssParser); _root = parser.GenerateCssTree(htmlSource, this, ref _cssData); if (_root != null) { _selectionHandler = new SelectionHandler(_root); _imageDownloader = new ImageDownloader(ImagesSecurityProtocol); } } }
/// <summary> /// Init with optinals document and stylesheet. /// </summary> /// <param name="htmlSource">the html to init with, init empty if not given</param> /// <param name="bridge">used to resolve external references in html code (property, method calls)</param> /// <param name="baseCssData">optional: the stylesheet to init with, init default if not given</param> public HtmlContainer(string htmlSource, object bridge, CssData baseCssData = null) { ArgChecker.AssertArgNotNullOrEmpty(htmlSource, "htmlSource"); _bridge = bridge; _cssData = baseCssData ?? CssUtils.DefaultCssData; if (htmlSource != null) { _root = DomParser.GenerateCssTree(htmlSource, ref _cssData, bridge); if (_root != null) { _root.HtmlContainer = this; _selectionHandler = new SelectionHandler(_root); } } }
public static string GetLayoutName(Page page) { if (page == null) { return(null); } if (!string.IsNullOrEmpty(page.LayoutName)) { return(page.LayoutName); } var dom = DomParser.CreateDom(page.Body); var layoutTags = dom.getElementsByTagName("layout"); if (layoutTags != null && layoutTags.item.Count() > 0) { var tag = layoutTags.item[0]; return(tag.id); } return(null); }
/// <summary> /// Загрузка страницы в асинхронном режиме /// </summary> /// <param name="__url">Адрес страницы</param> /// <returns>Признак успешной загрузки документа</returns> public async Task <bool> LoadPageAsync(string __url) { int attemptsRemain = MAX_ATTEMPTS; int attemptsAmount; string logMessage; ResetData(); Url = __url; // Подготовка к загрузке CookieContainer cookie = new CookieContainer(); HttpClientHandler handler = new HttpClientHandler { CookieContainer = cookie, UseCookies = true, AllowAutoRedirect = false }; HttpClient client = new HttpClient(handler); client.DefaultRequestHeaders.CacheControl = new CacheControlHeaderValue() { MaxAge = TimeSpan.Zero }; client.DefaultRequestHeaders.TryAddWithoutValidation("User-Agent", "Mozilla/5.0 (Windows NT 6.2; WOW64; rv:19.0) Gecko/20100101 Firefox/19.0"); HttpResponseMessage response; // Загрузка выполняется до успешного результата или окончания числа попыток while (attemptsRemain > 0 && !Success) { // Пауза при неудачной попытке if (attemptsRemain < MAX_ATTEMPTS) { attemptsAmount = MAX_ATTEMPTS - attemptsRemain; logMessage = @"Повторная загрузка страницы {0} - {1}/{2}: Error"; SiteWorker.CURRENT_INSTANCE.Log = String.Format(logMessage, Url, attemptsAmount.ToString(), MAX_ATTEMPTS.ToString()); await Task.Delay(15000); } attemptsRemain--; // Загрузка try { response = await client.GetAsync(Url); if (response != null) { attemptsAmount = MAX_ATTEMPTS - attemptsRemain; switch (response.StatusCode) { case HttpStatusCode.OK: Source = await response.Content.ReadAsStringAsync(); Document = await DomParser.ParseAsync(Source); Success = true; logMessage = @"Загрузка страницы {0} - {1}/{2}: Ok"; SiteWorker.CURRENT_INSTANCE.Log = String.Format(logMessage, Url, attemptsAmount.ToString(), MAX_ATTEMPTS.ToString()); break; case HttpStatusCode.Moved: attemptsRemain = MAX_ATTEMPTS; Url = response.Headers.Location.AbsoluteUri; logMessage = @"Перемещена страница {0} - {1}/{2}: Moved"; SiteWorker.CURRENT_INSTANCE.Log = String.Format(logMessage, Url, attemptsAmount.ToString(), MAX_ATTEMPTS.ToString()); break; } } } catch { } } return(Success); }
/// <summary> /// Updating model of the data for code completion /// </summary> /// <param name="inspector"></param> /// <param name="msbuild"></param> public void codeCompletionInit(IInspector inspector, IMSBuild msbuild = null) { dom = new DomParser(inspector, msbuild); Log.Trace("Code completion has been initialized for '{0}'", Name); }
/// <summary> /// Evaluate the to be rendered HTML into a list of RenderTask. /// </summary> /// <param name="html"></param> /// <returns></returns> public static List <IRenderTask> Evaluate(string html, EvaluatorOption options) { if (options == null) { options = new EvaluatorOption(); } List <IRenderTask> tasklist = new List <IRenderTask>(); List <IRenderTask> appendtask = new List <IRenderTask>(); if (string.IsNullOrEmpty(html)) { return(tasklist); } var doc = DomParser.CreateDom(html); int currentindex = 0; int totallen = html.Length; // handle comment in the top. foreach (var item in doc.childNodes.item) { if (item is Comment && Kooboo.Sites.Render.Commands.CommandManager.IsCommand(item as Comment)) { var command = Kooboo.Sites.Render.Commands.CommandParser.ParseCommand(item as Comment); if (command.Name.ToLower() == "layout") { if ((options.IgnoreEvaluators & EnumEvaluator.LayoutCommand) == EnumEvaluator.LayoutCommand) { continue; } } var comment = item as Comment; if (comment != null) { if (comment.location.endTokenEndIndex <= doc.documentElement.location.openTokenStartIndex) { tasklist.Add(new ContentRenderTask(doc.HtmlSource.Substring(currentindex, comment.location.openTokenStartIndex - currentindex))); var commandtask = new CommandRenderTask(comment, options); if (command.Name.ToLower() == "layout") { commandtask.ClearBefore = true; appendtask.Add(commandtask); } else { tasklist.Add(commandtask); } currentindex = comment.location.endTokenEndIndex + 1; } else { var commandtask = new CommandRenderTask(comment, options); if (command.Name.ToLower() == "layout") { commandtask.ClearBefore = true; appendtask.Add(commandtask); } else { tasklist.Add(commandtask); } } } } } var iterator = doc.createNodeIterator(doc.documentElement, enumWhatToShow.ELEMENT | enumWhatToShow.COMMENT, null); var nextnode = iterator.nextNode(); List <EvaluatorResponse> responseList = new List <EvaluatorResponse>(); List <IEvaluator> Evaluator; if (options.Evaluators != null) { Evaluator = options.Evaluators; } else { Evaluator = EvaluatorContainer.DefaultList; } while (nextnode != null) { if (ShouldTryRender(nextnode)) { foreach (var item in Evaluator) { var response = item.Evaluate(nextnode, options); if (response != null) { responseList.Add(response); if (response.StopNextEvaluator) { break; } } } int len = nextnode.location.openTokenStartIndex - currentindex; //document parse error,may cause nextnode openTokenStartIndex less than currentindex. //then get repeated content if (responseList.Count() > 0 && len >= 0) { var element = nextnode as Element; bool IsSelfClosed = element == null ? false : Service.DomService.IsSelfCloseTag(element.tagName); bool OmitTag = responseList.Any(o => o.OmitTag); if (len > 0) { tasklist.Add(new ContentRenderTask(doc.HtmlSource.Substring(currentindex, len))); } var bindings = GetBinding(responseList); if (bindings != null) { tasklist.AddRange(bindings); } var attributes = GetAttribute(responseList); var contenttask = GetContent(responseList); if (IsSelfClosed) { if (attributes != null && !OmitTag) { tasklist.Add(new ContentRenderTask(GetHalfOpenTag(element))); tasklist.AddRange(attributes); tasklist.Add(new ContentRenderTask("/>")); } else { if (contenttask != null) { tasklist.AddRange(contenttask); } } } else { if (attributes != null && !OmitTag) { tasklist.Add(new ContentRenderTask(GetHalfOpenTag(element))); tasklist.AddRange(attributes); tasklist.Add(new ContentRenderTask(">")); } else { if (!OmitTag) { tasklist.Add(new ContentRenderTask(GetHalfOpenTag(element) + ">")); } } if (contenttask != null) { tasklist.AddRange(contenttask); } if (!OmitTag && contenttask != null) { tasklist.Add(new ContentRenderTask("</" + element.tagName + ">")); } } var endbinding = GetEndBinding(responseList); if (endbinding != null) { tasklist.AddRange(endbinding); } var append = GetAppend(responseList); if (append != null) { appendtask.AddRange(append); } if (IsFakeHeader(element)) { currentindex = nextnode.location.openTokenStartIndex; nextnode = iterator.NextSibling(nextnode); } else if (contenttask != null) { currentindex = nextnode.location.endTokenEndIndex + 1; //document parse error,endTokenEndIndex may be zero if (nextnode.location.endTokenEndIndex + 1 < nextnode.location.openTokenEndIndex + 1) { currentindex = nextnode.location.openTokenEndIndex + 1; } nextnode = iterator.NextSibling(nextnode); } else { currentindex = nextnode.location.openTokenEndIndex + 1; nextnode = iterator.nextNode(); } responseList.Clear(); } else { nextnode = iterator.nextNode(); } } else { nextnode = iterator.nextNode(); } } if (currentindex < totallen - 1) { tasklist.Add(new ContentRenderTask(doc.HtmlSource.Substring(currentindex, totallen - currentindex))); } if (appendtask.Count() > 0) { tasklist.AddRange(appendtask); } RenderHelper.OptimizeTask(tasklist); return(tasklist); }
public static List <LanguageTask> ParseDom(string input) { List <LanguageTask> tasklist = new List <LanguageTask>(); if (string.IsNullOrEmpty(input)) { return(tasklist); } var doc = DomParser.CreateDom(input); int currentindex = 0; int totallen = input.Length; // var iterator = doc.createNodeIterator(doc.documentElement, enumWhatToShow.TEXT | enumWhatToShow.ELEMENT, null); var iterator = doc.createNodeIterator(doc.documentElement, enumWhatToShow.TEXT | enumWhatToShow.ELEMENT, null); var nextnode = iterator.nextNode(); while (nextnode != null) { if (nextnode.nodeType == enumNodeType.TEXT) { var textndoe = nextnode as Kooboo.Dom.Text; if (textndoe != null && MultiLingualHelper.IsMultilingualKey(textndoe.data)) { string key = textndoe.data; int len = nextnode.location.openTokenStartIndex - currentindex; if (len > 0) { tasklist.Add(new LanguageTask(input.Substring(currentindex, len), false)); } tasklist.Add(new LanguageTask(key, true)); currentindex = nextnode.location.endTokenEndIndex + 1; } } else if (nextnode.nodeType == enumNodeType.ELEMENT) { var el = nextnode as Element; if (el.tagName == "script" || el.tagName == "link" || el.tagName == "style" || el.tagName == "meta") { nextnode = iterator.NextSibling(nextnode); continue; } string placeholder = el.getAttribute("placeholder"); string title = el.getAttribute("title"); if (MultiLingualHelper.IsMultilingualKey(title) || MultiLingualHelper.IsMultilingualKey(placeholder)) { int len = nextnode.location.openTokenStartIndex - currentindex; string substring = string.Empty; if (len > 0) { substring = input.Substring(currentindex, len); } el.removeAttribute("placeholder"); el.removeAttribute("title"); substring += Helper.DomHelper.GetHalfOpenTag(el); tasklist.Add(new LanguageTask(substring, false)); if (!string.IsNullOrEmpty(title)) { string titleatt = " title=\""; tasklist.Add(new LanguageTask(titleatt, false)); tasklist.Add(new LanguageTask(title, true)); tasklist.Add(new LanguageTask("\"", false)); } if (!string.IsNullOrWhiteSpace(placeholder)) { string placeatt = " placeholder=\""; tasklist.Add(new LanguageTask(placeatt, false)); tasklist.Add(new LanguageTask(placeholder, true)); tasklist.Add(new LanguageTask("\"", false)); } if (Helper.DomHelper.IsSelfCloseTag(el.tagName)) { tasklist.Add(new LanguageTask(" />", false)); } else { tasklist.Add(new LanguageTask(">", false)); } currentindex = nextnode.location.openTokenEndIndex + 1; } } nextnode = iterator.nextNode(); } if (currentindex < totallen - 1) { tasklist.Add(new LanguageTask(doc.HtmlSource.Substring(currentindex, totallen - currentindex), false)); } OptimizeTask(tasklist); return(tasklist); }
public static List <string> GetRelativeLinks(string HtmlSource) { return(GetRelativeLinks(DomParser.CreateDom(HtmlSource))); }
public static List <Element> GetLinkElements(string htmlSource) { var doc = DomParser.CreateDom(htmlSource); return(doc.Links.item); }