public IExtensionData[] CalculateReferencedExtensionData(string content) { Hashtable datas = new Hashtable(); ContentSourceManager.SmartContentPredicate predicate = new ContentSourceManager.SmartContentPredicate(); SimpleHtmlParser p = new SimpleHtmlParser(content); for (Element el; null != (el = p.Next());) { if (predicate.IsMatch(el)) { BeginTag bt = el as BeginTag; Attr idAttr = bt.GetAttribute("id"); if (idAttr != null) //Synchronized WP posts will strip ID attrs (bug 488143) { string smartContentSourceId; string smartContentId; string smartContentElementId = idAttr.Value; ContentSourceManager.ParseContainingElementId(smartContentElementId, out smartContentSourceId, out smartContentId); IExtensionData data = GetExtensionData(smartContentId); if (data != null) { datas[smartContentId] = data; } } } } return((IExtensionData[])ArrayHelper.CollectionToArray(datas.Values, typeof(IExtensionData))); }
private void EmitTagAndAttributes(string tagName, Tag tag) { BeginTag beginTag = tag as BeginTag; if (beginTag != null) { Emit(string.Format(CultureInfo.InvariantCulture, "<{0}", tagName)); foreach (Attr attr in beginTag.Attributes) { if (PreserveAttributes.Contains(attr.Name.ToUpper(CultureInfo.InvariantCulture))) { Emit(" " + attr.ToString()); } } if (beginTag.Complete) { Emit("/"); } Emit(">"); } else { if (tagName.ToUpper(CultureInfo.InvariantCulture) != HTMLTokens.Br && tagName.ToUpper(CultureInfo.InvariantCulture) != HTMLTokens.Img) { Emit(string.Format(CultureInfo.InvariantCulture, "</{0}>", tagName)); } } }
private void HandleTextarea(HtmlForm parentForm, BeginTag textareaTag) { string name = textareaTag.GetAttributeValue("name"); string value = parser.CollectTextUntil("textarea"); new Textarea(parentForm, name, value); }
private static bool FindAndRemoveEmptyTag(int startIndex, Element[] els) { for (int j = startIndex; j < els.Length - 1; j++) { BeginTag begin = els[j] as BeginTag; if (begin != null) { for (int k = j + 1; k < els.Length; k++) { if (els[k] == null) { continue; // keep looking } if (!(els[k] is EndTag) || !((EndTag)els[k]).NameEquals(begin.Name)) { break; // no match } // It's a match, delete start and end tags and restart els[j] = null; els[k] = null; return(true); } } } return(false); }
public bool IsMatch(Element e) { BeginTag tag = e as BeginTag; if (tag == null) { return(false); } if (!tag.NameEquals("meta")) { return(false); } if (tag.GetAttributeValue("name") != "generator") { return(false); } string generator = tag.GetAttributeValue("content"); if (generator == null || CaseInsensitiveComparer.DefaultInvariant.Compare("blogger", generator) != 0) { return(false); } return(true); }
Size FindSizeAttribute(string input) { Size size = new Size(_width, _height); if (string.IsNullOrEmpty(input)) { return(size); } try { RequiredAttribute[] attrWidth = new RequiredAttribute[] { new RequiredAttribute("width"), new RequiredAttribute("height") }; IElementPredicate predicate = new OrPredicate(new BeginTagPredicate("embed", attrWidth), new BeginTagPredicate("object", attrWidth)); HtmlExtractor ex = new HtmlExtractor(input); if (ex.Seek(predicate).Success) { BeginTag tag = (BeginTag)ex.Element; size = new Size(Convert.ToInt32(tag.GetAttributeValue("width"), CultureInfo.InvariantCulture), Convert.ToInt32(tag.GetAttributeValue("height"), CultureInfo.InvariantCulture)); } } catch (Exception ex) { Trace.Fail("Exception thrown while trying to find video size: " + ex); } return(size); }
internal string Transform(BeginTag tag, string reference) { if (UrlHelper.IsUrl(reference)) { Uri localReferenceUri = new Uri(reference); /* * If we need to drop a hint to the photo uploader about * whether Lightbox-like preview is enabled, so that we know to link to * the image itself rather than the photo "self" page on photos.live.com; * this is where we would figure that out (by looking at the tag) and * pass that info through to the DoUploadWork call. */ bool isLightboxCloneEnabled = false; _referenceFixer._fileUploadWorker.DoUploadWork(reference, _uploader, isLightboxCloneEnabled); ISupportingFile supportingFile = _fileService.GetFileByUri(localReferenceUri); if (supportingFile != null) { Uri uploadUri = supportingFile.GetUploadInfo(_uploader.DestinationContext).UploadUri; if (uploadUri != null) { return(UrlHelper.SafeToAbsoluteUri(uploadUri)); } } } return(reference); }
protected override void OnBeginTag(BeginTag tag) { if (FlagIsSet(Flag.RemovePartialTags) && tag.Unterminated) { return; } //remove all illegal attributes from the tag foreach (Attr attr in tag.Attributes) { if (IsIllegalAttribute(attr)) { attr.Value = string.Empty; } } if (tag.NameEquals("script")) { Debug.WriteLine("Script tag"); } if (IsRegexMatch(IllegalTagTreeName, tag.Name)) { suspendTagDepth++; } else if (!IsIllegalTag(tag) && suspendTagDepth == 0) { PushStartTag(tag.Name); base.OnBeginTag(tag); } }
public string RestorePreserved(string html) { StringBuilder sb = new StringBuilder(); HtmlExtractor ex = new HtmlExtractor(html); int pos = 0; while (ex.Seek("<span class='" + PRESERVE_CLASS + "'>").Success) { sb.Append(html, pos, ex.Element.Offset - pos); pos = ex.Element.Offset; BeginTag bt = (BeginTag)ex.Element; string elementId = bt.GetAttributeValue("id"); Match m = Regex.Match(elementId ?? "", @"^preserve([a-zA-Z0-9]+)$"); if (m.Success) { string preserveId = m.Groups[1].Value; string preservedValue; if (preserved.TryGetValue(preserveId, out preservedValue)) { sb.Append(preservedValue); ex.CollectTextUntil("span"); if (ex.Element == null) { pos = html.Length; } else { pos = ex.Parser.Position; } } } } sb.Append(html, pos, html.Length - pos); return(sb.ToString()); }
protected override string Replace(Element el) { if (el is BeginTag) { BeginTag beginTag = (BeginTag)el; if (beginTag.NameEquals("a")) { Attr href = beginTag.GetAttribute("href"); if (href != null && href.Value != null) { href.Value = ConvertUrl(href.Value); return(beginTag.ToString()); } } else if (beginTag.NameEquals("img")) { Attr src = beginTag.GetAttribute("src"); if (src != null && src.Value != null) { src.Value = ConvertUrl(src.Value); return(beginTag.ToString()); } } } return(base.Replace(el)); }
public bool IsBlogger() { if (Regex.IsMatch(homepageUrl, @"^http://.+\.blogspot\.com($|/)", RegexOptions.IgnoreCase) || Regex.IsMatch(homepageUrl, @"^http(s)?://(www\.)?blogger\.com($|/)", RegexOptions.IgnoreCase) || new HtmlExtractor(html).Seek(new BloggerGeneratorCriterion()).Success) { return(true); } HtmlExtractor ex = new HtmlExtractor(html); while (ex.Seek("<link href rel='service.post' type='application/atom+xml'>").Success) { BeginTag bt = (BeginTag)ex.Element; string atomHref = bt.GetAttributeValue("href"); // these obsolete Blogger atom links can't be used, but are // still a good indication that it's Blogger if (atomHref.StartsWith("https://www.blogger.com/atom/", StringComparison.OrdinalIgnoreCase)) { return(true); } // any other blogger or blogspot atom link will be considered a match if (Regex.IsMatch(atomHref, @"^https?\:\/\/.+\.blog(ger|spot)\.com\/.*", RegexOptions.IgnoreCase)) { return(true); } } return(false); }
protected override void OnBeginTag(BeginTag tag) { if (IsPreserveWhitespaceTag(tag.Name)) { preserveLinebreaksDepth++; } base.OnBeginTag(tag); }
private void ModifyMetaDataAttribute(BeginTag tag, string attributeName, string valueToChangeTo) { Attr valueAttr = tag.GetAttribute(attributeName); if (valueAttr != null && valueToChangeTo != null) { valueAttr.Value = valueToChangeTo; } }
private void ModifyMetaDataAsNecessary(BeginTag tag) { if (_metaData == null) { return; } Attr nameAttr = tag.GetAttribute(HTMLTokens.Name); if (nameAttr == null) { nameAttr = tag.GetAttribute(HTMLTokens.HttpEquiv); } if (nameAttr != null) { switch (nameAttr.Value.ToUpper(CultureInfo.InvariantCulture)) { case (HTMLTokens.Author): ModifyMetaDataAttribute(tag, HTMLTokens.Content, _metaData.Author); break; case (HTMLTokens.ContentType): ModifyMetaDataAttribute(tag, HTMLTokens.Content, string.Format(CultureInfo.InvariantCulture, "text/html; {0}", _metaData.Charset)); break; case (HTMLTokens.Charset): ModifyMetaDataAttribute(tag, HTMLTokens.Content, _metaData.Charset); break; case (HTMLTokens.Description): ModifyMetaDataAttribute(tag, HTMLTokens.Content, _metaData.Description); break; case (HTMLTokens.Generator): ModifyMetaDataAttribute(tag, HTMLTokens.Content, _metaData.Generator); break; case (HTMLTokens.CopyRight): ModifyMetaDataAttribute(tag, HTMLTokens.Content, _metaData.CopyRight); break; case (HTMLTokens.Keywords): ModifyMetaDataAttribute(tag, HTMLTokens.Content, _metaData.KeywordString); break; case (HTMLTokens.Pragma): ModifyMetaDataAttribute(tag, HTMLTokens.Content, _metaData.Pragma); break; case (HTMLTokens.Robots): ModifyMetaDataAttribute(tag, HTMLTokens.Content, _metaData.Robots); break; } _emittedMetaData.Add(nameAttr.Value.ToUpper(CultureInfo.InvariantCulture)); } }
public static ImageViewer DetectImageViewer(string html, string sourceUrl) { List <ImageViewer> viewers = imageViewers; LazyLoader <List <Regex> > regexes = new LazyLoader <List <Regex> >(delegate { List <Regex> regexList = new List <Regex>(viewers.Count); foreach (ImageViewer v in viewers) { regexList.Add(new Regex(v.Pattern, RegexOptions.CultureInvariant)); } return(regexList); }); HtmlExtractor ex = new HtmlExtractor(html); while (ex.Seek("<script src>").Success) { BeginTag tag = (BeginTag)ex.Element; string src = tag.GetAttributeValue("src"); if (String.IsNullOrEmpty(src)) { continue; } try { if (!UrlHelper.IsUrl(src)) { // We need absolute URLs. src = UrlHelper.EscapeRelativeURL(sourceUrl, src); } Uri srcUri = new Uri(src); if (srcUri.IsAbsoluteUri) { // WinLive 248276: We want just the path portion since there could be an additional query or // fragment on the URL that our regexs can't handle. src = srcUri.GetLeftPart(UriPartial.Path); } } catch (UriFormatException) { // We'll just use the regex on the raw attribute value. } List <Regex> regexList = regexes.Value; for (int i = 0; i < regexList.Count; i++) { if (regexList[i].IsMatch(src)) { return(viewers[i]); } } } return(null); }
protected override void OnBeginTag(BeginTag tag) { if (tag.NameEquals(HTMLTokens.Body)) { bodyBeginTag = tag; } base.OnBeginTag(tag); }
private bool TagPermittedAboveBody(BeginTag tag) { foreach (string permittedAboveBody in _permittedBeforeBody) { if (tag.NameEquals(permittedAboveBody)) { return(true); } } return(false); }
private string AddHtmlReference(BeginTag tag, string reference) { if (UrlHelper.IsUrl(reference)) { ISupportingFile supportingFile = _editingContext.SupportingFileService.GetFileByUri(new Uri(reference)); if (supportingFile != null && supportingFile.Embedded) { AddReference(supportingFile); } } return(reference); }
private void HandleInput(HtmlForm parentForm, BeginTag inputTag) { string type = inputTag.GetAttributeValue("type"); if (type != null) { type = type.Trim().ToLowerInvariant(); } string name = inputTag.GetAttributeValue("name"); string value = inputTag.GetAttributeValue("value"); switch (type) { case "password": new Textbox(parentForm, name, value); break; case "checkbox": { int dummy; bool isChecked = inputTag.GetAttribute("checked", true, 0, out dummy) != null; new Checkbox(parentForm, name, value, isChecked); break; } case "radio": { int dummy; bool isChecked = inputTag.GetAttribute("checked", true, 0, out dummy) != null; new Radio(parentForm, name, value, isChecked); break; } case "submit": new SubmitButton(parentForm, name, value); break; case "image": new ImageButton(parentForm, name, value, inputTag.GetAttributeValue("src")); break; case "hidden": new Hidden(parentForm, name, value); break; case "text": default: new Textbox(parentForm, name, value); break; } }
public string ScanAndPreserve(string html) { StringBuilder sb = new StringBuilder(html.Length); SimpleHtmlParser p = new SimpleHtmlParser(html); Element e; while (null != (e = p.Next())) { if (!(e is BeginTag)) { sb.Append(html, e.Offset, e.Length); continue; } BeginTag bt = (BeginTag)e; if (bt.NameEquals("div")) { switch (bt.GetAttributeValue("class")) { case ContentSourceManager.EDITABLE_SMART_CONTENT: case ContentSourceManager.SMART_CONTENT: sb.Append(html, e.Offset, e.Length); sb.Append(p.CollectHtmlUntil("div")); sb.Append("</div>"); continue; } } if (!(bt.NameEquals("object") || bt.NameEquals("embed") || bt.NameEquals("noembed") || bt.NameEquals("script"))) { sb.Append(html, e.Offset, e.Length); continue; } else { string collected = p.CollectHtmlUntil(bt.Name); string preserve = bt.RawText + collected + "</" + bt.Name + ">"; string preserveId = Guid.NewGuid().ToString("N"); preserved[preserveId] = preserve; sb.AppendFormat("<span id=\"preserve{0}\" class=\"{1}\">", preserveId, PRESERVE_CLASS); sb.Append(preserve); sb.Append("</span>"); } } return(sb.ToString()); }
private string EnumerateLocalFileReference(BeginTag tag, string reference) { // check if the local file is contained in the supporting file directory string localFilePath = new Uri(reference).LocalPath; if (this.ContainsFile(localFilePath)) { _supportingFileScratchList.Add(localFilePath); } // don't transform the url return(reference); }
public static string Trim(string html, bool onlyTrimParagraphs) { Element[] els = Elements(html); int pos; // First, go backwards over the list, deleting // all <br> and whitespace. Stop as soon as // significant content is encountered. if (onlyTrimParagraphs) { pos = 1 + FindCleanupIndexForParagraphTrim(els); } else { pos = 1 + FindLastVisibleElementAndRemoveWhitespace(els); } // pos now points to the index where whitespace cleanup should begin // Remove empty pairs of invisible tags, e.g. <b></b>. Each time // a pair is removed, start over, because the removal // of an empty pair may create another empty pair, e.g. <p><i></i></p> while (FindAndRemoveEmptyTag(pos, els)) { } // Remove extra unmatched <p> begin tags. for (int i = pos; i < els.Length; i++) { BeginTag bt = els[i] as BeginTag; if (bt != null && bt.NameEquals("p")) { els[i] = null; } } // Concatenate all the elements that are left. StringBuilder output = new StringBuilder(html.Length); foreach (Element el in els) { if (el != null) { output.Append(el.RawText); } } return(output.ToString()); }
/// <summary> /// Clones active smart content contained in the provided HTML, and disables unknown smart content. /// </summary> public static string PrepareSmartContentHtmlForEditorInsertion(string html, IContentSourceSidebarContext sourceContext) { StringBuilder output = new StringBuilder(); ContentSourceManager.SmartContentPredicate predicate = new ContentSourceManager.SmartContentPredicate(); SimpleHtmlParser p = new SimpleHtmlParser(html); for (Element el; null != (el = p.Next());) { if (predicate.IsMatch(el)) { BeginTag bt = el as BeginTag; Attr idAttr = bt.GetAttribute("id"); String contentSourceId, contentItemId; ContentSourceManager.ParseContainingElementId(idAttr.Value, out contentSourceId, out contentItemId); ISmartContent smartContent = sourceContext.FindSmartContent(contentItemId); if (smartContent != null) { String newId = Guid.NewGuid().ToString(); sourceContext.CloneSmartContent(contentItemId, newId); if (RefreshableContentManager.ContentSourcesWithRefreshableContent.Contains(contentSourceId)) { IExtensionData extensionData = sourceContext.FindExtentsionData(newId); Debug.Assert(extensionData != null); // Since we just made a new id for the smart content just about to be inserted // we want to give it a chance to get a callback because its callback might have happened while // it was on the clipboard(in the event of cut). This means the refreshable content manager doesnt know // to watch out for this smart content on paste, it only knows to look out for who created it. Thus // we just force the callback, and if it didnt need it, nothing will happen. if (extensionData.RefreshCallBack == null) { extensionData.RefreshCallBack = DateTime.UtcNow; } } idAttr.Value = ContentSourceManager.MakeContainingElementId(contentSourceId, newId); } else { ContentSourceManager.RemoveSmartContentAttributes(bt); } } output.Append(el.ToString()); } return(output.ToString()); }
protected override void OnBeginTag(BeginTag tag) { if (tag != null && LightWeightHTMLDocument.AllUrlElements.ContainsKey(tag.Name.ToUpper(CultureInfo.InvariantCulture))) { Attr attr = tag.GetAttribute((string)LightWeightHTMLDocument.AllUrlElements[tag.Name.ToUpper(CultureInfo.InvariantCulture)]); if (attr != null) { string url = attr.Value; if (!UrlHelper.IsUrl(url)) { attr.Value = UrlHelper.EscapeRelativeURL(_baseUrl, url); } } } base.OnBeginTag(tag); }
protected override void OnBeginTag(BeginTag tag) { if (tag.NameEquals(HTMLTokens.Title) && !tag.Complete) { _inTitle = true; } if (TagsToPreserve.Contains(tag.Name.ToUpper(CultureInfo.InvariantCulture))) { EmitTagAndAttributes(tag.Name, tag); } else if (ReplaceTags.ContainsKey(tag.Name.ToUpper(CultureInfo.InvariantCulture))) { EmitTagAndAttributes((string)ReplaceTags[tag.Name.ToUpper(CultureInfo.InvariantCulture)], tag); } }
private bool IsIllegalTag(BeginTag tag) { if (IsRegexMatch(IllegalTagName, tag.Name)) { return(true); } else if (FlagIsSet(Flag.RemoveStyles) && tag.NameEquals("link")) { //if this link element is a stylesheet, it is illegal Attr relAttr = tag.GetAttribute("rel"); if (relAttr != null && relAttr.Value != null && relAttr.Value.ToUpperInvariant().Trim() == "STYLESHEET") { return(true); } } return(false); }
/// <summary> /// Is the tag a meaningless tag such as <p></p> or <a href="..."></a> or <a href="..."> </a> /// </summary> /// <param name="htmlParser"></param> /// <param name="bt"></param> /// <returns></returns> private static bool RemoveMeaninglessTags(SimpleHtmlParser htmlParser, BeginTag bt) { // Look to see if the tag is a <p> without any attributes if ((bt.NameEquals("p") && bt.Attributes.Length == 0 && !bt.HasResidue)) { Element e = htmlParser.Peek(0); // Look to see if thereis a matching end tag to the element we are looking at if (e != null && e is EndTag && ((EndTag)e).NameEquals("p")) { // eat up the end tag htmlParser.Next(); return(true); } } // Look to see if the tag is an <a> without a style/id/name attribute, but has an href... meaning the link is not useful if ((bt.NameEquals("a") && bt.GetAttribute("name") == null && bt.GetAttributeValue("style") == null && bt.GetAttributeValue("id") == null && bt.GetAttributeValue("href") != null)) { bool hadWhiteSpaceText = false; Element e = htmlParser.Peek(0); // Look to see if the a just has whitespace inside of it if (e is Text && HtmlUtils.UnEscapeEntities(e.RawText, HtmlUtils.UnEscapeMode.NonMarkupText).Trim().Length == 0) { e = htmlParser.Peek(1); hadWhiteSpaceText = true; } // Look to see if thereis a matching end tag to the element we are looking at if (e != null && e is EndTag && ((EndTag)e).NameEquals("a")) { // if this was an <a> with whitespace in the middle eat it up if (hadWhiteSpaceText) { htmlParser.Next(); } // eat up the end tag htmlParser.Next(); return(true); } } return(false); }
protected override void OnBeginTag(BeginTag tag) { if (tag != null && LightWeightHTMLDocument.AllUrlElements.ContainsKey(tag.Name.ToUpper(CultureInfo.InvariantCulture))) { Attr attr = tag.GetAttribute((string)LightWeightHTMLDocument.AllUrlElements[tag.Name.ToUpper(CultureInfo.InvariantCulture)]); if (attr != null) { string oldRef = attr.Value; string newRef = _referenceFixer(tag, attr.Value); attr.Value = newRef; if (oldRef != newRef && _referenceFixed != null) { //notify the reference fixed callback that a reference was fixed. _referenceFixed(oldRef, newRef); } } } base.OnBeginTag(tag); }
public string FixReferences(BeginTag tag, string reference) { // protect against unexpected/empty input if (!UrlHelper.IsUrl(reference)) { return(reference); } Uri referenceUri = new Uri(reference); if (referenceUri.IsFile) { return(_fixer(tag, reference)); } else { return(reference); } }
protected override void OnBeginTag(BeginTag tag) { if (tag.NameEquals(HTMLTokens.Ul)) { unorderedListLevel++; } else if (tag.NameEquals(HTMLTokens.Ol)) { orderedListLevel++; } else if ((unorderedListLevel < 1) && (orderedListLevel < 1) && (tag.NameEquals(HTMLTokens.Li))) { hasIncompleteList = true; } base.OnBeginTag(tag); }