/// <summary> /// Set the document's escape mode, which determines how characters are escaped when the output character set /// does not support a given character:- using either a named or a numbered escape. /// </summary> /// <param name="escapeMode">the new escape mode to use</param> /// <returns>the document's output settings, for chaining</returns> public virtual iText.StyledXmlParser.Jsoup.Nodes.OutputSettings EscapeMode(Entities.EscapeMode escapeMode) { this.escapeMode = escapeMode; return(this); }
// this method is ugly, and does a lot. but other breakups cause rescanning and stringbuilder generations internal static void Escape(StringBuilder accum, String str, OutputSettings outputSettings, bool inAttribute , bool normaliseWhite, bool stripLeadingWhite) { bool lastWasWhite = false; bool reachedNonWhite = false; Entities.EscapeMode escapeMode = outputSettings.EscapeMode(); System.Text.Encoding encoder = outputSettings.Charset(); Entities.CoreCharset coreCharset = GetCoreCharsetByName(outputSettings.Charset().Name()); IDictionary <char, String> map = escapeMode.GetMap(); int length = str.Length; int codePoint; for (int offset = 0; offset < length; offset += iText.IO.Util.TextUtil.CharCount(codePoint)) { codePoint = str.CodePointAt(offset); if (normaliseWhite) { if (iText.StyledXmlParser.Jsoup.Helper.StringUtil.IsWhitespace(codePoint)) { if ((stripLeadingWhite && !reachedNonWhite) || lastWasWhite) { continue; } accum.Append(' '); lastWasWhite = true; continue; } else { lastWasWhite = false; reachedNonWhite = true; } } // surrogate pairs, split implementation for efficiency on single char common case (saves creating strings, char[]): if (codePoint < iText.IO.Util.TextUtil.CHARACTER_MIN_SUPPLEMENTARY_CODE_POINT) { char c = (char)codePoint; // html specific and required escapes: switch (c) { case '&': { accum.Append("&"); break; } case (char)0xA0: { if (escapeMode != Entities.EscapeMode.xhtml) { accum.Append(" "); } else { accum.Append(" "); } break; } case '<': { // escape when in character data or when in a xml attribue val; not needed in html attr val if (!inAttribute || escapeMode == Entities.EscapeMode.xhtml) { accum.Append("<"); } else { accum.Append(c); } break; } case '>': { if (!inAttribute) { accum.Append(">"); } else { accum.Append(c); } break; } case '"': { if (inAttribute) { accum.Append("""); } else { accum.Append(c); } break; } default: { if (CanEncode(coreCharset, c, encoder)) { accum.Append(c); } else { if (map.ContainsKey(c)) { accum.Append('&').Append(map.Get(c)).Append(';'); } else { accum.Append("&#x").Append(JavaUtil.IntegerToHexString(codePoint)).Append(';'); } } break; } } } else { String c = new String(iText.IO.Util.TextUtil.ToChars(codePoint)); if (encoder.CanEncode(c)) { // uses fallback encoder for simplicity accum.Append(c); } else { accum.Append("&#x").Append(JavaUtil.IntegerToHexString(codePoint)).Append(';'); } } } }
/// <summary> /// Set the document's escape mode, which determines how characters are escaped when the output character set /// does not support a given character:- using either a named or a numbered escape. /// </summary> /// <param name="escapeMode">the new escape mode to use</param> /// <returns>the document's output settings, for chaining</returns> public virtual OutputSettings EscapeMode(Entities.EscapeMode escapeMode) { this.escapeMode = escapeMode; return(this); }