private void ParseInnerStyles(StyleGroup group, int grpStart, int grpLen) { var content = _str.Substring(grpStart, grpLen); var enumerator = new StringEnumerator(content); var prev = _str; _str = enumerator; while (this.MoveNext()) { if (this._log.ShouldLog(TraceLevel.Verbose)) { this._log.Add(TraceLevel.Verbose, "CSS", "Found css style for " + this.Current.ToString() + " parsing inner contents"); } if (null != this.Current) { group.Styles.Add(this.Current); } } _str = prev; }
/// <summary> /// Returns the character that has been escasped as the current position (just after the ampersand) /// </summary> /// <returns></returns> private char ReadHtmlEscapedChar(StringEnumerator src) { //Fall back if not found is just to output as is. //So we need to remember where we were. int curPos = src.Offset; int ampersandPos = curPos - 1; char cur = src.Current; bool terminated = true; while (cur != HTMLEntityEndMarker) { if (!src.MoveNext()) { terminated = false; break; } if (src.Offset > curPos + 10) //Max limit of HTML Entities - we are missing a teminating ; { terminated = false; break; } cur = src.Current; } if (terminated) { int length = 1 + src.Offset - ampersandPos; string entity = src.Substring(ampersandPos, length); char found; if (entity.Length < 3) { src.Offset = curPos; return(HTMLEntityStartMarker); } else if (entity[1] == HTMLEntityNumberMarker) //we have the character number { int charNum; if (entity[2] == '#' && int.TryParse(entity.Substring(3, entity.Length - 4), System.Globalization.NumberStyles.HexNumber, null, out charNum)) { //we are using the hex codes rather than decimal found = (char)charNum; src.MoveNext(); //TODO: Tests for parsing html numbers return(found); } else if (int.TryParse(entity.Substring(2, entity.Length - 3), out charNum)) { found = (char)charNum; src.MoveNext(); return(found); } else //could not parse the number { src.Offset = curPos; return(HTMLEntityStartMarker); } } else if (this.Parser.Settings.HTMLEntityMappings.TryGetValue(entity, out found)) { src.MoveNext(); //past the ; return(found); } else //Don't know this one so just go back to the orginal offset and return { src.Offset = curPos; return(HTMLEntityStartMarker); } } else //Hit End of String or character limit before terminator { src.Offset = curPos; return(HTMLEntityStartMarker); } }
public static string UnEscapeHtmlString(string value) { int ampersandPos = value.IndexOf(HTMLEntityStartMarker); if (ampersandPos < 0) { return(value); } StringBuilder buffer = new StringBuilder(); StringEnumerator src = new StringEnumerator(value); src.MoveNext(); while (ampersandPos >= 0) { if (src.Offset < ampersandPos - 1) { buffer.Append(value, src.Offset, ampersandPos - (src.Offset)); } src.Offset = ampersandPos; bool terminated = true; while (src.Current != HTMLEntityEndMarker) { if (!src.MoveNext()) { terminated = false; break; } else if (src.Offset > ampersandPos + 10) { terminated = false; break; } } if (terminated) { int len = 1 + src.Offset - ampersandPos; char found; if (len > 3) { string entity = src.Substring(ampersandPos, len); if (entity[1] == HTMLEntityNumberMarker) { int charNum; if (int.TryParse(entity.Substring(2, entity.Length - 3), out charNum)) { found = (char)charNum; src.MoveNext(); buffer.Append(found); } } //else if (HTMLParserSettings.DefaultEscapedHTMLEntities.TryGetValue(entity, out found)) //{ // buffer.Append(found); // src.MoveNext(); //} } } ampersandPos = value.IndexOf(HTMLEntityStartMarker, src.Offset); } if (src.Offset < src.Length) { buffer.Append(src.Substring(src.Length - src.Offset)); } return(buffer.ToString()); }