public void Advance() { CharacterReader r = new CharacterReader("One Two Three"); Assert.AreEqual('O', r.Consume()); r.Advance(); Assert.AreEqual('e', r.Consume()); }
public void ConsumeToString() { CharacterReader r = new CharacterReader("One Two Two Four"); Assert.AreEqual("One ", r.ConsumeTo("Two")); Assert.AreEqual('T', r.Consume()); Assert.AreEqual("wo ", r.ConsumeTo("Two")); Assert.AreEqual('T', r.Consume()); Assert.AreEqual("wo Four", r.ConsumeTo("Qux")); }
public void ConsumeToChar() { CharacterReader r = new CharacterReader("One Two Three"); Assert.AreEqual("One ", r.ConsumeTo('T')); Assert.AreEqual("", r.ConsumeTo('T')); // on Two Assert.AreEqual('T', r.Consume()); Assert.AreEqual("wo ", r.ConsumeTo('T')); Assert.AreEqual('T', r.Consume()); Assert.AreEqual("hree", r.ConsumeTo('T')); // consume to end }
public void ConsumeToAny() { CharacterReader r = new CharacterReader("One &bar; qux"); Assert.AreEqual("One ", r.ConsumeToAny('&', ';')); Assert.IsTrue(r.Matches('&')); Assert.IsTrue(r.Matches("&bar;")); Assert.AreEqual('&', r.Consume()); Assert.AreEqual("bar", r.ConsumeToAny('&', ';')); Assert.AreEqual(';', r.Consume()); Assert.AreEqual(" qux", r.ConsumeToAny('&', ';')); }
public void Mark() { CharacterReader r = new CharacterReader("one"); r.Consume(); r.Mark(); Assert.AreEqual('n', r.Consume()); Assert.AreEqual('e', r.Consume()); Assert.IsTrue(r.IsEmpty()); r.RewindToMark(); Assert.AreEqual('n', r.Consume()); }
// in data state, gather characters until a character reference or tag is found internal override void Read(Tokeniser t, CharacterReader r) { switch (r.Current()) { case '&': t.AdvanceTransition(TokeniserState.CharacterReferenceInData); break; case '<': t.AdvanceTransition(TokeniserState.TagOpen); break; case TokeniserState.nullChar: t.Error(this); // NOT replacement character (oddly?) t.Emit(r.Consume()); break; case TokeniserState.eof: t.Emit(new Token.EOF()); break; default: string data = r.ConsumeToAny('&', '<', TokeniserState.nullChar); t.Emit(data); break; } }
public void Consume() { CharacterReader r = new CharacterReader("one"); Assert.AreEqual(0, r.Pos()); Assert.AreEqual('o', r.Current()); Assert.AreEqual('o', r.Consume()); Assert.AreEqual(1, r.Pos()); Assert.AreEqual('n', r.Current()); Assert.AreEqual(1, r.Pos()); Assert.AreEqual('n', r.Consume()); Assert.AreEqual('e', r.Consume()); Assert.IsTrue(r.IsEmpty()); Assert.AreEqual(CharacterReader.EOF, r.Consume()); Assert.IsTrue(r.IsEmpty()); Assert.AreEqual(CharacterReader.EOF, r.Consume()); }
public void ConsumeLetterThenDigitSequence() { CharacterReader r = new CharacterReader("One12 Two &bar; qux"); Assert.AreEqual("One12", r.ConsumeLetterThenDigitSequence()); Assert.AreEqual(' ', r.Consume()); Assert.AreEqual("Two", r.ConsumeLetterThenDigitSequence()); Assert.AreEqual(" &bar; qux", r.ConsumeToEnd()); }
public void MatchesAny() { char[] scan = { ' ', '\n', '\t' }; CharacterReader r = new CharacterReader("One\nTwo\tThree"); Assert.IsFalse(r.MatchesAny(scan)); Assert.AreEqual("One", r.ConsumeToAny(scan)); Assert.IsTrue(r.MatchesAny(scan)); Assert.AreEqual('\n', r.Consume()); Assert.IsFalse(r.MatchesAny(scan)); }
public void Matches() { CharacterReader r = new CharacterReader("One Two Three"); Assert.IsTrue(r.Matches('O')); Assert.IsTrue(r.Matches("One Two Three")); Assert.IsTrue(r.Matches("One")); Assert.IsFalse(r.Matches("one")); Assert.AreEqual('O', r.Consume()); Assert.IsFalse(r.Matches("One")); Assert.IsTrue(r.Matches("ne Two Three")); Assert.IsFalse(r.Matches("ne Two Three Four")); Assert.AreEqual("ne Two Three", r.ConsumeToEnd()); Assert.IsFalse(r.Matches("ne")); }
public void NextIndexOfChar() { string @in = "blah blah"; CharacterReader r = new CharacterReader(@in); Assert.AreEqual(-1, r.NextIndexOf('x')); Assert.AreEqual(3, r.NextIndexOf('h')); string pull = r.ConsumeTo('h'); Assert.AreEqual("bla", pull); r.Consume(); Assert.AreEqual(2, r.NextIndexOf('l')); Assert.AreEqual(" blah", r.ConsumeToEnd()); Assert.AreEqual(-1, r.NextIndexOf('x')); }
public void MatchesIgnoreCase() { CharacterReader r = new CharacterReader("One Two Three"); Assert.IsTrue(r.MatchesIgnoreCase("O")); Assert.IsTrue(r.MatchesIgnoreCase("o")); Assert.IsTrue(r.Matches('O')); Assert.IsFalse(r.Matches('o')); Assert.IsTrue(r.MatchesIgnoreCase("One Two Three")); Assert.IsTrue(r.MatchesIgnoreCase("ONE two THREE")); Assert.IsTrue(r.MatchesIgnoreCase("One")); Assert.IsTrue(r.MatchesIgnoreCase("one")); Assert.AreEqual('O', r.Consume()); Assert.IsFalse(r.MatchesIgnoreCase("One")); Assert.IsTrue(r.MatchesIgnoreCase("NE Two Three")); Assert.IsFalse(r.MatchesIgnoreCase("ne Two Three Four")); Assert.AreEqual("ne Two Three", r.ConsumeToEnd()); Assert.IsFalse(r.MatchesIgnoreCase("ne")); }
/// <summary> /// Utility method to consume reader and unescape entities found within. /// </summary> /// <param name="inAttribute"></param> /// <returns>Unescaped string from reader</returns> public string UnescapeEntities(bool inAttribute) { StringBuilder builder = new StringBuilder(); while (!_reader.IsEmpty()) { builder.Append(_reader.ConsumeTo('&')); if (_reader.Matches('&')) { _reader.Consume(); char?c = ConsumeCharacterReference(null, inAttribute); if (c == null) { builder.Append('&'); } else { builder.Append(c); } } } return(builder.ToString()); }
public void Unconsume() { CharacterReader r = new CharacterReader("one"); Assert.AreEqual('o', r.Consume()); Assert.AreEqual('n', r.Current()); r.Unconsume(); Assert.AreEqual('o', r.Current()); Assert.AreEqual('o', r.Consume()); Assert.AreEqual('n', r.Consume()); Assert.AreEqual('e', r.Consume()); Assert.IsTrue(r.IsEmpty()); r.Unconsume(); Assert.IsFalse(r.IsEmpty()); Assert.AreEqual('e', r.Current()); Assert.AreEqual('e', r.Consume()); Assert.IsTrue(r.IsEmpty()); Assert.AreEqual(CharacterReader.EOF, r.Consume()); r.Unconsume(); Assert.IsTrue(r.IsEmpty()); Assert.AreEqual(CharacterReader.EOF, r.Current()); }
internal override void Read(Tokeniser t, CharacterReader r) { char c = r.Consume(); switch (c) { case '>': t.tagPending.selfClosing = true; t.EmitTagPending(); t.Transition(TokeniserState.Data); break; case TokeniserState.eof: t.EofError(this); t.Transition(TokeniserState.Data); break; default: t.Error(this); t.Transition(TokeniserState.BeforeAttributeName); break; } }
internal override void Read(Tokeniser t, CharacterReader r) { char c = r.Consume(); switch (c) { case '-': t.commentPending.data.Append("--!"); t.Transition(TokeniserState.CommentEndDash); break; case '>': t.EmitCommentPending(); t.Transition(TokeniserState.Data); break; case TokeniserState.nullChar: t.Error(this); t.commentPending.data.Append("--!").Append(TokeniserState.replacementChar ); t.Transition(TokeniserState.Comment); break; case TokeniserState.eof: t.EofError(this); t.EmitCommentPending(); t.Transition(TokeniserState.Data); break; default: t.commentPending.data.Append("--!").Append(c); t.Transition(TokeniserState.Comment); break; } }
internal override void Read(Tokeniser t, CharacterReader r) { char c = r.Consume(); switch (c) { case '\t': case '\n': case '\r': case '\f': case ' ': t.Transition(TokeniserState.BeforeDoctypeName); break; case TokeniserState.eof: t.EofError(this); goto case '>'; case '>': // note: fall through to > case // catch invalid <!DOCTYPE> t.Error(this); t.CreateDoctypePending(); t.doctypePending.forceQuirks = true; t.EmitDoctypePending(); t.Transition(TokeniserState.Data); break; default: t.Error(this); t.Transition(TokeniserState.BeforeDoctypeName); break; } }
internal override void Read(Tokeniser t, CharacterReader r) { if (r.MatchesLetter()) { t.CreateDoctypePending(); t.Transition(TokeniserState.DoctypeName); return; } char c = r.Consume(); switch (c) { case '\t': case '\n': case '\r': case '\f': case ' ': break; case TokeniserState.nullChar: // ignore whitespace t.Error(this); t.CreateDoctypePending(); t.doctypePending.name.Append(TokeniserState.replacementChar); t.Transition(TokeniserState.DoctypeName); break; case TokeniserState.eof: t.EofError(this); t.CreateDoctypePending(); t.doctypePending.forceQuirks = true; t.EmitDoctypePending(); t.Transition(TokeniserState.Data); break; default: t.CreateDoctypePending(); t.doctypePending.name.Append(c); t.Transition(TokeniserState.DoctypeName); break; } }
internal override void Read(Tokeniser t, CharacterReader r) { char c = r.Consume(); switch (c) { case '\t': case '\n': case '\r': case '\f': case ' ': break; case '>': t.EmitDoctypePending(); t.Transition(TokeniserState.Data); break; case TokeniserState.eof: t.EofError(this); t.doctypePending.forceQuirks = true; t.EmitDoctypePending(); t.Transition(TokeniserState.Data); break; default: t.Error(this); t.Transition(TokeniserState.BogusDoctype); break; } }
internal override void Read(Tokeniser t, CharacterReader r) { switch (r.Consume()) { case '/': t.CreateTempBuffer(); t.Transition(TokeniserState.ScriptDataEndTagOpen); break; case '!': t.Emit("<!"); t.Transition(TokeniserState.ScriptDataEscapeStart); break; default: t.Emit("<"); r.Unconsume(); t.Transition(TokeniserState.ScriptData); break; } }
// no default, handled in consume to any above internal override void Read(Tokeniser t, CharacterReader r) { string value = r.ConsumeToAny('\'', '&', TokeniserState.nullChar); if (value.Length > 0) { t.tagPending.AppendAttributeValue(value); } char c = r.Consume(); switch (c) { case '\'': t.Transition(TokeniserState.AfterAttributeValue_quoted); break; case '&': char[] @ref = t.ConsumeCharacterReference('\'', true); if (@ref != null) { t.tagPending.AppendAttributeValue(@ref); } else { t.tagPending.AppendAttributeValue('&'); } break; case TokeniserState.nullChar: t.Error(this); t.tagPending.AppendAttributeValue(TokeniserState.replacementChar); break; case TokeniserState.eof: t.EofError(this); t.Transition(TokeniserState.Data); break; } }
// no default, as covered in consumeToAny internal override void Read(Tokeniser t, CharacterReader r) { char c = r.Consume(); switch (c) { case '\t': case '\n': case '\r': case '\f': case ' ': // ignore break; case '/': t.Transition(TokeniserState.SelfClosingStartTag); break; case '=': t.Transition(TokeniserState.BeforeAttributeValue); break; case '>': t.EmitTagPending(); t.Transition(TokeniserState.Data); break; case TokeniserState.nullChar: t.Error(this); t.tagPending.AppendAttributeName(TokeniserState.replacementChar); t.Transition(TokeniserState.AttributeName); break; case TokeniserState.eof: t.EofError(this); t.Transition(TokeniserState.Data); break; case '"': case '\'': case '<': t.Error(this); t.tagPending.NewAttribute(); t.tagPending.AppendAttributeName(c); t.Transition(TokeniserState.AttributeName); break; default: // A-Z, anything else t.tagPending.NewAttribute(); r.Unconsume(); t.Transition(TokeniserState.AttributeName); break; } }
internal override void Read(Tokeniser t, CharacterReader r) { if (r.MatchesLetter()) { string name = r.ConsumeLetterSequence(); t.tagPending.AppendTagName(name.ToLower()); t.dataBuffer.Append(name); return; } char c = r.Consume(); switch (c) { case '\t': case '\n': case '\r': case '\f': case ' ': if (t.IsAppropriateEndTagToken()) { t.Transition(TokeniserState.BeforeAttributeName); } else { this.AnythingElse(t, r); } break; case '/': if (t.IsAppropriateEndTagToken()) { t.Transition(TokeniserState.SelfClosingStartTag); } else { this.AnythingElse(t, r); } break; case '>': if (t.IsAppropriateEndTagToken()) { t.EmitTagPending(); t.Transition(TokeniserState.Data); } else { this.AnythingElse(t, r); } break; default: this.AnythingElse(t, r); break; } }
internal override void Read(Tokeniser t, CharacterReader r) { char c = r.Consume(); switch (c) { case '\t': case '\n': case '\r': case '\f': case ' ': break; case '"': // set system id to empty string t.Transition(TokeniserState.DoctypeSystemIdentifier_doubleQuoted); break; case '\'': // set public id to empty string t.Transition(TokeniserState.DoctypeSystemIdentifier_singleQuoted); break; case '>': t.Error(this); t.doctypePending.forceQuirks = true; t.EmitDoctypePending(); t.Transition(TokeniserState.Data); break; case TokeniserState.eof: t.EofError(this); t.doctypePending.forceQuirks = true; t.EmitDoctypePending(); t.Transition(TokeniserState.Data); break; default: t.Error(this); t.doctypePending.forceQuirks = true; t.Transition(TokeniserState.BogusDoctype); break; } }
/// <summary> /// Handles RawtextEndTagName, ScriptDataEndTagName, and ScriptDataEscapedEndTagName. /// </summary> /// <remarks> /// Handles RawtextEndTagName, ScriptDataEndTagName, and ScriptDataEscapedEndTagName. Same body impl, just /// different else exit transitions. /// </remarks> private static void HandleDataEndTag(Tokeniser t, CharacterReader r, TokeniserState elseTransition) { if (r.MatchesLetter()) { string name = r.ConsumeLetterSequence(); t.tagPending.AppendTagName(name.ToLower()); t.dataBuffer.Append(name); return; } bool needsExitTransition = false; if (t.IsAppropriateEndTagToken() && !r.IsEmpty()) { char c = r.Consume(); switch (c) { case '\t': case '\n': case '\r': case '\f': case ' ': { t.Transition(BeforeAttributeName); break; } case '/': { t.Transition(SelfClosingStartTag); break; } case '>': { t.EmitTagPending(); t.Transition(Data); break; } default: { t.dataBuffer.Append(c); needsExitTransition = true; break; } } } else { needsExitTransition = true; } if (needsExitTransition) { t.Emit("</" + t.dataBuffer.ToString()); t.Transition(elseTransition); } }
// NOT force quirks internal override void Read(Tokeniser t, CharacterReader r) { char c = r.Consume(); switch (c) { case '>': t.EmitDoctypePending(); t.Transition(TokeniserState.Data); break; case TokeniserState.eof: t.EmitDoctypePending(); t.Transition(TokeniserState.Data); break; default: // ignore char break; } }
// no default, handled in consume to any above // CharacterReferenceInAttributeValue state handled inline internal override void Read(Tokeniser t, CharacterReader r) { char c = r.Consume(); switch (c) { case '\t': case '\n': case '\r': case '\f': case ' ': t.Transition(TokeniserState.BeforeAttributeName); break; case '/': t.Transition(TokeniserState.SelfClosingStartTag); break; case '>': t.EmitTagPending(); t.Transition(TokeniserState.Data); break; case TokeniserState.eof: t.EofError(this); t.Transition(TokeniserState.Data); break; default: t.Error(this); r.Unconsume(); t.Transition(TokeniserState.BeforeAttributeName); break; } }
internal override void Read(Tokeniser t, CharacterReader r) { if (r.IsEmpty()) { t.EofError(this); t.Transition(TokeniserState.Data); return; } char c = r.Consume(); switch (c) { case '-': t.Emit(c); t.Transition(TokeniserState.ScriptDataEscapedDashDash); break; case '<': t.Transition(TokeniserState.ScriptDataEscapedLessthanSign); break; case TokeniserState.nullChar: t.Error(this); t.Emit(TokeniserState.replacementChar); t.Transition(TokeniserState.ScriptDataEscaped); break; default: t.Emit(c); t.Transition(TokeniserState.ScriptDataEscaped); break; } }
// no default, handled in consume to any above internal override void Read(Tokeniser t, CharacterReader r) { string value = r.ConsumeToAny('\t', '\n', '\r', '\f', ' ', '&', '>', TokeniserState.nullChar, '"', '\'', '<', '=', '`'); if (value.Length > 0) { t.tagPending.AppendAttributeValue(value); } char c = r.Consume(); switch (c) { case '\t': case '\n': case '\r': case '\f': case ' ': t.Transition(TokeniserState.BeforeAttributeName); break; case '&': char[] @ref = t.ConsumeCharacterReference('>', true); if (@ref != null) { t.tagPending.AppendAttributeValue(@ref); } else { t.tagPending.AppendAttributeValue('&'); } break; case '>': t.EmitTagPending(); t.Transition(TokeniserState.Data); break; case TokeniserState.nullChar: t.Error(this); t.tagPending.AppendAttributeValue(TokeniserState.replacementChar); break; case TokeniserState.eof: t.EofError(this); t.Transition(TokeniserState.Data); break; case '"': case '\'': case '<': case '=': case '`': t.Error(this); t.tagPending.AppendAttributeValue(c); break; } }
internal override void Read(Tokeniser t, CharacterReader r) { char c = r.Consume(); switch (c) { case '-': t.Emit(c); break; case '<': t.Emit(c); t.Transition(TokeniserState.ScriptDataDoubleEscapedLessthanSign); break; case '>': t.Emit(c); t.Transition(TokeniserState.ScriptData); break; case TokeniserState.nullChar: t.Error(this); t.Emit(TokeniserState.replacementChar); t.Transition(TokeniserState.ScriptDataDoubleEscaped); break; case TokeniserState.eof: t.EofError(this); t.Transition(TokeniserState.Data); break; default: t.Emit(c); t.Transition(TokeniserState.ScriptDataDoubleEscaped); break; } }
internal override void Read(Tokeniser t, CharacterReader r) { char c = r.Consume(); switch (c) { case '\t': case '\n': case '\r': case '\f': case ' ': // ignore break; case '"': t.Transition(TokeniserState.AttributeValue_doubleQuoted); break; case '&': r.Unconsume(); t.Transition(TokeniserState.AttributeValue_unquoted); break; case '\'': t.Transition(TokeniserState.AttributeValue_singleQuoted); break; case TokeniserState.nullChar: t.Error(this); t.tagPending.AppendAttributeValue(TokeniserState.replacementChar); t.Transition(TokeniserState.AttributeValue_unquoted); break; case TokeniserState.eof: t.EofError(this); t.Transition(TokeniserState.Data); break; case '>': t.Error(this); t.EmitTagPending(); t.Transition(TokeniserState.Data); break; case '<': case '=': case '`': t.Error(this); t.tagPending.AppendAttributeValue(c); t.Transition(TokeniserState.AttributeValue_unquoted); break; default: r.Unconsume(); t.Transition(TokeniserState.AttributeValue_unquoted); break; } }
internal override void Read(Tokeniser t, CharacterReader r) { if (r.MatchesLetter()) { string name = r.ConsumeLetterSequence(); t.doctypePending.name.Append(name.ToLower()); return; } char c = r.Consume(); switch (c) { case '>': t.EmitDoctypePending(); t.Transition(TokeniserState.Data); break; case '\t': case '\n': case '\r': case '\f': case ' ': t.Transition(TokeniserState.AfterDoctypeName); break; case TokeniserState.nullChar: t.Error(this); t.doctypePending.name.Append(TokeniserState.replacementChar); break; case TokeniserState.eof: t.EofError(this); t.doctypePending.forceQuirks = true; t.EmitDoctypePending(); t.Transition(TokeniserState.Data); break; default: t.doctypePending.name.Append(c); break; } }
private static void HandleDataDoubleEscapeTag(Tokeniser t, CharacterReader r, TokeniserState primary, TokeniserState fallback) { if (r.MatchesLetter()) { string name = r.ConsumeLetterSequence(); t.dataBuffer.Append(name.ToLower()); t.Emit(name); return; } char c = r.Consume(); switch (c) { case '\t': case '\n': case '\r': case '\f': case ' ': case '/': case '>': { if (t.dataBuffer.ToString().Equals("script")) { t.Transition(primary); } else { t.Transition(fallback); } t.Emit(c); break; } default: { r.Unconsume(); t.Transition(fallback); break; } } }
internal override void Read(Tokeniser t, CharacterReader r) { char c = r.Consume(); switch (c) { case '\'': t.Transition(TokeniserState.AfterDoctypeSystemIdentifier); break; case TokeniserState.nullChar: t.Error(this); t.doctypePending.systemIdentifier.Append(TokeniserState.replacementChar ); break; case '>': t.Error(this); t.doctypePending.forceQuirks = true; t.EmitDoctypePending(); t.Transition(TokeniserState.Data); break; case TokeniserState.eof: t.EofError(this); t.doctypePending.forceQuirks = true; t.EmitDoctypePending(); t.Transition(TokeniserState.Data); break; default: t.doctypePending.systemIdentifier.Append(c); break; } }
// from < or </ in data, will have start or end tag pending internal override void Read(Tokeniser t, CharacterReader r) { // previous TagOpen state did NOT consume, will have a letter char in current string tagName = r.ConsumeToAny('\t', '\n', '\r', '\f', ' ', '/', '>', TokeniserState.nullChar).ToLower(); t.tagPending.AppendTagName(tagName); switch (r.Consume()) { case '\t': case '\n': case '\r': case '\f': case ' ': t.Transition(TokeniserState.BeforeAttributeName); break; case '/': t.Transition(TokeniserState.SelfClosingStartTag); break; case '>': t.EmitTagPending(); t.Transition(TokeniserState.Data); break; case TokeniserState.nullChar: // replacement t.tagPending.AppendTagName(TokeniserState.replacementStr); break; case TokeniserState.eof: // should emit pending tag? t.EofError(this); t.Transition(TokeniserState.Data); break; } }
// from before attribute name internal override void Read(Tokeniser t, CharacterReader r) { string name = r.ConsumeToAny('\t', '\n', '\r', '\f', ' ', '/', '=', '>', TokeniserState.nullChar, '"', '\'', '<'); t.tagPending.AppendAttributeName(name.ToLower()); char c = r.Consume(); switch (c) { case '\t': case '\n': case '\r': case '\f': case ' ': t.Transition(TokeniserState.AfterAttributeName); break; case '/': t.Transition(TokeniserState.SelfClosingStartTag); break; case '=': t.Transition(TokeniserState.BeforeAttributeValue); break; case '>': t.EmitTagPending(); t.Transition(TokeniserState.Data); break; case TokeniserState.nullChar: t.Error(this); t.tagPending.AppendAttributeName(TokeniserState.replacementChar); break; case TokeniserState.eof: t.EofError(this); t.Transition(TokeniserState.Data); break; case '"': case '\'': case '<': t.Error(this); t.tagPending.AppendAttributeName(c); break; } }