public void consume() { CharacterReader r = new CharacterReader("one"); Assert.AreEqual(0, r.Position); Assert.AreEqual('o', r.Current()); Assert.AreEqual('o', r.Consume()); Assert.AreEqual(1, r.Position); Assert.AreEqual('n', r.Current()); Assert.AreEqual(1, r.Position); Assert.AreEqual('n', r.Consume()); Assert.AreEqual('e', r.Consume()); Assert.IsTrue(r.IsEmpty()); Assert.AreEqual(CharacterReader.EOF, r.Consume()); Assert.IsTrue(r.IsEmpty()); Assert.AreEqual(CharacterReader.EOF, r.Consume()); }
/// <summary> /// Utility method to consume reader and unescape entities found within. /// </summary> /// <param name="inAttribute"></param> /// <returns>Unescaped string from reader</returns> public string UnescapeEntities(bool inAttribute) { StringBuilder builder = new StringBuilder(); while (!_reader.IsEmpty()) { builder.Append(_reader.ConsumeTo('&')); if (_reader.Matches('&')) { _reader.Consume(); char?c = ConsumeCharacterReference(null, inAttribute); if (c == null) { builder.Append('&'); } else { builder.Append(c); } } } return(builder.ToString()); }
// in data state, gather characters until a character reference or tag is found public override void Read(Tokeniser t, CharacterReader r) { switch (r.Current()) { case '&': t.AdvanceTransition(CharacterReferenceInData); break; case '<': t.AdvanceTransition(TagOpen); break; case _nullChar: t.Error(this); // NOT replacement character (oddly?) t.Emit(r.Consume()); break; case _eof: t.Emit(new Token.EOF()); break; default: string data = r.ConsumeToAny('&', '<', _nullChar); t.Emit(data); break; } }
public void matchesIgnoreCase() { CharacterReader r = new CharacterReader("One Two Three"); Assert.IsTrue(r.MatchesIgnoreCase("O")); Assert.IsTrue(r.MatchesIgnoreCase("o")); Assert.IsTrue(r.Matches('O')); Assert.IsFalse(r.Matches('o')); Assert.IsTrue(r.MatchesIgnoreCase("One Two Three")); Assert.IsTrue(r.MatchesIgnoreCase("ONE two THREE")); Assert.IsTrue(r.MatchesIgnoreCase("One")); Assert.IsTrue(r.MatchesIgnoreCase("one")); Assert.AreEqual('O', r.Consume()); Assert.IsFalse(r.MatchesIgnoreCase("One")); Assert.IsTrue(r.MatchesIgnoreCase("NE Two Three")); Assert.IsFalse(r.MatchesIgnoreCase("ne Two Three Four")); Assert.AreEqual("ne Two Three", r.ConsumeToEnd()); Assert.IsFalse(r.MatchesIgnoreCase("ne")); }
public void consumeLetterThenDigitSequence() { CharacterReader r = new CharacterReader("One12 Two &bar; qux"); Assert.AreEqual("One12", r.ConsumeLetterThenDigitSequence()); Assert.AreEqual(' ', r.Consume()); Assert.AreEqual("Two", r.ConsumeLetterThenDigitSequence()); Assert.AreEqual(" &bar; qux", r.ConsumeToEnd()); }
public void advance() { CharacterReader r = new CharacterReader("One Two Three"); Assert.AreEqual('O', r.Consume()); r.Advance(); Assert.AreEqual('e', r.Consume()); }
public void consumeToChar() { CharacterReader r = new CharacterReader("One Two Three"); Assert.AreEqual("One ", r.ConsumeTo('T')); Assert.AreEqual("", r.ConsumeTo('T')); // on Two Assert.AreEqual('T', r.Consume()); Assert.AreEqual("wo ", r.ConsumeTo('T')); Assert.AreEqual('T', r.Consume()); Assert.AreEqual("hree", r.ConsumeTo('T')); // consume to end }
public void mark() { CharacterReader r = new CharacterReader("one"); r.Consume(); r.Mark(); Assert.AreEqual('n', r.Consume()); Assert.AreEqual('e', r.Consume()); Assert.IsTrue(r.IsEmpty()); r.RewindToMark(); Assert.AreEqual('n', r.Consume()); }
// from tagname <xxx public override void Read(Tokeniser t, CharacterReader r) { char c = r.Consume(); switch (c) { case '\t': case '\n': case '\r': case '\f': case ' ': break; // ignore whitespace case '/': t.Transition(SelfClosingStartTag); break; case '>': t.EmitTagPending(); t.Transition(Data); break; case _nullChar: t.Error(this); t.TagPending.NewAttribute(); r.Unconsume(); t.Transition(AttributeName); break; case _eof: t.EofError(this); t.Transition(Data); break; case '"': case '\'': case '<': case '=': t.Error(this); t.TagPending.NewAttribute(); t.TagPending.AppendAttributeName(c); t.Transition(AttributeName); break; default: // A-Z, anything else t.TagPending.NewAttribute(); r.Unconsume(); t.Transition(AttributeName); break; } }
public override void Read(Tokeniser t, CharacterReader r) { char c = r.Consume(); switch (c) { case '\t': case '\n': case '\r': case '\f': case ' ': break; case '"': // set system id to empty string t.Transition(DoctypeSystemIdentifierDoubleQuoted); break; case '\'': // set public id to empty string t.Transition(DoctypeSystemIdentifierSingleQuoted); break; case '>': t.Error(this); t.DoctypePending.ForceQuirks = true; t.EmitDoctypePending(); t.Transition(Data); break; case _eof: t.EofError(this); t.DoctypePending.ForceQuirks = true; t.EmitDoctypePending(); t.Transition(Data); break; default: t.Error(this); t.DoctypePending.ForceQuirks = true; t.Transition(BogusDoctype); break; } }
public override void Read(Tokeniser t, CharacterReader r) { if (r.MatchesLetter()) { string name = r.ConsumeLetterSequence(); t.DoctypePending.Name.Append(name.ToLowerInvariant()); return; } char c = r.Consume(); switch (c) { case '>': t.EmitDoctypePending(); t.Transition(Data); break; case '\t': case '\n': case '\r': case '\f': case ' ': t.Transition(AfterDoctypeName); break; case _nullChar: t.Error(this); t.DoctypePending.Name.Append(_replacementChar); break; case _eof: t.EofError(this); t.DoctypePending.ForceQuirks = true; t.EmitDoctypePending(); t.Transition(Data); break; default: t.DoctypePending.Name.Append(c); break; } }
public override void Read(Tokeniser t, CharacterReader r) { if (r.MatchesLetter()) { t.CreateDoctypePending(); t.Transition(DoctypeName); return; } char c = r.Consume(); switch (c) { case '\t': case '\n': case '\r': case '\f': case ' ': break; // ignore whitespace case _nullChar: t.Error(this); t.DoctypePending.Name.Append(_replacementChar); t.Transition(DoctypeName); break; case _eof: t.EofError(this); t.CreateDoctypePending(); t.DoctypePending.ForceQuirks = true; t.EmitDoctypePending(); t.Transition(Data); break; default: t.CreateDoctypePending(); t.DoctypePending.Name.Append(c); t.Transition(DoctypeName); break; } }
public override void Read(Tokeniser t, CharacterReader r) { char c = r.Consume(); switch (c) { case '-': t.CommentPending.Data.Append("--!"); t.Transition(CommentEndDash); break; case '>': t.EmitCommentPending(); t.Transition(Data); break; case _nullChar: t.Error(this); t.CommentPending.Data.Append("--!").Append(_replacementChar); t.Transition(Comment); break; case _eof: t.EofError(this); t.EmitCommentPending(); t.Transition(Data); break; default: t.CommentPending.Data.Append("--!").Append(c); t.Transition(Comment); break; } }
public override void Read(Tokeniser t, CharacterReader r) { char c = r.Consume(); switch (c) { case '>': t.TagPending.IsSelfClosing = true; t.EmitTagPending(); t.Transition(Data); break; case _eof: t.EofError(this); t.Transition(Data); break; default: t.Error(this); t.Transition(BeforeAttributeName); break; } }
public override void Read(Tokeniser t, CharacterReader r) { char c = r.Consume(); switch (c) { case '\t': case '\n': case '\r': case '\f': case ' ': t.Transition(BeforeAttributeName); break; case '/': t.Transition(SelfClosingStartTag); break; case '>': t.EmitTagPending(); t.Transition(Data); break; case _eof: t.EofError(this); t.Transition(Data); break; default: t.Error(this); r.Unconsume(); t.Transition(BeforeAttributeName); break; } }
public override void Read(Tokeniser t, CharacterReader r) { if (r.MatchesLetter()) { string name = r.ConsumeLetterSequence(); t.DataBuffer.Append(name.ToLowerInvariant()); t.Emit(name); return; } char c = r.Consume(); switch (c) { case '\t': case '\n': case '\r': case '\f': case ' ': case '/': case '>': if (t.DataBuffer.ToString().Equals("script")) { t.Transition(ScriptDataEscaped); } else { t.Transition(ScriptDataDoubleEscaped); } t.Emit(c); break; default: r.Unconsume(); t.Transition(ScriptDataDoubleEscaped); break; } }
public override void Read(Tokeniser t, CharacterReader r) { char c = r.Consume(); switch (c) { case '\'': t.Transition(AfterDoctypeSystemIdentifier); break; case _nullChar: t.Error(this); t.DoctypePending.SystemIdentifier.Append(_replacementChar); break; case '>': t.Error(this); t.DoctypePending.ForceQuirks = true; t.EmitDoctypePending(); t.Transition(Data); break; case _eof: t.EofError(this); t.DoctypePending.ForceQuirks = true; t.EmitDoctypePending(); t.Transition(Data); break; default: t.DoctypePending.SystemIdentifier.Append(c); break; } }
public override void Read(Tokeniser t, CharacterReader r) { char c = r.Consume(); switch (c) { case '\t': case '\n': case '\r': case '\f': case ' ': break; case '>': t.EmitDoctypePending(); t.Transition(Data); break; case _eof: t.EofError(this); t.DoctypePending.ForceQuirks = true; t.EmitDoctypePending(); t.Transition(Data); break; default: t.Error(this); t.Transition(BogusDoctype); break; // NOT force quirks } }
public override void Read(Tokeniser t, CharacterReader r) { char c = r.Consume(); switch (c) { case '>': t.EmitDoctypePending(); t.Transition(Data); break; case _eof: t.EmitDoctypePending(); t.Transition(Data); break; default: // ignore char break; } }
public void nextIndexOfChar() { string input = "blah blah"; CharacterReader r = new CharacterReader(input); Assert.AreEqual(-1, r.NextIndexOf('x')); Assert.AreEqual(3, r.NextIndexOf('h')); String pull = r.ConsumeTo('h'); Assert.AreEqual("bla", pull); r.Consume(); Assert.AreEqual(2, r.NextIndexOf('l')); Assert.AreEqual(" blah", r.ConsumeToEnd()); Assert.AreEqual(-1, r.NextIndexOf('x')); }
// from < or </ in data, will have start or end tag pending public override void Read(Tokeniser t, CharacterReader r) { // previous TagOpen state did NOT Consume, will have a letter char in current string tagName = r.ConsumeToAny('\t', '\n', '\r', '\f', ' ', '/', '>', _nullChar).ToLowerInvariant(); t.TagPending.AppendTagName(tagName); switch (r.Consume()) { case '\t': case '\n': case '\r': case '\f': case ' ': t.Transition(BeforeAttributeName); break; case '/': t.Transition(SelfClosingStartTag); break; case '>': t.EmitTagPending(); t.Transition(Data); break; case _nullChar: // replacement t.TagPending.AppendTagName(_replacementStr); break; case _eof: // should Emit pending tag? t.EofError(this); t.Transition(Data); break; // no default, as covered with above ConsumeToAny } }
public void consumeToString() { CharacterReader r = new CharacterReader("One Two Two Four"); Assert.AreEqual("One ", r.ConsumeTo("Two")); Assert.AreEqual('T', r.Consume()); Assert.AreEqual("wo ", r.ConsumeTo("Two")); Assert.AreEqual('T', r.Consume()); Assert.AreEqual("wo Four", r.ConsumeTo("Qux")); }
public override void Read(Tokeniser t, CharacterReader r) { switch (r.Consume()) { case '/': t.CreateTempBuffer(); t.Transition(ScriptDataEndTagOpen); break; case '!': t.Emit("<!"); t.Transition(ScriptDataEscapeStart); break; default: t.Emit("<"); r.Unconsume(); t.Transition(ScriptData); break; } }
public void consumeToAny() { CharacterReader r = new CharacterReader("One &bar; qux"); Assert.AreEqual("One ", r.ConsumeToAny('&', ';')); Assert.IsTrue(r.Matches('&')); Assert.IsTrue(r.Matches("&bar;")); Assert.AreEqual('&', r.Consume()); Assert.AreEqual("bar", r.ConsumeToAny('&', ';')); Assert.AreEqual(';', r.Consume()); Assert.AreEqual(" qux", r.ConsumeToAny('&', ';')); }
public override void Read(Tokeniser t, CharacterReader r) { if (r.IsEmpty()) { t.EofError(this); t.Transition(Data); return; } char c = r.Consume(); switch (c) { case '-': t.Emit(c); t.Transition(ScriptDataEscapedDashDash); break; case '<': t.Transition(ScriptDataEscapedLessThanSign); break; case _nullChar: t.Error(this); t.Emit(_replacementChar); t.Transition(ScriptDataEscaped); break; default: t.Emit(c); t.Transition(ScriptDataEscaped); break; } }
public void matches() { CharacterReader r = new CharacterReader("One Two Three"); Assert.IsTrue(r.Matches('O')); Assert.IsTrue(r.Matches("One Two Three")); Assert.IsTrue(r.Matches("One")); Assert.IsFalse(r.Matches("one")); Assert.AreEqual('O', r.Consume()); Assert.IsFalse(r.Matches("One")); Assert.IsTrue(r.Matches("ne Two Three")); Assert.IsFalse(r.Matches("ne Two Three Four")); Assert.AreEqual("ne Two Three", r.ConsumeToEnd()); Assert.IsFalse(r.Matches("ne")); }
public override void Read(Tokeniser t, CharacterReader r) { if (r.MatchesLetter()) { string name = r.ConsumeLetterSequence(); t.TagPending.AppendTagName(name.ToLowerInvariant()); t.DataBuffer.Append(name); return; } if (t.IsAppropriateEndTagToken() && !r.IsEmpty()) { char c = r.Consume(); switch (c) { case '\t': case '\n': case '\r': case '\f': case ' ': t.Transition(BeforeAttributeName); break; case '/': t.Transition(SelfClosingStartTag); break; case '>': t.EmitTagPending(); t.Transition(Data); break; default: t.DataBuffer.Append(c); AnythingElse(t, r); break; } } else { AnythingElse(t, r); } }
public void matchesAny() { char[] scan = { ' ', '\n', '\t' }; CharacterReader r = new CharacterReader("One\nTwo\tThree"); Assert.IsFalse(r.MatchesAny(scan)); Assert.AreEqual("One", r.ConsumeToAny(scan)); Assert.IsTrue(r.MatchesAny(scan)); Assert.AreEqual('\n', r.Consume()); Assert.IsFalse(r.MatchesAny(scan)); }
public override void Read(Tokeniser t, CharacterReader r) { char c = r.Consume(); switch (c) { case '-': t.Emit(c); break; case '<': t.Emit(c); t.Transition(ScriptDataDoubleEscapedLessthanSign); break; case '>': t.Emit(c); t.Transition(ScriptData); break; case _nullChar: t.Error(this); t.Emit(_replacementChar); t.Transition(ScriptDataDoubleEscaped); break; case _eof: t.EofError(this); t.Transition(Data); break; default: t.Emit(c); t.Transition(ScriptDataDoubleEscaped); break; } }
public void unconsume() { CharacterReader r = new CharacterReader("one"); Assert.AreEqual('o', r.Consume()); Assert.AreEqual('n', r.Current()); r.Unconsume(); Assert.AreEqual('o', r.Current()); Assert.AreEqual('o', r.Consume()); Assert.AreEqual('n', r.Consume()); Assert.AreEqual('e', r.Consume()); Assert.IsTrue(r.IsEmpty()); r.Unconsume(); Assert.IsFalse(r.IsEmpty()); Assert.AreEqual('e', r.Current()); Assert.AreEqual('e', r.Consume()); Assert.IsTrue(r.IsEmpty()); Assert.AreEqual(CharacterReader.EOF, r.Consume()); r.Unconsume(); Assert.IsTrue(r.IsEmpty()); Assert.AreEqual(CharacterReader.EOF, r.Current()); }
public override void Read(Tokeniser t, CharacterReader r) { string value = r.ConsumeToAny('\t', '\n', '\r', '\f', ' ', '&', '>', _nullChar, '"', '\'', '<', '=', '`'); if (value.Length > 0) { t.TagPending.AppendAttributeValue(value); } char c = r.Consume(); switch (c) { case '\t': case '\n': case '\r': case '\f': case ' ': t.Transition(BeforeAttributeName); break; case '&': char? reference = t.ConsumeCharacterReference('>', true); if (reference != null) { t.TagPending.AppendAttributeValue(reference.Value); } else { t.TagPending.AppendAttributeValue('&'); } break; case '>': t.EmitTagPending(); t.Transition(Data); break; case _nullChar: t.Error(this); t.TagPending.AppendAttributeValue(_replacementChar); break; case _eof: t.EofError(this); t.Transition(Data); break; case '"': case '\'': case '<': case '=': case '`': t.Error(this); t.TagPending.AppendAttributeValue(c); break; // no default, handled in Consume to any above } }