// from & in data public override void Read(Tokeniser t, CharacterReader r) { char? c = t.ConsumeCharacterReference(null, false); if (c == null) { t.Emit('&'); } else { t.Emit(c.Value); } t.Transition(Data); }
public void consume() { CharacterReader r = new CharacterReader("one"); Assert.AreEqual(0, r.Position); Assert.AreEqual('o', r.Current()); Assert.AreEqual('o', r.Consume()); Assert.AreEqual(1, r.Position); Assert.AreEqual('n', r.Current()); Assert.AreEqual(1, r.Position); Assert.AreEqual('n', r.Consume()); Assert.AreEqual('e', r.Consume()); Assert.IsTrue(r.IsEmpty()); Assert.AreEqual(CharacterReader.EOF, r.Consume()); Assert.IsTrue(r.IsEmpty()); Assert.AreEqual(CharacterReader.EOF, r.Consume()); }
protected ParseErrorList _errors; // null when not tracking errors protected virtual void InitialiseParse(string input, string baseUri, ParseErrorList errors) { if (input == null) { throw new ArgumentNullException("String input must not be null"); } if (baseUri == null) { throw new ArgumentNullException("BaseURI must not be null"); } _doc = new Document(baseUri); _reader = new CharacterReader(input); _errors = errors; _tokeniser = new Tokeniser(_reader, errors); _stack = new DescendableLinkedList<Element>(); this._baseUri = baseUri; }
protected ParseErrorList _errors; // null when not tracking errors protected virtual void InitialiseParse(string input, string baseUri, ParseErrorList errors) { if (input == null) { throw new ArgumentNullException("String input must not be null"); } if (baseUri == null) { throw new ArgumentNullException("BaseURI must not be null"); } _doc = new Document(baseUri); _reader = new CharacterReader(input); _errors = errors; _tokeniser = new Tokeniser(_reader, errors); _stack = new DescendableLinkedList <Element>(); this._baseUri = baseUri; }
// in data state, gather characters until a character reference or tag is found public override void Read(Tokeniser t, CharacterReader r) { switch (r.Current()) { case '&': t.AdvanceTransition(CharacterReferenceInData); break; case '<': t.AdvanceTransition(TagOpen); break; case _nullChar: t.Error(this); // NOT replacement character (oddly?) t.Emit(r.Consume()); break; case _eof: t.Emit(new Token.EOF()); break; default: string data = r.ConsumeToAny('&', '<', _nullChar); t.Emit(data); break; } }
/// handles data in title, textarea etc public override void Read(Tokeniser t, CharacterReader r) { switch (r.Current()) { case '&': t.AdvanceTransition(CharacterReferenceInRcData); break; case '<': t.AdvanceTransition(RcDataLessThanSign); break; case _nullChar: t.Error(this); r.Advance(); t.Emit(_replacementChar); break; case _eof: t.Emit(new Token.EOF()); break; default: string data = r.ConsumeToAny('&', '<', _nullChar); t.Emit(data); break; } }
public void consumeToChar() { CharacterReader r = new CharacterReader("One Two Three"); Assert.AreEqual("One ", r.ConsumeTo('T')); Assert.AreEqual("", r.ConsumeTo('T')); // on Two Assert.AreEqual('T', r.Consume()); Assert.AreEqual("wo ", r.ConsumeTo('T')); Assert.AreEqual('T', r.Consume()); Assert.AreEqual("hree", r.ConsumeTo('T')); // consume to end }
private void AnythingElse(Tokeniser t, CharacterReader r) { t.Emit("</" + t.DataBuffer.ToString()); t.Transition(ScriptDataEscaped); }
public void unconsume() { CharacterReader r = new CharacterReader("one"); Assert.AreEqual('o', r.Consume()); Assert.AreEqual('n', r.Current()); r.Unconsume(); Assert.AreEqual('o', r.Current()); Assert.AreEqual('o', r.Consume()); Assert.AreEqual('n', r.Consume()); Assert.AreEqual('e', r.Consume()); Assert.IsTrue(r.IsEmpty()); r.Unconsume(); Assert.IsFalse(r.IsEmpty()); Assert.AreEqual('e', r.Current()); Assert.AreEqual('e', r.Consume()); Assert.IsTrue(r.IsEmpty()); Assert.AreEqual(CharacterReader.EOF, r.Consume()); r.Unconsume(); Assert.IsTrue(r.IsEmpty()); Assert.AreEqual(CharacterReader.EOF, r.Current()); }
public void matchesAny() { char[] scan = { ' ', '\n', '\t' }; CharacterReader r = new CharacterReader("One\nTwo\tThree"); Assert.IsFalse(r.MatchesAny(scan)); Assert.AreEqual("One", r.ConsumeToAny(scan)); Assert.IsTrue(r.MatchesAny(scan)); Assert.AreEqual('\n', r.Consume()); Assert.IsFalse(r.MatchesAny(scan)); }
public void matchesIgnoreCase() { CharacterReader r = new CharacterReader("One Two Three"); Assert.IsTrue(r.MatchesIgnoreCase("O")); Assert.IsTrue(r.MatchesIgnoreCase("o")); Assert.IsTrue(r.Matches('O')); Assert.IsFalse(r.Matches('o')); Assert.IsTrue(r.MatchesIgnoreCase("One Two Three")); Assert.IsTrue(r.MatchesIgnoreCase("ONE two THREE")); Assert.IsTrue(r.MatchesIgnoreCase("One")); Assert.IsTrue(r.MatchesIgnoreCase("one")); Assert.AreEqual('O', r.Consume()); Assert.IsFalse(r.MatchesIgnoreCase("One")); Assert.IsTrue(r.MatchesIgnoreCase("NE Two Three")); Assert.IsFalse(r.MatchesIgnoreCase("ne Two Three Four")); Assert.AreEqual("ne Two Three", r.ConsumeToEnd()); Assert.IsFalse(r.MatchesIgnoreCase("ne")); }
public void consumeLetterThenDigitSequence() { CharacterReader r = new CharacterReader("One12 Two &bar; qux"); Assert.AreEqual("One12", r.ConsumeLetterThenDigitSequence()); Assert.AreEqual(' ', r.Consume()); Assert.AreEqual("Two", r.ConsumeLetterThenDigitSequence()); Assert.AreEqual(" &bar; qux", r.ConsumeToEnd()); }
public void consumeToAny() { CharacterReader r = new CharacterReader("One &bar; qux"); Assert.AreEqual("One ", r.ConsumeToAny('&', ';')); Assert.IsTrue(r.Matches('&')); Assert.IsTrue(r.Matches("&bar;")); Assert.AreEqual('&', r.Consume()); Assert.AreEqual("bar", r.ConsumeToAny('&', ';')); Assert.AreEqual(';', r.Consume()); Assert.AreEqual(" qux", r.ConsumeToAny('&', ';')); }
public Tokeniser(CharacterReader reader, ParseErrorList errors) { this._reader = reader; this._errors = errors; }
// from tagname <xxx public override void Read(Tokeniser t, CharacterReader r) { char c = r.Consume(); switch (c) { case '\t': case '\n': case '\r': case '\f': case ' ': break; // ignore whitespace case '/': t.Transition(SelfClosingStartTag); break; case '>': t.EmitTagPending(); t.Transition(Data); break; case _nullChar: t.Error(this); t.TagPending.NewAttribute(); r.Unconsume(); t.Transition(AttributeName); break; case _eof: t.EofError(this); t.Transition(Data); break; case '"': case '\'': case '<': case '=': t.Error(this); t.TagPending.NewAttribute(); t.TagPending.AppendAttributeName(c); t.Transition(AttributeName); break; default: // A-Z, anything else t.TagPending.NewAttribute(); r.Unconsume(); t.Transition(AttributeName); break; } }
public override void Read(Tokeniser t, CharacterReader r) { if (r.MatchesLetter()) { string name = r.ConsumeLetterSequence(); t.DataBuffer.Append(name.ToLowerInvariant()); t.Emit(name); return; } char c = r.Consume(); switch (c) { case '\t': case '\n': case '\r': case '\f': case ' ': case '/': case '>': if (t.DataBuffer.ToString().Equals("script")) { t.Transition(ScriptDataEscaped); } else { t.Transition(ScriptDataDoubleEscaped); } t.Emit(c); break; default: r.Unconsume(); t.Transition(ScriptDataDoubleEscaped); break; } }
public override void Read(Tokeniser t, CharacterReader r) { if (r.Matches('/')) { t.Emit('/'); t.CreateTempBuffer(); t.AdvanceTransition(ScriptDataDoubleEscapeEnd); } else { t.Transition(ScriptDataDoubleEscaped); } }
public override void Read(Tokeniser t, CharacterReader r) { char c = r.Consume(); switch (c) { case '-': t.Emit(c); break; case '<': t.Emit(c); t.Transition(ScriptDataDoubleEscapedLessthanSign); break; case '>': t.Emit(c); t.Transition(ScriptData); break; case _nullChar: t.Error(this); t.Emit(_replacementChar); t.Transition(ScriptDataDoubleEscaped); break; case _eof: t.EofError(this); t.Transition(Data); break; default: t.Emit(c); t.Transition(ScriptDataDoubleEscaped); break; } }
public override void Read(Tokeniser t, CharacterReader r) { char c = r.Current(); switch (c) { case '-': t.Emit(c); t.AdvanceTransition(ScriptDataDoubleEscapedDash); break; case '<': t.Emit(c); t.AdvanceTransition(ScriptDataDoubleEscapedLessthanSign); break; case _nullChar: t.Error(this); r.Advance(); t.Emit(_replacementChar); break; case _eof: t.EofError(this); t.Transition(Data); break; default: string data = r.ConsumeToAny('-', '<', _nullChar); t.Emit(data); break; } }
public void consumeToString() { CharacterReader r = new CharacterReader("One Two Two Four"); Assert.AreEqual("One ", r.ConsumeTo("Two")); Assert.AreEqual('T', r.Consume()); Assert.AreEqual("wo ", r.ConsumeTo("Two")); Assert.AreEqual('T', r.Consume()); Assert.AreEqual("wo Four", r.ConsumeTo("Qux")); }
public void advance() { CharacterReader r = new CharacterReader("One Two Three"); Assert.AreEqual('O', r.Consume()); r.Advance(); Assert.AreEqual('e', r.Consume()); }
public override void Read(Tokeniser t, CharacterReader r) { if (r.MatchesLetter()) { t.CreateTagPending(false); t.TagPending.AppendTagName(char.ToLowerInvariant(r.Current())); t.DataBuffer.Append(r.Current()); t.AdvanceTransition(ScriptDataEscapedEndTagName); } else { t.Emit("</"); t.Transition(ScriptDataEscaped); } }
public void consumeLetterSequence() { CharacterReader r = new CharacterReader("One &bar; qux"); Assert.AreEqual("One", r.ConsumeLetterSequence()); Assert.AreEqual(" &", r.ConsumeTo("bar;")); Assert.AreEqual("bar", r.ConsumeLetterSequence()); Assert.AreEqual("; qux", r.ConsumeToEnd()); }
public void matches() { CharacterReader r = new CharacterReader("One Two Three"); Assert.IsTrue(r.Matches('O')); Assert.IsTrue(r.Matches("One Two Three")); Assert.IsTrue(r.Matches("One")); Assert.IsFalse(r.Matches("one")); Assert.AreEqual('O', r.Consume()); Assert.IsFalse(r.Matches("One")); Assert.IsTrue(r.Matches("ne Two Three")); Assert.IsFalse(r.Matches("ne Two Three Four")); Assert.AreEqual("ne Two Three", r.ConsumeToEnd()); Assert.IsFalse(r.Matches("ne")); }
public void mark() { CharacterReader r = new CharacterReader("one"); r.Consume(); r.Mark(); Assert.AreEqual('n', r.Consume()); Assert.AreEqual('e', r.Consume()); Assert.IsTrue(r.IsEmpty()); r.RewindToMark(); Assert.AreEqual('n', r.Consume()); }
public void containsIgnoreCase() { CharacterReader r = new CharacterReader("One TWO three"); Assert.IsTrue(r.ContainsIgnoreCase("two")); Assert.IsTrue(r.ContainsIgnoreCase("three")); // weird one: does not find one, because it scans for consistent case only Assert.IsFalse(r.ContainsIgnoreCase("one")); }
public override void Read(Tokeniser t, CharacterReader r) { if (r.MatchesLetter()) { t.CreateTempBuffer(); t.DataBuffer.Append(char.ToLowerInvariant(r.Current())); t.Emit("<" + r.Current()); t.AdvanceTransition(ScriptDataDoubleEscapeStart); } else if (r.Matches('/')) { t.CreateTempBuffer(); t.AdvanceTransition(ScriptDataEscapedEndTagOpen); } else { t.Emit('<'); t.Transition(ScriptDataEscaped); } }
public void nextIndexOfChar() { string input = "blah blah"; CharacterReader r = new CharacterReader(input); Assert.AreEqual(-1, r.NextIndexOf('x')); Assert.AreEqual(3, r.NextIndexOf('h')); String pull = r.ConsumeTo('h'); Assert.AreEqual("bla", pull); r.Consume(); Assert.AreEqual(2, r.NextIndexOf('l')); Assert.AreEqual(" blah", r.ConsumeToEnd()); Assert.AreEqual(-1, r.NextIndexOf('x')); }
public void nextIndexOfString() { string input = "One Two something Two Three Four"; CharacterReader r = new CharacterReader(input); Assert.AreEqual(-1, r.NextIndexOf("Foo")); Assert.AreEqual(4, r.NextIndexOf("Two")); Assert.AreEqual("One Two ", r.ConsumeTo("something")); Assert.AreEqual(10, r.NextIndexOf("Two")); Assert.AreEqual("something Two Three Four", r.ConsumeToEnd()); Assert.AreEqual(-1, r.NextIndexOf("Two")); }
public void consumeToEnd() { string input = "one two three"; CharacterReader r = new CharacterReader(input); String toEnd = r.ConsumeToEnd(); Assert.AreEqual(input, toEnd); Assert.IsTrue(r.IsEmpty()); }
public override void Read(Tokeniser t, CharacterReader r) { if (r.MatchesLetter()) { string name = r.ConsumeLetterSequence(); t.TagPending.AppendTagName(name.ToLowerInvariant()); t.DataBuffer.Append(name); return; } if (t.IsAppropriateEndTagToken() && !r.IsEmpty()) { char c = r.Consume(); switch (c) { case '\t': case '\n': case '\r': case '\f': case ' ': t.Transition(BeforeAttributeName); break; case '/': t.Transition(SelfClosingStartTag); break; case '>': t.EmitTagPending(); t.Transition(Data); break; default: t.DataBuffer.Append(c); AnythingElse(t, r); break; } } else { AnythingElse(t, r); } }