コード例 #1
0
ファイル: TokeniserState.cs プロジェクト: fengweijp/NSoup
            // from & in data
            public override void Read(Tokeniser t, CharacterReader r)
            {
                char? c = t.ConsumeCharacterReference(null, false);

                if (c == null)
                {
                    t.Emit('&');
                }
                else
                {
                    t.Emit(c.Value);
                }

                t.Transition(Data);
            }
コード例 #2
0
 public void consume()
 {
     CharacterReader r = new CharacterReader("one");
     Assert.AreEqual(0, r.Position);
     Assert.AreEqual('o', r.Current());
     Assert.AreEqual('o', r.Consume());
     Assert.AreEqual(1, r.Position);
     Assert.AreEqual('n', r.Current());
     Assert.AreEqual(1, r.Position);
     Assert.AreEqual('n', r.Consume());
     Assert.AreEqual('e', r.Consume());
     Assert.IsTrue(r.IsEmpty());
     Assert.AreEqual(CharacterReader.EOF, r.Consume());
     Assert.IsTrue(r.IsEmpty());
     Assert.AreEqual(CharacterReader.EOF, r.Consume());
 }
コード例 #3
0
ファイル: TreeBuilder.cs プロジェクト: fengweijp/NSoup
        protected ParseErrorList _errors; // null when not tracking errors

        protected virtual void InitialiseParse(string input, string baseUri, ParseErrorList errors)
        {
            if (input == null)
            {
                throw new ArgumentNullException("String input must not be null");
            }
            if (baseUri == null)
            {
                throw new ArgumentNullException("BaseURI must not be null");
            }

            _doc = new Document(baseUri);
            _reader = new CharacterReader(input);
            _errors = errors;
            _tokeniser = new Tokeniser(_reader, errors);
            _stack = new DescendableLinkedList<Element>();
            this._baseUri = baseUri;
        }
コード例 #4
0
        protected ParseErrorList _errors;                 // null when not tracking errors

        protected virtual void InitialiseParse(string input, string baseUri, ParseErrorList errors)
        {
            if (input == null)
            {
                throw new ArgumentNullException("String input must not be null");
            }
            if (baseUri == null)
            {
                throw new ArgumentNullException("BaseURI must not be null");
            }

            _doc          = new Document(baseUri);
            _reader       = new CharacterReader(input);
            _errors       = errors;
            _tokeniser    = new Tokeniser(_reader, errors);
            _stack        = new DescendableLinkedList <Element>();
            this._baseUri = baseUri;
        }
コード例 #5
0
ファイル: TokeniserState.cs プロジェクト: fengweijp/NSoup
 // in data state, gather characters until a character reference or tag is found
 public override void Read(Tokeniser t, CharacterReader r)
 {
     switch (r.Current())
     {
         case '&':
             t.AdvanceTransition(CharacterReferenceInData);
             break;
         case '<':
             t.AdvanceTransition(TagOpen);
             break;
         case _nullChar:
             t.Error(this); // NOT replacement character (oddly?)
             t.Emit(r.Consume());
             break;
         case _eof:
             t.Emit(new Token.EOF());
             break;
         default:
             string data = r.ConsumeToAny('&', '<', _nullChar);
             t.Emit(data);
             break;
     }
 }
コード例 #6
0
ファイル: TokeniserState.cs プロジェクト: fengweijp/NSoup
 /// handles data in title, textarea etc
 public override void Read(Tokeniser t, CharacterReader r)
 {
     switch (r.Current())
     {
         case '&':
             t.AdvanceTransition(CharacterReferenceInRcData);
             break;
         case '<':
             t.AdvanceTransition(RcDataLessThanSign);
             break;
         case _nullChar:
             t.Error(this);
             r.Advance();
             t.Emit(_replacementChar);
             break;
         case _eof:
             t.Emit(new Token.EOF());
             break;
         default:
             string data = r.ConsumeToAny('&', '<', _nullChar);
             t.Emit(data);
             break;
     }
 }
コード例 #7
0
 public void consumeToChar()
 {
     CharacterReader r = new CharacterReader("One Two Three");
     Assert.AreEqual("One ", r.ConsumeTo('T'));
     Assert.AreEqual("", r.ConsumeTo('T')); // on Two
     Assert.AreEqual('T', r.Consume());
     Assert.AreEqual("wo ", r.ConsumeTo('T'));
     Assert.AreEqual('T', r.Consume());
     Assert.AreEqual("hree", r.ConsumeTo('T')); // consume to end
 }
コード例 #8
0
ファイル: TokeniserState.cs プロジェクト: fengweijp/NSoup
 private void AnythingElse(Tokeniser t, CharacterReader r)
 {
     t.Emit("</" + t.DataBuffer.ToString());
     t.Transition(ScriptDataEscaped);
 }
コード例 #9
0
        public void unconsume()
        {
            CharacterReader r = new CharacterReader("one");
            Assert.AreEqual('o', r.Consume());
            Assert.AreEqual('n', r.Current());
            r.Unconsume();
            Assert.AreEqual('o', r.Current());

            Assert.AreEqual('o', r.Consume());
            Assert.AreEqual('n', r.Consume());
            Assert.AreEqual('e', r.Consume());
            Assert.IsTrue(r.IsEmpty());
            r.Unconsume();
            Assert.IsFalse(r.IsEmpty());
            Assert.AreEqual('e', r.Current());
            Assert.AreEqual('e', r.Consume());
            Assert.IsTrue(r.IsEmpty());

            Assert.AreEqual(CharacterReader.EOF, r.Consume());
            r.Unconsume();
            Assert.IsTrue(r.IsEmpty());
            Assert.AreEqual(CharacterReader.EOF, r.Current());
        }
コード例 #10
0
 public void matchesAny()
 {
     char[] scan = { ' ', '\n', '\t' };
     CharacterReader r = new CharacterReader("One\nTwo\tThree");
     Assert.IsFalse(r.MatchesAny(scan));
     Assert.AreEqual("One", r.ConsumeToAny(scan));
     Assert.IsTrue(r.MatchesAny(scan));
     Assert.AreEqual('\n', r.Consume());
     Assert.IsFalse(r.MatchesAny(scan));
 }
コード例 #11
0
 public void matchesIgnoreCase()
 {
     CharacterReader r = new CharacterReader("One Two Three");
     Assert.IsTrue(r.MatchesIgnoreCase("O"));
     Assert.IsTrue(r.MatchesIgnoreCase("o"));
     Assert.IsTrue(r.Matches('O'));
     Assert.IsFalse(r.Matches('o'));
     Assert.IsTrue(r.MatchesIgnoreCase("One Two Three"));
     Assert.IsTrue(r.MatchesIgnoreCase("ONE two THREE"));
     Assert.IsTrue(r.MatchesIgnoreCase("One"));
     Assert.IsTrue(r.MatchesIgnoreCase("one"));
     Assert.AreEqual('O', r.Consume());
     Assert.IsFalse(r.MatchesIgnoreCase("One"));
     Assert.IsTrue(r.MatchesIgnoreCase("NE Two Three"));
     Assert.IsFalse(r.MatchesIgnoreCase("ne Two Three Four"));
     Assert.AreEqual("ne Two Three", r.ConsumeToEnd());
     Assert.IsFalse(r.MatchesIgnoreCase("ne"));
 }
コード例 #12
0
 public void consumeLetterThenDigitSequence()
 {
     CharacterReader r = new CharacterReader("One12 Two &bar; qux");
     Assert.AreEqual("One12", r.ConsumeLetterThenDigitSequence());
     Assert.AreEqual(' ', r.Consume());
     Assert.AreEqual("Two", r.ConsumeLetterThenDigitSequence());
     Assert.AreEqual(" &bar; qux", r.ConsumeToEnd());
 }
コード例 #13
0
 public void consumeToAny()
 {
     CharacterReader r = new CharacterReader("One &bar; qux");
     Assert.AreEqual("One ", r.ConsumeToAny('&', ';'));
     Assert.IsTrue(r.Matches('&'));
     Assert.IsTrue(r.Matches("&bar;"));
     Assert.AreEqual('&', r.Consume());
     Assert.AreEqual("bar", r.ConsumeToAny('&', ';'));
     Assert.AreEqual(';', r.Consume());
     Assert.AreEqual(" qux", r.ConsumeToAny('&', ';'));
 }
コード例 #14
0
ファイル: Tokeniser.cs プロジェクト: fengweijp/NSoup
 public Tokeniser(CharacterReader reader, ParseErrorList errors)
 {
     this._reader = reader;
     this._errors = errors;
 }
コード例 #15
0
ファイル: TokeniserState.cs プロジェクト: fengweijp/NSoup
 // from tagname <xxx
 public override void Read(Tokeniser t, CharacterReader r)
 {
     char c = r.Consume();
     switch (c)
     {
         case '\t':
         case '\n':
         case '\r':
         case '\f':
         case ' ':
             break; // ignore whitespace
         case '/':
             t.Transition(SelfClosingStartTag);
             break;
         case '>':
             t.EmitTagPending();
             t.Transition(Data);
             break;
         case _nullChar:
             t.Error(this);
             t.TagPending.NewAttribute();
             r.Unconsume();
             t.Transition(AttributeName);
             break;
         case _eof:
             t.EofError(this);
             t.Transition(Data);
             break;
         case '"':
         case '\'':
         case '<':
         case '=':
             t.Error(this);
             t.TagPending.NewAttribute();
             t.TagPending.AppendAttributeName(c);
             t.Transition(AttributeName);
             break;
         default: // A-Z, anything else
             t.TagPending.NewAttribute();
             r.Unconsume();
             t.Transition(AttributeName);
             break;
     }
 }
コード例 #16
0
ファイル: TokeniserState.cs プロジェクト: fengweijp/NSoup
            public override void Read(Tokeniser t, CharacterReader r)
            {
                if (r.MatchesLetter())
                {
                    string name = r.ConsumeLetterSequence();
                    t.DataBuffer.Append(name.ToLowerInvariant());
                    t.Emit(name);
                    return;
                }

                char c = r.Consume();
                switch (c)
                {
                    case '\t':
                    case '\n':
                    case '\r':
                    case '\f':
                    case ' ':
                    case '/':
                    case '>':
                        if (t.DataBuffer.ToString().Equals("script"))
                        {
                            t.Transition(ScriptDataEscaped);
                        }
                        else
                        {
                            t.Transition(ScriptDataDoubleEscaped);
                        }
                        t.Emit(c);
                        break;
                    default:
                        r.Unconsume();
                        t.Transition(ScriptDataDoubleEscaped);
                        break;
                }
            }
コード例 #17
0
ファイル: TokeniserState.cs プロジェクト: fengweijp/NSoup
 public override void Read(Tokeniser t, CharacterReader r)
 {
     if (r.Matches('/'))
     {
         t.Emit('/');
         t.CreateTempBuffer();
         t.AdvanceTransition(ScriptDataDoubleEscapeEnd);
     }
     else
     {
         t.Transition(ScriptDataDoubleEscaped);
     }
 }
コード例 #18
0
ファイル: TokeniserState.cs プロジェクト: fengweijp/NSoup
 public override void Read(Tokeniser t, CharacterReader r)
 {
     char c = r.Consume();
     switch (c)
     {
         case '-':
             t.Emit(c);
             break;
         case '<':
             t.Emit(c);
             t.Transition(ScriptDataDoubleEscapedLessthanSign);
             break;
         case '>':
             t.Emit(c);
             t.Transition(ScriptData);
             break;
         case _nullChar:
             t.Error(this);
             t.Emit(_replacementChar);
             t.Transition(ScriptDataDoubleEscaped);
             break;
         case _eof:
             t.EofError(this);
             t.Transition(Data);
             break;
         default:
             t.Emit(c);
             t.Transition(ScriptDataDoubleEscaped);
             break;
     }
 }
コード例 #19
0
ファイル: TokeniserState.cs プロジェクト: fengweijp/NSoup
 public override void Read(Tokeniser t, CharacterReader r)
 {
     char c = r.Current();
     switch (c)
     {
         case '-':
             t.Emit(c);
             t.AdvanceTransition(ScriptDataDoubleEscapedDash);
             break;
         case '<':
             t.Emit(c);
             t.AdvanceTransition(ScriptDataDoubleEscapedLessthanSign);
             break;
         case _nullChar:
             t.Error(this);
             r.Advance();
             t.Emit(_replacementChar);
             break;
         case _eof:
             t.EofError(this);
             t.Transition(Data);
             break;
         default:
             string data = r.ConsumeToAny('-', '<', _nullChar);
             t.Emit(data);
             break;
     }
 }
コード例 #20
0
 public void consumeToString()
 {
     CharacterReader r = new CharacterReader("One Two Two Four");
     Assert.AreEqual("One ", r.ConsumeTo("Two"));
     Assert.AreEqual('T', r.Consume());
     Assert.AreEqual("wo ", r.ConsumeTo("Two"));
     Assert.AreEqual('T', r.Consume());
     Assert.AreEqual("wo Four", r.ConsumeTo("Qux"));
 }
コード例 #21
0
 public void advance()
 {
     CharacterReader r = new CharacterReader("One Two Three");
     Assert.AreEqual('O', r.Consume());
     r.Advance();
     Assert.AreEqual('e', r.Consume());
 }
コード例 #22
0
ファイル: TokeniserState.cs プロジェクト: fengweijp/NSoup
 public override void Read(Tokeniser t, CharacterReader r)
 {
     if (r.MatchesLetter())
     {
         t.CreateTagPending(false);
         t.TagPending.AppendTagName(char.ToLowerInvariant(r.Current()));
         t.DataBuffer.Append(r.Current());
         t.AdvanceTransition(ScriptDataEscapedEndTagName);
     }
     else
     {
         t.Emit("</");
         t.Transition(ScriptDataEscaped);
     }
 }
コード例 #23
0
 public void consumeLetterSequence()
 {
     CharacterReader r = new CharacterReader("One &bar; qux");
     Assert.AreEqual("One", r.ConsumeLetterSequence());
     Assert.AreEqual(" &", r.ConsumeTo("bar;"));
     Assert.AreEqual("bar", r.ConsumeLetterSequence());
     Assert.AreEqual("; qux", r.ConsumeToEnd());
 }
コード例 #24
0
 public Tokeniser(CharacterReader reader, ParseErrorList errors)
 {
     this._reader = reader;
     this._errors = errors;
 }
コード例 #25
0
 public void matches()
 {
     CharacterReader r = new CharacterReader("One Two Three");
     Assert.IsTrue(r.Matches('O'));
     Assert.IsTrue(r.Matches("One Two Three"));
     Assert.IsTrue(r.Matches("One"));
     Assert.IsFalse(r.Matches("one"));
     Assert.AreEqual('O', r.Consume());
     Assert.IsFalse(r.Matches("One"));
     Assert.IsTrue(r.Matches("ne Two Three"));
     Assert.IsFalse(r.Matches("ne Two Three Four"));
     Assert.AreEqual("ne Two Three", r.ConsumeToEnd());
     Assert.IsFalse(r.Matches("ne"));
 }
コード例 #26
0
 public void mark()
 {
     CharacterReader r = new CharacterReader("one");
     r.Consume();
     r.Mark();
     Assert.AreEqual('n', r.Consume());
     Assert.AreEqual('e', r.Consume());
     Assert.IsTrue(r.IsEmpty());
     r.RewindToMark();
     Assert.AreEqual('n', r.Consume());
 }
コード例 #27
0
 public void containsIgnoreCase()
 {
     CharacterReader r = new CharacterReader("One TWO three");
     Assert.IsTrue(r.ContainsIgnoreCase("two"));
     Assert.IsTrue(r.ContainsIgnoreCase("three"));
     // weird one: does not find one, because it scans for consistent case only
     Assert.IsFalse(r.ContainsIgnoreCase("one"));
 }
コード例 #28
0
ファイル: TokeniserState.cs プロジェクト: fengweijp/NSoup
 public override void Read(Tokeniser t, CharacterReader r)
 {
     if (r.MatchesLetter())
     {
         t.CreateTempBuffer();
         t.DataBuffer.Append(char.ToLowerInvariant(r.Current()));
         t.Emit("<" + r.Current());
         t.AdvanceTransition(ScriptDataDoubleEscapeStart);
     }
     else if (r.Matches('/'))
     {
         t.CreateTempBuffer();
         t.AdvanceTransition(ScriptDataEscapedEndTagOpen);
     }
     else
     {
         t.Emit('<');
         t.Transition(ScriptDataEscaped);
     }
 }
コード例 #29
0
        public void nextIndexOfChar()
        {
            string input = "blah blah";
            CharacterReader r = new CharacterReader(input);

            Assert.AreEqual(-1, r.NextIndexOf('x'));
            Assert.AreEqual(3, r.NextIndexOf('h'));
            String pull = r.ConsumeTo('h');
            Assert.AreEqual("bla", pull);
            r.Consume();
            Assert.AreEqual(2, r.NextIndexOf('l'));
            Assert.AreEqual(" blah", r.ConsumeToEnd());
            Assert.AreEqual(-1, r.NextIndexOf('x'));
        }
コード例 #30
0
        public void nextIndexOfString()
        {
            string input = "One Two something Two Three Four";
            CharacterReader r = new CharacterReader(input);

            Assert.AreEqual(-1, r.NextIndexOf("Foo"));
            Assert.AreEqual(4, r.NextIndexOf("Two"));
            Assert.AreEqual("One Two ", r.ConsumeTo("something"));
            Assert.AreEqual(10, r.NextIndexOf("Two"));
            Assert.AreEqual("something Two Three Four", r.ConsumeToEnd());
            Assert.AreEqual(-1, r.NextIndexOf("Two"));
        }
コード例 #31
0
 public void consumeToEnd()
 {
     string input = "one two three";
     CharacterReader r = new CharacterReader(input);
     String toEnd = r.ConsumeToEnd();
     Assert.AreEqual(input, toEnd);
     Assert.IsTrue(r.IsEmpty());
 }
コード例 #32
0
ファイル: TokeniserState.cs プロジェクト: fengweijp/NSoup
            public override void Read(Tokeniser t, CharacterReader r)
            {
                if (r.MatchesLetter())
                {
                    string name = r.ConsumeLetterSequence();
                    t.TagPending.AppendTagName(name.ToLowerInvariant());
                    t.DataBuffer.Append(name);
                    
                    return;
                }

                if (t.IsAppropriateEndTagToken() && !r.IsEmpty())
                {
                    char c = r.Consume();
                    switch (c)
                    {
                        case '\t':
                        case '\n':
                        case '\r':
                        case '\f':
                        case ' ':
                            t.Transition(BeforeAttributeName);
                            break;
                        case '/':
                            t.Transition(SelfClosingStartTag);
                            break;
                        case '>':
                            t.EmitTagPending();
                            t.Transition(Data);
                            break;
                        default:
                            t.DataBuffer.Append(c);
                            AnythingElse(t, r);
                            break;
                    }
                }
                else
                {
                    AnythingElse(t, r);
                }
            }