Inheritance: Microsoft.Languages.Core.Text.CharacterStream
Esempio n. 1
0
        public static NextTokenType PeekNextToken(HtmlCharStream cs, int tagEnd, out ITextRange range) {
            NextTokenType tokenType = NextTokenType.Unknown;
            int current = cs.Position;

            if (cs.IsEndOfStream() || cs.Position == tagEnd) {
                range = new TextRange();
                return NextTokenType.None;
            }

            int start = cs.Position;

            while (cs.IsWhiteSpace())
                cs.MoveToNextChar();

            if (cs.IsEndOfStream() || cs.Position == tagEnd) {
                range = TextRange.FromBounds(start, cs.Position);
                return NextTokenType.Unknown;
            }

            if (cs.IsAtTagDelimiter()) {
                tokenType = NextTokenType.Tag;
            } else if (cs.CurrentChar == '=') {
                tokenType = NextTokenType.Equals;
            } else {
                int digits = 0;
                bool firstLetter = false;
                int length = 0;
                int chars = 0;

                if (cs.IsAnsiLetter())
                    firstLetter = true;

                while (!cs.IsEndOfStream() && !cs.IsWhiteSpace() && !cs.IsAtTagDelimiter() && cs.CurrentChar != '=' && cs.Position < tagEnd) {
                    if (cs.IsAnsiLetter() || cs.CurrentChar == '_' || cs.CurrentChar == '-')
                        chars++;
                    else if (cs.IsDecimal() || cs.CurrentChar == '.')
                        digits++;

                    cs.MoveToNextChar();
                    length++;
                }

                if (length > 0) {
                    if (length == digits)
                        tokenType = NextTokenType.Number;
                    else if (length == chars)
                        tokenType = NextTokenType.Letters;
                    else if (firstLetter)
                        tokenType = NextTokenType.Identifier;
                }
            }

            range = TextRange.FromBounds(start, cs.Position);
            cs.Position = current;
            return tokenType;
        }
Esempio n. 2
0
        public void HtmlTokenizer_GetNameToken_BasicTest() {
            var cs = new HtmlCharStream("foo");
            HtmlTokenizer target = new HtmlTokenizer(cs);
            NameToken actual = target.GetNameToken();

            Assert.Equal(3, actual.Length);
            Assert.Equal(0, actual.Start);
            Assert.Equal(3, actual.End);

            Assert.Equal(3, actual.NameRange.Length);
            Assert.Equal(0, actual.NameRange.Start);
            Assert.Equal(3, actual.NameRange.End);

            Assert.Equal(0, actual.PrefixRange.Start);
            Assert.Equal(0, actual.PrefixRange.End);
        }
Esempio n. 3
0
        public void CharStream_BasicTest() {
            string text = "abcd\"foo\"\r\n<a href=";
            HtmlCharStream cs = new HtmlCharStream(text);

            Assert.Equal('a', cs.CurrentChar);

            cs.Advance(2);
            Assert.False(cs.IsEndOfStream());
            Assert.Equal('c', cs.CurrentChar);

            cs.Advance(-1);
            Assert.False(cs.IsEndOfStream());
            Assert.Equal('b', cs.CurrentChar);

            cs.Advance(text.Length);
            Assert.True(cs.IsEndOfStream());
            Assert.Equal(0, cs.CurrentChar);

            cs.Advance(-text.Length);
            Assert.False(cs.IsEndOfStream());
            Assert.Equal('a', cs.CurrentChar);

            Assert.Equal('d', cs.LookAhead(3));
            Assert.Equal('\"', cs.LookAhead(4));

            Assert.Equal(0, cs.LookAhead(text.Length));
            Assert.Equal(0, cs.LookAhead(-1));

            Assert.Equal(text.Length, cs.DistanceFromEnd);
            cs.Advance(1);
            Assert.Equal(text.Length - 1, cs.DistanceFromEnd);

            cs.Position = 4;
            Assert.True(cs.IsAtString());
            cs.Position = 5;
            Assert.False(cs.IsAtString());

            cs.Position = 9;
            Assert.True(cs.IsWhiteSpace());
            cs.MoveToNextChar();
            Assert.True(cs.IsWhiteSpace());

            cs.MoveToNextChar();
            Assert.True(cs.IsAtTagDelimiter());
        }
Esempio n. 4
0
        public void HtmlTokenizer_GetNameToken_MissingNameTest() {
            var cs = new HtmlCharStream("foo:");
            HtmlTokenizer target = new HtmlTokenizer(cs);
            NameToken actual = target.GetNameToken();

            Assert.Equal(4, actual.Length);
            Assert.Equal(0, actual.Start);
            Assert.Equal(4, actual.End);

            Assert.True(actual.HasPrefix());
            Assert.Equal(0, actual.PrefixRange.Start);
            Assert.Equal(3, actual.PrefixRange.End);

            Assert.False(actual.HasName());
            Assert.Equal(0, actual.NameRange.Length);

            Assert.False(actual.HasQualifiedName());
            Assert.Equal(0, actual.QualifiedName.Start);
            Assert.Equal(4, actual.QualifiedName.End);
        }
Esempio n. 5
0
        public void HtmlCharStream_IsNameCharTest() {
            var stream = new HtmlCharStream(new TextStream(""));
            Assert.True(stream.IsEndOfStream());
            Assert.Equal(0, stream.Length);

            stream = new HtmlCharStream(new TextStream("<h123"));
            Assert.Equal(0, stream.Position);
            Assert.False(stream.IsEndOfStream());
            stream.Position = 5;
            Assert.True(stream.IsEndOfStream());
            stream.Position = 0;
            Assert.False(stream.IsEndOfStream());

            stream.MoveToNextChar();
            Assert.Equal(1, stream.Position);

            stream.Advance(2);
            Assert.Equal(3, stream.Position);

            stream.Advance(-2);
            Assert.Equal(1, stream.Position);

            stream.Advance(1000);
            Assert.True(stream.IsEndOfStream());

            stream.Position = 0;
            Assert.True(stream.IsAtTagDelimiter());
            Assert.Equal('<', stream.CurrentChar);
            Assert.Equal('h', stream.NextChar);

            stream.Position = 1;
            Assert.False(stream.IsAtTagDelimiter());
            Assert.True(stream.IsNameChar());
            Assert.True(HtmlCharStream.IsNameStartChar(stream.CurrentChar));

            stream.Position = 2;
            Assert.False(stream.IsAtTagDelimiter());
            Assert.True(stream.IsNameChar());
            Assert.False(HtmlCharStream.IsNameStartChar(stream.CurrentChar));
        }
Esempio n. 6
0
 public HtmlTokenizer(HtmlCharStream cs) {
     _cs = cs;
     _stringClosure = new StringClosure(cs);
 }
Esempio n. 7
0
 private StringClosure CreateStringClosure(string text) {
     HtmlCharStream stream = new HtmlCharStream(text);
     StringClosure closure = new StringClosure(stream);
     return closure;
 }
Esempio n. 8
0
        public void HtmlTokenizer_SkipWhitespaceTest() {
            var cs = new HtmlCharStream("   abc\t\tdef\r\n gh");
            HtmlTokenizer target = new HtmlTokenizer(cs);
            target.SkipWhitespace();
            Assert.Equal(3, cs.Position);

            target.SkipWhitespace();
            Assert.Equal(3, cs.Position);

            cs.Advance(3);
            target.SkipWhitespace();
            Assert.Equal(8, cs.Position);

            cs.Advance(3);
            target.SkipWhitespace();
            Assert.Equal(14, cs.Position);
        }
Esempio n. 9
0
 public StringClosure(HtmlCharStream cs) {
     _cs = cs;
 }
Esempio n. 10
0
        /// <summary>
        /// Parse text from a text provider within a given range
        /// </summary>
        /// <param name="textProvider">Text provider</param>
        /// <param name="range">Range to parse</param>
        public void Parse(ITextProvider textProvider, ITextRange range) {
            DateTime? timeStart = null;

            if (Stats.Enabled)
                timeStart = DateTime.UtcNow;

            if (ParsingStarting != null)
                ParsingStarting(this, new HtmlParserRangeEventArgs(range));

            DocType = DocType.Undefined;

            _cs = new HtmlCharStream(textProvider, range);
            _tokenizer = new HtmlTokenizer(_cs);
            _softRangeEnd = range.End;

            OnTextState();

            if (ParsingComplete != null)
                ParsingComplete(this, new HtmlParserRangeEventArgs(range));

            if (Stats.Enabled) {
                Stats.ParseTime = (DateTime.UtcNow - timeStart.Value);
                Stats.CharactersPerSecond = (int)(1000.0 * (double)_cs.Length / (double)Stats.ParseTime.TotalMilliseconds + 0.5);
            }
        }