Example #1
0
        }              // func ScanLevel

        private SimpleToken ScanSimpleTokenNonWhiteSpace(ref int iStart)
        {
            iStart = iOffset;
            SimpleToken t = ScanSimpleToken();

            return(t == SimpleToken.WhiteSpace ? ScanSimpleTokenNonWhiteSpace(ref iStart) : t);
        }         // func ScanSimpleTokenNonWhiteSpace
        public void parse_returns_false_on_invalid_string(string token)
        {
            SimpleToken tkn;
            var         parsed = SimpleToken.TryParse(token, out tkn);

            parsed.Should().Be(false, "because we passed incorrect token string");
            tkn.Should().BeNull();
        }
Example #3
0
        /**
         * 打印所有的Token
         * @param tokenReader
         */
        public static void dump(SimpleTokenReader tokenReader)
        {
            Console.WriteLine("text\ttype");
            IToken token = null;

            while ((token = tokenReader.read()) != null)
            {
                Console.WriteLine(token.getText() + "\t\t" + token.getType());
            }
        }
        public void to_string_returns_serialized_claims(string[] input, string expected)
        {
            var claims = input
                            .Select(i => i.Split('|'))
                            .Select(s => new Claim(s[0], s[1]))
                            .ToArray();

            var actual = new SimpleToken(claims).ToString();

            Assert.Equal(expected, actual);
        }
        public void parse_returns_correct_claims_on_valid_string(string token, string[] claims)
        {
            var expected = claims
                           .Where((x, i) => i % 2 == 0)
                           .Zip(claims.Where((x, i) => i % 2 != 0), Tuple.Create)
                           .Select(pair => new Claim(pair.Item1, pair.Item2));
            SimpleToken tkn;
            var         parsed = SimpleToken.TryParse(token, out tkn);

            parsed.Should().Be(true, "because we passed correct token string");
            tkn.Should().BeEquivalentTo(expected);
        }
        public void parse_returns_correct_claims_on_valid_string(string[] input)
        {
            var claims = input
                            .Select(i => i.Split('|'))
                            .Select(s => new Claim(s[0], s[1]))
                            .ToArray();

            var expected = new SimpleToken(claims);
            SimpleToken actual;
            var result = SimpleToken.TryParse(expected.ToString(), out actual);

            Assert.True(result);
            Assert.True(expected.SequenceEqual(actual, new ClaimComparer()));
        }
Example #7
0
        protected virtual Core.Tokenization.Token CreateToken(string s,
                                                              System.Text.RegularExpressions.GroupCollection groups)
        {
            Token t = null;

            if (_Type == TokenType.OtherTextPlaceable)
            {
                t = new GenericPlaceableToken(s, TokenClassName, _AutoSubstitutable);
            }
            else
            {
                t = new SimpleToken(s, _Type);
            }
            return(t);
        }
        public void ToComplexEntity_WhenSimpleEntity_ExpectCorrectMap()
        {
            // Arrange
            var mockPropertyMapper = new Mock<IPropertyGetSettersTyped<Token>>();
            var mockClaimsMapper = new Mock<IMapper<SimpleClaim, Claim>>();
            var mockClientMapper = new Mock<IMapper<SimpleClient, Client>>();

            mockClaimsMapper.Setup(r => r.ToComplexEntity(It.IsAny<SimpleClaim>())).Returns(new Claim("Val1", "Val2"));
            mockClientMapper.Setup(r => r.ToComplexEntity(It.IsAny<SimpleClient>())).Returns(new Client());

            mockPropertyMapper.Setup(r => r.GetSetters(It.IsAny<Type>()))
                .Returns(new Dictionary<string, TypedSetter<Token>>());

            var tokenMappers = new TokenMapper<Token>(mockPropertyMapper.Object, mockClaimsMapper.Object, mockClientMapper.Object);

            var simpleEntity = new SimpleToken
            {
                Claims = new List<SimpleClaim>(),
                Client = new SimpleClient(),
                Type = "Type",
                CreationTime = new DateTimeOffset(new DateTime(2016, 1, 1)),
                Issuer = "Issuer",
                Version = 1,
                Audience = "Audience",
                Lifetime = 1,
            };

            // Act
            var stopwatch = Stopwatch.StartNew();
            var complexEntity = tokenMappers.ToComplexEntity(simpleEntity);
            stopwatch.Stop();

            // Assert
            this.WriteTimeElapsed(stopwatch);

            Assert.That(complexEntity, Is.Not.Null);

            Assert.That(complexEntity.Claims, Is.Not.Null);
            Assert.That(complexEntity.Client, Is.Not.Null);
            Assert.That(complexEntity.Type, Is.EqualTo("Type"));
            Assert.That(complexEntity.CreationTime, Is.EqualTo(new DateTimeOffset(new DateTime(2016, 1, 1))));
            Assert.That(complexEntity.Issuer, Is.EqualTo("Issuer"));
            Assert.That(complexEntity.Version, Is.EqualTo(1));
            Assert.That(complexEntity.Audience, Is.EqualTo("Audience"));
            Assert.That(complexEntity.Lifetime, Is.EqualTo(1));
        }
        public override Core.Tokenization.Token Recognize(string s, int from, bool allowTokenBundles, ref int consumedLength)
        {
            if (String.IsNullOrEmpty(s) || from >= s.Length)
            {
                return(null);
            }

            consumedLength = 0;
            int originalStart = from;

            if (Core.CharacterProperties.IsCJKPunctuation(s[from]))
            {
                while (from < s.Length && Core.CharacterProperties.IsCJKPunctuation(s[from]))
                {
                    ++consumedLength;
                    ++from;
                    if (_JUSTONE)
                    {
                        break;
                    }
                }
                Token t = new SimpleToken(s.Substring(originalStart, consumedLength), TokenType.GeneralPunctuation);
                return(t);
            }

            if (Core.CharacterProperties.IsCJKChar(s[from]))
            {
                while (from < s.Length && Core.CharacterProperties.IsCJKChar(s[from]))
                {
                    ++consumedLength;
                    ++from;
                    if (_JUSTONE)
                    {
                        break;
                    }
                }
                Token t = new SimpleToken(s.Substring(originalStart, consumedLength), TokenType.CharSequence);
                return(t);
            }

            // TODO CJK punctuation etc.

            return(base.Recognize(s, from, allowTokenBundles, ref consumedLength));
        }
Example #10
0
        public override Core.Tokenization.Token Recognize(string s, int from, bool allowTokenBundles, ref int consumedLength)
        {
            if (String.IsNullOrEmpty(s) || from >= s.Length)
            {
                return(null);
            }

            consumedLength = 0;
            int originalStart = from;

            // splitting off all punctuation may exaggerate a bit - wait for user feedback
            if (System.Char.IsPunctuation(s[from]))
            {
                while (from < s.Length && System.Char.IsPunctuation(s[from]))
                {
                    ++consumedLength;
                    ++from;
                    if (_JUSTONE)
                    {
                        break;
                    }
                }
                Token t = new SimpleToken(s.Substring(originalStart, consumedLength), TokenType.GeneralPunctuation);
                return(t);
            }

            if (Core.CharacterProperties.IsInBlock(s[from], Core.UnicodeBlock.Thai))
            {
                while (from < s.Length && Core.CharacterProperties.IsInBlock(s[from], Core.UnicodeBlock.Thai))
                {
                    ++consumedLength;
                    ++from;
                    if (_JUSTONE)
                    {
                        break;
                    }
                }
                Token t = new SimpleToken(s.Substring(originalStart, consumedLength), TokenType.CharSequence);
                return(t);
            }

            return(base.Recognize(s, from, allowTokenBundles, ref consumedLength));
        }
        public override Core.Tokenization.Token Recognize(string s, int from, bool allowTokenBundles, ref int consumedLength)
        {
            if (String.IsNullOrEmpty(s) || from >= s.Length)
            {
                return(null);
            }

            consumedLength = 0;
            int originalStart = from;

            if (_DefaultPunctCharset.Contains(s[from]))
            {
                while (from < s.Length && _DefaultPunctCharset.Contains(s[from]))
                {
                    ++consumedLength;
                    ++from;
                }
                Token t = new SimpleToken(s.Substring(originalStart, consumedLength), TokenType.GeneralPunctuation);
                return(t);
            }

            System.Text.RegularExpressions.Match m = _DefaultWordRegex.Match(s, from);
            if (m != null && m.Success && m.Index == from)
            {
                consumedLength = m.Length;
                Token t = new SimpleToken(m.Value, TokenType.Word);
                return(t);
            }

            /*
             *      AUTOMATON PUNCT [U+3000-U+303FU+3200-U+32FFU+FF01-U+FF0FU+FF1A-U+FF20U+FF3B-U+FF3DU+FF5B-U+FF64]
             *      NFA WORD [U+30A0-U+30FFU+FF65-U+FF9F]+
             *      NFA WORD [U+3040-U+3091U+3093-U+309F]+
             *      NFA WORD [U+3092]
             *      NFA WORD [U+4E00-U+9FFF]+
             *      NFA WORD [U+FF21-U+FF3AU+FF41-U+FF5A]+
             */

            return(base.Recognize(s, from, allowTokenBundles, ref consumedLength));
        }
Example #12
0
        public override Core.Tokenization.Token Recognize(string s, int from, bool allowTokenBundles, ref int consumedLength)
        {
            if (String.IsNullOrEmpty(s) || from >= s.Length)
            {
                return(null);
            }

            consumedLength = 0;
            int originalStart = from;

            if (_DefaultPunctCharset.Contains(s[from]))
            {
                while (from < s.Length && _DefaultPunctCharset.Contains(s[from]))
                {
                    ++consumedLength;
                    ++from;
                }
                Token t = new SimpleToken(s.Substring(originalStart, consumedLength), TokenType.GeneralPunctuation);
                return(t);
            }

            if (s[from] >= 0x4e00 && s[from] <= 0x9fff)
            {
                while (from < s.Length && s[from] >= 0x4e00 && s[from] <= 0x9fff)
                {
                    ++consumedLength;
                    ++from;
                }
                Token t = new SimpleToken(s.Substring(originalStart, consumedLength), TokenType.CharSequence);
                return(t);
            }

            // TODO CJK punctuation etc.

            return(base.Recognize(s, from, allowTokenBundles, ref consumedLength));
        }
Example #13
0
 public void VisitSimpleToken(SimpleToken token)
 {
 }
Example #14
0
        private List <Core.Tokenization.Token> TokenizeInternal(string s,
                                                                int currentRun,
                                                                bool createWhitespaceTokens,
                                                                bool allowTokenBundles)
        {
            List <Token> result = new List <Token>();

            int p    = 0;
            int sLen = s.Length;

            while (p < sLen)
            {
                int start = p;

                while (p < sLen && System.Char.IsWhiteSpace(s, p))
                {
                    ++p;
                }

                if (p > start)
                {
                    if (createWhitespaceTokens)
                    {
                        Token t = new SimpleToken(s.Substring(start, p - start), TokenType.Whitespace);
                        t.Span = new SegmentRange(currentRun, start, p - 1);
                        result.Add(t);
                    }
                    start = p;
                }
                if (p >= sLen)
                {
                    break;
                }

                // test which recognizer claims the longest prefix

                Recognizer winningRecognizer = null;
                int        winningLength     = 0;
                Token      winningToken      = null;

                const bool allowBundlesOfDifferentType = false;

                for (int r = 0; r < _Parameters.Count; ++r)
                {
                    Recognizer rec            = _Parameters[r];
                    int        consumedLength = 0;
                    Token      t = rec.Recognize(s, start, allowTokenBundles, ref consumedLength);

                    if (t != null)
                    {
                        if (winningRecognizer == null ||
                            (winningLength < consumedLength && !(winningRecognizer.OverrideFallbackRecognizer && rec.IsFallbackRecognizer)))
                        {
                            winningToken      = t;
                            winningRecognizer = rec;
                            winningLength     = consumedLength;
                            p = start + consumedLength;
                        }
                        else if (allowTokenBundles && allowBundlesOfDifferentType)
                        {
                            Core.Tokenization.TokenBundle winningBundle
                                = winningToken as Core.Tokenization.TokenBundle;

                            if (winningBundle == null)
                            {
                                winningBundle = new TokenBundle(winningToken, winningRecognizer.Priority);
                                winningToken  = winningBundle;
                            }
                            else
                            {
                                winningBundle.Add(t, winningRecognizer.Priority);
                            }

                            System.Diagnostics.Debug.Assert(winningLength == consumedLength);
                            System.Diagnostics.Debug.Assert(p == start + consumedLength);
                        }
                        else if (winningRecognizer.Priority < rec.Priority)
                        {
                            // same length, but lower priority - highest prio wins
                            winningToken      = t;
                            winningRecognizer = rec;
                            winningLength     = consumedLength;
                            p = start + consumedLength;
                        }
                    }
                }

                if (winningToken == null)
                {
                    // none of the recognizers claimed any input, or there were no recognizers set up.
                    // ultimate fallback required: group by same Unicode category
                    // TODO scanning on just the category is too fine - we may want to group coarser categories together
                    System.Globalization.UnicodeCategory cat = System.Char.GetUnicodeCategory(s, start);
                    while (p < sLen && System.Char.GetUnicodeCategory(s, p) == cat)
                    {
                        ++p;
                    }
                    winningLength = p - start;
                    // TODO distinguish result token type depending on the category
                    winningToken      = new SimpleToken(s.Substring(start, p - start), TokenType.Word);
                    winningRecognizer = null;
                }
                else if (winningToken is TokenBundle)
                {
                    // convert single-element token bundles to single tokens
                    TokenBundle tb = winningToken as TokenBundle;
                    if (tb.Count == 1)
                    {
                        winningToken = tb[0].Token;
                    }
                }

                System.Diagnostics.Debug.Assert(winningLength > 0);
                System.Diagnostics.Debug.Assert(winningToken != null);

                winningToken.Span = new SegmentRange(currentRun, start, p - 1);

                result.Add(winningToken);
            }

            return(result);
        }
Example #15
0
        /// <summary>
        /// Extracts the next token found in the program source.
        /// </summary>
        public IToken NextToken()
        {
            if (SourcePosition > Source.Length)
            {
                throw new CompilerException(CurrentLine, CurrentLinePosition, "Read beyond the Source end");
            }

            var inComment = false;

            while (CurrentChar != C_EOF)
            {
                // Skip white chars.
                while (IsWhite(CurrentChar))
                {
                    NextChar();
                }

                //if (CurrentChar == '{')
                //{
                //    SkipComment();

                //    continue;
                //}

                //if (CurrentChar == '(')
                //{
                //    if (PeakChar() == '*')
                //    {
                //        NextChar();

                //        SkipComment();

                //        continue;
                //    }
                //}

                if (IsLetter(CurrentChar))
                {
                    return(CurrentToken = ParseIdent());
                }

                if (IsDigit(CurrentChar))
                {
                    return(CurrentToken = ParseNumber(1));
                }

                if (CurrentChar == '\'')
                {
                    return(CurrentToken = ParseString());
                }

                switch (CurrentChar)
                {
                case '{':
                {
                    SkipComment();

                    continue;
                }

                case '+':
                {
                    NextChar();

                    if (IsDigit(CurrentChar))
                    {
                        return(CurrentToken = ParseNumber(1));
                    }

                    return(CurrentToken = new SimpleToken(TokenCode.TOK_ADD_OP));
                }

                case '-':
                {
                    NextChar();

                    if (IsDigit(CurrentChar))
                    {
                        return(CurrentToken = ParseNumber(-1));
                    }

                    return(CurrentToken = new SimpleToken(TokenCode.TOK_SUB_OP));
                }

                case '*': NextChar(); return(CurrentToken = new SimpleToken(TokenCode.TOK_MUL_OP));

                case '/': NextChar(); return(CurrentToken = new SimpleToken(TokenCode.TOK_DIV_OP));

                case '=': NextChar(); return(CurrentToken = new SimpleToken(TokenCode.TOK_EQ_OP));

                case '<':
                {
                    NextChar();

                    if (CurrentChar == '>')
                    {
                        NextChar();

                        return(CurrentToken = new SimpleToken(TokenCode.TOK_NEQ_OP));         // '<>'
                    }
                    else if (CurrentChar == '=')
                    {
                        NextChar();

                        return(CurrentToken = new SimpleToken(TokenCode.TOK_LE_OP));         // '<='
                    }

                    return(CurrentToken = new SimpleToken(TokenCode.TOK_LT_OP));         // '<'
                }

                case '>':
                {
                    NextChar();

                    if (CurrentChar == '=')
                    {
                        NextChar();

                        return(CurrentToken = new SimpleToken(TokenCode.TOK_GE_OP));         // '>='
                    }

                    return(CurrentToken = new SimpleToken(TokenCode.TOK_GT_OP));         // '>'
                }

                case ';': NextChar(); return(CurrentToken = new SimpleToken(TokenCode.TOK_SEP));

                case ',': NextChar(); return(CurrentToken = new SimpleToken(TokenCode.TOK_LIST_SEP));

                case ':':
                {
                    NextChar();

                    if (CurrentChar == '=')
                    {
                        NextChar();

                        return(CurrentToken = new SimpleToken(TokenCode.TOK_ASGN_OP));
                    }

                    return(CurrentToken = new SimpleToken(TokenCode.TOK_DDOT));
                }

                case '(':
                {
                    NextChar();

                    if (CurrentChar == '*')
                    {
                        NextChar();

                        SkipComment();

                        continue;
                    }
                    else
                    {
                        return(CurrentToken = new SimpleToken(TokenCode.TOK_LBRA));
                    }
                }

                case ')': NextChar(); return(CurrentToken = new SimpleToken(TokenCode.TOK_RBRA));

                case '.': NextChar(); return(CurrentToken = new SimpleToken(TokenCode.TOK_PROG_END));

                case '\0': return(CurrentToken = new SimpleToken(TokenCode.TOK_EOF));

                default:
                    throw new CompilerException(CurrentLine, CurrentLinePosition, $"Unknown character '{CurrentChar}' found.");
                }
            }

            if (inComment)
            {
                throw new CompilerException(CurrentLine, CurrentLinePosition, "An end of comment expected.");
            }

            return(CurrentToken = new SimpleToken(TokenCode.TOK_EOF));
        }
Example #16
0
 public void VisitSimpleToken(SimpleToken token)
 {
     this._sb.Append(token.Text);
 }
Example #17
0
		public void VisitSimpleToken(SimpleToken token)
		{
			_plainText += token.Text;
		}
        public void TestFixtureSetup()
        {
            var database = RedisHelpers.ConnectionMultiplexer.GetDatabase();

            var claim1 = new SimpleClaim { Type = "Type1", Value = "Value1" };
            var claim2 = new SimpleClaim { Type = "Type2", Value = "Value2" };

            var client = new SimpleClient
            {
                Claims = new List<SimpleClaim> { claim1, claim2 },
                DataBag = new Dictionary<string, object> { { "AppId", 12 } }
            };

            var token = new SimpleToken
            {
                Claims = new List<SimpleClaim> { claim1, claim2 },
                Client = client,
                Type = "Type",
                CreationTime = new DateTimeOffset(new DateTime(2016, 1, 1)),
                Version = 1,
                Issuer = "Issuer",
                Lifetime = 120,
                Audience = "Audience"
            };

            var settings = new JsonSettingsFactory(new CustomMappersConfiguration { ClientMapper = CustomMapperFactory.CreateClientMapper<CustomClient>() }).Create();

            var serialized = JsonConvert.SerializeObject(token, settings);

            database.StringSet("DEFAULT_THS_Existing", serialized);
            database.StringSet("DEFAULT_THS_Delete", serialized);
        }
        public void simple_token_yields_passed_claims(List<Claim> claims)
        {
            var sut = new SimpleToken(claims.ToArray());

            sut.Should().Equal(claims);
        }
        public void token_is_a_collection_of_claims()
        {
            var tkn = new SimpleToken();

            Assert.IsAssignableFrom <IEnumerable <Claim> >(tkn);
        }
        public void token_returns_passed_claims(List <Claim> claims)
        {
            var tkn = new SimpleToken(claims);

            tkn.Should().BeEquivalentTo(claims);
        }
        public void simple_token_is_collection_of_claims()
        {
            var sut = new SimpleToken();

            Assert.IsAssignableFrom<IEnumerable<Claim>>(sut);
        }
Example #23
0
        /**
         * 有限状态机进入初始状态。
         * 这个初始状态其实并不做停留,它马上进入其他状态。
         * 开始解析的时候,进入初始状态;某个Token解析完毕,也进入初始状态,在这里把Token记下来,然后建立一个新的Token。
         * @param ch
         * @return
         */
        private DfaState initToken(char ch)
        {
            if (tokenText.ToString() != "")
            {
                token.text = tokenText.ToString();
                tokens.Add(token);

                tokenText.Close();
                tokenText = new StringWriter();
                token     = new SimpleToken();
            }

            DfaState newState = DfaState.Initial;

            if (isAlpha(ch))
            {
                if (ch == 'i')
                {
                    newState = DfaState.Id_int1;
                }
                else
                {
                    newState = DfaState.Id;
                }
                token.type = TokenType.Identifier;
                tokenText.Write(ch);
            }
            else if (isDigit(ch))
            {
                newState   = DfaState.IntLiteral;
                token.type = TokenType.IntLiteral;
                tokenText.Write(ch);
            }
            else if (ch == '>')
            {
                newState   = DfaState.GT;
                token.type = TokenType.GT;
                tokenText.Write(ch);
            }
            else if (ch == '+')
            {
                newState   = DfaState.Plus;
                token.type = TokenType.Plus;
                tokenText.Write(ch);
            }
            else if (ch == '-')
            {
                newState   = DfaState.Minus;
                token.type = TokenType.Minus;
                tokenText.Write(ch);
            }
            else if (ch == '*')
            {
                newState   = DfaState.Star;
                token.type = TokenType.Star;
                tokenText.Write(ch);
            }
            else if (ch == '/')
            {
                newState   = DfaState.Slash;
                token.type = TokenType.Slash;
                tokenText.Write(ch);
            }
            else if (ch == ';')
            {
                newState   = DfaState.SemiColon;
                token.type = TokenType.SemiColon;
                tokenText.Write(ch);
            }
            else if (ch == '(')
            {
                newState   = DfaState.LeftParen;
                token.type = TokenType.LeftParen;
                tokenText.Write(ch);
            }
            else if (ch == ')')
            {
                newState   = DfaState.RightParen;
                token.type = TokenType.RightParen;
                tokenText.Write(ch);
            }
            else if (ch == '=')
            {
                newState   = DfaState.Assignment;
                token.type = TokenType.Assignment;
                tokenText.Write(ch);
            }
            else
            {
                newState = DfaState.Initial;
            }
            return(newState);
        }
 public void VisitSimpleToken(SimpleToken token)
 {
     // not required with this implementation
 }
Example #25
0
        }         // func GetLineStateData

        public bool ScanTokenAndProvideInfoAboutIt(TokenInfo tokenInfo, ref int lineState)
        {
RedoScan:
            SimpleToken token = SimpleToken.Unknown;
            int iStart = iOffset;

RedoLineState:
            if ((lineState & StateFlag) == 0)
            {
                if (token == SimpleToken.Unknown)
                {
                    token = ScanSimpleTokenNonWhiteSpace(ref iStart);
                }

                if (token == SimpleToken.Identifier)
                {
                    // local var : typedef
                    // const var typeof typedef
                    // const var : typedef
                    // function name.a:a(a : typedef, a : typedef) : typedef
                    // do (a : typedef,
                    // for a : typedef,
                    // foreach a : typedef
                    string sValue = GetValue(iStart, iOffset);
                    if (sValue == "local" || sValue == "foreach" || sValue == "for")
                    {
                        SetLineStateExtented(ref lineState, 1);
                    }
                    else if (sValue == "const")
                    {
                        SetLineStateExtented(ref lineState, 3);
                    }
                    else if (sValue == "function")
                    {
                        SetLineStateExtented(ref lineState, 5);
                    }
                    else if (sValue == "do")
                    {
                        SetLineStateExtented(ref lineState, 8);
                    }
                    else if (sValue == "cast")
                    {
                        SetLineStateExtented(ref lineState, 13);
                    }
                }
                goto EmitToken;
            }
            else if ((lineState & (StringFlag | CommentFlag)) != 0)             // Block (String, Comment)
            {
                #region -- block --
                if (iOffset >= sLine.Length)
                {
                    token = SimpleToken.Eof;
                }
                else
                {
                    int iLevel = GetLineStateData(lineState);
                    token = (lineState & StringFlag) == StringFlag ? SimpleToken.String : SimpleToken.LineComment;                     // Emit part
                    while (iOffset < sLine.Length)
                    {
                        if (sLine[iOffset] == ']' && iOffset + iLevel + 1 < sLine.Length && sLine[iOffset + iLevel + 1] == ']')
                        {
                            // check for equals
                            bool lValid = true;
                            for (int i = iOffset + 1; i <= iOffset + iLevel; i++)
                            {
                                if (sLine[i] != '=')
                                {
                                    lValid = false;
                                    break;
                                }
                            }
                            if (lValid)
                            {
                                iOffset  += iLevel + 2;
                                lineState = lineState & (ParserFlag | TypeFlag);
                                break;
                            }
                        }
                        iOffset++;
                    }
                }
                goto EmitToken;
                #endregion
            }
            else if ((lineState & TypeFlag) != 0)             // typedef parser idenfifier.idenfier[identifier,identifier]
            {
                #region -- typedef --
                int iLevel = GetLineStateData(lineState);
                if (token == SimpleToken.Unknown)
                {
                    token = ScanSimpleTokenNonWhiteSpace(ref iStart);
                }
                if (token != SimpleToken.Eof)
                {
                    switch ((lineState & TypeFlag) >> 6)
                    {
                    case 1:
                        if (token == SimpleToken.Identifier)
                        {
                            token = SimpleToken.Type;
                            SetLineStateType(ref lineState, 2);
                        }
                        else
                        {
                            SetLineStateType(ref lineState, 0);
                            goto RedoLineState;
                        }
                        break;

                    case 2:
                        if (token == SimpleToken.Dot)
                        {
                            SetLineStateType(ref lineState, 1);
                        }
                        else if (token == SimpleToken.Comma)
                        {
                            if (iLevel == 0)
                            {
                                SetLineStateType(ref lineState, 0);
                                goto RedoLineState;
                            }
                            else
                            {
                                SetLineStateType(ref lineState, 1);
                            }
                        }
                        else if (token == SimpleToken.BraceSquareOpen)
                        {
                            iLevel++;
                            if (iLevel > 0x7FFFFF)
                            {
                                throw new OverflowException();
                            }

                            SetLineStateData(ref lineState, iLevel);
                            SetLineStateType(ref lineState, 1);
                        }
                        else if (token == SimpleToken.BraceSquareClose)
                        {
                            iLevel--;
                            if (iLevel < 0)
                            {
                                SetLineStateType(ref lineState, 0);
                                goto RedoLineState;
                            }
                            else
                            {
                                SetLineStateData(ref lineState, iLevel);
                            }
                        }
                        else
                        {
                            SetLineStateType(ref lineState, 0);
                            goto RedoLineState;
                        }
                        break;
                    }
                }
                goto EmitToken;
                #endregion
            }
            else if ((lineState & ParserFlag) != 0)             // extented Parser
            {
                if (token == SimpleToken.Unknown)
                {
                    token = ScanSimpleTokenNonWhiteSpace(ref iStart);
                }
                if (token != SimpleToken.Eof)
                {
                    switch ((lineState & ParserFlag) >> 2)
                    {
                        #region -- 1, 12 -- local var : typedef, var : typedef, for, foreach--
                    case 1:
                        if (token == SimpleToken.Identifier)                                 // identifier
                        {
                            SetLineStateExtented(ref lineState, 2);
                        }
                        else
                        {
                            SetLineStateExtented(ref lineState, 0);
                        }
                        break;

                    case 2:
                        if (token == SimpleToken.Colon)
                        {
                            SetLineStateExtented(ref lineState, 12);
                            SetLineStateType(ref lineState, 1);
                        }
                        else if (token == SimpleToken.Comma)
                        {
                            SetLineStateExtented(ref lineState, 1);
                        }
                        else
                        {
                            SetLineStateExtented(ref lineState, 0);
                        }
                        break;

                    case 12:
                        if (token == SimpleToken.Comma)
                        {
                            SetLineStateExtented(ref lineState, 1);
                        }
                        else
                        {
                            SetLineStateExtented(ref lineState, 0);
                        }
                        break;

                        #endregion
                        #region -- 3 -- const c typeof typedef, const c : typedef --
                    case 3:
                        if (token == SimpleToken.Identifier)
                        {
                            SetLineStateExtented(ref lineState, 4);
                        }
                        else
                        {
                            SetLineStateExtented(ref lineState, 0);
                        }
                        break;

                    case 4:
                        if ((token == SimpleToken.Identifier && GetValue(iStart, iOffset) == "typeof") ||
                            token == SimpleToken.Colon)
                        {
                            SetLineStateType(ref lineState, 1);
                        }
                        SetLineStateExtented(ref lineState, 0);
                        break;

                        #endregion
                        #region -- 5,14 -- function m.m:m (a : typedef, b : typedef) : typedef --
                    case 5:
                        if (token == SimpleToken.Identifier)
                        {
                            SetLineStateExtented(ref lineState, 6);
                        }
                        else if (token == SimpleToken.BraceOpen)
                        {
                            token = SimpleToken.Braces;
                            SetLineStateExtented(ref lineState, 9);
                        }
                        else
                        {
                            SetLineStateExtented(ref lineState, 0);
                        }
                        break;

                    case 6:
                        if (token == SimpleToken.Dot)
                        {
                            SetLineStateExtented(ref lineState, 5);
                        }
                        else if (token == SimpleToken.Colon)
                        {
                            SetLineStateExtented(ref lineState, 7);
                        }
                        else if (token == SimpleToken.BraceOpen)
                        {
                            token = SimpleToken.Braces;
                            SetLineStateExtented(ref lineState, 9);
                        }
                        else
                        {
                            SetLineStateExtented(ref lineState, 0);
                        }
                        break;

                    case 7:
                        if (token == SimpleToken.Identifier)
                        {
                            SetLineStateExtented(ref lineState, 8);
                        }
                        else
                        {
                            SetLineStateExtented(ref lineState, 0);
                        }
                        break;

                    case 8:
                        if (token == SimpleToken.BraceOpen)
                        {
                            token = SimpleToken.Braces;
                            SetLineStateExtented(ref lineState, 9);
                        }
                        else
                        {
                            SetLineStateExtented(ref lineState, 0);
                        }
                        break;

                    case 9:                             // argument list: a : typedef,
                        if (token == SimpleToken.Identifier)
                        {
                            SetLineStateExtented(ref lineState, 10);
                        }
                        else if (token == SimpleToken.BraceClose)
                        {
                            SetLineStateExtented(ref lineState, 14);
                        }
                        else
                        {
                            SetLineStateExtented(ref lineState, 0);
                        }
                        break;

                    case 10:
                        if (token == SimpleToken.Colon)
                        {
                            SetLineStateType(ref lineState, 1);
                            SetLineStateExtented(ref lineState, 11);
                        }
                        else if (token == SimpleToken.Comma)
                        {
                            SetLineStateExtented(ref lineState, 9);
                        }
                        else if (token == SimpleToken.BraceClose)
                        {
                            SetLineStateExtented(ref lineState, 14);
                        }
                        else
                        {
                            SetLineStateExtented(ref lineState, 0);
                        }
                        break;

                    case 11:
                        if (token == SimpleToken.Comma)
                        {
                            SetLineStateExtented(ref lineState, 9);
                        }
                        else if (token == SimpleToken.BraceClose)
                        {
                            SetLineStateExtented(ref lineState, 14);
                        }
                        else
                        {
                            SetLineStateExtented(ref lineState, 0);
                        }
                        break;

                    case 14:
                        if (token == SimpleToken.Colon)
                        {
                            SetLineStateType(ref lineState, 1);
                        }
                        SetLineStateExtented(ref lineState, 0);
                        break;

                        #endregion
                        #region -- 13 -- cast(typedef --
                    case 13:
                        if (token == SimpleToken.BraceOpen)
                        {
                            SetLineStateType(ref lineState, 1);
                            SetLineStateExtented(ref lineState, 0);
                        }
                        break;
                        #endregion
                    }
                }
                goto EmitToken;
            }

            throw new InvalidOperationException();

EmitToken:
            switch (token)
            {
            case SimpleToken.Unknown:
                goto RedoScan;

            case SimpleToken.Eof:
                return(false);

            case SimpleToken.WhiteSpace:
                tokenInfo.Color   = TokenColor.Text;
                tokenInfo.Type    = TokenType.WhiteSpace;
                tokenInfo.Trigger = TokenTriggers.None;
                break;

            case SimpleToken.Comment:
                tokenInfo.Color   = TokenColor.Comment;
                tokenInfo.Type    = TokenType.Comment;
                tokenInfo.Trigger = TokenTriggers.None;
                break;

            case SimpleToken.LineComment:
                tokenInfo.Color   = TokenColor.Comment;
                tokenInfo.Type    = TokenType.LineComment;
                tokenInfo.Trigger = TokenTriggers.None;
                break;

            case SimpleToken.String:
                tokenInfo.Color   = TokenColor.String;
                tokenInfo.Type    = TokenType.String;
                tokenInfo.Trigger = TokenTriggers.None;
                break;

            case SimpleToken.Number:
                tokenInfo.Color   = TokenColor.Number;
                tokenInfo.Type    = TokenType.Literal;
                tokenInfo.Trigger = TokenTriggers.None;
                break;

            case SimpleToken.BraceOpen:
            case SimpleToken.BraceSquareOpen:
                tokenInfo.Color   = OperatorColor;
                tokenInfo.Type    = TokenType.Operator;
                tokenInfo.Trigger = TokenTriggers.MatchBraces | TokenTriggers.ParameterStart;
                break;

            case SimpleToken.Comma:
                tokenInfo.Color   = OperatorColor;
                tokenInfo.Type    = TokenType.Operator;
                tokenInfo.Trigger = TokenTriggers.MatchBraces | TokenTriggers.ParameterNext;
                break;

            case SimpleToken.Dot:
            case SimpleToken.Colon:
                tokenInfo.Color   = OperatorColor;
                tokenInfo.Type    = TokenType.Delimiter;
                tokenInfo.Trigger = TokenTriggers.MemberSelect;
                break;

            case SimpleToken.Operator:
                tokenInfo.Color   = OperatorColor;
                tokenInfo.Type    = TokenType.Operator;
                tokenInfo.Trigger = TokenTriggers.None;
                break;

            case SimpleToken.BraceClose:
            case SimpleToken.BraceSquareClose:
            case SimpleToken.Braces:
                tokenInfo.Color   = OperatorColor;
                tokenInfo.Type    = TokenType.WhiteSpace;
                tokenInfo.Trigger = TokenTriggers.MatchBraces;
                break;

            case SimpleToken.Identifier:
                if (IsKeyword(sLine, iStart, iOffset))
                {
                    tokenInfo.Color   = TokenColor.Keyword;
                    tokenInfo.Type    = TokenType.Keyword;
                    tokenInfo.Trigger = TokenTriggers.None;
                }
                else
                {
                    tokenInfo.Color   = TokenColor.Identifier;
                    tokenInfo.Type    = TokenType.Text;
                    tokenInfo.Trigger = TokenTriggers.None;
                }
                break;

            case SimpleToken.Type:
                tokenInfo.Color   = TypeColor;
                tokenInfo.Type    = TokenType.Text;
                tokenInfo.Trigger = TokenTriggers.None;
                break;

            case SimpleToken.LongStringStart:
                lineState = (ScanLevel() << DataShift) | (lineState & StateFlag) | StringFlag;
                goto RedoLineState;

            case SimpleToken.LongCommentStart:
                lineState = (ScanLevel() << DataShift) | (lineState & StateFlag) | CommentFlag;
                goto RedoLineState;
            }

            tokenInfo.StartIndex = iStart;
            tokenInfo.Color      = tokenInfo.Color;
            tokenInfo.EndIndex   = iOffset - 1;
            return(true);
        }         // func ScanTokenAndProvideInfoAboutIt
        public override Core.Tokenization.Token Recognize(string s, int from, bool allowTokenBundles, ref int consumedLength)
        {
            /*
             * TODO handle some special cases, e.g.
             *
             * "--" in en-US
             * "...." etc. in mid-words
             * "l-xxx" in Maltese (leading and trailing clitics)
             * ta' in Maltese (word ends in non-sep punct)
             * */

            consumedLength = 0;

            if (String.IsNullOrEmpty(s))
            {
                return(null);
            }

            int len = s.Length;
            int pos = from;

            // check for leading whitespace
            while (pos < len && (System.Char.IsWhiteSpace(s, pos) || System.Char.IsSeparator(s, pos)))
            {
                ++pos;
            }

            if (pos > from)
            {
                // found a whitespace token
                consumedLength = pos - from;
                Token t = new SimpleToken(s.Substring(from, consumedLength), TokenType.Whitespace);
                return(t);
            }

            // initial hard token terminators: treat as punctuation token
            if (IsHardTokenTerminator(s, pos))
            {
                consumedLength = 1;
                Token t = new SimpleToken(s.Substring(from, consumedLength), TokenType.GeneralPunctuation);
                return(t);
            }

            // clitics, if defined by the culture, are always separated
            if (_LeadingClitics != null)
            {
                TrieIterator <char, int> iter
                    = _LeadingClitics.GetIterator();
                int cliticLength = 0;

                while (iter != null && pos + cliticLength < len && !iter.IsFinal)
                {
                    if (!iter.Traverse(s[pos + cliticLength]))
                    {
                        break;
                    }
                    ++cliticLength;
                }
                if (iter != null && iter.IsValid && iter.IsFinal)
                {
                    // found a clitic
                    consumedLength = cliticLength;
                    Token t = new SimpleToken(s.Substring(from, cliticLength), TokenType.Word);
                    return(t);
                }
            }

            char c         = s[pos];
            bool lastIsCJK = Core.CharacterProperties.IsCJKChar(c);

            while (pos < len &&
                   !(System.Char.IsWhiteSpace(c) || System.Char.IsSeparator(c) || IsHardTokenTerminator(s, pos)))
            {
                // don't step over critical script changes
                // NOTE default fallback tokenizer will return CJK sequences as one token while
                //  FE fallback tokenizer will split them into single-char char sequences.
                bool currentIsCJK = Core.CharacterProperties.IsCJKChar(c);
                if (currentIsCJK != lastIsCJK)
                {
                    break;
                }

                ++pos;
                if (pos < len)
                {
                    c         = s[pos];
                    lastIsCJK = currentIsCJK;
                }
            }

            int upto = pos;

            // [from, upto[ is now the longest non-whitespace chain. Start separating leading punctuation,
            // including full stops

            // TODO this will put ")." into one token. We may want to split it into two.

            for (pos = from; pos < upto && (IsSeparablePunct(s, pos) || s[pos] == '.'); ++pos)
            {
                ;
            }

            if (pos > from)
            {
                // found a sequence of separable punctuation
                consumedLength = pos - from;
                Token t = new SimpleToken(s.Substring(from, consumedLength), TokenType.GeneralPunctuation);
                return(t);
            }

            // token does not start with separable punctuation - remove separable punctuation from the end
            // and take care of trailing full stop and abbreviations

            // We need to catch situations like "...test)." - here, after the full stop is removed, we need to
            //  check for trailing closing punctuation again, and vice versa as in "test...)." and similar cases.
            bool separated;
            bool isAbbreviation = false;

            do
            {
                separated = false;

                // take care of trailing closing punctuation
                while (upto - 1 > pos && IsSeparablePunct(s, upto - 1))
                {
                    --upto;
                    separated = true;
                }

                // take care of full stop separation
                int trailingFullStops = 0;
                while (upto - 1 - trailingFullStops > pos && s[upto - 1 - trailingFullStops] == '.')
                {
                    ++trailingFullStops;
                }
                if (trailingFullStops > 1)
                {
                    // ellipsis
                    upto     -= trailingFullStops;
                    separated = true;
                }
                else if (trailingFullStops == 1)
                {
                    // single trailing full stop - separate if we aren't looking at a known abbreviation.
                    // TODO add abbreviation heuristics
                    // TODO use specific token type for abbreviations?
                    if (_Resources == null || !_Resources.IsAbbreviation(s.Substring(from, upto - from)))
                    {
                        --upto;
                        separated = true;
                    }
                    else
                    {
                        isAbbreviation = true;
                    }
                }
            } while (separated);

            // treat the remainder as a word
            consumedLength = upto - from;
            Token token = new SimpleToken(s.Substring(from, consumedLength),
                                          isAbbreviation ? TokenType.Abbreviation : TokenType.Word);

            return(token);
        }
Example #27
0
        private SimpleToken token      = null;   //当前正在解析的Token

        /**
         * 解析字符串,形成Token。(就是把字符串,按照token的定义,翻译成一个个token)
         * 这是一个有限状态自动机,在不同的状态中迁移。
         * @param code
         * @return
         */
        public SimpleTokenReader tokenize(string code)
        {
            tokens = new List <IToken>();
            StringReader code_reader = new StringReader(code);

            tokenText = new StringWriter();
            token     = new SimpleToken();
            int      ich   = 0;
            char     ch    = Convert.ToChar(ich);
            DfaState state = DfaState.Initial;

            try
            {
                while ((ich = code_reader.Read()) != -1)
                {
                    ch = Convert.ToChar(ich);
                    switch (state)
                    {
                    case DfaState.Initial:
                        state = initToken(ch);
                        break;

                    case DfaState.Id:
                        if (isAlpha(ch) || (isDigit(ch)))
                        {
                            tokenText.Write(ch);
                        }
                        else
                        {
                            state = initToken(ch);
                        }
                        break;

                    case DfaState.GT:
                        if (ch == '=')
                        {
                            token.type = TokenType.GE;      //转换成GE
                            state      = DfaState.GE;
                            tokenText.Write(ch);
                        }
                        else
                        {
                            state = initToken(ch);          //退出GT状态,并保存Token
                        }
                        break;

                    case DfaState.GE:
                    case DfaState.Assignment:
                    case DfaState.Plus:
                    case DfaState.Minus:
                    case DfaState.Star:
                    case DfaState.Slash:
                    case DfaState.SemiColon:
                    case DfaState.LeftParen:
                    case DfaState.RightParen:
                        state = initToken(ch);              //退出当前状态,并保存Token
                        break;

                    case DfaState.IntLiteral:
                        if (isDigit(ch))
                        {
                            tokenText.Write(ch);           //继续保持在数字字面量状态
                        }
                        else
                        {
                            state = initToken(ch);          //退出当前状态,并保存Token
                        }
                        break;

                    case DfaState.Id_int1:
                        if (ch == 'n')
                        {
                            state = DfaState.Id_int2;
                            tokenText.Write(ch);
                        }
                        else if (isDigit(ch) || isAlpha(ch))
                        {
                            state = DfaState.Id;        //切换回Id状态
                            tokenText.Write(ch);
                        }
                        else
                        {
                            state = initToken(ch);
                        }
                        break;

                    case DfaState.Id_int2:
                        if (ch == 't')
                        {
                            state = DfaState.Id_int3;
                            tokenText.Write(ch);
                        }
                        else if (isDigit(ch) || isAlpha(ch))
                        {
                            state = DfaState.Id;        //切换回id状态
                            tokenText.Write(ch);
                        }
                        else
                        {
                            state = initToken(ch);
                        }
                        break;

                    case DfaState.Id_int3:
                        if (isBlank(ch))
                        {
                            token.type = TokenType.Int;
                            state      = initToken(ch);
                        }
                        else
                        {
                            state = DfaState.Id;        //切换回Id状态
                            tokenText.Write(ch);
                        }
                        break;

                    default:
                        Console.WriteLine("Unexpected state: {0}", state);
                        break;
                    }
                }

                if (tokenText.ToString() != "")
                {
                    initToken(ch);
                }
            }
            catch (IOException e)
            {
                Console.WriteLine(
                    "{0}: The write operation could not be performed ",
                    e.GetType().Name);
            }

            return(new SimpleTokenReader(tokens));
        }