LexCharUtils, Microsoft.ML.Transforms C# (CSharp)のコード例

コード例 #1

0

ファイルを表示

ファイル: Lexer.cs プロジェクト: artemiusgreat/ML-NET

            /// <summary>
            /// Lex a line termination character. Transforms CRLF into a single LF.
            /// Updates the line mapping. When this "drops" a character and sb is not
            /// null, it adds the character to sb. It does NOT add the returned character
            /// to the sb.
            /// </summary>
            private char LexLineTerm(StringBuilder sb = null)
            {
                Contracts.Assert(LexCharUtils.StartKind(ChCur) == LexStartKind.LineTerm);
                int ichMin = _cursor.IchCur;

                if (ChCur == '\xD' && ChPeek(1) == '\xA')
                {
                    if (sb != null)
                    {
                        sb.Append(ChCur);
                    }
                    ChNext();
                }
                char ch = ChCur;

                ChNext();

                if (_ichMinTok == ichMin)
                {
                    // Not nested.
                    _queue.Enqueue(new NewLineToken(GetSpan(), false));
                }
                else
                {
                    // Is nested.
                    _queue.Enqueue(new NewLineToken(GetTextSpan(ichMin, _cursor.IchCur), true));
                }
                _fLineStart = true;
                return(ch);
            }

コード例 #2

0

ファイルを表示

ファイル: Lexer.cs プロジェクト: artemiusgreat/ML-NET

 /// <summary>
 /// Skip over an error character. Always returns null.
 /// REVIEW: Should we skip over multiple?
 /// </summary>
 private Token LexError()
 {
     _sb.Length = 0;
     do
     {
         _sb.AppendFormat("{0}({1})", ChCur, LexCharUtils.GetUniEscape(ChCur));
     } while (LexCharUtils.StartKind(ChNext()) == LexStartKind.None && !Eof);
     return(new ErrorToken(GetSpan(), ErrId.BadChar, _sb.ToString()));
 }

コード例 #3

0

ファイルを表示

ファイル: Lexer.cs プロジェクト: artemiusgreat/ML-NET

 /// <summary>
 /// Lex a sequence of spacing characters.
 /// Always returns null.
 /// </summary>
 private Token LexSpace()
 {
     Contracts.Assert(LexCharUtils.StartKind(ChCur) == LexStartKind.Space);
     while (LexCharUtils.IsSpace(ChNext()))
     {
         ;
     }
     return(null);
 }

コード例 #4

0

ファイルを表示

ファイル: Lexer.cs プロジェクト: artemiusgreat/ML-NET

            private Token FetchToken()
            {
                Contracts.Assert(!Eof);
                StartTok();

                LexStartKind kind = LexCharUtils.StartKind(ChCur);

                if (kind != LexStartKind.Space && kind != LexStartKind.PreProc)
                {
                    _fLineStart = false;
                }

                switch (kind)
                {
                case LexStartKind.Punc:
                    return(LexPunc());

                case LexStartKind.NumLit:
                    return(LexNumLit());

                case LexStartKind.StrLit:
                    return(LexStrLit());

                case LexStartKind.Verbatim:
                    if (ChPeek(1) == '"')
                    {
                        return(LexStrLit());
                    }
                    if (LexCharUtils.StartKind(ChPeek(1)) == LexStartKind.Ident)
                    {
                        return(LexIdent());
                    }
                    ChNext();
                    ReportError(ErrId.VerbatimLiteralExpected);
                    return(null);

                case LexStartKind.Ident:
                    return(LexIdent());

                case LexStartKind.Comment:
                    return(LexComment());

                case LexStartKind.Space:
                    return(LexSpace());

                case LexStartKind.LineTerm:
                    LexLineTerm();
                    return(null);

                case LexStartKind.PreProc:
                    return(LexPreProc());

                default:
                    return(LexError());
                }
            }

コード例 #5

0

ファイルを表示

ファイル: Lexer.cs プロジェクト: artemiusgreat/ML-NET

            private NormStr LexIdentCore(ref bool fVerbatim)
            {
                Contracts.Assert(LexCharUtils.IsIdentStart(ChCur));

                _sb.Length = 0;
                for (; ;)
                {
                    char ch;
                    if (ChCur == '\\')
                    {
                        uint u;
                        int  ichErr = _cursor.IchCur;

                        if (!FLexEscChar(true, out u))
                        {
                            break;
                        }
                        if (u > 0xFFFF || !LexCharUtils.IsIdent(ch = (char)u))
                        {
                            ReportError(ichErr, _cursor.IchCur, ErrId.BadChar, LexCharUtils.GetUniEscape(u));
                            break;
                        }
                        fVerbatim = true;
                    }
                    else
                    {
                        if (!LexCharUtils.IsIdent(ChCur))
                        {
                            break;
                        }
                        ch = ChCur;
                        ChNext();
                    }
                    Contracts.Assert(LexCharUtils.IsIdent(ch));
                    if (!LexCharUtils.IsFormat(ch))
                    {
                        _sb.Append(ch);
                    }
                }

                if (_sb.Length == 0)
                {
                    return(null);
                }

                return(_lex._pool.Add(_sb));
            }

コード例 #6

0

ファイルを表示

ファイル: Lexer.cs プロジェクト: artemiusgreat/ML-NET

            /// <summary>
            /// Lex a decimal integer literal. The digits must be in _sb.
            /// </summary>
            private Token LexDecInt(IntLitKind ilk)
            {
                // Digits are in _sb.
                Contracts.Assert(_sb.Length > 0);
                ulong u = 0;

                try
                {
                    for (int ich = 0; ich < _sb.Length; ich++)
                    {
                        u = checked (u * 10 + (ulong)LexCharUtils.GetDecVal(_sb[ich]));
                    }
                }
                catch (System.OverflowException)
                {
                    ReportError(ErrId.IntOverflow);
                    u = ulong.MaxValue;
                }
                return(new IntLitToken(GetSpan(), u, ilk));
            }

コード例 #7

0

ファイルを表示

ファイル: Lexer.cs プロジェクト: artemiusgreat/ML-NET

            /// <summary>
            /// Called to lex a punctuator (operator). Asserts the current character lex type
            /// is LexCharType.Punc.
            /// </summary>
            private Token LexPunc()
            {
                int     cchPunc = 0;
                TokKind tidPunc = TokKind.None;

                _sb.Length = 0;
                _sb.Append(ChCur);
                for (; ;)
                {
                    TokKind tidCur;
                    NormStr nstr = _lex._pool.Add(_sb);
                    if (!_lex._kwt.IsPunctuator(nstr, out tidCur))
                    {
                        break;
                    }

                    if (tidCur != TokKind.None)
                    {
                        // This is a real punctuator, not just a prefix.
                        tidPunc = tidCur;
                        cchPunc = _sb.Length;
                    }

                    char ch = ChPeek(_sb.Length);
                    if (!LexCharUtils.IsPunc(ch))
                    {
                        break;
                    }
                    _sb.Append(ch);
                }
                if (cchPunc == 0)
                {
                    return(LexError());
                }
                while (--cchPunc >= 0)
                {
                    ChNext();
                }
                return(KeyToken.Create(GetSpan(), tidPunc));
            }

コード例 #8

0

ファイルを表示

ファイル: Lexer.cs プロジェクト: artemiusgreat/ML-NET

            /// <summary>
            /// Lex a hex literal optionally followed by an integer suffix. Asserts the current
            /// character is a hex digit.
            /// </summary>
            private Token LexHexInt()
            {
                Contracts.Assert(LexCharUtils.IsHexDigit(ChCur));

                ulong u         = 0;
                bool  fOverflow = false;

                do
                {
                    if ((u & 0xF000000000000000) != 0 && !fOverflow)
                    {
                        ReportError(ErrId.IntOverflow);
                        fOverflow = true;
                    }
                    u = (u << 4) + (ulong)LexCharUtils.GetHexVal(ChCur);
                } while (LexCharUtils.IsHexDigit(ChNext()));

                if (fOverflow)
                {
                    u = ulong.MaxValue;
                }

                return(new IntLitToken(GetSpan(), u, LexIntSuffix() | IntLitKind.Hex));
            }

コード例 #9

0

ファイルを表示

ファイル: Lexer.cs プロジェクト: artemiusgreat/ML-NET

            /// <summary>
            /// Lex a comment.
            /// </summary>
            private Token LexComment()
            {
                Contracts.Assert(ChCur == '/');
                int ichErr = _cursor.IchCur;

                switch (ChPeek(1))
                {
                default:
                    return(LexPunc());

                case '/':
                    // Single line comment.
                    ChNext();
                    _sb.Length = 0;
                    _sb.Append("//");
                    for (; ;)
                    {
                        if (LexCharUtils.IsLineTerm(ChNext()) || Eof)
                        {
                            return(new CommentToken(GetSpan(), _sb.ToString(), 0));
                        }
                        _sb.Append(ChCur);
                    }

                case '*':
                    /* block comment */
                    ChNext();
                    _sb.Length = 0;
                    _sb.Append("/*");
                    ChNext();
                    int lines = 0;
                    for (; ;)
                    {
                        if (Eof)
                        {
                            ReportError(ichErr, _cursor.IchCur, ErrId.UnterminatedComment);
                            break;
                        }
                        char ch = ChCur;
                        if (LexCharUtils.IsLineTerm(ch))
                        {
                            ch = LexLineTerm(_sb);
                            lines++;
                        }
                        else
                        {
                            ChNext();
                        }
                        _sb.Append(ch);
                        if (ch == '*' && ChCur == '/')
                        {
                            _sb.Append('/');
                            ChNext();
                            break;
                        }
                    }
                    // We support comment keywords.
                    KeyWordTable.KeyWordKind kind;
                    NormStr nstr = _lex._pool.Add(_sb);
                    if (_lex._kwt.IsKeyWord(nstr, out kind))
                    {
                        return(KeyToken.CreateKeyWord(GetSpan(), nstr.ToString(), kind.Kind, kind.IsContextKeyWord));
                    }
                    return(new CommentToken(GetSpan(), _sb.ToString(), lines));
                }
            }

コード例 #10

0

ファイルを表示

ファイル: Lexer.cs プロジェクト: artemiusgreat/ML-NET

            /// <summary>
            /// Lex a character escape. Returns true if successful (ch is valid).
            /// </summary>
            private bool FLexEscChar(bool fUniOnly, out uint u)
            {
                Contracts.Assert(ChCur == '\\');

                int  ichErr = _cursor.IchCur;
                bool fUni;
                int  cchHex;

                switch (ChNext())
                {
                case 'u':
                    fUni   = true;
                    cchHex = 4;
                    goto LHex;

                case 'U':
                    fUni   = true;
                    cchHex = 8;
                    goto LHex;

                default:
                    if (!fUniOnly)
                    {
                        switch (ChCur)
                        {
                        default:
                            goto LBad;

                        case 'x':
                        case 'X':
                            fUni   = false;
                            cchHex = 4;
                            goto LHex;

                        case '\'':
                            u = 0x0027;
                            break;

                        case '"':
                            u = 0x0022;
                            break;

                        case '\\':
                            u = 0x005C;
                            break;

                        case '0':
                            u = 0x0000;
                            break;

                        case 'a':
                            u = 0x0007;
                            break;

                        case 'b':
                            u = 0x0008;
                            break;

                        case 'f':
                            u = 0x000C;
                            break;

                        case 'n':
                            u = 0x000A;
                            break;

                        case 'r':
                            u = 0x000D;
                            break;

                        case 't':
                            u = 0x0009;
                            break;

                        case 'v':
                            u = 0x000B;
                            break;
                        }
                        ChNext();
                        return(true);
                    }
LBad:
                    ReportError(ichErr, _cursor.IchCur, ErrId.BadEscape);
                    u = 0;
                    return(false);
                }

LHex:
                bool fRet = true;

                ChNext();

                u = 0;
                for (int ich = 0; ich < cchHex; ich++)
                {
                    if (!LexCharUtils.IsHexDigit(ChCur))
                    {
                        fRet = (ich > 0);
                        if (fUni || !fRet)
                        {
                            ReportError(ichErr, _cursor.IchCur, ErrId.BadEscape);
                        }
                        break;
                    }
                    u = (u << 4) + (uint)LexCharUtils.GetHexVal(ChCur);
                    ChNext();
                }
                return(fRet);
            }

コード例 #11

0

ファイルを表示

ファイル: Lexer.cs プロジェクト: artemiusgreat/ML-NET

            /// <summary>
            /// Lex a string or character literal.
            /// </summary>
            private Token LexStrLit()
            {
                char chQuote;

                _sb.Length = 0;
                if (ChCur == '@')
                {
                    chQuote = '"';
                    ChNext();
                    Contracts.Assert(ChCur == '"');
                    ChNext();
                    for (; ;)
                    {
                        char ch = ChCur;
                        if (ch == '"')
                        {
                            ChNext();
                            if (ChCur != '"')
                            {
                                break;
                            }
                            ChNext();
                        }
                        else if (LexCharUtils.IsLineTerm(ch))
                        {
                            ch = LexLineTerm(_sb);
                        }
                        else if (Eof)
                        {
                            ReportError(ErrId.UnterminatedString);
                            break;
                        }
                        else
                        {
                            ChNext();
                        }
                        _sb.Append(ch);
                    }
                }
                else
                {
                    Contracts.Assert(ChCur == '"' || ChCur == '\'');
                    chQuote = ChCur;

                    ChNext();
                    for (; ;)
                    {
                        char ch = ChCur;
                        if (ch == chQuote || Eof || LexCharUtils.IsLineTerm(ch))
                        {
                            break;
                        }
                        if (ch == '\\')
                        {
                            uint u;
                            if (!FLexEscChar(false, out u))
                            {
                                continue;
                            }
                            if (u < 0x10000)
                            {
                                ch = (char)u;
                            }
                            else
                            {
                                char chT;
                                if (!ConvertToSurrogatePair(u, out chT, out ch))
                                {
                                    continue;
                                }
                                _sb.Append(chT);
                            }
                        }
                        else
                        {
                            ChNext();
                        }
                        _sb.Append(ch);
                    }

                    if (ChCur != chQuote)
                    {
                        ReportError(ErrId.NewlineInConst);
                    }
                    else
                    {
                        ChNext();
                    }
                }

                if (chQuote == '"')
                {
                    return(new StrLitToken(GetSpan(), _sb.ToString()));
                }

                if (_sb.Length != 1)
                {
                    ReportError(_sb.Length == 0 ? ErrId.CharConstEmpty : ErrId.CharConstTooLong);
                }
                return(new CharLitToken(GetSpan(), _sb.Length > 0 ? _sb[0] : '\0'));
            }

コード例 #12

0

ファイルを表示

ファイル: Lexer.cs プロジェクト: artemiusgreat/ML-NET

            /// <summary>
            /// Called to lex a numeric literal or a Dot token. Asserts the current
            /// character lex type is LexCharType.NumLit.
            /// </summary>
            private Token LexNumLit()
            {
                Contracts.Assert(LexCharUtils.StartKind(ChCur) == LexStartKind.NumLit);
                Contracts.Assert(LexCharUtils.IsDigit(ChCur) || ChCur == '.');

                // A dot not followed by a digit is just a Dot. This is a very common case (hence first).
                if (ChCur == '.' && !LexCharUtils.IsDigit(ChPeek(1)))
                {
                    return(LexPunc());
                }

                // Check for a hex literal. Note that 0x followed by a non-hex-digit is really a 0 followed
                // by an identifier.
                if (ChCur == '0' && (ChPeek(1) == 'x' || ChPeek(1) == 'X') && LexCharUtils.IsHexDigit(ChPeek(2)))
                {
                    // Advance to first hex digit.
                    ChNext();
                    ChNext();
                    return(LexHexInt());
                }

                // Decimal literal (possible floating point).
                Contracts.Assert(LexCharUtils.IsDigit(ChCur) || ChCur == '.' && LexCharUtils.IsDigit(ChPeek(1)));
                bool fExp = false;
                bool fDot = ChCur == '.';

                _sb.Length = 0;
                _sb.Append(ChCur);

                for (; ;)
                {
                    if (ChNext() == '.')
                    {
                        if (fDot || !LexCharUtils.IsDigit(ChPeek(1)))
                        {
                            break;
                        }
                        fDot = true;
                    }
                    else if (!LexCharUtils.IsDigit(ChCur))
                    {
                        break;
                    }
                    _sb.Append(ChCur);
                }

                // Check for an exponent.
                if (ChCur == 'e' || ChCur == 'E')
                {
                    char chTmp = ChPeek(1);
                    if (LexCharUtils.IsDigit(chTmp) || (chTmp == '+' || chTmp == '-') && LexCharUtils.IsDigit(ChPeek(2)))
                    {
                        fExp = true;
                        _sb.Append(ChCur);
                        _sb.Append(ChNext());
                        while (LexCharUtils.IsDigit(chTmp = ChNext()))
                        {
                            _sb.Append(chTmp);
                        }
                    }
                }

                bool fReal = fDot || fExp;
                char chSuf = LexRealSuffix(fReal);

                if (fReal || chSuf != '\0')
                {
                    return(LexRealNum(chSuf));
                }

                // Integer type.
                return(LexDecInt(LexIntSuffix()));
            }

C# (CSharp) Microsoft.ML.Transforms LexCharUtilsの例