} // func ScanLevel private SimpleToken ScanSimpleTokenNonWhiteSpace(ref int iStart) { iStart = iOffset; SimpleToken t = ScanSimpleToken(); return(t == SimpleToken.WhiteSpace ? ScanSimpleTokenNonWhiteSpace(ref iStart) : t); } // func ScanSimpleTokenNonWhiteSpace
public void parse_returns_false_on_invalid_string(string token) { SimpleToken tkn; var parsed = SimpleToken.TryParse(token, out tkn); parsed.Should().Be(false, "because we passed incorrect token string"); tkn.Should().BeNull(); }
/** * 打印所有的Token * @param tokenReader */ public static void dump(SimpleTokenReader tokenReader) { Console.WriteLine("text\ttype"); IToken token = null; while ((token = tokenReader.read()) != null) { Console.WriteLine(token.getText() + "\t\t" + token.getType()); } }
public void to_string_returns_serialized_claims(string[] input, string expected) { var claims = input .Select(i => i.Split('|')) .Select(s => new Claim(s[0], s[1])) .ToArray(); var actual = new SimpleToken(claims).ToString(); Assert.Equal(expected, actual); }
public void parse_returns_correct_claims_on_valid_string(string token, string[] claims) { var expected = claims .Where((x, i) => i % 2 == 0) .Zip(claims.Where((x, i) => i % 2 != 0), Tuple.Create) .Select(pair => new Claim(pair.Item1, pair.Item2)); SimpleToken tkn; var parsed = SimpleToken.TryParse(token, out tkn); parsed.Should().Be(true, "because we passed correct token string"); tkn.Should().BeEquivalentTo(expected); }
public void parse_returns_correct_claims_on_valid_string(string[] input) { var claims = input .Select(i => i.Split('|')) .Select(s => new Claim(s[0], s[1])) .ToArray(); var expected = new SimpleToken(claims); SimpleToken actual; var result = SimpleToken.TryParse(expected.ToString(), out actual); Assert.True(result); Assert.True(expected.SequenceEqual(actual, new ClaimComparer())); }
protected virtual Core.Tokenization.Token CreateToken(string s, System.Text.RegularExpressions.GroupCollection groups) { Token t = null; if (_Type == TokenType.OtherTextPlaceable) { t = new GenericPlaceableToken(s, TokenClassName, _AutoSubstitutable); } else { t = new SimpleToken(s, _Type); } return(t); }
public void ToComplexEntity_WhenSimpleEntity_ExpectCorrectMap() { // Arrange var mockPropertyMapper = new Mock<IPropertyGetSettersTyped<Token>>(); var mockClaimsMapper = new Mock<IMapper<SimpleClaim, Claim>>(); var mockClientMapper = new Mock<IMapper<SimpleClient, Client>>(); mockClaimsMapper.Setup(r => r.ToComplexEntity(It.IsAny<SimpleClaim>())).Returns(new Claim("Val1", "Val2")); mockClientMapper.Setup(r => r.ToComplexEntity(It.IsAny<SimpleClient>())).Returns(new Client()); mockPropertyMapper.Setup(r => r.GetSetters(It.IsAny<Type>())) .Returns(new Dictionary<string, TypedSetter<Token>>()); var tokenMappers = new TokenMapper<Token>(mockPropertyMapper.Object, mockClaimsMapper.Object, mockClientMapper.Object); var simpleEntity = new SimpleToken { Claims = new List<SimpleClaim>(), Client = new SimpleClient(), Type = "Type", CreationTime = new DateTimeOffset(new DateTime(2016, 1, 1)), Issuer = "Issuer", Version = 1, Audience = "Audience", Lifetime = 1, }; // Act var stopwatch = Stopwatch.StartNew(); var complexEntity = tokenMappers.ToComplexEntity(simpleEntity); stopwatch.Stop(); // Assert this.WriteTimeElapsed(stopwatch); Assert.That(complexEntity, Is.Not.Null); Assert.That(complexEntity.Claims, Is.Not.Null); Assert.That(complexEntity.Client, Is.Not.Null); Assert.That(complexEntity.Type, Is.EqualTo("Type")); Assert.That(complexEntity.CreationTime, Is.EqualTo(new DateTimeOffset(new DateTime(2016, 1, 1)))); Assert.That(complexEntity.Issuer, Is.EqualTo("Issuer")); Assert.That(complexEntity.Version, Is.EqualTo(1)); Assert.That(complexEntity.Audience, Is.EqualTo("Audience")); Assert.That(complexEntity.Lifetime, Is.EqualTo(1)); }
public override Core.Tokenization.Token Recognize(string s, int from, bool allowTokenBundles, ref int consumedLength) { if (String.IsNullOrEmpty(s) || from >= s.Length) { return(null); } consumedLength = 0; int originalStart = from; if (Core.CharacterProperties.IsCJKPunctuation(s[from])) { while (from < s.Length && Core.CharacterProperties.IsCJKPunctuation(s[from])) { ++consumedLength; ++from; if (_JUSTONE) { break; } } Token t = new SimpleToken(s.Substring(originalStart, consumedLength), TokenType.GeneralPunctuation); return(t); } if (Core.CharacterProperties.IsCJKChar(s[from])) { while (from < s.Length && Core.CharacterProperties.IsCJKChar(s[from])) { ++consumedLength; ++from; if (_JUSTONE) { break; } } Token t = new SimpleToken(s.Substring(originalStart, consumedLength), TokenType.CharSequence); return(t); } // TODO CJK punctuation etc. return(base.Recognize(s, from, allowTokenBundles, ref consumedLength)); }
public override Core.Tokenization.Token Recognize(string s, int from, bool allowTokenBundles, ref int consumedLength) { if (String.IsNullOrEmpty(s) || from >= s.Length) { return(null); } consumedLength = 0; int originalStart = from; // splitting off all punctuation may exaggerate a bit - wait for user feedback if (System.Char.IsPunctuation(s[from])) { while (from < s.Length && System.Char.IsPunctuation(s[from])) { ++consumedLength; ++from; if (_JUSTONE) { break; } } Token t = new SimpleToken(s.Substring(originalStart, consumedLength), TokenType.GeneralPunctuation); return(t); } if (Core.CharacterProperties.IsInBlock(s[from], Core.UnicodeBlock.Thai)) { while (from < s.Length && Core.CharacterProperties.IsInBlock(s[from], Core.UnicodeBlock.Thai)) { ++consumedLength; ++from; if (_JUSTONE) { break; } } Token t = new SimpleToken(s.Substring(originalStart, consumedLength), TokenType.CharSequence); return(t); } return(base.Recognize(s, from, allowTokenBundles, ref consumedLength)); }
public override Core.Tokenization.Token Recognize(string s, int from, bool allowTokenBundles, ref int consumedLength) { if (String.IsNullOrEmpty(s) || from >= s.Length) { return(null); } consumedLength = 0; int originalStart = from; if (_DefaultPunctCharset.Contains(s[from])) { while (from < s.Length && _DefaultPunctCharset.Contains(s[from])) { ++consumedLength; ++from; } Token t = new SimpleToken(s.Substring(originalStart, consumedLength), TokenType.GeneralPunctuation); return(t); } System.Text.RegularExpressions.Match m = _DefaultWordRegex.Match(s, from); if (m != null && m.Success && m.Index == from) { consumedLength = m.Length; Token t = new SimpleToken(m.Value, TokenType.Word); return(t); } /* * AUTOMATON PUNCT [U+3000-U+303FU+3200-U+32FFU+FF01-U+FF0FU+FF1A-U+FF20U+FF3B-U+FF3DU+FF5B-U+FF64] * NFA WORD [U+30A0-U+30FFU+FF65-U+FF9F]+ * NFA WORD [U+3040-U+3091U+3093-U+309F]+ * NFA WORD [U+3092] * NFA WORD [U+4E00-U+9FFF]+ * NFA WORD [U+FF21-U+FF3AU+FF41-U+FF5A]+ */ return(base.Recognize(s, from, allowTokenBundles, ref consumedLength)); }
public override Core.Tokenization.Token Recognize(string s, int from, bool allowTokenBundles, ref int consumedLength) { if (String.IsNullOrEmpty(s) || from >= s.Length) { return(null); } consumedLength = 0; int originalStart = from; if (_DefaultPunctCharset.Contains(s[from])) { while (from < s.Length && _DefaultPunctCharset.Contains(s[from])) { ++consumedLength; ++from; } Token t = new SimpleToken(s.Substring(originalStart, consumedLength), TokenType.GeneralPunctuation); return(t); } if (s[from] >= 0x4e00 && s[from] <= 0x9fff) { while (from < s.Length && s[from] >= 0x4e00 && s[from] <= 0x9fff) { ++consumedLength; ++from; } Token t = new SimpleToken(s.Substring(originalStart, consumedLength), TokenType.CharSequence); return(t); } // TODO CJK punctuation etc. return(base.Recognize(s, from, allowTokenBundles, ref consumedLength)); }
public void VisitSimpleToken(SimpleToken token) { }
private List <Core.Tokenization.Token> TokenizeInternal(string s, int currentRun, bool createWhitespaceTokens, bool allowTokenBundles) { List <Token> result = new List <Token>(); int p = 0; int sLen = s.Length; while (p < sLen) { int start = p; while (p < sLen && System.Char.IsWhiteSpace(s, p)) { ++p; } if (p > start) { if (createWhitespaceTokens) { Token t = new SimpleToken(s.Substring(start, p - start), TokenType.Whitespace); t.Span = new SegmentRange(currentRun, start, p - 1); result.Add(t); } start = p; } if (p >= sLen) { break; } // test which recognizer claims the longest prefix Recognizer winningRecognizer = null; int winningLength = 0; Token winningToken = null; const bool allowBundlesOfDifferentType = false; for (int r = 0; r < _Parameters.Count; ++r) { Recognizer rec = _Parameters[r]; int consumedLength = 0; Token t = rec.Recognize(s, start, allowTokenBundles, ref consumedLength); if (t != null) { if (winningRecognizer == null || (winningLength < consumedLength && !(winningRecognizer.OverrideFallbackRecognizer && rec.IsFallbackRecognizer))) { winningToken = t; winningRecognizer = rec; winningLength = consumedLength; p = start + consumedLength; } else if (allowTokenBundles && allowBundlesOfDifferentType) { Core.Tokenization.TokenBundle winningBundle = winningToken as Core.Tokenization.TokenBundle; if (winningBundle == null) { winningBundle = new TokenBundle(winningToken, winningRecognizer.Priority); winningToken = winningBundle; } else { winningBundle.Add(t, winningRecognizer.Priority); } System.Diagnostics.Debug.Assert(winningLength == consumedLength); System.Diagnostics.Debug.Assert(p == start + consumedLength); } else if (winningRecognizer.Priority < rec.Priority) { // same length, but lower priority - highest prio wins winningToken = t; winningRecognizer = rec; winningLength = consumedLength; p = start + consumedLength; } } } if (winningToken == null) { // none of the recognizers claimed any input, or there were no recognizers set up. // ultimate fallback required: group by same Unicode category // TODO scanning on just the category is too fine - we may want to group coarser categories together System.Globalization.UnicodeCategory cat = System.Char.GetUnicodeCategory(s, start); while (p < sLen && System.Char.GetUnicodeCategory(s, p) == cat) { ++p; } winningLength = p - start; // TODO distinguish result token type depending on the category winningToken = new SimpleToken(s.Substring(start, p - start), TokenType.Word); winningRecognizer = null; } else if (winningToken is TokenBundle) { // convert single-element token bundles to single tokens TokenBundle tb = winningToken as TokenBundle; if (tb.Count == 1) { winningToken = tb[0].Token; } } System.Diagnostics.Debug.Assert(winningLength > 0); System.Diagnostics.Debug.Assert(winningToken != null); winningToken.Span = new SegmentRange(currentRun, start, p - 1); result.Add(winningToken); } return(result); }
/// <summary> /// Extracts the next token found in the program source. /// </summary> public IToken NextToken() { if (SourcePosition > Source.Length) { throw new CompilerException(CurrentLine, CurrentLinePosition, "Read beyond the Source end"); } var inComment = false; while (CurrentChar != C_EOF) { // Skip white chars. while (IsWhite(CurrentChar)) { NextChar(); } //if (CurrentChar == '{') //{ // SkipComment(); // continue; //} //if (CurrentChar == '(') //{ // if (PeakChar() == '*') // { // NextChar(); // SkipComment(); // continue; // } //} if (IsLetter(CurrentChar)) { return(CurrentToken = ParseIdent()); } if (IsDigit(CurrentChar)) { return(CurrentToken = ParseNumber(1)); } if (CurrentChar == '\'') { return(CurrentToken = ParseString()); } switch (CurrentChar) { case '{': { SkipComment(); continue; } case '+': { NextChar(); if (IsDigit(CurrentChar)) { return(CurrentToken = ParseNumber(1)); } return(CurrentToken = new SimpleToken(TokenCode.TOK_ADD_OP)); } case '-': { NextChar(); if (IsDigit(CurrentChar)) { return(CurrentToken = ParseNumber(-1)); } return(CurrentToken = new SimpleToken(TokenCode.TOK_SUB_OP)); } case '*': NextChar(); return(CurrentToken = new SimpleToken(TokenCode.TOK_MUL_OP)); case '/': NextChar(); return(CurrentToken = new SimpleToken(TokenCode.TOK_DIV_OP)); case '=': NextChar(); return(CurrentToken = new SimpleToken(TokenCode.TOK_EQ_OP)); case '<': { NextChar(); if (CurrentChar == '>') { NextChar(); return(CurrentToken = new SimpleToken(TokenCode.TOK_NEQ_OP)); // '<>' } else if (CurrentChar == '=') { NextChar(); return(CurrentToken = new SimpleToken(TokenCode.TOK_LE_OP)); // '<=' } return(CurrentToken = new SimpleToken(TokenCode.TOK_LT_OP)); // '<' } case '>': { NextChar(); if (CurrentChar == '=') { NextChar(); return(CurrentToken = new SimpleToken(TokenCode.TOK_GE_OP)); // '>=' } return(CurrentToken = new SimpleToken(TokenCode.TOK_GT_OP)); // '>' } case ';': NextChar(); return(CurrentToken = new SimpleToken(TokenCode.TOK_SEP)); case ',': NextChar(); return(CurrentToken = new SimpleToken(TokenCode.TOK_LIST_SEP)); case ':': { NextChar(); if (CurrentChar == '=') { NextChar(); return(CurrentToken = new SimpleToken(TokenCode.TOK_ASGN_OP)); } return(CurrentToken = new SimpleToken(TokenCode.TOK_DDOT)); } case '(': { NextChar(); if (CurrentChar == '*') { NextChar(); SkipComment(); continue; } else { return(CurrentToken = new SimpleToken(TokenCode.TOK_LBRA)); } } case ')': NextChar(); return(CurrentToken = new SimpleToken(TokenCode.TOK_RBRA)); case '.': NextChar(); return(CurrentToken = new SimpleToken(TokenCode.TOK_PROG_END)); case '\0': return(CurrentToken = new SimpleToken(TokenCode.TOK_EOF)); default: throw new CompilerException(CurrentLine, CurrentLinePosition, $"Unknown character '{CurrentChar}' found."); } } if (inComment) { throw new CompilerException(CurrentLine, CurrentLinePosition, "An end of comment expected."); } return(CurrentToken = new SimpleToken(TokenCode.TOK_EOF)); }
public void VisitSimpleToken(SimpleToken token) { this._sb.Append(token.Text); }
public void VisitSimpleToken(SimpleToken token) { _plainText += token.Text; }
public void TestFixtureSetup() { var database = RedisHelpers.ConnectionMultiplexer.GetDatabase(); var claim1 = new SimpleClaim { Type = "Type1", Value = "Value1" }; var claim2 = new SimpleClaim { Type = "Type2", Value = "Value2" }; var client = new SimpleClient { Claims = new List<SimpleClaim> { claim1, claim2 }, DataBag = new Dictionary<string, object> { { "AppId", 12 } } }; var token = new SimpleToken { Claims = new List<SimpleClaim> { claim1, claim2 }, Client = client, Type = "Type", CreationTime = new DateTimeOffset(new DateTime(2016, 1, 1)), Version = 1, Issuer = "Issuer", Lifetime = 120, Audience = "Audience" }; var settings = new JsonSettingsFactory(new CustomMappersConfiguration { ClientMapper = CustomMapperFactory.CreateClientMapper<CustomClient>() }).Create(); var serialized = JsonConvert.SerializeObject(token, settings); database.StringSet("DEFAULT_THS_Existing", serialized); database.StringSet("DEFAULT_THS_Delete", serialized); }
public void simple_token_yields_passed_claims(List<Claim> claims) { var sut = new SimpleToken(claims.ToArray()); sut.Should().Equal(claims); }
public void token_is_a_collection_of_claims() { var tkn = new SimpleToken(); Assert.IsAssignableFrom <IEnumerable <Claim> >(tkn); }
public void token_returns_passed_claims(List <Claim> claims) { var tkn = new SimpleToken(claims); tkn.Should().BeEquivalentTo(claims); }
public void simple_token_is_collection_of_claims() { var sut = new SimpleToken(); Assert.IsAssignableFrom<IEnumerable<Claim>>(sut); }
/** * 有限状态机进入初始状态。 * 这个初始状态其实并不做停留,它马上进入其他状态。 * 开始解析的时候,进入初始状态;某个Token解析完毕,也进入初始状态,在这里把Token记下来,然后建立一个新的Token。 * @param ch * @return */ private DfaState initToken(char ch) { if (tokenText.ToString() != "") { token.text = tokenText.ToString(); tokens.Add(token); tokenText.Close(); tokenText = new StringWriter(); token = new SimpleToken(); } DfaState newState = DfaState.Initial; if (isAlpha(ch)) { if (ch == 'i') { newState = DfaState.Id_int1; } else { newState = DfaState.Id; } token.type = TokenType.Identifier; tokenText.Write(ch); } else if (isDigit(ch)) { newState = DfaState.IntLiteral; token.type = TokenType.IntLiteral; tokenText.Write(ch); } else if (ch == '>') { newState = DfaState.GT; token.type = TokenType.GT; tokenText.Write(ch); } else if (ch == '+') { newState = DfaState.Plus; token.type = TokenType.Plus; tokenText.Write(ch); } else if (ch == '-') { newState = DfaState.Minus; token.type = TokenType.Minus; tokenText.Write(ch); } else if (ch == '*') { newState = DfaState.Star; token.type = TokenType.Star; tokenText.Write(ch); } else if (ch == '/') { newState = DfaState.Slash; token.type = TokenType.Slash; tokenText.Write(ch); } else if (ch == ';') { newState = DfaState.SemiColon; token.type = TokenType.SemiColon; tokenText.Write(ch); } else if (ch == '(') { newState = DfaState.LeftParen; token.type = TokenType.LeftParen; tokenText.Write(ch); } else if (ch == ')') { newState = DfaState.RightParen; token.type = TokenType.RightParen; tokenText.Write(ch); } else if (ch == '=') { newState = DfaState.Assignment; token.type = TokenType.Assignment; tokenText.Write(ch); } else { newState = DfaState.Initial; } return(newState); }
public void VisitSimpleToken(SimpleToken token) { // not required with this implementation }
} // func GetLineStateData public bool ScanTokenAndProvideInfoAboutIt(TokenInfo tokenInfo, ref int lineState) { RedoScan: SimpleToken token = SimpleToken.Unknown; int iStart = iOffset; RedoLineState: if ((lineState & StateFlag) == 0) { if (token == SimpleToken.Unknown) { token = ScanSimpleTokenNonWhiteSpace(ref iStart); } if (token == SimpleToken.Identifier) { // local var : typedef // const var typeof typedef // const var : typedef // function name.a:a(a : typedef, a : typedef) : typedef // do (a : typedef, // for a : typedef, // foreach a : typedef string sValue = GetValue(iStart, iOffset); if (sValue == "local" || sValue == "foreach" || sValue == "for") { SetLineStateExtented(ref lineState, 1); } else if (sValue == "const") { SetLineStateExtented(ref lineState, 3); } else if (sValue == "function") { SetLineStateExtented(ref lineState, 5); } else if (sValue == "do") { SetLineStateExtented(ref lineState, 8); } else if (sValue == "cast") { SetLineStateExtented(ref lineState, 13); } } goto EmitToken; } else if ((lineState & (StringFlag | CommentFlag)) != 0) // Block (String, Comment) { #region -- block -- if (iOffset >= sLine.Length) { token = SimpleToken.Eof; } else { int iLevel = GetLineStateData(lineState); token = (lineState & StringFlag) == StringFlag ? SimpleToken.String : SimpleToken.LineComment; // Emit part while (iOffset < sLine.Length) { if (sLine[iOffset] == ']' && iOffset + iLevel + 1 < sLine.Length && sLine[iOffset + iLevel + 1] == ']') { // check for equals bool lValid = true; for (int i = iOffset + 1; i <= iOffset + iLevel; i++) { if (sLine[i] != '=') { lValid = false; break; } } if (lValid) { iOffset += iLevel + 2; lineState = lineState & (ParserFlag | TypeFlag); break; } } iOffset++; } } goto EmitToken; #endregion } else if ((lineState & TypeFlag) != 0) // typedef parser idenfifier.idenfier[identifier,identifier] { #region -- typedef -- int iLevel = GetLineStateData(lineState); if (token == SimpleToken.Unknown) { token = ScanSimpleTokenNonWhiteSpace(ref iStart); } if (token != SimpleToken.Eof) { switch ((lineState & TypeFlag) >> 6) { case 1: if (token == SimpleToken.Identifier) { token = SimpleToken.Type; SetLineStateType(ref lineState, 2); } else { SetLineStateType(ref lineState, 0); goto RedoLineState; } break; case 2: if (token == SimpleToken.Dot) { SetLineStateType(ref lineState, 1); } else if (token == SimpleToken.Comma) { if (iLevel == 0) { SetLineStateType(ref lineState, 0); goto RedoLineState; } else { SetLineStateType(ref lineState, 1); } } else if (token == SimpleToken.BraceSquareOpen) { iLevel++; if (iLevel > 0x7FFFFF) { throw new OverflowException(); } SetLineStateData(ref lineState, iLevel); SetLineStateType(ref lineState, 1); } else if (token == SimpleToken.BraceSquareClose) { iLevel--; if (iLevel < 0) { SetLineStateType(ref lineState, 0); goto RedoLineState; } else { SetLineStateData(ref lineState, iLevel); } } else { SetLineStateType(ref lineState, 0); goto RedoLineState; } break; } } goto EmitToken; #endregion } else if ((lineState & ParserFlag) != 0) // extented Parser { if (token == SimpleToken.Unknown) { token = ScanSimpleTokenNonWhiteSpace(ref iStart); } if (token != SimpleToken.Eof) { switch ((lineState & ParserFlag) >> 2) { #region -- 1, 12 -- local var : typedef, var : typedef, for, foreach-- case 1: if (token == SimpleToken.Identifier) // identifier { SetLineStateExtented(ref lineState, 2); } else { SetLineStateExtented(ref lineState, 0); } break; case 2: if (token == SimpleToken.Colon) { SetLineStateExtented(ref lineState, 12); SetLineStateType(ref lineState, 1); } else if (token == SimpleToken.Comma) { SetLineStateExtented(ref lineState, 1); } else { SetLineStateExtented(ref lineState, 0); } break; case 12: if (token == SimpleToken.Comma) { SetLineStateExtented(ref lineState, 1); } else { SetLineStateExtented(ref lineState, 0); } break; #endregion #region -- 3 -- const c typeof typedef, const c : typedef -- case 3: if (token == SimpleToken.Identifier) { SetLineStateExtented(ref lineState, 4); } else { SetLineStateExtented(ref lineState, 0); } break; case 4: if ((token == SimpleToken.Identifier && GetValue(iStart, iOffset) == "typeof") || token == SimpleToken.Colon) { SetLineStateType(ref lineState, 1); } SetLineStateExtented(ref lineState, 0); break; #endregion #region -- 5,14 -- function m.m:m (a : typedef, b : typedef) : typedef -- case 5: if (token == SimpleToken.Identifier) { SetLineStateExtented(ref lineState, 6); } else if (token == SimpleToken.BraceOpen) { token = SimpleToken.Braces; SetLineStateExtented(ref lineState, 9); } else { SetLineStateExtented(ref lineState, 0); } break; case 6: if (token == SimpleToken.Dot) { SetLineStateExtented(ref lineState, 5); } else if (token == SimpleToken.Colon) { SetLineStateExtented(ref lineState, 7); } else if (token == SimpleToken.BraceOpen) { token = SimpleToken.Braces; SetLineStateExtented(ref lineState, 9); } else { SetLineStateExtented(ref lineState, 0); } break; case 7: if (token == SimpleToken.Identifier) { SetLineStateExtented(ref lineState, 8); } else { SetLineStateExtented(ref lineState, 0); } break; case 8: if (token == SimpleToken.BraceOpen) { token = SimpleToken.Braces; SetLineStateExtented(ref lineState, 9); } else { SetLineStateExtented(ref lineState, 0); } break; case 9: // argument list: a : typedef, if (token == SimpleToken.Identifier) { SetLineStateExtented(ref lineState, 10); } else if (token == SimpleToken.BraceClose) { SetLineStateExtented(ref lineState, 14); } else { SetLineStateExtented(ref lineState, 0); } break; case 10: if (token == SimpleToken.Colon) { SetLineStateType(ref lineState, 1); SetLineStateExtented(ref lineState, 11); } else if (token == SimpleToken.Comma) { SetLineStateExtented(ref lineState, 9); } else if (token == SimpleToken.BraceClose) { SetLineStateExtented(ref lineState, 14); } else { SetLineStateExtented(ref lineState, 0); } break; case 11: if (token == SimpleToken.Comma) { SetLineStateExtented(ref lineState, 9); } else if (token == SimpleToken.BraceClose) { SetLineStateExtented(ref lineState, 14); } else { SetLineStateExtented(ref lineState, 0); } break; case 14: if (token == SimpleToken.Colon) { SetLineStateType(ref lineState, 1); } SetLineStateExtented(ref lineState, 0); break; #endregion #region -- 13 -- cast(typedef -- case 13: if (token == SimpleToken.BraceOpen) { SetLineStateType(ref lineState, 1); SetLineStateExtented(ref lineState, 0); } break; #endregion } } goto EmitToken; } throw new InvalidOperationException(); EmitToken: switch (token) { case SimpleToken.Unknown: goto RedoScan; case SimpleToken.Eof: return(false); case SimpleToken.WhiteSpace: tokenInfo.Color = TokenColor.Text; tokenInfo.Type = TokenType.WhiteSpace; tokenInfo.Trigger = TokenTriggers.None; break; case SimpleToken.Comment: tokenInfo.Color = TokenColor.Comment; tokenInfo.Type = TokenType.Comment; tokenInfo.Trigger = TokenTriggers.None; break; case SimpleToken.LineComment: tokenInfo.Color = TokenColor.Comment; tokenInfo.Type = TokenType.LineComment; tokenInfo.Trigger = TokenTriggers.None; break; case SimpleToken.String: tokenInfo.Color = TokenColor.String; tokenInfo.Type = TokenType.String; tokenInfo.Trigger = TokenTriggers.None; break; case SimpleToken.Number: tokenInfo.Color = TokenColor.Number; tokenInfo.Type = TokenType.Literal; tokenInfo.Trigger = TokenTriggers.None; break; case SimpleToken.BraceOpen: case SimpleToken.BraceSquareOpen: tokenInfo.Color = OperatorColor; tokenInfo.Type = TokenType.Operator; tokenInfo.Trigger = TokenTriggers.MatchBraces | TokenTriggers.ParameterStart; break; case SimpleToken.Comma: tokenInfo.Color = OperatorColor; tokenInfo.Type = TokenType.Operator; tokenInfo.Trigger = TokenTriggers.MatchBraces | TokenTriggers.ParameterNext; break; case SimpleToken.Dot: case SimpleToken.Colon: tokenInfo.Color = OperatorColor; tokenInfo.Type = TokenType.Delimiter; tokenInfo.Trigger = TokenTriggers.MemberSelect; break; case SimpleToken.Operator: tokenInfo.Color = OperatorColor; tokenInfo.Type = TokenType.Operator; tokenInfo.Trigger = TokenTriggers.None; break; case SimpleToken.BraceClose: case SimpleToken.BraceSquareClose: case SimpleToken.Braces: tokenInfo.Color = OperatorColor; tokenInfo.Type = TokenType.WhiteSpace; tokenInfo.Trigger = TokenTriggers.MatchBraces; break; case SimpleToken.Identifier: if (IsKeyword(sLine, iStart, iOffset)) { tokenInfo.Color = TokenColor.Keyword; tokenInfo.Type = TokenType.Keyword; tokenInfo.Trigger = TokenTriggers.None; } else { tokenInfo.Color = TokenColor.Identifier; tokenInfo.Type = TokenType.Text; tokenInfo.Trigger = TokenTriggers.None; } break; case SimpleToken.Type: tokenInfo.Color = TypeColor; tokenInfo.Type = TokenType.Text; tokenInfo.Trigger = TokenTriggers.None; break; case SimpleToken.LongStringStart: lineState = (ScanLevel() << DataShift) | (lineState & StateFlag) | StringFlag; goto RedoLineState; case SimpleToken.LongCommentStart: lineState = (ScanLevel() << DataShift) | (lineState & StateFlag) | CommentFlag; goto RedoLineState; } tokenInfo.StartIndex = iStart; tokenInfo.Color = tokenInfo.Color; tokenInfo.EndIndex = iOffset - 1; return(true); } // func ScanTokenAndProvideInfoAboutIt
public override Core.Tokenization.Token Recognize(string s, int from, bool allowTokenBundles, ref int consumedLength) { /* * TODO handle some special cases, e.g. * * "--" in en-US * "...." etc. in mid-words * "l-xxx" in Maltese (leading and trailing clitics) * ta' in Maltese (word ends in non-sep punct) * */ consumedLength = 0; if (String.IsNullOrEmpty(s)) { return(null); } int len = s.Length; int pos = from; // check for leading whitespace while (pos < len && (System.Char.IsWhiteSpace(s, pos) || System.Char.IsSeparator(s, pos))) { ++pos; } if (pos > from) { // found a whitespace token consumedLength = pos - from; Token t = new SimpleToken(s.Substring(from, consumedLength), TokenType.Whitespace); return(t); } // initial hard token terminators: treat as punctuation token if (IsHardTokenTerminator(s, pos)) { consumedLength = 1; Token t = new SimpleToken(s.Substring(from, consumedLength), TokenType.GeneralPunctuation); return(t); } // clitics, if defined by the culture, are always separated if (_LeadingClitics != null) { TrieIterator <char, int> iter = _LeadingClitics.GetIterator(); int cliticLength = 0; while (iter != null && pos + cliticLength < len && !iter.IsFinal) { if (!iter.Traverse(s[pos + cliticLength])) { break; } ++cliticLength; } if (iter != null && iter.IsValid && iter.IsFinal) { // found a clitic consumedLength = cliticLength; Token t = new SimpleToken(s.Substring(from, cliticLength), TokenType.Word); return(t); } } char c = s[pos]; bool lastIsCJK = Core.CharacterProperties.IsCJKChar(c); while (pos < len && !(System.Char.IsWhiteSpace(c) || System.Char.IsSeparator(c) || IsHardTokenTerminator(s, pos))) { // don't step over critical script changes // NOTE default fallback tokenizer will return CJK sequences as one token while // FE fallback tokenizer will split them into single-char char sequences. bool currentIsCJK = Core.CharacterProperties.IsCJKChar(c); if (currentIsCJK != lastIsCJK) { break; } ++pos; if (pos < len) { c = s[pos]; lastIsCJK = currentIsCJK; } } int upto = pos; // [from, upto[ is now the longest non-whitespace chain. Start separating leading punctuation, // including full stops // TODO this will put ")." into one token. We may want to split it into two. for (pos = from; pos < upto && (IsSeparablePunct(s, pos) || s[pos] == '.'); ++pos) { ; } if (pos > from) { // found a sequence of separable punctuation consumedLength = pos - from; Token t = new SimpleToken(s.Substring(from, consumedLength), TokenType.GeneralPunctuation); return(t); } // token does not start with separable punctuation - remove separable punctuation from the end // and take care of trailing full stop and abbreviations // We need to catch situations like "...test)." - here, after the full stop is removed, we need to // check for trailing closing punctuation again, and vice versa as in "test...)." and similar cases. bool separated; bool isAbbreviation = false; do { separated = false; // take care of trailing closing punctuation while (upto - 1 > pos && IsSeparablePunct(s, upto - 1)) { --upto; separated = true; } // take care of full stop separation int trailingFullStops = 0; while (upto - 1 - trailingFullStops > pos && s[upto - 1 - trailingFullStops] == '.') { ++trailingFullStops; } if (trailingFullStops > 1) { // ellipsis upto -= trailingFullStops; separated = true; } else if (trailingFullStops == 1) { // single trailing full stop - separate if we aren't looking at a known abbreviation. // TODO add abbreviation heuristics // TODO use specific token type for abbreviations? if (_Resources == null || !_Resources.IsAbbreviation(s.Substring(from, upto - from))) { --upto; separated = true; } else { isAbbreviation = true; } } } while (separated); // treat the remainder as a word consumedLength = upto - from; Token token = new SimpleToken(s.Substring(from, consumedLength), isAbbreviation ? TokenType.Abbreviation : TokenType.Word); return(token); }
private SimpleToken token = null; //当前正在解析的Token /** * 解析字符串,形成Token。(就是把字符串,按照token的定义,翻译成一个个token) * 这是一个有限状态自动机,在不同的状态中迁移。 * @param code * @return */ public SimpleTokenReader tokenize(string code) { tokens = new List <IToken>(); StringReader code_reader = new StringReader(code); tokenText = new StringWriter(); token = new SimpleToken(); int ich = 0; char ch = Convert.ToChar(ich); DfaState state = DfaState.Initial; try { while ((ich = code_reader.Read()) != -1) { ch = Convert.ToChar(ich); switch (state) { case DfaState.Initial: state = initToken(ch); break; case DfaState.Id: if (isAlpha(ch) || (isDigit(ch))) { tokenText.Write(ch); } else { state = initToken(ch); } break; case DfaState.GT: if (ch == '=') { token.type = TokenType.GE; //转换成GE state = DfaState.GE; tokenText.Write(ch); } else { state = initToken(ch); //退出GT状态,并保存Token } break; case DfaState.GE: case DfaState.Assignment: case DfaState.Plus: case DfaState.Minus: case DfaState.Star: case DfaState.Slash: case DfaState.SemiColon: case DfaState.LeftParen: case DfaState.RightParen: state = initToken(ch); //退出当前状态,并保存Token break; case DfaState.IntLiteral: if (isDigit(ch)) { tokenText.Write(ch); //继续保持在数字字面量状态 } else { state = initToken(ch); //退出当前状态,并保存Token } break; case DfaState.Id_int1: if (ch == 'n') { state = DfaState.Id_int2; tokenText.Write(ch); } else if (isDigit(ch) || isAlpha(ch)) { state = DfaState.Id; //切换回Id状态 tokenText.Write(ch); } else { state = initToken(ch); } break; case DfaState.Id_int2: if (ch == 't') { state = DfaState.Id_int3; tokenText.Write(ch); } else if (isDigit(ch) || isAlpha(ch)) { state = DfaState.Id; //切换回id状态 tokenText.Write(ch); } else { state = initToken(ch); } break; case DfaState.Id_int3: if (isBlank(ch)) { token.type = TokenType.Int; state = initToken(ch); } else { state = DfaState.Id; //切换回Id状态 tokenText.Write(ch); } break; default: Console.WriteLine("Unexpected state: {0}", state); break; } } if (tokenText.ToString() != "") { initToken(ch); } } catch (IOException e) { Console.WriteLine( "{0}: The write operation could not be performed ", e.GetType().Name); } return(new SimpleTokenReader(tokens)); }