public static RuleMatchInfo MatchIdentifier(ref string source, ScannerState scannerState) { var match = Regex.Match(source, "^([A-Za-z][A-Za-z0-9]*)"); if (match.Captures.Count == 1) { var id = match.Captures[0].Value; source = source.Remove(0, id.Length); scannerState.AdvanceCharIndex(id.Length); return(new RuleMatchInfo( new ClassDiagramToken(scannerState.LineIndex, scannerState.CharIndex, TokenType.Identifier, id))); } return(RuleMatchInfo.Fail); }
// note: "0adsf" must be invalid! (\b matches word boundary) public static RuleMatchInfo MatchNumbers(ref string source, ScannerState scannerState) { var match = Regex.Match(source, @"^([0-9]+)\b"); if (match.Captures.Count > 0) { // found a number var numberString = match.Captures[0].Value; source = source.Remove(0, numberString.Length); scannerState.AdvanceCharIndex(numberString.Length); return(new RuleMatchInfo( new ClassDiagramToken(scannerState.LineIndex, scannerState.CharIndex, TokenType.Number, numberString))); } return(RuleMatchInfo.Fail); }
public ClassDiagramTokenStream Parse(string source) { if (source == null) { throw new ArgumentNullException("source"); } var tokens = new ClassDiagramTokenStream(); TrimStart(ref source); while (source.Length >= 1) { // check for keywords. var keywordFound = false; for (var keywordLength = 1; keywordLength <= LongestKeyword && source.Length >= keywordLength && !keywordFound; keywordLength++) { var sourceSubstring = source.Substring(0, keywordLength); if (Keywords.Contains(sourceSubstring)) { var tokenType = source.Substring(0, keywordLength).FromDisplayString(); tokens.Add(new ClassDiagramToken(_scannerState.LineIndex, _scannerState.AdvanceCharIndex(keywordLength), tokenType)); source = source.Remove(0, keywordLength); keywordFound = true; } } // no keyword found? => check additional rules. if (!keywordFound) { bool ruleApplied = false; foreach (var rule in Rules) { var matchInfo = rule(ref source, _scannerState); if (matchInfo.IsSuccess) { tokens.Add(matchInfo.Token); ruleApplied = true; break; } } if (!ruleApplied) { // no match -> error. // Remove all non-ws characters up to the first ws and continue scanning. // when there is no newline, create one token with anything and stop. // TODO cannot flag a token "invalid" int i = source.IndexOfAny(new[] { ' ', '\n', '\t' }); // TODO win/lin? if (i >= 0) { source = source.Remove(0, i); _scannerState.AdvanceCharIndex(i); } else { // no whitespace. stop. tokens.Add(new ClassDiagramToken(_scannerState.LineIndex, _scannerState.CharIndex + source.Length, TokenType.Unknown, source)); break; } } } TrimStart(ref source); } return(tokens); }