public static RuleMatchInfo MatchIdentifier(ref string source, ScannerState scannerState)
            {
                var match = Regex.Match(source, "^([A-Za-z][A-Za-z0-9]*)");

                if (match.Captures.Count == 1)
                {
                    var id = match.Captures[0].Value;
                    source = source.Remove(0, id.Length);
                    scannerState.AdvanceCharIndex(id.Length);

                    return(new RuleMatchInfo(
                               new ClassDiagramToken(scannerState.LineIndex, scannerState.CharIndex, TokenType.Identifier, id)));
                }

                return(RuleMatchInfo.Fail);
            }
            // note: "0adsf" must be invalid! (\b matches word boundary)
            public static RuleMatchInfo MatchNumbers(ref string source, ScannerState scannerState)
            {
                var match = Regex.Match(source, @"^([0-9]+)\b");

                if (match.Captures.Count > 0)
                {
                    // found a number
                    var numberString = match.Captures[0].Value;
                    source = source.Remove(0, numberString.Length);
                    scannerState.AdvanceCharIndex(numberString.Length);

                    return(new RuleMatchInfo(
                               new ClassDiagramToken(scannerState.LineIndex, scannerState.CharIndex, TokenType.Number, numberString)));
                }

                return(RuleMatchInfo.Fail);
            }
        public ClassDiagramTokenStream Parse(string source)
        {
            if (source == null)
            {
                throw new ArgumentNullException("source");
            }

            var tokens = new ClassDiagramTokenStream();

            TrimStart(ref source);
            while (source.Length >= 1)
            {
                // check for keywords.
                var keywordFound = false;
                for (var keywordLength = 1;
                     keywordLength <= LongestKeyword && source.Length >= keywordLength && !keywordFound;
                     keywordLength++)
                {
                    var sourceSubstring = source.Substring(0, keywordLength);
                    if (Keywords.Contains(sourceSubstring))
                    {
                        var tokenType = source.Substring(0, keywordLength).FromDisplayString();
                        tokens.Add(new ClassDiagramToken(_scannerState.LineIndex, _scannerState.AdvanceCharIndex(keywordLength),
                                                         tokenType));
                        source       = source.Remove(0, keywordLength);
                        keywordFound = true;
                    }
                }

                // no keyword found? => check additional rules.
                if (!keywordFound)
                {
                    bool ruleApplied = false;
                    foreach (var rule in Rules)
                    {
                        var matchInfo = rule(ref source, _scannerState);
                        if (matchInfo.IsSuccess)
                        {
                            tokens.Add(matchInfo.Token);
                            ruleApplied = true;
                            break;
                        }
                    }

                    if (!ruleApplied)
                    {
                        // no match -> error.
                        // Remove all non-ws characters up to the first ws and continue scanning.
                        // when there is no newline, create one token with anything and stop.

                        // TODO cannot flag a token "invalid"
                        int i = source.IndexOfAny(new[] { ' ', '\n', '\t' }); // TODO win/lin?
                        if (i >= 0)
                        {
                            source = source.Remove(0, i);
                            _scannerState.AdvanceCharIndex(i);
                        }
                        else
                        {
                            // no whitespace. stop.
                            tokens.Add(new ClassDiagramToken(_scannerState.LineIndex, _scannerState.CharIndex + source.Length,
                                                             TokenType.Unknown,
                                                             source));
                            break;
                        }
                    }
                }

                TrimStart(ref source);
            }

            return(tokens);
        }