public List <Token> Convert(string regexString)
        {
            var result           = new List <Token>();
            var currentCharIndex = 0;
            var length           = regexString.Length;

            while (currentCharIndex != length)
            {
                var   currentChar = regexString[currentCharIndex];
                Token toAdd;
                switch (currentChar)
                {
                case '(':
                    toAdd = new LeftBracketToken();
                    break;

                case ')':
                    toAdd = new RightBracketToken();
                    break;

                case '*':
                    toAdd = new StarToken();
                    break;

                case '|':
                    toAdd = new SumToken();
                    break;

                case '\\':
                    currentCharIndex++;
                    if (currentCharIndex == length)
                    {
                        throw new RegexParseException($"Backslash at the end of input escaping nothing.");
                    }

                    toAdd = new CharacterClassToken($"\\{regexString[currentCharIndex]}");
                    break;

                case '[':
                    currentCharIndex++;
                    var startingIndex = currentCharIndex;
                    while (currentCharIndex != length && regexString[currentCharIndex] != ']')
                    {
                        // Escape inside character class
                        if ('\\'.Equals(regexString[currentCharIndex]))
                        {
                            currentCharIndex++;
                        }

                        currentCharIndex++;
                    }

                    if (currentCharIndex == length)
                    {
                        throw new RegexParseException($"Character class not properly closed in {regexString}");
                    }

                    var value = regexString.Substring(startingIndex, currentCharIndex - startingIndex);
                    toAdd = new CharacterClassToken(value);
                    break;

                default:
                    toAdd = new CharacterClassToken($"\\{currentChar}");
                    break;
                }

                result.Add(toAdd);
                currentCharIndex++;
            }

            return(result);
        }
Beispiel #2
0
        private Regex CreateRegexFromCharacterClassToken(CharacterClassToken token)
        {
            var chars   = token.Value;
            var regexes = new List <Regex>();

            for (var index = 0; index < chars.Length; index++)
            {
                var currentChar = chars[index];
                switch (currentChar)
                {
                case '\\':
                    index++;
                    if (index == chars.Length)
                    {
                        throw new RegexParserInternalException("Character class: escape (\\) at the end of body.");
                    }

                    regexes.Add(chars[index].ToRegex());
                    break;

                case '-':
                    var previousIndex = index - 1;
                    var nextIndex     = index + 1;
                    if (nextIndex == chars.Length)
                    {
                        throw new RegexParserInternalException(
                                  $"Character class: unescaped minus at the end of body: {chars}");
                    }

                    if ('\\'.Equals(chars[nextIndex]))
                    {
                        nextIndex++;
                    }

                    if (nextIndex == chars.Length)
                    {
                        throw new RegexParserInternalException(
                                  $"Character class: backslash after minus at the end of body: {chars}");
                    }

                    if (previousIndex < 0)
                    {
                        throw new RegexParserInternalException(
                                  $"Character class: unescaped minus at the beginning of body: {chars}");
                    }

                    regexes.RemoveAt(regexes.Count - 1);

                    var startCharacter = chars[previousIndex];
                    var endCharacter   = chars[nextIndex];
                    if (startCharacter > endCharacter)
                    {
                        throw new RegexParserInternalException(
                                  $"Character class: start character > end character: {startCharacter} > {endCharacter}");
                    }

                    for (var charToAdd = startCharacter; charToAdd <= endCharacter; charToAdd++)
                    {
                        regexes.Add(charToAdd.ToRegex());
                    }

                    index = nextIndex;
                    break;

                default:
                    regexes.Add(currentChar.ToRegex());
                    break;
                }
            }

            return(RegexUtils.Sum(regexes.ToArray()));
        }