public List <Token> Convert(string regexString) { var result = new List <Token>(); var currentCharIndex = 0; var length = regexString.Length; while (currentCharIndex != length) { var currentChar = regexString[currentCharIndex]; Token toAdd; switch (currentChar) { case '(': toAdd = new LeftBracketToken(); break; case ')': toAdd = new RightBracketToken(); break; case '*': toAdd = new StarToken(); break; case '|': toAdd = new SumToken(); break; case '\\': currentCharIndex++; if (currentCharIndex == length) { throw new RegexParseException($"Backslash at the end of input escaping nothing."); } toAdd = new CharacterClassToken($"\\{regexString[currentCharIndex]}"); break; case '[': currentCharIndex++; var startingIndex = currentCharIndex; while (currentCharIndex != length && regexString[currentCharIndex] != ']') { // Escape inside character class if ('\\'.Equals(regexString[currentCharIndex])) { currentCharIndex++; } currentCharIndex++; } if (currentCharIndex == length) { throw new RegexParseException($"Character class not properly closed in {regexString}"); } var value = regexString.Substring(startingIndex, currentCharIndex - startingIndex); toAdd = new CharacterClassToken(value); break; default: toAdd = new CharacterClassToken($"\\{currentChar}"); break; } result.Add(toAdd); currentCharIndex++; } return(result); }
private Regex CreateRegexFromCharacterClassToken(CharacterClassToken token) { var chars = token.Value; var regexes = new List <Regex>(); for (var index = 0; index < chars.Length; index++) { var currentChar = chars[index]; switch (currentChar) { case '\\': index++; if (index == chars.Length) { throw new RegexParserInternalException("Character class: escape (\\) at the end of body."); } regexes.Add(chars[index].ToRegex()); break; case '-': var previousIndex = index - 1; var nextIndex = index + 1; if (nextIndex == chars.Length) { throw new RegexParserInternalException( $"Character class: unescaped minus at the end of body: {chars}"); } if ('\\'.Equals(chars[nextIndex])) { nextIndex++; } if (nextIndex == chars.Length) { throw new RegexParserInternalException( $"Character class: backslash after minus at the end of body: {chars}"); } if (previousIndex < 0) { throw new RegexParserInternalException( $"Character class: unescaped minus at the beginning of body: {chars}"); } regexes.RemoveAt(regexes.Count - 1); var startCharacter = chars[previousIndex]; var endCharacter = chars[nextIndex]; if (startCharacter > endCharacter) { throw new RegexParserInternalException( $"Character class: start character > end character: {startCharacter} > {endCharacter}"); } for (var charToAdd = startCharacter; charToAdd <= endCharacter; charToAdd++) { regexes.Add(charToAdd.ToRegex()); } index = nextIndex; break; default: regexes.Add(currentChar.ToRegex()); break; } } return(RegexUtils.Sum(regexes.ToArray())); }