public void Tokenizer_GetTokens_ShouldTokenizeOnePartConditional() { // Arrange const string input = "(?(foo)bar)"; // Act var result = new Tokenizer(input).GetTokens(); // Assert CollectionAssert.AreEqual(new[] { new Token(TokenType.GroupStart, "(", 0), new Token(TokenType.GroupDirectiveStart, "?", 1), new Token(TokenType.ConditionalExpressionStart, "(", 2), new Token(TokenType.Literal, "f", 3), new Token(TokenType.Literal, "o", 4), new Token(TokenType.Literal, "o", 5), new Token(TokenType.ConditionalExpressionEnd, ")", 6), new Token(TokenType.Literal, "b", 7), new Token(TokenType.Literal, "a", 8), new Token(TokenType.Literal, "r", 9), new Token(TokenType.GroupEnd, ")", 10) }, result.ToArray() ); }
public void Tokenizer_GetTokens_ShouldTokenizeOnePartConditionalWithNestedNamedGroup() { // Arrange const string input = "(?(foo)(?<bar>baz))"; // Act var result = new Tokenizer(input).GetTokens(); // Assert CollectionAssert.AreEqual(new[] { new Token(TokenType.GroupStart, "(", 0), new Token(TokenType.GroupDirectiveStart, "?", 1), new Token(TokenType.ConditionalExpressionStart, "(", 2), new Token(TokenType.Literal, "f", 3), new Token(TokenType.Literal, "o", 4), new Token(TokenType.Literal, "o", 5), new Token(TokenType.ConditionalExpressionEnd, ")", 6), new Token(TokenType.GroupStart, "(", 7), new Token(TokenType.GroupDirectiveStart, "?", 8), new Token(TokenType.NamedIdentifierStartOrLookBehindMarker, "<", 9), new Token(TokenType.Literal, "b", 10), new Token(TokenType.Literal, "a", 11), new Token(TokenType.Literal, "r", 12), new Token(TokenType.NamedIdentifierEnd, ">", 13), new Token(TokenType.Literal, "b", 14), new Token(TokenType.Literal, "a", 15), new Token(TokenType.Literal, "z", 16), new Token(TokenType.GroupEnd, ")", 17), new Token(TokenType.GroupEnd, ")", 18) }, result.ToArray() ); }
public void Tokenizer_GetTokens_ShouldTokenizeOrOperator() { // Arrange const string input = "cat|dog|tiger"; // Act var result = new Tokenizer(input).GetTokens(); // Assert CollectionAssert.AreEqual(new[] { new Token(TokenType.Literal, "c", 0), new Token(TokenType.Literal, "a", 1), new Token(TokenType.Literal, "t", 2), new Token(TokenType.OrOperator, "|", 3), new Token(TokenType.Literal, "d", 4), new Token(TokenType.Literal, "o", 5), new Token(TokenType.Literal, "g", 6), new Token(TokenType.OrOperator, "|", 7), new Token(TokenType.Literal, "t", 8), new Token(TokenType.Literal, "i", 9), new Token(TokenType.Literal, "g", 10), new Token(TokenType.Literal, "e", 11), new Token(TokenType.Literal, "r", 12) }, result.ToArray() ); }
public void Tokenizer_GetTokens_ShouldTokenizeBalancingGroup() { // Arrange const string input = "(?<foo-bar>baz)"; // Act var result = new Tokenizer(input).GetTokens(); // Assert CollectionAssert.AreEqual(new[] { new Token(TokenType.GroupStart, "(", 0), new Token(TokenType.GroupDirectiveStart, "?", 1), new Token(TokenType.NamedIdentifierStartOrLookBehindMarker, "<", 2), new Token(TokenType.Literal, "f", 3), new Token(TokenType.Literal, "o", 4), new Token(TokenType.Literal, "o", 5), new Token(TokenType.BalancingGroupNamedIdentifierSeparator, "-", 6), new Token(TokenType.Literal, "b", 7), new Token(TokenType.Literal, "a", 8), new Token(TokenType.Literal, "r", 9), new Token(TokenType.NamedIdentifierEnd, ">", 10), new Token(TokenType.Literal, "b", 11), new Token(TokenType.Literal, "a", 12), new Token(TokenType.Literal, "z", 13), new Token(TokenType.GroupEnd, ")", 14) }, result.ToArray() ); }
public void Tokenizer_GetTokens_ShouldTokenizeAnyCharacterClass() { // Arrange const string input = "."; // Act var result = new Tokenizer(input).GetTokens(); // Assert CollectionAssert.AreEqual(new[] { new Token(TokenType.AnyCharacter, ".", 0) }, result.ToArray() ); }
public void Tokenizer_GetTokens_ShouldTokenizeEndOfStringAssertion() { // Arrange const string input = "a$"; // Act var result = new Tokenizer(input).GetTokens(); // Assert CollectionAssert.AreEqual(new[] { new Token(TokenType.Literal, "a", 0), new Token(TokenType.EndOfStringAssertion, "$", 1) }, result.ToArray() ); }
public void Tokenizer_GetTokens_ShouldTokenizeOneOrMoreQuantifier() { // Arrange const string input = "a+"; // Act var result = new Tokenizer(input).GetTokens(); // Assert CollectionAssert.AreEqual(new[] { new Token(TokenType.Literal, "a", 0), new Token(TokenType.Quantifier, "+", 1) }, result.ToArray() ); }
public void Tokenizer_GetTokens_ShouldReturnParseFailureWhenClosingGroupImmediatelyAfterGroupDirectiveStartToken() { // Arrange const string input = "(?)"; // Act var result = new Tokenizer(input).GetTokens(); // Assert CollectionAssert.AreEqual(new[] { new Token(TokenType.GroupStart, "(", 0), new Token(TokenType.GroupDirectiveStart, "?", 1), new Token(TokenType.ParseFailure, ")", 2) }, result.ToArray() ); }
public void Tokenizer_GetTokens_ShouldTokenizeAsciiHexEscapeSequence() { // Arrange const string input = @"\x1f"; // Act var result = new Tokenizer(input).GetTokens(); // Assert CollectionAssert.AreEqual(new[] { new Token(TokenType.CharacterEscapeMarker, @"\", 0), new Token(TokenType.CharacterEscapeHexMarker, "x", 1), new Token(TokenType.CharacterEscapeData, "1", 2), new Token(TokenType.CharacterEscapeData, "f", 3) }, result.ToArray() ); }
public void Tokenizer_GetTokens_ShouldTokenizeParametizedMinimumQuantifier() { // Arrange const string input = "a{6,}"; // Act var result = new Tokenizer(input).GetTokens(); // Assert CollectionAssert.AreEqual(new[] { new Token(TokenType.Literal, "a", 0), new Token(TokenType.ParametizedQuantifierStart, "{", 1), new Token(TokenType.Number, "6", 2), new Token(TokenType.ParametizedQuantifierRangeSeparator, ",", 3), new Token(TokenType.ParametizedQuantifierEnd, "}", 4) }, result.ToArray() ); }
public void Tokenizer_GetTokens_ShouldTokenizeGroup() { // Arrange const string input = "(foo)"; // Act var result = new Tokenizer(input).GetTokens(); // Assert CollectionAssert.AreEqual(new[] { new Token(TokenType.GroupStart, "(", 0), new Token(TokenType.Literal, "f", 1), new Token(TokenType.Literal, "o", 2), new Token(TokenType.Literal, "o", 3), new Token(TokenType.GroupEnd, ")", 4) }, result.ToArray() ); }
public void Tokenizer_GetTokens_ShouldTokenizeAsciiHexEscapeSequenceWithTrailingNumbers() { // Arrange const string input = @"ab\x201"; // Act var result = new Tokenizer(input).GetTokens(); // Assert CollectionAssert.AreEqual(new[] { new Token(TokenType.Literal, "a", 0), new Token(TokenType.Literal, "b", 1), new Token(TokenType.CharacterEscapeMarker, @"\", 2), new Token(TokenType.CharacterEscapeHexMarker, "x", 3), new Token(TokenType.CharacterEscapeData, "2", 4), new Token(TokenType.CharacterEscapeData, "0", 5), new Token(TokenType.Literal, "1", 6) }, result.ToArray() ); }
public void Tokenizer_GetTokens_ShouldTokenizeCharacterRange() { // Arrange const string input = "[a-z]"; // Act var result = new Tokenizer(input).GetTokens(); // Assert CollectionAssert.AreEqual(new[] { new Token(TokenType.CharacterSetStart, "[", 0), new Token(TokenType.Character, "a", 1), new Token(TokenType.CharacterRangeSeparator, "-", 2), new Token(TokenType.Character, "z", 3), new Token(TokenType.CharacterSetEnd, "]", 4) }, result.ToArray() ); }
public void Tokenizer_GetTokens_ShouldTokenizePositiveLookBehind() { // Arrange const string input = "(?<=foo)"; // Act var result = new Tokenizer(input).GetTokens(); // Assert CollectionAssert.AreEqual(new[] { new Token(TokenType.GroupStart, "(", 0), new Token(TokenType.GroupDirectiveStart, "?", 1), new Token(TokenType.NamedIdentifierStartOrLookBehindMarker, "<", 2), new Token(TokenType.PositiveLookBehindMarker, "=", 3), new Token(TokenType.Literal, "f", 4), new Token(TokenType.Literal, "o", 5), new Token(TokenType.Literal, "o", 6), new Token(TokenType.GroupEnd, ")", 7) }, result.ToArray() ); }
public void Tokenizer_GetTokens_ShouldTokenizeNonCapturingGroup() { // Arrange const string input = "(?:foo)"; // Act var result = new Tokenizer(input).GetTokens(); // Assert CollectionAssert.AreEqual(new[] { new Token(TokenType.GroupStart, "(", 0), new Token(TokenType.GroupDirectiveStart, "?", 1), new Token(TokenType.NonCapturingGroupMarker, ":", 2), new Token(TokenType.Literal, "f", 3), new Token(TokenType.Literal, "o", 4), new Token(TokenType.Literal, "o", 5), new Token(TokenType.GroupEnd, ")", 6) }, result.ToArray() ); }
public void Tokenizer_GetTokens_ShouldTokenizeInlineComment() { // Arrange const string input = "(?#foo!)"; // Act var result = new Tokenizer(input).GetTokens(); // Assert CollectionAssert.AreEqual(new[] { new Token(TokenType.GroupStart, "(", 0), new Token(TokenType.GroupDirectiveStart, "?", 1), new Token(TokenType.CommentStart, "#", 2), new Token(TokenType.Literal, "f", 3), new Token(TokenType.Literal, "o", 4), new Token(TokenType.Literal, "o", 5), new Token(TokenType.Literal, "!", 6), new Token(TokenType.GroupEnd, ")", 7) }, result.ToArray() ); }
public void Tokenizer_GetTokens_ShouldTokenizeControlCharacterEscapeSequenceWithTrailingLetters() { // Arrange const string input = @"\cCC"; // Act var result = new Tokenizer(input).GetTokens(); // Assert CollectionAssert.AreEqual(new[] { new Token(TokenType.CharacterEscapeMarker, @"\", 0), new Token(TokenType.CharacterEscapeControlMarker, "c", 1), new Token(TokenType.CharacterEscapeData, "C", 2), new Token(TokenType.Literal, "C", 3) }, result.ToArray() ); }
public void Tokenizer_GetTokens_ShouldTokenizeEscapedPeriod() { // Arrange const string input = @"\."; // Act var result = new Tokenizer(input).GetTokens(); // Assert CollectionAssert.AreEqual(new[] { new Token(TokenType.CharacterEscapeMarker, @"\", 0), new Token(TokenType.CharacterEscapeData, ".", 1) }, result.ToArray() ); }
public void Tokenizer_GetTokens_ShouldTokenizeMultipleLiteralCharacters() { // Arrange const string input = "abc"; // Act var result = new Tokenizer(input).GetTokens(); // Assert CollectionAssert.AreEqual(new[] { new Token(TokenType.Literal, "a", 0), new Token(TokenType.Literal, "b", 1), new Token(TokenType.Literal, "c", 2) }, result.ToArray() ); }
public void Tokenizer_GetTokens_ShouldTokenizeBasicEscapeSequence() { // Arrange const string input = @"ab\+c"; // Act var result = new Tokenizer(input).GetTokens(); // Assert CollectionAssert.AreEqual(new[] { new Token(TokenType.Literal, "a", 0), new Token(TokenType.Literal, "b", 1), new Token(TokenType.CharacterEscapeMarker, @"\", 2), new Token(TokenType.CharacterEscapeData, "+", 3), new Token(TokenType.Literal, "c", 4) }, result.ToArray() ); }
public void Tokenizer_GetTokens_ShouldTokenizeUnicodeEscapeSequenceWithTrailingNumbers() { // Arrange const string input = @"\u030a44"; // Act var result = new Tokenizer(input).GetTokens(); // Assert CollectionAssert.AreEqual(new[] { new Token(TokenType.CharacterEscapeMarker, @"\", 0), new Token(TokenType.CharacterEscapeUnicodeMarker, "u", 1), new Token(TokenType.CharacterEscapeData, "0", 2), new Token(TokenType.CharacterEscapeData, "3", 3), new Token(TokenType.CharacterEscapeData, "0", 4), new Token(TokenType.CharacterEscapeData, "a", 5), new Token(TokenType.Literal, "4", 6), new Token(TokenType.Literal, "4", 7) }, result.ToArray() ); }
public void Tokenizer_GetTokens_ShouldTokenizeNegativeCharacterSet() { // Arrange const string input = "[^abc]"; // Act var result = new Tokenizer(input).GetTokens(); // Assert CollectionAssert.AreEqual(new[] { new Token(TokenType.CharacterSetStart, "[", 0), new Token(TokenType.NegativeCharacterSetModifier, "^", 1), new Token(TokenType.Character, "a", 2), new Token(TokenType.Character, "b", 3), new Token(TokenType.Character, "c", 4), new Token(TokenType.CharacterSetEnd, "]", 5) }, result.ToArray() ); }
public void Tokenizer_GetTokens_ShouldTokenizeGroupDirectiveStart() { // Arrange const string input = "(?"; // Act var result = new Tokenizer(input).GetTokens(); // Assert CollectionAssert.AreEqual(new[] { new Token(TokenType.GroupStart, "(", 0), new Token(TokenType.GroupDirectiveStart, "?", 1) }, result.ToArray() ); }
public void Tokenizer_GetTokens_ShouldTokenizeZeroOrOneQuantifierInGroup() { // Arrange const string input = "(a?)"; // Act var result = new Tokenizer(input).GetTokens(); // Assert CollectionAssert.AreEqual(new[] { new Token(TokenType.GroupStart, "(", 0), new Token(TokenType.Literal, "a", 1), new Token(TokenType.Quantifier, "?", 2), new Token(TokenType.GroupEnd, ")", 3) }, result.ToArray() ); }