public void LookAheadReturnsNothingWhenOutOfRange() { //Arrange var input = "123"; var scanState = new SourceScanState(input.AsMemory()); //Act scanState.LookAhead(4).IsEmpty.Should().BeTrue(); }
public void PlainTextSmokeTest() { //Act var sourceScanState = new SourceScanState("hello".AsMemory()); var token = Scanner.ScanNextToken(ref sourceScanState); //Assert token.NewValueStr().Should().Be("hello"); }
public void LookAheadReturnsStuff() { //Arrange var input = "123"; var scanState = new SourceScanState(input.AsMemory()); //Act scanState.LookAhead(2).ToArray().Should().BeEquivalentTo(new[] { '1', '2' }); }
public string Textorize(string htmlInput) { if (string.IsNullOrWhiteSpace(htmlInput)) { return(""); } var source = new SourceScanState(htmlInput.AsMemory()); var htmlTagStack = new Stack <TokenInfo>(32); var state = new TextorizeState(new StringBuilder(htmlInput.Length),
public void AdvanceReturnsCharacters() { //Arrange var scanState = new SourceScanState("123".AsMemory()); //Act & Assert scanState.Advance().Should().Be('1'); scanState.Advance().Should().Be('2'); scanState.Advance().Should().Be('3'); }
public void ReturnsHtmlOpenTagTokenForHtmlOpenTagInText(string input) { //Arrange var sourceScanState = new SourceScanState(input.AsMemory()); //Act var token = Scanner.ScanNextToken(ref sourceScanState); //Assert token.TokenType.Should().Be(TokenType.HtmlOpenTag, $"for : {token.NewValueStr()}"); token.NewValueStr().Should().Be(input); }
public void TokenTypesTest(string input, TokenType expected) { //Arrange var sourceScanState = new SourceScanState(input.AsMemory()); //Act var token = Scanner.ScanNextToken(ref sourceScanState); //Assert token.TokenType.Should().Be(expected, $"for : {token.NewValueStr()}"); token.NewValueStr().Should().Be(input); }
public void ReturnsEofTokenAsLastToken() { //Arrange var sourceScanState = new SourceScanState("Hello".AsMemory()); //Act Scanner.ScanNextToken(ref sourceScanState).NewValueStr().Should().Be("Hello"); var token = Scanner.ScanNextToken(ref sourceScanState); //Assert token.TokenType.Should().Be(TokenType.Eof); }
public void LookAheadWhenAtEndReturnsEmpty() { //Arrange var input = "123"; var scanState = new SourceScanState(input.AsMemory()); scanState.Advance(); scanState.Advance(); scanState.Advance(); //Act & Assert scanState.LookAhead(2).IsEmpty.Should().BeTrue(); }
public void AdvanceReturnsNullCharWhenAtEnd() { //Arrange var input = "123"; var scanState = new SourceScanState(input.AsMemory()); scanState.Advance(); scanState.Advance(); scanState.Advance(); //Act & Assert scanState.Advance().Should().Be('\0'); }
public void PeekNextAtEndReturnsNullChar() { //Arrange var input = "1"; var scanState = new SourceScanState(input.AsMemory()); //Advance once scanState.Advance(); //Act scanState.PeekNext().Should().Be('\0'); }
public void IsAtEndReturnsTrueWhenCurrentIndexAtEnd() { //Arrange var input = "123".AsMemory(); var scanState = new SourceScanState(input); //Act scanState.Advance(); scanState.Advance(); scanState.Advance(); //Assert scanState.IsAtEnd().Should().BeTrue(); }
public void ReturnsHtmlTokenAfterInvalidTag() { //Arrange var sourceScanState = new SourceScanState("aaa<1<p>".AsMemory()); //Act //1. Scanner.ScanNextToken(ref sourceScanState).NewValueStr().Should().Be("aaa"); //2. Scanner.ScanNextToken(ref sourceScanState).NewValueStr().Should().Be("<1"); //3. Scanner.ScanNextToken(ref sourceScanState).NewValueStr().Should().Be("<p>"); }
public void EndsInHtmlOpenTagCharacter() { //Arrange var sourceScanState = new SourceScanState("aaa<".AsMemory()); //Act & Assert var current = Scanner.ScanNextToken(ref sourceScanState); current.TokenType.Should().Be(TokenType.Text, $"for: {current.NewValueStr()}"); current.NewValueStr().Should().Be("aaa"); current = Scanner.ScanNextToken(ref sourceScanState); current.TokenType.Should().Be(TokenType.Text, $"for: {current.NewValueStr()}"); current.NewValueStr().Should().Be("<"); }
public void ReturnsEofTokenAsLastTokenForHtml(string input) { //Arrange var sourceScanState = new SourceScanState(input.AsMemory()); Token current = default; //Act while (current.TokenType != TokenType.Eof) { //consume current = Scanner.ScanNextToken(ref sourceScanState); } //Assert current.TokenType.Should().Be(TokenType.Eof); }
// ReSharper restore InconsistentNaming public static Token ScanNextToken(ref SourceScanState state) { if (state.IsAtEnd()) { return(EOF_TOKEN); } state.StartIndex = state.CurrentPos; var current = state.Advance(); // A '&' in plain text //Possible HTML entity // hello > world // ^ if (current == HTML_ENTITY_START) { //a valid entity ends on ';' // the only spaces that are allowed are before the terminating ';' // next should be either a letter or a '#' var next = state.PeekNext(); if (char.IsLetter(next) || next == '#') { state.Advance(); next = state.PeekNext(); if (char.IsLetterOrDigit(next)) { //&[a-z][a-z]  or &#x // ^ ^ ^ current = state.Advance(); while (current != '\0' && char.IsLetterOrDigit(state.PeekNext())) { current = state.Advance(); if (current == ';') { //end of entity return(CreateToken(TokenType.HtmlEntity, HtmlElementType.None, state)); } } //end of contiguous chars, but no ';' found //next non-whitespace character should be a ';' // ex: > ; // ^ current = AdvanceWhiteSpaces(ref state, current); if (state.PeekNext() == ';' /*current == ';'*/) { state.Advance(); //end of entity return(CreateToken(TokenType.HtmlEntity, HtmlElementType.None, state)); } if (!char.IsLetterOrDigit(current)) { //Invalid entity, treat as text return(CreateToken(TokenType.Text, HtmlElementType.None, state)); } // else it is not a valid entity and we can ignore it } } } if (current == HTML_TAG_START) { current = state.Advance(); if (state.IsInTag) { //We found a < inside a tag, ex: "<u<p>> // ^ //We found a < inside a tag, ex: "<<p>> // ^ //Finish the current Tag and start a new one return(CreateToken(TokenType.HtmlOpenTag, HtmlElementType.Invalid, state)); } if (state.IsAtEnd()) { //we ended on a '<' character return(CreateToken(TokenType.Text, HtmlElementType.Invalid, state)); } state.IsInTag = true; } if (state.IsInTag) { return(ScanTag(ref state, current)); } // not in a tag and we found a '<' if (current == HTML_TAG_END) { if (state.IsInTag) { state.IsInTag = false; } //hello > world // ^ return(CreateToken(TokenType.Text, HtmlElementType.Invalid, state)); } while (!state.IsAtEnd() && state.PeekNextUnsafe() != HTML_TAG_START && state.PeekNextUnsafe() != HTML_ENTITY_START) { state.Advance(); } return(CreateToken(TokenType.Text, HtmlElementType.None, state)); }