public void AdvanceReturnsCharacters() { //Arrange var scanState = new SourceScanState("123".AsMemory()); //Act & Assert scanState.Advance().Should().Be('1'); scanState.Advance().Should().Be('2'); scanState.Advance().Should().Be('3'); }
public void LookAheadWhenAtEndReturnsEmpty() { //Arrange var input = "123"; var scanState = new SourceScanState(input.AsMemory()); scanState.Advance(); scanState.Advance(); scanState.Advance(); //Act & Assert scanState.LookAhead(2).IsEmpty.Should().BeTrue(); }
public void AdvanceReturnsNullCharWhenAtEnd() { //Arrange var input = "123"; var scanState = new SourceScanState(input.AsMemory()); scanState.Advance(); scanState.Advance(); scanState.Advance(); //Act & Assert scanState.Advance().Should().Be('\0'); }
public void IsAtEndReturnsTrueWhenCurrentIndexAtEnd() { //Arrange var input = "123".AsMemory(); var scanState = new SourceScanState(input); //Act scanState.Advance(); scanState.Advance(); scanState.Advance(); //Assert scanState.IsAtEnd().Should().BeTrue(); }
public void LookAheadTillEndReturnsStuff() { //Arrange var input = "123"; var scanState = new SourceScanState(input.AsMemory()); scanState.Advance(); //Act scanState.LookAhead(2).ToArray().Should().BeEquivalentTo(new[] { '2', '3' }); }
public void PeekNextAtEndReturnsNullChar() { //Arrange var input = "1"; var scanState = new SourceScanState(input.AsMemory()); //Advance once scanState.Advance(); //Act scanState.PeekNext().Should().Be('\0'); }
// ReSharper restore InconsistentNaming public static Token ScanNextToken(ref SourceScanState state) { if (state.IsAtEnd()) { return(EOF_TOKEN); } state.StartIndex = state.CurrentPos; var current = state.Advance(); // A '&' in plain text //Possible HTML entity // hello > world // ^ if (current == HTML_ENTITY_START) { //a valid entity ends on ';' // the only spaces that are allowed are before the terminating ';' // next should be either a letter or a '#' var next = state.PeekNext(); if (char.IsLetter(next) || next == '#') { state.Advance(); next = state.PeekNext(); if (char.IsLetterOrDigit(next)) { //&[a-z][a-z]  or &#x // ^ ^ ^ current = state.Advance(); while (current != '\0' && char.IsLetterOrDigit(state.PeekNext())) { current = state.Advance(); if (current == ';') { //end of entity return(CreateToken(TokenType.HtmlEntity, HtmlElementType.None, state)); } } //end of contiguous chars, but no ';' found //next non-whitespace character should be a ';' // ex: > ; // ^ current = AdvanceWhiteSpaces(ref state, current); if (state.PeekNext() == ';' /*current == ';'*/) { state.Advance(); //end of entity return(CreateToken(TokenType.HtmlEntity, HtmlElementType.None, state)); } if (!char.IsLetterOrDigit(current)) { //Invalid entity, treat as text return(CreateToken(TokenType.Text, HtmlElementType.None, state)); } // else it is not a valid entity and we can ignore it } } } if (current == HTML_TAG_START) { current = state.Advance(); if (state.IsInTag) { //We found a < inside a tag, ex: "<u<p>> // ^ //We found a < inside a tag, ex: "<<p>> // ^ //Finish the current Tag and start a new one return(CreateToken(TokenType.HtmlOpenTag, HtmlElementType.Invalid, state)); } if (state.IsAtEnd()) { //we ended on a '<' character return(CreateToken(TokenType.Text, HtmlElementType.Invalid, state)); } state.IsInTag = true; } if (state.IsInTag) { return(ScanTag(ref state, current)); } // not in a tag and we found a '<' if (current == HTML_TAG_END) { if (state.IsInTag) { state.IsInTag = false; } //hello > world // ^ return(CreateToken(TokenType.Text, HtmlElementType.Invalid, state)); } while (!state.IsAtEnd() && state.PeekNextUnsafe() != HTML_TAG_START && state.PeekNextUnsafe() != HTML_ENTITY_START) { state.Advance(); } return(CreateToken(TokenType.Text, HtmlElementType.None, state)); }