static void Main(string[] args) { var tokenizer = new HtmlTokenizer(File.ReadAllText("simple.html"), true); Console.WriteLine("=== <Begin Tokenization> ==="); var tokenizationResult = tokenizer.Run(); var tokens = tokenizationResult.Tokens; var errors = tokenizationResult.Errors; Console.WriteLine("=== <End Tokenization> ==="); Console.WriteLine(); Console.WriteLine("=== <Begin Tokens> ==="); foreach (var token in tokens) { Console.WriteLine($"{token}"); } Console.WriteLine("=== <End Tokens> ==="); Console.WriteLine(); Console.WriteLine("=== <Begin Errors> ==="); foreach (var error in errors) { Console.WriteLine($"{error}"); } Console.WriteLine("=== <End Errors> ==="); }
private static void RunTestWithData(Html5LibTokenizerTestData testCase) { // Arrange var initialState = testCase.InitialState switch { "PLAINTEXT state" => HtmlTokenizerState.PlainText, "RCDATA state" => HtmlTokenizerState.RcData, "RAWTEXT state" => HtmlTokenizerState.RawText, "Script data state" => HtmlTokenizerState.ScriptData, "CDATA section state" => HtmlTokenizerState.CDataSection, _ => HtmlTokenizerState.Data }; var tokenizer = new HtmlTokenizer(testCase.Input, true, initialState); // Act var tokenizationResult = tokenizer.Run(); var actualTokens = tokenizationResult.Tokens.Where(token => token.Type != HtmlTokenType.EndOfFile).ToArray(); var actualErrors = tokenizationResult.Errors.ToArray(); // Assert // Output / Tokens actualTokens.Should().HaveCount(testCase.Output.Length); for (var i = 0; i < testCase.Output.Length; i++) { var expectedToken = testCase.Output[i]; var actualToken = actualTokens[i]; actualToken.Type.Should().Be(expectedToken.TokenType); switch (expectedToken.TokenType) { case HtmlTokenType.Doctype: DoctypeTokensMatch((HtmlDoctypeToken)actualToken, (Html5LibTokenizerTestOutputDoctypeToken)expectedToken); break; case HtmlTokenType.StartTag: StartTagTokensMatch((HtmlStartTagToken)actualToken, (Html5LibTokenizerTestOutputStartTagToken)expectedToken); break; case HtmlTokenType.EndTag: EndTagTokensMatch((HtmlEndTagToken)actualToken, (Html5LibTokenizerTestOutputEndTagToken)expectedToken); break; case HtmlTokenType.Comment: CommentTagTokensMatch((HtmlCommentToken)actualToken, (Html5LibTokenizerTestOutputCommentToken)expectedToken); break; case HtmlTokenType.Character: CharacterTagTokensMatch((HtmlCharacterToken)actualToken, (Html5LibTokenizerTestOutputCharacterToken)expectedToken); break; case HtmlTokenType.EndOfFile: true.Should().BeFalse("there are no EndOfFile tokens to be expected."); break; default: true.Should().BeFalse("default should not be reached."); break; } } // Errors actualErrors.Should().HaveCount(testCase.Errors.Length); for (var i = 0; i < testCase.Errors.Length; i++) { actualErrors[i].ToString().Should().Be(testCase.Errors[i].Code); } }