public void Empty() { reader = HtmlReaderFactory.FromString("", parseErrors); Assert.IsFalse(reader.Read()); Assert.AreEqual(0, parseErrors.Count); }
/// <summary> /// Sanitizes the specified HTML, removing scripts, styles, and tags /// which might pose a security concern /// </summary> /// <param name="html">The HTML content to minify. A <see cref="string"/> or <see cref="Stream"/> can also be used.</param> /// <param name="writer">Writer to which the sanitized HTML is written</param> /// <param name="settings">Settings controlling what CSS and HTML is permitted in the result</param> /// <remarks> /// The goal of sanitization is to prevent XSS patterns /// described on <a href="https://www.owasp.org/index.php/XSS_Filter_Evasion_Cheat_Sheet">XSS Filter Evasion Cheat Sheet</a> /// </remarks> public static void Sanitize(TextSource html, XmlWriter writer, HtmlSanitizeSettings settings = null) { using (var reader = new HtmlReader(html, false)) { reader.Sanitize(settings).ToHtml(writer); } }
public void TagDataTagData() { reader = HtmlReaderFactory.FromString("<br/>a<p/>b", parseErrors); Assert.IsTrue(reader.Read()); Assert.AreEqual(HtmlTokenKind.Tag, reader.TokenKind); Assert.AreEqual("br", reader.Name); Assert.IsTrue(reader.SelfClosingElement); Assert.IsTrue(reader.Read()); Assert.AreEqual(HtmlTokenKind.Text, reader.TokenKind); Assert.AreEqual("a", reader.Text); Assert.IsTrue(reader.Read()); Assert.AreEqual(HtmlTokenKind.Tag, reader.TokenKind); Assert.AreEqual("p", reader.Name); Assert.IsTrue(reader.SelfClosingElement); Assert.IsTrue(reader.Read()); Assert.AreEqual(HtmlTokenKind.Text, reader.TokenKind); Assert.AreEqual("b", reader.Text); Assert.IsFalse(reader.Read()); Assert.AreEqual(0, parseErrors.Count); }
/// <summary> /// Compress HTML by removing unnecessary whitespace and comments /// </summary> /// <param name="html">The HTML content to minify. A <see cref="string"/> or <see cref="Stream"/> can also be used.</param> /// <param name="writer">The writer where the HTML output is written. For best results, use a <see cref="HtmlTextWriter"/></param> /// <param name="settings">Settings to control how the HTML is compressed</param> public static void Minify(TextSource html, XmlWriter writer, HtmlMinifySettings settings = null) { using (var reader = new HtmlReader(html, false)) { reader.Minify(settings).ToHtml(writer); } }
private void buttonConvert_Click(object sender, EventArgs e) { bool IsValid = true; IsValid = this.IsValidOption(ref this.textBoxSourceHtmlFolder, ref this._textBoxSourceHtmlFolderColor) && IsValid; IsValid = this.IsValidOption(ref this.textBoxXmlTemplate, ref this._textBoxXmlTemplate) && IsValid; IsValid = this.IsValidOption(ref this.textBoxDestinationXmlFolder, ref this._textBoxDestinationXmlFolder) && IsValid; if (!IsValid) return; this.buttonConvert.Enabled = false; IEnumerable<string> SupportFiles = Directory.GetFiles(this.textBoxSourceHtmlFolder.Text, "*.*", SearchOption.AllDirectories).Where(s => _SupportedExtensions.Contains(Path.GetExtension(s).ToLower())); progressBarStatus.Maximum = SupportFiles.Count(); foreach (string ImportDataSourceFile in SupportFiles) { XmlTemplateRW XmlTemplateRWObj = new XmlTemplateRW(); XmlTemplateRWObj.LoadXmlTemplate(this.textBoxXmlTemplate.Text); HtmlReader HtmlReadObj = new HtmlReader(); if (HtmlReadObj.LoadHtml(ImportDataSourceFile)) { string ImportDataDestinationFile = this.textBoxDestinationXmlFolder.Text + "\\" + Path.GetFileName(ImportDataSourceFile) + ".xml"; XmlTemplateRWObj.ExecutePlaceholderReplacement(HtmlReadObj); XmlTemplateRWObj.Save(ImportDataDestinationFile); } progressBarStatus.PerformStep(); Application.DoEvents(); } progressBarStatus.Value = 0; this.buttonConvert.Enabled = true; }
public void ShouldReturnLinkFromATag() { var htmlReader = new HtmlReader(); var result = htmlReader.GetLinksFromText(this.GetValidTestHtmlData()); result.First().Should().Be("https://www.orf.at"); }
public override List <TestRunImportResultInfo> GetTestRunImportResults() { var result = new List <TestRunImportResultInfo>(); if (Reader != null) { var runDate = DateTime.Now; using (var htmlReader = new HtmlReader(Reader)) { // move over all theads while (htmlReader.ReadToFollowing("thead")) { if (htmlReader.ReadToFollowing("tr") && htmlReader.Attributes.Count > 0) { var classAttribute = htmlReader.Attributes["class"]; if (!string.IsNullOrEmpty(classAttribute) && classAttribute.Contains("status_")) { var table = ParseTrTag(htmlReader); if (table != null) { table.RunDate = runDate; result.Add(table); } } } } } } return(result); }
public void RoundTrip_SgmlReader36() { var html = @"<html> <head> <script language=""JavaScript""> <!-- --></script> </head> <body> <p>hello</p> </body> </html> "; using (var reader = new HtmlReader(html)) { var rendered = reader.ToHtml(); Assert.Equal(@"<html> <head> <script language=""JavaScript""> <!-- --></script> </head> <body> <p>hello</p> </body> </html> ", rendered); } }
public GameController(Account account, TabPage tabPage) { tabPage.Text = account.AccountName + " - Speed: " + account.Speed.ToString(); PlayerAccount = account; WebBrowser = (WebBrowser)tabPage.Controls.Find("webBrowser", true)[0]; WebBrowser.ScriptErrorsSuppressed = true; StatusStrip ss = (StatusStrip)tabPage.Controls.Find("statusStrip1", true)[0]; ProgressBar = (ToolStripProgressBar)ss.Items.Find("tspbOpperationProgress", true)[0]; ProgressLabel = (ToolStripStatusLabel)ss.Items.Find("tsslOpperation", true)[0]; ProgressLabel.Text = "Starting up, starting login soon."; Client = new CustomHttpClient(account); HtmlReader = new HtmlReader(); WorkerPriorityQueue = new SimplePriorityQueue <IWorker, int>(); var loginWorker = new LoginWorker(PlayerAccount, Client, HtmlReader, WebBrowser, ProgressLabel, ProgressBar); loginWorker.Worker.RunWorkerCompleted += Worker_RunWorkerCompleted; WorkerPriorityQueue.Enqueue(loginWorker, 0); IWorker worker = WorkerPriorityQueue.Dequeue(); worker.StartWork(); }
private static TestRunImportResultInfo ParseTrTag(HtmlReader htmlReader) { var parsed = false; var testCaseName = string.Empty; var testCaseSucceeded = false; var attribute = htmlReader.Attributes["class"]; if (attribute != null) { var strings = attribute.Split(new[] { ' ' }, StringSplitOptions.RemoveEmptyEntries); foreach (var status in from s in strings where s.StartsWith("status_") select s.Substring("status_".Length)) { testCaseSucceeded = string.Compare(status, "passed", StringComparison.InvariantCulture) == 0; break; } if (htmlReader.ReadToFollowing("td")) { testCaseName = htmlReader.GetInnerTextUpToElement("td", HtmlNodeType.EndElement); parsed = true; } } return(parsed ? new TestRunImportResultInfo { Name = testCaseName, IsSuccess = testCaseSucceeded } : null); }
public string Render() { StringBuilder sb = new StringBuilder(); sb.AppendLine(HtmlReader.Read(Constans.Header)); sb.AppendLine(HtmlReader.Read(Constans.NavLogged)); sb.AppendLine(HtmlReader.Read(Constans.Home)); int count = 1; foreach (var model in this.Model) { if (count == 3) { sb.AppendLine("</div>"); count = 1; } sb.AppendLine("<div class=\"row\""); sb.Append(model.ToString()); count++; } sb.AppendLine(Environment.NewLine); sb.AppendLine(HtmlReader.Read(Constans.HomeEnd)); sb.AppendLine(HtmlReader.Read(Constans.Footer)); return(sb.ToString()); }
public async Task <IActionResult> ConvertHtmlToPdf() { _logger.LogInformation("Serving convert html to pdf"); var htmlContent = await HtmlReader.ReadHtmlFile(); return(await _reportService.ExportToPdf(htmlContent, false)); }
public static HtmlReader FromString(string html, List <HtmlParseErrorEventArgs> parseErrors) { var reader = new HtmlReader(new MemoryStream(Encoding.UTF8.GetBytes(html))); reader.ParseError += (sender, args) => parseErrors.Add(args); return(reader); }
public void ShouldReturnNoLinkFromInvalidATag() { var htmlReader = new HtmlReader(); var result = htmlReader.GetLinksFromText(this.GetInvalidTestHtmlData()); result.Count().Should().Be(0); }
public void RoundTrip_Basic() { var html = @"<!DOCTYPE html> <html lang=""en""> <head> <meta charset=""utf-8""> <title>The HTML5 Herald</title> <meta name=""description"" content=""The HTML5 Herald""> <meta name=""author"" content=""SitePoint""> <link rel=""stylesheet"" href=""css/styles.css?v=1.0""> <!--[if lt IE 9]> <script src=""https://cdnjs.cloudflare.com/ajax/libs/html5shiv/3.7.3/html5shiv.js""></script> <![endif]--> </head> <body> <input type=""text"" required> <script src=""js/scripts.js""></script> </body> </html>"; using (var reader = new HtmlReader(html)) { var rendered = reader.ToHtml(); Assert.Equal(html, rendered); } }
public static HtmlReader FromStream(Stream stream, List <HtmlParseErrorEventArgs> parseErrors) { stream.Seek(0, SeekOrigin.Begin); var reader = new HtmlReader(stream); reader.ParseError += (sender, args) => parseErrors.Add(args); return(reader); }
private void CheckConversion(string html, string expected) { using (var reader = new HtmlReader(html)) { var actual = reader.ToMarkdown(); Assert.Equal(expected, actual); } }
private string Read(string html) { using (var stream = new MemoryStream(Encoding.UTF8.GetBytes(html))) { var result = ((IEnumerable <HtmlChunk>)HtmlReader.Read(stream).ToList()).ToArray(); return(string.Join(", ", result.Select(x => x.Type + ":" + x.Value).ToArray())); } }
private void TestSanitize(string input, string expected, HtmlSanitizeSettings settings = null, HtmlWriterSettings writerSettings = null) { using (var reader = new HtmlReader(input)) { var rendered = reader.Sanitize(settings ?? HtmlSanitizeSettings.Default()).ToHtml(writerSettings); Assert.Equal(expected, rendered); } }
private void TestParser(string data, string expected) { using (var reader = new HtmlReader(data)) { var rendered = reader.ToHtml(); Assert.Equal(expected, rendered); } }
public void RoundTrip_EntityName() { var html = @"<p>a±b</p>"; using (var reader = new HtmlReader(html)) { var rendered = reader.ToHtml(); Assert.Equal(@"<p>a±b</p>", rendered); } }
public void AttributeValueNamedCharacterReference() { reader = HtmlReaderFactory.FromString("<a title=\"<\">", parseErrors); Assert.IsTrue(reader.Read()); Assert.AreEqual("<", reader.GetAttribute("title")); Assert.IsFalse(reader.Read()); Assert.AreEqual(0, parseErrors.Count); }
public void RoundTrip_SgmlReader15() { var html = @"<html xmlns=""http://www.w3.org/1999/xhtml""><head /><body><table u1:str="""" x:str=""""></table></body></html>"; using (var reader = new HtmlReader(html)) { var rendered = reader.ToHtml(); Assert.Equal(@"<html xmlns=""http://www.w3.org/1999/xhtml""><head></head><body><table u1:str x:str></table></body></html>", rendered); } }
public void RoundTrip_SgmlReader46() { var html = @"blah <b>foo</b>"; using (var reader = new HtmlReader(html)) { var rendered = reader.ToHtml(); Assert.Equal(html, rendered); } }
public static void Initialize() { var allContentHtmls = Directory.GetFiles("../../content"); foreach (var htmlPath in allContentHtmls) { var htmlName = htmlPath.Substring(htmlPath.LastIndexOf(@"\") + 1); htmls.Add(htmlName, HtmlReader.ReadHtml(htmlPath)); } }
public void RoundTrip_SgmlReader22() { var html = @"<html><body>do <![if !supportLists]>not<![endif]> lose this text</body></html>"; using (var reader = new HtmlReader(html)) { var rendered = reader.ToHtml(); Assert.Equal(@"<html><body>do <![if !supportLists]>not<![endif]> lose this text</body></html>", rendered); } }
public void RoundTrip_SgmlReader24() { var html = @"<html xmlns:n=""http://explicit""><foo attr1=""1"" n:attr2=""2"" /><n:foo attr1=""1"" n:attr2=""2"" /></html>"; using (var reader = new HtmlReader(html)) { var rendered = reader.ToHtml(); Assert.Equal(@"<html xmlns:n=""http://explicit""><foo attr1=""1"" n:attr2=""2""></foo><n:foo attr1=""1"" n:attr2=""2""></n:foo></html>", rendered); } }
public void RoundTrip_SgmlReader28() { var html = @"<html xmlns:o=""http://microsoft.com""><body>A<o:p></o:p>B<o:p></o:p></body></html>"; using (var reader = new HtmlReader(html)) { var rendered = reader.ToHtml(); Assert.Equal(@"<html xmlns:o=""http://microsoft.com""><body>A<o:p></o:p>B<o:p></o:p></body></html>", rendered); } }
public void RoundTrip_SgmlReader60() { var html = @"<p>&#x;</p>"; using (var reader = new HtmlReader(html)) { var rendered = reader.ToHtml(); Assert.Equal(@"<p>&#x;</p>", rendered); } }
public void RoundTrip_VoidElementCloseTag() { var html = @"<p><img src></img></p>"; using (var reader = new HtmlReader(html)) { var rendered = reader.ToHtml(); Assert.Equal(@"<p><img src></p>", rendered); } }
public void RoundTrip_SgmlReader39() { var html = @"<html><class=""black"">Text………</html>"; using (var reader = new HtmlReader(html)) { var rendered = reader.ToHtml(); Assert.Equal(@"<html><class=""black"">Text………</class=""black""></html>", rendered); } }
private string GetPlaceholdContent(string PlaceholderName, HtmlReader LoadedHtmlReader) { string RetPlaceholderContent = ""; switch (PlaceholderName) { case TemplatePlacholderInternal.FileName: RetPlaceholderContent = Path.GetFileName(LoadedHtmlReader.FullFilePath); break; case TemplatePlacholderInternal.FilePath: RetPlaceholderContent = Path.GetDirectoryName(LoadedHtmlReader.FullFilePath); break; default: string PlaceholderRegex = PlaceholderName.TrimStart(new char[] { '{' }).TrimEnd(new char[] { '}' }); if (PlaceholderName.StartsWith("{{{")) { RetPlaceholderContent = LoadedHtmlReader.GetOuterHtmlContent(PlaceholderRegex); } else if (PlaceholderName.StartsWith("{{")) { RetPlaceholderContent = LoadedHtmlReader.GetInnerHtmlContent(PlaceholderRegex); } else if (PlaceholderName.StartsWith("{")) { string[] PlaceholderRegexArray = PlaceholderRegex.Split(','); if (PlaceholderRegexArray.Length == 1) { RetPlaceholderContent = LoadedHtmlReader.GetCSVInnerHtmlContent(PlaceholderRegexArray[0]); } else if (PlaceholderRegexArray.Length == 2) { RetPlaceholderContent = LoadedHtmlReader.GetAttributeHtmlContent(PlaceholderRegexArray[0], PlaceholderRegexArray[1]); string[] RetPlaceholderContentArray = RetPlaceholderContent.Split(','); for (int i = 0; i < RetPlaceholderContentArray.Length; i++) { RetPlaceholderContentArray[i] = Path.GetFileName(RetPlaceholderContentArray[i]); } RetPlaceholderContent = String.Join(",", RetPlaceholderContentArray); } } break; } return RetPlaceholderContent; }
public void ExecutePlaceholderReplacement(HtmlReader LoadedHtmlReader) { XmlNodeList FoundNodes = _LoadedXmlDoc.DocumentElement.SelectNodes("//*"); foreach (XmlNode FoundNode in FoundNodes) { foreach (XmlNode ChildNode in FoundNode.ChildNodes) { if (ChildNode.NodeType == XmlNodeType.Text || ChildNode.NodeType == XmlNodeType.CDATA) { List<String> FoundPlaceholderNames = this.ParseForPlaceholder(ChildNode); foreach (string FoundPlaceholderName in FoundPlaceholderNames) { string PlaceholderValue = this.GetPlaceholdContent(FoundPlaceholderName, LoadedHtmlReader); ChildNode.InnerText = ChildNode.InnerText.Replace(FoundPlaceholderName, PlaceholderValue); } } } } }
public void loops() { var webClient = new WebClient(); const string buy = "/buy/"; const string loops = "{0}/loops/{1}"; const string api = "{0}/api/loopdetails.aspx?loopname={1}"; var all = new Dictionary<string, Dictionary<string, IEnumerable<string>>>(); all.Add("premiumcollection", null); all.Add("standardcollection", null); all.Add("classiccollection", null); // get loop pages foreach (string collection in all.Keys.ToArray()) { string url = string.Format(loops, host, collection); // load HTML page string source = webClient.DownloadString(url); Assert.IsTrue(!String.IsNullOrEmpty(source), collection + " access denied"); // read url "Add to Cart" all[collection] = new HtmlReader<Tags.A>().Read(source) .Distinct() .Where(i => !string.IsNullOrEmpty(i.Url) && i.Url.StartsWith(buy)) .Select(i => i.Url.Replace(buy, string.Empty)) .ToArray() .GroupBy(x => x, x => string.Empty) .ToDictionary(x => x.Key, x => Enumerable.Empty<string>()); Assert.IsNotNull(all[collection], collection + " links not found"); } // get mp3 links foreach (string collection in all.Keys.ToArray()) { foreach (string loopname in all[collection].Keys.ToArray()) { string url = string.Format(api, host, loopname); string source = string.Empty; try { // load HTML page source = webClient.DownloadString(url); } catch (WebException) { continue; } string[] addresses = null; if (!string.IsNullOrEmpty(source)) { all[collection][loopname] = new HtmlReader<Tags.Div>().Read(source) .Where(i => string.CompareOrdinal(i.Class, "hide") == 0) .Distinct() .Select(i => i.Text.Trim()) .ToArray(); } } } // save links foreach (string collection in all.Keys.ToArray()) { using (StreamWriter file = new StreamWriter(Path.Combine(Directory.GetCurrentDirectory(), collection + "-links.txt"))) { foreach (string link in all[collection].SelectMany(i => i.Value).Select(i => i)) file.WriteLine(string.Format("{0}{1}", host, link)); } } }
public void productionmusic() { var webClient = new WebClient(); string url = string.Format("{0}/productionmusic/ambient", host); var all = new Dictionary<string, string[]>(); all.Add("genre",null); all.Add("use", null); all.Add("mood", null); all.Add("inst", null); Regex split = new Regex(@"'[\s\S][^']*'",RegexOptions.Compiled); // load HTML page string page = webClient.DownloadString(url); var source = new HtmlReader<Tags.Input>().Read(page) .Where(i => i.OnChange != null && i.Type != null && string.CompareOrdinal(i.Type, "checkbox") == 0) .ToArray(); foreach (var list in all.Keys.ToArray()) { all[list] = source.Where(i => i.OnChange.Contains(list + "_list")) .Distinct() .Select(i => split.Matches(i.OnChange)[2].Value.Replace("'", string.Empty)) .OrderBy(i => i) .ToArray(); Assert.IsNotNull(list, list + " not found"); } }