public void TestRemoveImages() { var watch = new Stopwatch(); watch.Start(); var data = new HtmlTestData(); string source = data.MissingEndTags1; TagParser = new TagParser(source); string result = ""; var doRemove = false; TagParser.DoElement = stuff => { result = stuff.Element; if (!doRemove) { doRemove = stuff.State == State.Name && stuff.Element == "img"; } }; TagParser.DoTag = stuff => { result = stuff.Element; if (doRemove) { TagParser.Remove(stuff.TagPosition, stuff.Position); doRemove = false; } }; TagParser.DoText = stuff => { result = stuff.Element; }; TagParser.Parse(); watch.Stop(); ReportDetail(TagParser.Stuff.Text.ToString()); ReportDetail(result); ReportDetail("Elapsed: " + watch.Elapsed); }
public void TestingReplace() { var watch = new Stopwatch(); watch.Start(); var data = new HtmlTestData(); string source = data.HTML2; TagParser = new TagParser(source); StyleParser = new StyleParser(source); var deleteat = -1; var styleit = false; var result = ""; StyleParser.DoElement = stuff => { TagParser.Stuff = StyleParser.Stuff; }; TagParser.DoElement = stuff => { if (stuff.State.Equals(State.Name)) { if (stuff.Element.Equals("span")) { deleteat = stuff.TagPosition; } else if (stuff.Element.Equals("style")) { styleit = true; } } else if (stuff.State.Equals(State.Endtag)) { if (stuff.Element.Equals("/span")) { deleteat = stuff.TagPosition; } } else if (stuff.State.Equals(State.Value)) { if (stuff.Element.Equals("Flieatext")) { TagParser.Replace(stuff.Origin, stuff.Position, "Inhalt"); } } }; TagParser.DoTag = stuff => { if (stuff.TagPosition == deleteat) { TagParser.Remove(stuff.TagPosition, stuff.Position); deleteat = -1; } else if (styleit) { StyleParser.Stuff = TagParser.Stuff; StyleParser.Parse(); styleit = false; } else { } }; TagParser.DoText = stuff => { }; TagParser.Parse(); watch.Stop(); ReportDetail(TagParser.Stuff.Text.ToString()); ReportDetail(result); ReportDetail("Vergangene Zeit: " + watch.Elapsed); }
protected virtual void Compose() { if (RemoveImages) { var doRemove = false; TagParser.DoElement += stuff => { if (!doRemove) { doRemove = stuff.State == Parser.State.Name && stuff.Element.ToLower() == "img"; } }; TagParser.DoTag += stuff => { if (doRemove) { TagParser.Remove(stuff.TagPosition, stuff.Position); doRemove = false; } }; } if (RemoveSpan) { var doRemove = false; TagParser.DoElement += stuff => { var element = stuff.Element.ToLower(); if (!doRemove) { doRemove = stuff.State == Parser.State.Name && element == "span" || stuff.State == Parser.State.Endtag && element == "/span"; } }; TagParser.DoTag += stuff => { if (doRemove) { TagParser.Remove(stuff.TagPosition, stuff.Position); doRemove = false; } }; } if (RemoveFonts) { var doRemove = false; TagParser.DoElement += stuff => { var element = stuff.Element.ToLower(); if (!doRemove) { doRemove = stuff.State == Parser.State.Name && element == "font" || stuff.State == Parser.State.Endtag && element == "/font"; } }; TagParser.DoTag += stuff => { if (doRemove) { TagParser.Remove(stuff.TagPosition, stuff.Position); doRemove = false; } }; } if (RemoveStrong) { var doRemove = false; TagParser.DoElement += stuff => { var element = stuff.Element.ToLower(); if (!doRemove) { doRemove = stuff.State == Parser.State.Name && element == "strong" || stuff.State == Parser.State.Endtag && element == "/strong"; } }; TagParser.DoTag += stuff => { if (doRemove) { TagParser.Remove(stuff.TagPosition, stuff.Position); doRemove = false; } }; } if (RemoveCData) { var doRemove = false; TagParser.DoElement += stuff => { var element = stuff.Element; if (!doRemove) { doRemove = stuff.State == Parser.State.Commenttag && element.StartsWith("![CDATA"); } }; TagParser.DoTag += stuff => { if (doRemove) { var start1 = stuff.TagPosition - 2; var end1 = stuff.TagPosition + 11; var start2 = stuff.Position - 5; var end2 = stuff.Position + 2; TagParser.Remove(start2, end2); TagParser.Remove(start1, end1); doRemove = false; } }; } if (RemoveTable) { var doRemove = false; TagParser.DoElement += stuff => { var element = stuff.Element.ToLower(); if (!doRemove) { doRemove = (stuff.State == Parser.State.Name && (element == "table" || element == "td" || element == "tr" || element == "th")) || (stuff.State == Parser.State.Endtag && (element == "/table" || element == "/td" || element == "/tr" || element == "/th")); } }; TagParser.DoTag += stuff => { if (doRemove) { TagParser.Remove(stuff.TagPosition, stuff.Position); doRemove = false; } }; } if (FistLineAsH1 && false) { throw new NotImplementedException(); var firstPara = false; bool removed = false; TagParser.DoElement += stuff => { var element = stuff.Element.ToLower(); if (!firstPara) { firstPara = stuff.State == Parser.State.Name && element == "p" || stuff.State == Parser.State.Endtag && element == "/p"; } }; TagParser.DoTag += stuff => { if (firstPara && !removed) { TagParser.Insert(stuff.TagPosition, "<H1/>"); removed = true; } }; } if (RemoveBrBr) { throw new NotImplementedException(); var doRemove = false; var wasBr = false; TagParser.DoElement += stuff => { var element = stuff.Tag.ToLower(); if (!wasBr) { wasBr = stuff.State == Parser.State.Solotag && element == "br />"; } }; TagParser.DoTag += stuff => { if (doRemove) { TagParser.Remove(stuff.TagPosition, stuff.Position); doRemove = false; } }; } if (RemoveStyle) { var doRemove = false; TagParser.DoElement += stuff => { var element = stuff.Element.ToLower(); if (!doRemove) { doRemove = stuff.State == Parser.State.Name && element == "style" || stuff.State == Parser.State.Endtag && element == "/style"; } }; TagParser.DoTag += stuff => { if (doRemove) { TagParser.Remove(stuff.TagPosition, stuff.Position); doRemove = false; } }; } if (RemoveComment) { var doRemove = false; TagParser.DoElement += stuff => { var element = stuff.Element.ToLower(); if (!doRemove) { doRemove = stuff.State == Parser.State.Commenttag; } }; TagParser.DoTag += stuff => { if (doRemove) { TagParser.Remove(stuff.TagPosition, stuff.Position); doRemove = false; } }; } }