예제 #1
0
        public void TestRemoveImages()
        {
            var watch = new Stopwatch();

            watch.Start();


            var    data   = new HtmlTestData();
            string source = data.MissingEndTags1;

            TagParser = new TagParser(source);

            string result = "";

            var doRemove = false;

            TagParser.DoElement = stuff => {
                result = stuff.Element;
                if (!doRemove)
                {
                    doRemove = stuff.State == State.Name && stuff.Element == "img";
                }
            };

            TagParser.DoTag = stuff => {
                result = stuff.Element;
                if (doRemove)
                {
                    TagParser.Remove(stuff.TagPosition, stuff.Position);
                    doRemove = false;
                }
            };

            TagParser.DoText = stuff => {
                result = stuff.Element;
            };

            TagParser.Parse();

            watch.Stop();


            ReportDetail(TagParser.Stuff.Text.ToString());

            ReportDetail(result);
            ReportDetail("Elapsed: " + watch.Elapsed);
        }
예제 #2
0
        public void TestingReplace()
        {
            var watch = new Stopwatch();

            watch.Start();


            var    data   = new HtmlTestData();
            string source = data.HTML2;

            TagParser   = new TagParser(source);
            StyleParser = new StyleParser(source);

            var deleteat = -1;
            var styleit  = false;
            var result   = "";


            StyleParser.DoElement = stuff => {
                TagParser.Stuff = StyleParser.Stuff;
            };


            TagParser.DoElement = stuff => {
                if (stuff.State.Equals(State.Name))
                {
                    if (stuff.Element.Equals("span"))
                    {
                        deleteat = stuff.TagPosition;
                    }
                    else if (stuff.Element.Equals("style"))
                    {
                        styleit = true;
                    }
                }
                else if (stuff.State.Equals(State.Endtag))
                {
                    if (stuff.Element.Equals("/span"))
                    {
                        deleteat = stuff.TagPosition;
                    }
                }
                else if (stuff.State.Equals(State.Value))
                {
                    if (stuff.Element.Equals("Flieatext"))
                    {
                        TagParser.Replace(stuff.Origin, stuff.Position, "Inhalt");
                    }
                }
            };

            TagParser.DoTag = stuff => {
                if (stuff.TagPosition == deleteat)
                {
                    TagParser.Remove(stuff.TagPosition, stuff.Position);
                    deleteat = -1;
                }
                else if (styleit)
                {
                    StyleParser.Stuff = TagParser.Stuff;
                    StyleParser.Parse();
                    styleit = false;
                }
                else
                {
                }
            };

            TagParser.DoText = stuff => { };

            TagParser.Parse();

            watch.Stop();


            ReportDetail(TagParser.Stuff.Text.ToString());

            ReportDetail(result);
            ReportDetail("Vergangene Zeit: " + watch.Elapsed);
        }
예제 #3
0
        protected virtual void Compose()
        {
            if (RemoveImages)
            {
                var doRemove = false;
                TagParser.DoElement += stuff => {
                    if (!doRemove)
                    {
                        doRemove = stuff.State == Parser.State.Name && stuff.Element.ToLower() == "img";
                    }
                };

                TagParser.DoTag += stuff => {
                    if (doRemove)
                    {
                        TagParser.Remove(stuff.TagPosition, stuff.Position);
                        doRemove = false;
                    }
                };
            }
            if (RemoveSpan)
            {
                var doRemove = false;

                TagParser.DoElement += stuff => {
                    var element = stuff.Element.ToLower();
                    if (!doRemove)
                    {
                        doRemove =
                            stuff.State == Parser.State.Name && element == "span" ||
                            stuff.State == Parser.State.Endtag && element == "/span";
                    }
                };

                TagParser.DoTag += stuff => {
                    if (doRemove)
                    {
                        TagParser.Remove(stuff.TagPosition, stuff.Position);
                        doRemove = false;
                    }
                };
            }
            if (RemoveFonts)
            {
                var doRemove = false;

                TagParser.DoElement += stuff => {
                    var element = stuff.Element.ToLower();
                    if (!doRemove)
                    {
                        doRemove =
                            stuff.State == Parser.State.Name && element == "font" ||
                            stuff.State == Parser.State.Endtag && element == "/font";
                    }
                };

                TagParser.DoTag += stuff => {
                    if (doRemove)
                    {
                        TagParser.Remove(stuff.TagPosition, stuff.Position);
                        doRemove = false;
                    }
                };
            }
            if (RemoveStrong)
            {
                var doRemove = false;

                TagParser.DoElement += stuff => {
                    var element = stuff.Element.ToLower();
                    if (!doRemove)
                    {
                        doRemove =
                            stuff.State == Parser.State.Name && element == "strong" ||
                            stuff.State == Parser.State.Endtag && element == "/strong";
                    }
                };

                TagParser.DoTag += stuff => {
                    if (doRemove)
                    {
                        TagParser.Remove(stuff.TagPosition, stuff.Position);
                        doRemove = false;
                    }
                };
            }

            if (RemoveCData)
            {
                var doRemove = false;

                TagParser.DoElement += stuff => {
                    var element = stuff.Element;
                    if (!doRemove)
                    {
                        doRemove =
                            stuff.State == Parser.State.Commenttag && element.StartsWith("![CDATA");
                    }
                };

                TagParser.DoTag += stuff => {
                    if (doRemove)
                    {
                        var start1 = stuff.TagPosition - 2;
                        var end1   = stuff.TagPosition + 11;
                        var start2 = stuff.Position - 5;
                        var end2   = stuff.Position + 2;
                        TagParser.Remove(start2, end2);
                        TagParser.Remove(start1, end1);

                        doRemove = false;
                    }
                };
            }

            if (RemoveTable)
            {
                var doRemove = false;

                TagParser.DoElement += stuff => {
                    var element = stuff.Element.ToLower();
                    if (!doRemove)
                    {
                        doRemove =
                            (stuff.State == Parser.State.Name && (element == "table" || element == "td" || element == "tr" || element == "th")) ||
                            (stuff.State == Parser.State.Endtag && (element == "/table" || element == "/td" || element == "/tr" || element == "/th"));
                    }
                };

                TagParser.DoTag += stuff => {
                    if (doRemove)
                    {
                        TagParser.Remove(stuff.TagPosition, stuff.Position);
                        doRemove = false;
                    }
                };
            }

            if (FistLineAsH1 && false)
            {
                throw new NotImplementedException();
                var  firstPara = false;
                bool removed   = false;
                TagParser.DoElement += stuff => {
                    var element = stuff.Element.ToLower();

                    if (!firstPara)
                    {
                        firstPara = stuff.State == Parser.State.Name && element == "p" ||
                                    stuff.State == Parser.State.Endtag && element == "/p";
                    }
                };

                TagParser.DoTag += stuff => {
                    if (firstPara && !removed)
                    {
                        TagParser.Insert(stuff.TagPosition, "<H1/>");
                        removed = true;
                    }
                };
            }
            if (RemoveBrBr)
            {
                throw new NotImplementedException();
                var doRemove = false;
                var wasBr    = false;
                TagParser.DoElement += stuff => {
                    var element = stuff.Tag.ToLower();
                    if (!wasBr)
                    {
                        wasBr = stuff.State == Parser.State.Solotag && element == "br />";
                    }
                };

                TagParser.DoTag += stuff => {
                    if (doRemove)
                    {
                        TagParser.Remove(stuff.TagPosition, stuff.Position);
                        doRemove = false;
                    }
                };
            }
            if (RemoveStyle)
            {
                var doRemove = false;

                TagParser.DoElement += stuff => {
                    var element = stuff.Element.ToLower();
                    if (!doRemove)
                    {
                        doRemove =
                            stuff.State == Parser.State.Name && element == "style" ||
                            stuff.State == Parser.State.Endtag && element == "/style";
                    }
                };

                TagParser.DoTag += stuff => {
                    if (doRemove)
                    {
                        TagParser.Remove(stuff.TagPosition, stuff.Position);
                        doRemove = false;
                    }
                };
            }
            if (RemoveComment)
            {
                var doRemove = false;

                TagParser.DoElement += stuff => {
                    var element = stuff.Element.ToLower();
                    if (!doRemove)
                    {
                        doRemove = stuff.State == Parser.State.Commenttag;
                    }
                };

                TagParser.DoTag += stuff => {
                    if (doRemove)
                    {
                        TagParser.Remove(stuff.TagPosition, stuff.Position);
                        doRemove = false;
                    }
                };
            }
        }