Esempio n. 1
0
        public void MissingEndTagsTest()
        {
            var watch = new Stopwatch();

            watch.Start();


            var    data   = new HtmlTestData();
            string source = data.MissingEndTags1;

            TagParser = new TagParser(source);

            var tagender = new TagEnder();

            string result          = "";
            var    lastTagPosition = 0;

            TagParser.DoElement = stuff => {
                lastTagPosition = stuff.TagPosition;
            };

            TagParser.DoTag = stuff => {
                var tag = tagender.Name(stuff.Tag);
                if (!tagender.Set(tag, stuff.State))
                {
                    var tagsend = tagender.CloseTag(tag);
                    TagParser.Insert(lastTagPosition, tagsend);
                }
            };

            TagParser.DoText = stuff => {};

            TagParser.Parse();

            watch.Stop();

            result = TagParser.Stuff.Text.ToString();
            ReportDetail(result);
            ReportDetail("Elapsed: " + watch.Elapsed);
        }
Esempio n. 2
0
        protected virtual void Compose()
        {
            if (RemoveImages)
            {
                var doRemove = false;
                TagParser.DoElement += stuff => {
                    if (!doRemove)
                    {
                        doRemove = stuff.State == Parser.State.Name && stuff.Element.ToLower() == "img";
                    }
                };

                TagParser.DoTag += stuff => {
                    if (doRemove)
                    {
                        TagParser.Remove(stuff.TagPosition, stuff.Position);
                        doRemove = false;
                    }
                };
            }
            if (RemoveSpan)
            {
                var doRemove = false;

                TagParser.DoElement += stuff => {
                    var element = stuff.Element.ToLower();
                    if (!doRemove)
                    {
                        doRemove =
                            stuff.State == Parser.State.Name && element == "span" ||
                            stuff.State == Parser.State.Endtag && element == "/span";
                    }
                };

                TagParser.DoTag += stuff => {
                    if (doRemove)
                    {
                        TagParser.Remove(stuff.TagPosition, stuff.Position);
                        doRemove = false;
                    }
                };
            }
            if (RemoveFonts)
            {
                var doRemove = false;

                TagParser.DoElement += stuff => {
                    var element = stuff.Element.ToLower();
                    if (!doRemove)
                    {
                        doRemove =
                            stuff.State == Parser.State.Name && element == "font" ||
                            stuff.State == Parser.State.Endtag && element == "/font";
                    }
                };

                TagParser.DoTag += stuff => {
                    if (doRemove)
                    {
                        TagParser.Remove(stuff.TagPosition, stuff.Position);
                        doRemove = false;
                    }
                };
            }
            if (RemoveStrong)
            {
                var doRemove = false;

                TagParser.DoElement += stuff => {
                    var element = stuff.Element.ToLower();
                    if (!doRemove)
                    {
                        doRemove =
                            stuff.State == Parser.State.Name && element == "strong" ||
                            stuff.State == Parser.State.Endtag && element == "/strong";
                    }
                };

                TagParser.DoTag += stuff => {
                    if (doRemove)
                    {
                        TagParser.Remove(stuff.TagPosition, stuff.Position);
                        doRemove = false;
                    }
                };
            }

            if (RemoveCData)
            {
                var doRemove = false;

                TagParser.DoElement += stuff => {
                    var element = stuff.Element;
                    if (!doRemove)
                    {
                        doRemove =
                            stuff.State == Parser.State.Commenttag && element.StartsWith("![CDATA");
                    }
                };

                TagParser.DoTag += stuff => {
                    if (doRemove)
                    {
                        var start1 = stuff.TagPosition - 2;
                        var end1   = stuff.TagPosition + 11;
                        var start2 = stuff.Position - 5;
                        var end2   = stuff.Position + 2;
                        TagParser.Remove(start2, end2);
                        TagParser.Remove(start1, end1);

                        doRemove = false;
                    }
                };
            }

            if (RemoveTable)
            {
                var doRemove = false;

                TagParser.DoElement += stuff => {
                    var element = stuff.Element.ToLower();
                    if (!doRemove)
                    {
                        doRemove =
                            (stuff.State == Parser.State.Name && (element == "table" || element == "td" || element == "tr" || element == "th")) ||
                            (stuff.State == Parser.State.Endtag && (element == "/table" || element == "/td" || element == "/tr" || element == "/th"));
                    }
                };

                TagParser.DoTag += stuff => {
                    if (doRemove)
                    {
                        TagParser.Remove(stuff.TagPosition, stuff.Position);
                        doRemove = false;
                    }
                };
            }

            if (FistLineAsH1 && false)
            {
                throw new NotImplementedException();
                var  firstPara = false;
                bool removed   = false;
                TagParser.DoElement += stuff => {
                    var element = stuff.Element.ToLower();

                    if (!firstPara)
                    {
                        firstPara = stuff.State == Parser.State.Name && element == "p" ||
                                    stuff.State == Parser.State.Endtag && element == "/p";
                    }
                };

                TagParser.DoTag += stuff => {
                    if (firstPara && !removed)
                    {
                        TagParser.Insert(stuff.TagPosition, "<H1/>");
                        removed = true;
                    }
                };
            }
            if (RemoveBrBr)
            {
                throw new NotImplementedException();
                var doRemove = false;
                var wasBr    = false;
                TagParser.DoElement += stuff => {
                    var element = stuff.Tag.ToLower();
                    if (!wasBr)
                    {
                        wasBr = stuff.State == Parser.State.Solotag && element == "br />";
                    }
                };

                TagParser.DoTag += stuff => {
                    if (doRemove)
                    {
                        TagParser.Remove(stuff.TagPosition, stuff.Position);
                        doRemove = false;
                    }
                };
            }
            if (RemoveStyle)
            {
                var doRemove = false;

                TagParser.DoElement += stuff => {
                    var element = stuff.Element.ToLower();
                    if (!doRemove)
                    {
                        doRemove =
                            stuff.State == Parser.State.Name && element == "style" ||
                            stuff.State == Parser.State.Endtag && element == "/style";
                    }
                };

                TagParser.DoTag += stuff => {
                    if (doRemove)
                    {
                        TagParser.Remove(stuff.TagPosition, stuff.Position);
                        doRemove = false;
                    }
                };
            }
            if (RemoveComment)
            {
                var doRemove = false;

                TagParser.DoElement += stuff => {
                    var element = stuff.Element.ToLower();
                    if (!doRemove)
                    {
                        doRemove = stuff.State == Parser.State.Commenttag;
                    }
                };

                TagParser.DoTag += stuff => {
                    if (doRemove)
                    {
                        TagParser.Remove(stuff.TagPosition, stuff.Position);
                        doRemove = false;
                    }
                };
            }
        }