Example #1
0
        /// <summary>
        /// Rearrange the DOM of the box to have block box with boxes before the inner block box and after.
        /// </summary>
        /// <param name="box">the box that has the problem</param>
        private static void CorrectBlockInsideInlineImp(CssBox box)
        {
            if (box.Boxes.Count > 1)
            {
                var leftBlock = CssBox.CreateBlock(box);

                while (ContainsInlinesOnlyDeep(box.Boxes[0]))
                {
                    box.Boxes[0].ParentBox = leftBlock;
                }
                leftBlock.SetBeforeBox(box.Boxes[0]);

                var splitBox = box.Boxes[1];
                splitBox.ParentBox = null;

                CorrectBlockSplitBadBox(box, splitBox, leftBlock);

                if (box.Boxes.Count > 2)
                {
                    var rightBox = CssBox.CreateBox(box, null, box.Boxes[2]);
                    while (box.Boxes.Count > 3)
                    {
                        box.Boxes[3].ParentBox = rightBox;
                    }
                }
                box.Display = CssConstants.Block;
            }
            else
            {
                box.Boxes[0].Display = CssConstants.Block;
            }
        }
Example #2
0
        /// <summary>
        /// Makes block boxes be among only block boxes and all inline boxes have block parent box.<br/>
        /// Inline boxes should live in a pool of Inline boxes only so they will define a single block.<br/>
        /// At the end of this process a block box will have only block siblings and inline box will have
        /// only inline siblings.
        /// </summary>
        /// <param name="box">the current box to correct its sub-tree</param>
        private static void CorrectInlineBoxesParent(CssBox box)
        {
            if (ContainsVariantBoxes(box))
            {
                for (int i = 0; i < box.Boxes.Count; i++)
                {
                    if (box.Boxes[i].IsInline)
                    {
                        var newbox = CssBox.CreateBlock(box, null, box.Boxes[i++]);
                        while (i < box.Boxes.Count && box.Boxes[i].IsInline)
                        {
                            box.Boxes[i].ParentBox = newbox;
                        }
                    }
                }
            }

            if (!DomUtils.ContainsInlinesOnly(box))
            {
                foreach (var childBox in box.Boxes)
                {
                    CorrectInlineBoxesParent(childBox);
                }
            }
        }
Example #3
0
        /// <summary>
        /// Makes block boxes be among only block boxes and all inline boxes have block parent box.<br/>
        /// Inline boxes should live in a pool of Inline boxes only so they will define a single block.<br/>
        /// At the end of this process a block box will have only block siblings and inline box will have
        /// only inline siblings.
        ///
        /// </summary>
        /// <param name="box">the current box to correct its sub-tree</param>
        private static void CorrectInlineBoxesParent(CssBox box)
        {
            if (ContainsVariantBoxes(box)) //if this box contains inline and block elements
            {
                for (int i = 0; i < box.Boxes.Count; i++)
                {
                    //if the child isInline box take the child and add it to newbox
                    // then set newbox as the parentbox
                    if (box.Boxes[i].IsInline)
                    {
                        //creates new box in parent(box) at position it was in currently in
                        var newbox = CssBox.CreateBlock(box, null, box.Boxes[i++]);
                        while (i < box.Boxes.Count && box.Boxes[i].IsInline)
                        {
                            box.Boxes[i].ParentBox = newbox;
                        }
                    }
                }
            }

            if (!DomUtils.ContainsInlinesOnly(box))
            {
                foreach (var childBox in box.Boxes)
                {
                    CorrectInlineBoxesParent(childBox);
                }
            }
        }
Example #4
0
        /// <summary>
        /// Rearrange the DOM of the box to have block box with boxes before the inner block box and after.
        /// </summary>
        /// <param name="box">the box that has the problem</param>
        private static CssBox CorrectBlockInsideInlineImp(CssBox box)
        {
            if (box.Display == CssConstants.Inline)
            {
                box.Display = CssConstants.Block;
            }

            if (box.Boxes.Count > 1 || box.Boxes[0].Boxes.Count > 1)
            {
                var leftBlock = CssBox.CreateBlock(box);

                while (ContainsInlinesOnlyDeep(box.Boxes[0]))
                {
                    box.Boxes[0].ParentBox = leftBlock;
                }
                leftBlock.SetBeforeBox(box.Boxes[0]);

                var splitBox = box.Boxes[1];
                splitBox.ParentBox = null;

                CorrectBlockSplitBadBox(box, splitBox, leftBlock);

                // remove block that did not get any inner elements
                if (leftBlock.Boxes.Count < 1)
                {
                    leftBlock.ParentBox = null;
                }

                int minBoxes = leftBlock.ParentBox != null ? 2 : 1;
                if (box.Boxes.Count > minBoxes)
                {
                    // create temp box to handle the tail elements and then get them back so no deep hierarchy is created
                    var tempRightBox = CssBox.CreateBox(box, null, box.Boxes[minBoxes]);
                    while (box.Boxes.Count > minBoxes + 1)
                    {
                        box.Boxes[minBoxes + 1].ParentBox = tempRightBox;
                    }

                    return(tempRightBox);
                }
            }
            else if (box.Boxes[0].Display == CssConstants.Inline)
            {
                box.Boxes[0].Display = CssConstants.Block;
            }

            return(null);
        }
Example #5
0
        /// <summary>
        /// Parses the source html to css boxes tree structure.
        /// </summary>
        /// <param name="source">the html source to parse</param>
        public static CssBox ParseDocument(string source)
        {
            var root   = CssBox.CreateBlock();
            var curBox = root;

            int endIdx   = 0;
            int startIdx = 0;

            while (startIdx >= 0)
            {
                var tagIdx = source.IndexOf('<', startIdx);
                if (tagIdx >= 0 && tagIdx < source.Length)
                {
                    // add the html text as anon css box to the structure
                    AddTextBox(source, startIdx, tagIdx, ref curBox);

                    if (source[tagIdx + 1] == '!')
                    {
                        // skip the html crap elements (<!-- bla -->) (<!crap bla>)
                        startIdx = source.IndexOf(">", tagIdx + 2);
                        endIdx   = startIdx > 0 ? startIdx + 1 : tagIdx + 2;
                    }
                    else
                    {
                        // parse element tag to css box structure
                        endIdx = ParseHtmlTag(source, tagIdx, ref curBox) + 1;
                    }
                }
                startIdx = tagIdx > -1 && endIdx > 0 ? endIdx : -1;
            }

            // handle pices of html without proper structure
            if (endIdx < source.Length)
            {
                // there is text after the end of last element
                var endText = new SubString(source, endIdx, source.Length - endIdx);
                if (!endText.IsEmptyOrWhitespace())
                {
                    var abox = CssBox.CreateBox(root);
                    abox.Text = endText;
                }
            }

            return(root);
        }
Example #6
0
 /// <summary>
 /// Go over all image boxes and if its display style is set to block, put it inside another block but set the image to inline.
 /// </summary>
 /// <param name="box">the current box to correct its sub-tree</param>
 private static void CorrectImgBoxes(CssBox box)
 {
     for (int i = box.Boxes.Count - 1; i >= 0; i--)
     {
         var childBox = box.Boxes[i];
         if (childBox is CssBoxImage && childBox.Display == CssConstants.Block)
         {
             var block = CssBox.CreateBlock(childBox.ParentBox, null, childBox);
             childBox.ParentBox = block;
             childBox.Display   = CssConstants.Inline;
         }
         else
         {
             // recursive
             CorrectImgBoxes(childBox);
         }
     }
 }
Example #7
0
        /// <summary>
        /// Correct the DOM tree recursively by replacing  "br" html boxes with anonymous blocks that respect br spec.<br/>
        /// If the "br" tag is after inline box then the anon block will have zero height only acting as newline,
        /// but if it is after block box then it will have min-height of the font size so it will create empty line.
        /// </summary>
        /// <param name="box">the current box to correct its sub-tree</param>
        /// <param name="followingBlock">used to know if the br is following a box so it should create an empty line or not so it only
        /// move to a new line</param>
        private static void CorrectLineBreaksBlocks(CssBox box, ref bool followingBlock)
        {
            followingBlock = followingBlock || box.IsBlock;
            foreach (CssBox childBox in box.Boxes)
            {
                CorrectLineBreaksBlocks(childBox, ref followingBlock);
                followingBlock = childBox.Words.Count == 0 && (followingBlock || childBox.IsBlock);
            }

            int    lastBr = -1;
            CssBox brBox;

            do
            {
                brBox = null;
                for (int i = 0; i < box.Boxes.Count && brBox == null; i++)
                {
                    if (i > lastBr && box.Boxes[i].IsBrElement)
                    {
                        brBox  = box.Boxes[i];
                        lastBr = i;
                    }
                    else if (box.Boxes[i].Words.Count > 0)
                    {
                        followingBlock = false;
                    }
                    else if (box.Boxes[i].IsBlock)
                    {
                        followingBlock = true;
                    }
                }

                if (brBox != null)
                {
                    CssBox anonBlock = CssBox.CreateBlock(box, new HtmlTag("br"), brBox);
                    if (followingBlock)
                    {
                        anonBlock.Height = ".95em"; // atodo: check the height to min-height when it is supported
                    }
                    brBox.ParentBox = null;
                }
            } while (brBox != null);
        }
Example #8
0
        /// <summary>
        /// Correct the DOM tree recursively by replacing  "br" html boxes with anonymous blocks that respect br spec.<br/>
        /// If the "br" tag is after inline box then the anon block will have zero height only acting as newline,
        /// but if it is after block box then it will have min-height of the font size so it will create empty line.
        /// </summary>
        /// <param name="box">the current box to correct its sub-tree</param>
        private static void CorrectLineBreaksBlocks(CssBox box)
        {
            int    lastBr = -1;
            CssBox brBox;

            do
            {
                brBox = null;
                CssBox prevBox = null;
                for (int i = 0; i < box.Boxes.Count && brBox == null; i++)
                {
                    if (i > lastBr && box.Boxes[i].HtmlTag != null && box.Boxes[i].HtmlTag.Name == "br")
                    {
                        brBox  = box.Boxes[i];
                        lastBr = i;
                    }
                    else
                    {
                        prevBox = box.Boxes[i];
                    }
                }

                if (brBox != null)
                {
                    var anonBlock = CssBox.CreateBlock(box, new HtmlTag("br"), brBox);
                    if (prevBox == null || prevBox.Display != CssConstants.Inline)
                    {
                        anonBlock.Height = ".9em"; // atodo: check the height to min-height when it is supported
                    }
                    brBox.ParentBox = null;
                }
            } while (brBox != null);


            foreach (var childBox in box.Boxes)
            {
                CorrectLineBreaksBlocks(childBox);
            }
        }
Example #9
0
        /// <summary>
        /// Parses the source html to css boxes tree structure.
        /// </summary>
        /// <param name="source">the html source to parse</param>
        public static CssBox ParseDocument(string source)
        {
            var root   = CssBox.CreateBlock();
            var curBox = root;

            int endIdx   = 0;
            int startIdx = 0;

            while (startIdx >= 0)
            {
                var tagIdx = source.IndexOf('<', startIdx);
                if (tagIdx >= 0 && tagIdx < source.Length)
                {
                    // add the html text as anon css box to the structure
                    AddTextBox(source, startIdx, tagIdx, ref curBox);

                    if (source[tagIdx + 1] == '!')
                    {
                        if (source[tagIdx + 2] == '-')
                        {
                            // skip the html comment elements (<!-- bla -->)
                            startIdx = source.IndexOf("-->", tagIdx + 2);
                            endIdx   = startIdx > 0 ? startIdx + 3 : tagIdx + 2;
                        }
                        else
                        {
                            // skip the html crap elements (<!crap bla>)
                            startIdx = source.IndexOf(">", tagIdx + 2);
                            endIdx   = startIdx > 0 ? startIdx + 1 : tagIdx + 2;
                        }
                    }
                    else
                    {
                        // parse element tag to css box structure
                        endIdx = ParseHtmlTag(source, tagIdx, ref curBox) + 1;

                        if (curBox.HtmlTag != null &&
                            curBox.HtmlTag.Name.Equals(HtmlConstants.Style, StringComparison.OrdinalIgnoreCase))
                        {
                            var endIdxS = endIdx;
                            endIdx = source.IndexOf("</style>", endIdx, StringComparison.OrdinalIgnoreCase);
                            if (endIdx > -1)
                            {
                                AddTextBox(source, endIdxS, endIdx, ref curBox);
                            }
                        }
                    }
                }

                startIdx = tagIdx > -1 && endIdx > 0 ? endIdx : -1;
            }

            // handle pieces of html without proper structure
            if (endIdx > -1 && endIdx < source.Length)
            {
                // there is text after the end of last element
                var endText = new SubString(source, endIdx, source.Length - endIdx);
                if (!endText.IsEmptyOrWhitespace())
                {
                    var abox = CssBox.CreateBox(root);
                    abox.Text = endText;
                }
            }

            return(root);
        }
Example #10
0
        /// <summary>
        /// Parses the document
        /// </summary>
        public static CssBox ParseDocument(string document)
        {
            document = RemoveHtmlComments(document);

            int    lastEnd = -1;
            CssBox root    = null;
            CssBox curBox  = null;
            var    tags    = RegexParserUtils.Match(RegexParserUtils.HtmlTag, document);

            foreach (Match tagmatch in tags)
            {
                string text = tagmatch.Index > 0 ? document.Substring(lastEnd + 1, tagmatch.Index - lastEnd - 1) : String.Empty;

                var emptyText = String.IsNullOrEmpty(text.Trim());
                if (!emptyText)
                {
                    if (curBox == null)
                    {
                        root = curBox = CssBox.CreateBlock();
                    }

                    var abox = CssBox.CreateBox(curBox);
                    abox.Text = text;
                }

                var tag = ParseHtmlTag(tagmatch.Value);

                if (tag.IsClosing)
                {
                    // handle tags that have no content but whitespace
                    if (emptyText && curBox != null && curBox.Boxes.Count == 0 && !string.IsNullOrEmpty(text))
                    {
                        var abox = CssBox.CreateBox(curBox);
                        abox.Text = " ";
                    }

                    // need to find the parent tag to go one level up
                    curBox = DomUtils.FindParent(root, tag.Name, curBox);
                }
                else if (tag.IsSingle)
                {
                    // the current box is not changed
                    new CssBox(curBox, tag);
                }
                else
                {
                    // go one level down, make the new box the current box
                    curBox = new CssBox(curBox, tag);
                }

                if (root == null && curBox != null)
                {
                    root         = curBox;
                    root.Display = CssConstants.Block;
                }

                lastEnd = tagmatch.Index + tagmatch.Length - 1;
            }

            if (root == null)
            {
                root = CssBox.CreateBlock();
                var abox = CssBox.CreateBox(root);
                abox.Text = document;
            }
            else if (lastEnd < document.Length)
            {
                var endText = document.Substring(lastEnd + 1);
                if (!string.IsNullOrEmpty(endText.Trim()))
                {
                    var abox = CssBox.CreateBox(root);
                    abox.Text = endText;
                }
            }

            return(root);
        }