Example #1
0
        private void TagEnd(string sTagName)
        {
            if (_normalizeXml)
            {
                HtmlTagType tagType = HtmlTags.GetHtmlTagType(sTagName);
                switch (tagType)
                {
                case HtmlTagType.Html:
                case HtmlTagType.Head:
                case HtmlTagType.Body:
                    return;

                case HtmlTagType.Title:
                    _currentNode = _currentTreeNode;
                    return;

                case HtmlTagType.Table:
                    if (_table == null)
                    {
                        return;
                    }
                    _currentNode = _currentTreeNode = GetParentXXNode(_table.Table);
                    _table       = null;
                    if (_tableStack.Count != 0)
                    {
                        _table = _tableStack.Pop();
                    }
                    return;

                case HtmlTagType.DL:
                    if (_definitionList == null)
                    {
                        return;
                    }
                    _currentNode    = _currentTreeNode = GetParentXXNode(_definitionList);
                    _definitionList = null;
                    if (_definitionListStack.Count != 0)
                    {
                        _definitionList = _definitionListStack.Pop();
                    }
                    return;
                }
                if (_table != null)
                {
                    switch (tagType)
                    {
                    case HtmlTagType.THead:
                    case HtmlTagType.TBody:
                    case HtmlTagType.TFoot:
                        _currentNode = _currentTreeNode = _table.Table;
                        _table.Body  = null;
                        return;

                    case HtmlTagType.ColGroup:
                        _currentNode    = _currentTreeNode = _table.Table;
                        _table.ColGroup = null;
                        return;

                    case HtmlTagType.Col:
                        if (_table.Col != null)
                        {
                            _currentNode = _currentTreeNode = GetParentXXNode(_table.Col);
                            _table.Col   = null;
                        }
                        return;

                    case HtmlTagType.TR:
                        if (_table.Row != null)
                        {
                            _currentNode = _currentTreeNode = GetParentXXNode(_table.Row);
                            _table.Row   = null;
                        }
                        return;

                    case HtmlTagType.TH:
                    case HtmlTagType.TD:
                        if (_table.Data != null)
                        {
                            _currentNode = _currentTreeNode = GetParentXXNode(_table.Data);
                            _table.Data  = null;
                        }
                        return;
                    }
                }
            }
            XXXNode_v2 node = GetParentXXNodeByName(_currentTreeNode, sTagName);

            if (node != null)
            {
                _currentTreeNode = GetParentXXNode(node);
            }
            _currentNode = _currentTreeNode;
        }
Example #2
0
        private void TagEnd(string sTagName)
        {
            if (gbNormalizeXml)
            {
                HtmlTagType tagType = HtmlTags.GetHtmlTagType(sTagName);
                switch (tagType)
                {
                case HtmlTagType.Html:
                case HtmlTagType.Head:
                case HtmlTagType.Body:
                    return;

                case HtmlTagType.Title:
                    gCurrentNode = gCurrentTreeNode;
                    return;

                case HtmlTagType.Table:
                    if (gTable == null)
                    {
                        return;
                    }
                    //gCurrentNode = gCurrentTreeNode = gTable.Table.ParentNode;
                    gCurrentNode = gCurrentTreeNode = GetParentXXNode(gTable.Table);
                    gTable       = null;
                    if (gTableStack.Count != 0)
                    {
                        gTable = gTableStack.Pop();
                    }
                    return;

                case HtmlTagType.DL:
                    if (gDefinitionList == null)
                    {
                        return;
                    }
                    //gCurrentNode = gCurrentTreeNode = gDefinitionList.ParentNode;
                    gCurrentNode    = gCurrentTreeNode = GetParentXXNode(gDefinitionList);
                    gDefinitionList = null;
                    if (gDefinitionListStack.Count != 0)
                    {
                        gDefinitionList = gDefinitionListStack.Pop();
                    }
                    return;
                }
                if (gTable != null)
                {
                    switch (tagType)
                    {
                    case HtmlTagType.THead:
                    case HtmlTagType.TBody:
                    case HtmlTagType.TFoot:
                        gCurrentNode = gCurrentTreeNode = gTable.Table;
                        gTable.Body  = null;
                        return;

                    case HtmlTagType.ColGroup:
                        gCurrentNode    = gCurrentTreeNode = gTable.Table;
                        gTable.ColGroup = null;
                        return;

                    case HtmlTagType.Col:
                        if (gTable.Col != null)
                        {
                            //gCurrentNode = gCurrentTreeNode = gTable.Col.ParentNode;
                            gCurrentNode = gCurrentTreeNode = GetParentXXNode(gTable.Col);
                            gTable.Col   = null;
                        }
                        return;

                    case HtmlTagType.TR:
                        if (gTable.Row != null)
                        {
                            //gCurrentNode = gCurrentTreeNode = gTable.Row.ParentNode;
                            gCurrentNode = gCurrentTreeNode = GetParentXXNode(gTable.Row);
                            gTable.Row   = null;
                        }
                        return;

                    case HtmlTagType.TH:
                    case HtmlTagType.TD:
                        if (gTable.Data != null)
                        {
                            //gCurrentNode = gCurrentTreeNode = gTable.Data.ParentNode;
                            gCurrentNode = gCurrentTreeNode = GetParentXXNode(gTable.Data);
                            gTable.Data  = null;
                        }
                        return;
                    }
                }
            }
            //XmlNode node = gCurrentTreeNode;
            //while (node != null)
            //{
            //    if (node.Name == sTagName)
            //    {
            //        gCurrentTreeNode = node.ParentNode;
            //        break;
            //    }
            //    node = node.ParentNode;
            //}
            XXXNode_v2 node = GetParentXXNodeByName(gCurrentTreeNode, sTagName);

            if (node != null)
            {
                gCurrentTreeNode = GetParentXXNode(node);
            }
            gCurrentNode = gCurrentTreeNode;
        }
Example #3
0
        private void TagBegin(string tagName, bool tagEnd)
        {
            _noTag = false;
            HtmlTagType tagType = HtmlTags.GetHtmlTagType(tagName);
            HtmlTag     tag     = HtmlTags.GetHtmlTag(tagType);

            if (_normalizeXml)
            {
                if (tagType == HtmlTagType.Html || tagType == HtmlTagType.Head)
                {
                    _noTag = true;
                    return;
                }
                if (tagType == HtmlTagType.Body)
                {
                    _noTag = true;
                    if (!_body)
                    {
                        _body        = true;
                        _currentNode = _currentTreeNode = _bodyNode;
                    }
                    return;
                }
                if (tagType == HtmlTagType.Title)
                {
                    if (!_title)
                    {
                        if (!tagEnd)
                        {
                            _title       = true;
                            _currentNode = _titleNode;
                        }
                    }
                    else
                    {
                        _noTag = true;
                    }
                    return;
                }
                // $$pb modif le 11/01/2015
                //if (!_body && tag.TagCategory != HtmlTagCategory.Head)
                //{
                //    _body = true;
                //    _currentNode = _currentTreeNode = _bodyNode;
                //}
            }
            _currentNode = CreateElement(tagName);
            if (_normalizeXml)
            {
                if (tagType == HtmlTagType.Table && !tagEnd)
                {
                    if (_table != null)
                    {
                        _tableStack.Push(_table);
                    }
                    _table       = new HtmlTable_v2();
                    _table.Table = _currentNode;
                    AddElement(_currentTreeNode, _currentNode);
                    _currentTreeNode = _currentNode;
                    return;
                }
                if (TagBeginTableCategory(tag, tagEnd))
                {
                    return;
                }
                if (tagType == HtmlTagType.DL && !tagEnd)
                {
                    if (_definitionList != null)
                    {
                        _definitionListStack.Push(_definitionList);
                    }
                    _definitionList = _currentNode;
                    AddElement(_currentTreeNode, _currentNode);
                    _currentTreeNode = _currentNode;
                    return;
                }
                if (TagBeginDefinitionListCategory(tag, tagEnd))
                {
                    return;
                }

                // $$pb à revérifier
                // il faut au moins annuler gLastPNode quand un des parents de gLastPNode se ferme
                //if (tagType == HtmlTagTypeEnum.P)
                //{
                //    // pour gérer une balise <p> qui n'a pas de fin de balise </p>
                //    if (gLastPNode != null)
                //    {
                //        gCurrentTreeNode = GetParentXXNode(gLastPNode);
                //        gLastPNode = null;
                //    }
                //    if (!bTagEnd) gLastPNode = gCurrentNode;
                //}
            }
            AddElement(_currentTreeNode, _currentNode);
            //if (!tagEnd && tag.EndBoundType != HtmlBoundType.Forbidden)
            //    _currentTreeNode = _currentNode;

            if (!tagEnd)
            {
                if (tag.EndBoundType != HtmlBoundType.Forbidden)
                {
                    _currentTreeNode = _currentNode;
                }
            }
            else if (_correctionMarkBeginEnd)
            {
                _currentNode = _currentTreeNode;
            }
        }
Example #4
0
        private void TagBegin(string sTagName, bool bTagEnd)
        {
            gbNoTag = false;

            //sTagName = sTagName.Replace('-', '_');
            //sTagName = sTagName.Replace('!', '_');
            //sTagName = sTagName.Replace('[', '_');
            //sTagName = sTagName.Replace(']', '_');
            //sTagName = gReplace.Replace(sTagName, "_");
            //if (sTagName == "") sTagName = "_";
            HtmlTagType tagType = HtmlTags.GetHtmlTagType(sTagName);
            HtmlTag     tag     = HtmlTags.GetHtmlTag(tagType);

            if (gbNormalizeXml)
            {
                if (tagType == HtmlTagType.Html || tagType == HtmlTagType.Head)
                {
                    gbNoTag = true;
                    return;
                }
                if (tagType == HtmlTagType.Body)
                {
                    gbNoTag = true;
                    if (!gbBody)
                    {
                        gbBody       = true;
                        gCurrentNode = gCurrentTreeNode = gBodyNode;
                    }
                    return;
                }
                if (tagType == HtmlTagType.Title)
                {
                    if (!gbTitle)
                    {
                        if (!bTagEnd)
                        {
                            gbTitle      = true;
                            gCurrentNode = gTitleNode;
                        }
                    }
                    else
                    {
                        gbNoTag = true;
                    }
                    return;
                }
                if (!gbBody && tag.TagCategory != HtmlTagCategory.Head)
                {
                    gbBody       = true;
                    gCurrentNode = gCurrentTreeNode = gBodyNode;
                }
            }
            //gCurrentNode = gXmlDocument.CreateElement(sTagName);
            gCurrentNode = CreateElement(sTagName);
            if (gbNormalizeXml)
            {
                if (tagType == HtmlTagType.Table && !bTagEnd)
                {
                    if (gTable != null)
                    {
                        gTableStack.Push(gTable);
                    }
                    gTable       = new HtmlTable_v2();
                    gTable.Table = gCurrentNode;
                    //gCurrentTreeNode.AppendChild(gCurrentNode);
                    AddElement(gCurrentTreeNode, gCurrentNode);
                    gCurrentTreeNode = gCurrentNode;
                    return;
                }
                if (TagBeginTableCategory(tag, bTagEnd))
                {
                    return;
                }
                if (tagType == HtmlTagType.DL && !bTagEnd)
                {
                    if (gDefinitionList != null)
                    {
                        gDefinitionListStack.Push(gDefinitionList);
                    }
                    gDefinitionList = gCurrentNode;
                    //gCurrentTreeNode.AppendChild(gCurrentNode);
                    AddElement(gCurrentTreeNode, gCurrentNode);
                    gCurrentTreeNode = gCurrentNode;
                    return;
                }
                if (TagBeginDefinitionListCategory(tag, bTagEnd))
                {
                    return;
                }

                // $$pb à revérifier
                // il faut au moins annuler gLastPNode quand un des parents de gLastPNode se ferme
                //if (tagType == HtmlTagTypeEnum.P)
                //{
                //    // pour gérer une balise <p> qui n'a pas de fin de balise </p>
                //    if (gLastPNode != null)
                //    {
                //        gCurrentTreeNode = GetParentXXNode(gLastPNode);
                //        gLastPNode = null;
                //    }
                //    if (!bTagEnd) gLastPNode = gCurrentNode;
                //}
            }
            //gCurrentTreeNode.AppendChild(gCurrentNode);
            AddElement(gCurrentTreeNode, gCurrentNode);
            if (!bTagEnd && tag.EndBoundType != HtmlBoundType.Forbidden)
            {
                gCurrentTreeNode = gCurrentNode;
            }
        }