private void AddAttribute(XXXNode_v2 parent, string name, string value) { if (gXmlDocument != null) { if (parent.XmlNode.Attributes.GetNamedItem(name) == null) { XmlAttribute attrib = gXmlDocument.CreateAttribute(name); if (value == null) { value = ""; } attrib.Value = value; parent.XmlNode.Attributes.Append(attrib); } } if (gXDocument != null) { XElement xeParent = (XElement)parent.XNode; if (xeParent.Attribute(name) == null) { if (value == null) { value = ""; } XAttribute attrib = new XAttribute(name, value); xeParent.Add(attrib); } } }
public XmlDocument GenerateXmlDocument() { _xmlDocument = new XmlDocument(); _documentNode = new XXXNode_v2(); _documentNode.XmlNode = _xmlDocument; GenerateXml(); return(_xmlDocument); }
public XDocument GenerateXDocument() { gXDocument = new XDocument(); gDocumentNode = new XXXNode_v2(); gDocumentNode.XNode = gXDocument; GenerateXml(); return(gXDocument); }
private void AddComment(XXXNode_v2 parent, string comment) { if (gXmlDocument != null) { XmlComment node = gXmlDocument.CreateComment(comment); parent.XmlNode.AppendChild(node); } if (gXDocument != null) { XComment node = new XComment(comment); ((XElement)parent.XNode).Add(node); } }
private static XXXNode_v2 GetParentXXNode(XXXNode_v2 node) { XXXNode_v2 parentNode = new XXXNode_v2(); if (node.XmlNode != null) { parentNode.XmlNode = node.XmlNode.ParentNode; } if (node.XNode != null) { parentNode.XNode = node.XNode.Parent; } return(parentNode); }
private XXXNode_v2 CreateElement(string name) { XXXNode_v2 node = new XXXNode_v2(); if (_xmlDocument != null) { XmlElement element = _xmlDocument.CreateElement(name); node.XmlNode = element; } if (_xDocument != null) { XElement element = new XElement(name); node.XNode = element; } return(node); }
private static XXXNode_v2 GetParentXXNodeByName(XXXNode_v2 node, string name) { bool found = false; XXXNode_v2 node2 = new XXXNode_v2(); XmlNode xmlNode = node.XmlNode; while (xmlNode != null) { if (xmlNode.Name == name) { node2.XmlNode = xmlNode; found = true; break; } xmlNode = xmlNode.ParentNode; } if (node.XNode != null) { if (!(node.XNode is XElement)) { throw new PBException("error generating XDocument node is not a XElement"); } XElement element = (XElement)node.XNode; while (element != null) { if (element.Name == name) { node2.XNode = element; found = true; break; } element = element.Parent; } } if (found) { return(node2); } else { return(null); } }
public XDocument GenerateXDocument() { if (_useXDocumentCreator) { _xdCreator = new XDocumentCreator(); _xDocument = _xdCreator.XDocument; } else { _xDocument = new XDocument(); } _documentNode = new XXXNode_v2(); _documentNode.XNode = _xDocument; GenerateXml(); return(_xDocument); }
private void AddElement(XXXNode_v2 parent, string element) { if (parent.XmlNode != null) { XmlElement node = gXmlDocument.CreateElement(element); parent.XmlNode.AppendChild(node); } if (parent.XNode != null) { if (!(parent.XNode is XElement)) { throw new PBException("error generating XDocument node is not a XElement"); } XElement node = new XElement(element); ((XElement)parent.XNode).Add(node); } }
private bool TagBeginDefinitionListCategory(HtmlTag tag, bool bTagEnd) { if (_definitionList == null || tag.TagCategory != HtmlTagCategory.DefinitionList) { return(false); } switch (tag.TagType) { case HtmlTagType.DT: case HtmlTagType.DD: AddElement(_definitionList, _currentNode); if (!bTagEnd) { _currentTreeNode = _currentNode; } return(true); } return(false); }
private void AddText(XXXNode_v2 parent, string text) { if (IsSeparator(text)) { return; } if (gXmlDocument != null) { //XmlText node = gXmlDocument.CreateTextNode("text"); //node.Value = text; XmlText node = gXmlDocument.CreateTextNode(text); parent.XmlNode.AppendChild(node); } if (gXDocument != null) { XText node = new XText(text); ((XElement)parent.XNode).Add(node); } }
private bool TagBeginDefinitionListCategory(HtmlTag tag, bool bTagEnd) { if (gDefinitionList == null || tag.TagCategory != HtmlTagCategory.DefinitionList) { return(false); } switch (tag.TagType) { case HtmlTagType.DT: case HtmlTagType.DD: //gDefinitionList.AppendChild(gCurrentNode); AddElement(gDefinitionList, gCurrentNode); if (!bTagEnd) { gCurrentTreeNode = gCurrentNode; } return(true); } return(false); }
private void AddComment(XXXNode_v2 parent, string comment) { if (_xmlDocument != null) { XmlComment node = _xmlDocument.CreateComment(comment); parent.XmlNode.AppendChild(node); } if (_xDocument != null) { if (_xdCreator != null) { _xdCreator.AddComment((XElement)parent.XNode, comment); } else { XComment node = new XComment(comment); ((XElement)parent.XNode).Add(node); } } }
private void AddElement(XXXNode_v2 parent, XXXNode_v2 child) { if (parent.XmlNode != null) { parent.XmlNode.AppendChild(child.XmlNode); } XNode xnode = parent.XNode; if (xnode != null) { if (xnode is XElement) { if (_xdCreator != null) { _xdCreator.AddElement((XElement)xnode, (XElement)child.XNode); } else { ((XElement)xnode).Add(child.XNode); } } else if (xnode is XDocument) { if (_xdCreator != null) { _xdCreator.AddRootElement((XElement)child.XNode); } else { ((XDocument)xnode).Add(child.XNode); } } else { throw new PBException("error generating XDocument node is neither a XElement nor a XDocument"); } } }
private void AddText(XXXNode_v2 parent, string text) { if (IsSeparator(text)) { return; } if (_xmlDocument != null) { XmlText node = _xmlDocument.CreateTextNode(text); parent.XmlNode.AppendChild(node); } if (_xDocument != null) { if (_xdCreator != null) { _xdCreator.AddText((XElement)parent.XNode, text); } else { XText node = new XText(text); ((XElement)parent.XNode).Add(node); } } }
private void TagEnd(string sTagName) { if (_normalizeXml) { HtmlTagType tagType = HtmlTags.GetHtmlTagType(sTagName); switch (tagType) { case HtmlTagType.Html: case HtmlTagType.Head: case HtmlTagType.Body: return; case HtmlTagType.Title: _currentNode = _currentTreeNode; return; case HtmlTagType.Table: if (_table == null) { return; } _currentNode = _currentTreeNode = GetParentXXNode(_table.Table); _table = null; if (_tableStack.Count != 0) { _table = _tableStack.Pop(); } return; case HtmlTagType.DL: if (_definitionList == null) { return; } _currentNode = _currentTreeNode = GetParentXXNode(_definitionList); _definitionList = null; if (_definitionListStack.Count != 0) { _definitionList = _definitionListStack.Pop(); } return; } if (_table != null) { switch (tagType) { case HtmlTagType.THead: case HtmlTagType.TBody: case HtmlTagType.TFoot: _currentNode = _currentTreeNode = _table.Table; _table.Body = null; return; case HtmlTagType.ColGroup: _currentNode = _currentTreeNode = _table.Table; _table.ColGroup = null; return; case HtmlTagType.Col: if (_table.Col != null) { _currentNode = _currentTreeNode = GetParentXXNode(_table.Col); _table.Col = null; } return; case HtmlTagType.TR: if (_table.Row != null) { _currentNode = _currentTreeNode = GetParentXXNode(_table.Row); _table.Row = null; } return; case HtmlTagType.TH: case HtmlTagType.TD: if (_table.Data != null) { _currentNode = _currentTreeNode = GetParentXXNode(_table.Data); _table.Data = null; } return; } } } XXXNode_v2 node = GetParentXXNodeByName(_currentTreeNode, sTagName); if (node != null) { _currentTreeNode = GetParentXXNode(node); } _currentNode = _currentTreeNode; }
private void GenerateXml() { // gbNormalizeXml = true : // - les tag html, head, title et body sont créés automatiquement // - les tag html, head, title et body rencontrés ne sont pas pris en compte // - seul les tag title et meta sont mis dans la partie head les autre tag sont mis dans la partie body // - si un tag meta est placé après le début de la partie body, ce tag reste dans la partie body // - seul le premier tag title est pris en compte et placé dans la partie head, les autre tag title ne sont pas pris en compte try { //cTrace.Trace("GenerateXml NewGenerateXml : {0}", XmlConfig.CurrentConfig.Get("NewGenerateXml")); InitXml(); gTableStack = new Stack <HtmlTable_v2>(); gTable = null; gDefinitionListStack = new Stack <XXXNode_v2>(); gDefinitionList = null; gbNoTag = false; gbBody = false; gbTitle = false; while (gHTMLReader.Read()) { if (gHTMLReader.IsText || gHTMLReader.IsComment) { if (gHTMLReader.IsText && !gHTMLReader.IsTextSeparator && !gbBody) { gbBody = true; gCurrentNode = gCurrentTreeNode = gBodyNode; } if (!gbGenerateXmlNodeOnly) { //if (string.Compare(XmlConfig.CurrentConfig.Get("NewGenerateXml"), "true", true) != 0) if (gbReadCommentInText) { //XmlText text = gXmlDocument.CreateTextNode("text"); //text.Value = gHTMLReader.Value; //gCurrentNode.AppendChild(text); AddText(gCurrentNode, gHTMLReader.Value); } else { if (gHTMLReader.IsText) { //XmlText text = gXmlDocument.CreateTextNode("text"); //text.Value = gHTMLReader.Value; //gCurrentNode.AppendChild(text); AddText(gCurrentNode, gHTMLReader.Value); } else { string s = gHTMLReader.Value; s = gCommentCorrection.Replace(s, "-"); if (s.EndsWith("-")) { s += " "; } //XmlComment comment = gXmlDocument.CreateComment(s); //gCurrentNode.AppendChild(comment); AddComment(gCurrentNode, s); } } } } else if (gHTMLReader.IsDocType) { //XmlAttribute attrib = gXmlDocument.CreateAttribute("doctype"); //attrib.Value = gHTMLReader.DocType; //gHtmlNode.Attributes.Append(attrib); AddAttribute(gHtmlNode, "doctype", gHTMLReader.DocType); } else if (gHTMLReader.IsProperty) { if (gbGenerateXmlNodeOnly || gbNoTag) { continue; } try { string sPropertyName = gHTMLReader.PropertyName; //sPropertyName = sPropertyName.Replace("\"", ""); //sPropertyName = sPropertyName.Replace("/", ""); //sPropertyName = sPropertyName.Replace("\\", ""); //sPropertyName = sPropertyName.Replace("-", ""); //sPropertyName = sPropertyName.Replace(",", ""); sPropertyName = gReplace.Replace(sPropertyName, ""); sPropertyName = sPropertyName.ToLower(); if (sPropertyName == "") { sPropertyName = "__value"; } //XmlAttribute attrib = gXmlDocument.CreateAttribute(sPropertyName); //attrib.Value = gHTMLReader.PropertyValue; //gCurrentNode.Attributes.Append(attrib); // modif le 28/01/2014 // hexadecimal value 0x03, is an invalid character // found in http://www.reseau-gesat.com/Gesat/Yvelines,78/Fontenay-le-Fleury,31443/esat-cotra,e1596/ // <html><head><meta name="keywords" content="Conditionnement, travaux &agrave; fa&ccedil;onToutes activit&eacute;s en entreprise Entretien et cr&eacute;ation despaces verts" /> string propertyValue = gHTMLReader.PropertyValue; if (propertyValue != null) { propertyValue = propertyValue.Replace("\x03", ""); } //AddAttribute(gCurrentNode, sPropertyName, gHTMLReader.PropertyValue); AddAttribute(gCurrentNode, sPropertyName, propertyValue); if (gHTMLReader.IsMarkBeginEnd) { TagEnd(gHTMLReader.MarkName.ToLower()); } } catch { } } else if (gHTMLReader.IsMarkBeginEnd) { //TagBegin(gHTMLReader.MarkName.ToLower(), true); string sTagName = gHTMLReader.MarkName.ToLower(); sTagName = gReplace.Replace(sTagName, "_"); if (sTagName == "") { sTagName = "_"; } TagBegin(sTagName, true); } else if (gHTMLReader.IsMarkBegin) { //TagBegin(gHTMLReader.MarkName.ToLower(), false); string sTagName = gHTMLReader.MarkName.ToLower(); sTagName = gReplace.Replace(sTagName, "_"); if (sTagName == "") { sTagName = "_"; } TagBegin(sTagName, false); } else if (gHTMLReader.IsMarkEnd) { //TagEnd(gHTMLReader.MarkName.ToLower()); string sTagName = gHTMLReader.MarkName.ToLower(); sTagName = gReplace.Replace(sTagName, "_"); if (sTagName == "") { sTagName = "_"; } TagEnd(sTagName); } } } finally { gHTMLReader.Close(); } }
private bool TagBeginTableCategory(HtmlTag tag, bool bTagEnd) { if (_table == null || tag.TagCategory != HtmlTagCategory.Table) { return(false); } switch (tag.TagType) { case HtmlTagType.THead: case HtmlTagType.TBody: case HtmlTagType.TFoot: AddElement(_table.Table, _currentNode); if (!bTagEnd) { _table.Body = _currentNode; _currentTreeNode = _currentNode; } else { _table.Body = null; } return(true); case HtmlTagType.ColGroup: AddElement(_table.Table, _currentNode); if (!bTagEnd) { _table.ColGroup = _currentNode; _currentTreeNode = _currentNode; } else { _table.ColGroup = null; } return(true); case HtmlTagType.Col: _currentTreeNode = _table.Table; if (_table.ColGroup != null) { _currentTreeNode = _table.ColGroup; } AddElement(_currentTreeNode, _currentNode); if (!bTagEnd) { _table.Col = _currentNode; _currentTreeNode = _currentNode; } else { _table.Col = null; } return(true); case HtmlTagType.TR: if (_table.Body == null) { _table.Body = CreateElement("tbody"); AddElement(_table.Table, _table.Body); } AddElement(_table.Body, _currentNode); if (!bTagEnd) { _table.Row = _currentNode; _currentTreeNode = _currentNode; } else { _table.Row = null; } return(true); case HtmlTagType.TH: case HtmlTagType.TD: if (_table.Row == null) { if (_table.Body == null) { _table.Body = CreateElement("tbody"); AddElement(_table.Table, _table.Body); } _table.Row = CreateElement("tr"); AddElement(_table.Body, _table.Row); } AddElement(_table.Row, _currentNode); if (!bTagEnd) { _table.Data = _currentNode; _currentTreeNode = _currentNode; } else { _table.Data = null; } return(true); } return(false); }
private void TagBegin(string tagName, bool tagEnd) { _noTag = false; HtmlTagType tagType = HtmlTags.GetHtmlTagType(tagName); HtmlTag tag = HtmlTags.GetHtmlTag(tagType); if (_normalizeXml) { if (tagType == HtmlTagType.Html || tagType == HtmlTagType.Head) { _noTag = true; return; } if (tagType == HtmlTagType.Body) { _noTag = true; if (!_body) { _body = true; _currentNode = _currentTreeNode = _bodyNode; } return; } if (tagType == HtmlTagType.Title) { if (!_title) { if (!tagEnd) { _title = true; _currentNode = _titleNode; } } else { _noTag = true; } return; } // $$pb modif le 11/01/2015 //if (!_body && tag.TagCategory != HtmlTagCategory.Head) //{ // _body = true; // _currentNode = _currentTreeNode = _bodyNode; //} } _currentNode = CreateElement(tagName); if (_normalizeXml) { if (tagType == HtmlTagType.Table && !tagEnd) { if (_table != null) { _tableStack.Push(_table); } _table = new HtmlTable_v2(); _table.Table = _currentNode; AddElement(_currentTreeNode, _currentNode); _currentTreeNode = _currentNode; return; } if (TagBeginTableCategory(tag, tagEnd)) { return; } if (tagType == HtmlTagType.DL && !tagEnd) { if (_definitionList != null) { _definitionListStack.Push(_definitionList); } _definitionList = _currentNode; AddElement(_currentTreeNode, _currentNode); _currentTreeNode = _currentNode; return; } if (TagBeginDefinitionListCategory(tag, tagEnd)) { return; } // $$pb à revérifier // il faut au moins annuler gLastPNode quand un des parents de gLastPNode se ferme //if (tagType == HtmlTagTypeEnum.P) //{ // // pour gérer une balise <p> qui n'a pas de fin de balise </p> // if (gLastPNode != null) // { // gCurrentTreeNode = GetParentXXNode(gLastPNode); // gLastPNode = null; // } // if (!bTagEnd) gLastPNode = gCurrentNode; //} } AddElement(_currentTreeNode, _currentNode); //if (!tagEnd && tag.EndBoundType != HtmlBoundType.Forbidden) // _currentTreeNode = _currentNode; if (!tagEnd) { if (tag.EndBoundType != HtmlBoundType.Forbidden) { _currentTreeNode = _currentNode; } } else if (_correctionMarkBeginEnd) { _currentNode = _currentTreeNode; } }
private void InitXml() { _htmlReader.ReadCommentInText = _readCommentInText; if (_xmlDocument != null) { if (!_generateXmlNodeOnly) { _xmlDocument.PreserveWhitespace = true; } else { _xmlDocument.PreserveWhitespace = false; } } // création du tag xml XXXNode_v2 element = CreateElement("xml"); AddElement(_documentNode, element); _currentNode = _currentTreeNode = element; if (!_generateXmlNodeOnly) { AddText(_currentNode, "\r\n"); } if (_normalizeXml) { // création du tag html element = CreateElement("html"); AddElement(_currentTreeNode, element); _htmlNode = _currentNode = _currentTreeNode = element; if (!_generateXmlNodeOnly) { AddText(_currentNode, "\r\n"); } // création du tag head element = CreateElement("head"); AddElement(_currentTreeNode, element); _headNode = _currentNode = element; if (!_generateXmlNodeOnly) { AddText(_currentNode, "\r\n"); } // création du tag title element = CreateElement("title"); AddElement(_currentNode, element); _titleNode = _currentNode = element; // création du tag body element = CreateElement("body"); AddElement(_currentTreeNode, element); _bodyNode = _currentNode = element; if (!_generateXmlNodeOnly) { AddText(_currentNode, "\r\n"); } _currentNode = _currentTreeNode = _headNode; } }
private void GenerateXml() { // gbNormalizeXml = true : // - les tag html, head, title et body sont créés automatiquement // - les tag html, head, title et body rencontrés ne sont pas pris en compte // - seul les tag title et meta sont mis dans la partie head les autre tag sont mis dans la partie body // - si un tag meta est placé après le début de la partie body, ce tag reste dans la partie body // - seul le premier tag title est pris en compte et placé dans la partie head, les autre tag title ne sont pas pris en compte try { //cTrace.Trace("GenerateXml NewGenerateXml : {0}", XmlConfig.CurrentConfig.Get("NewGenerateXml")); InitXml(); _tableStack = new Stack <HtmlTable_v2>(); _table = null; _definitionListStack = new Stack <XXXNode_v2>(); _definitionList = null; _noTag = false; _body = false; _title = false; while (_htmlReader.Read()) { if (_htmlReader.IsText || _htmlReader.IsComment) { // $$pb modif le 11/01/2015 //if (_htmlReader.IsText && !_htmlReader.IsTextSeparator && !_body) //if (_htmlReader.IsText && !_htmlReader.IsTextSeparator && !_htmlReader.IsScript && !_body) //{ // _body = true; // _currentNode = _currentTreeNode = _bodyNode; //} if (!_generateXmlNodeOnly) { if (_readCommentInText) { AddText(_currentNode, _htmlReader.Value); } else { if (_htmlReader.IsText) { AddText(_currentNode, _htmlReader.Value); } else { string s = _htmlReader.Value; s = _commentCorrection.Replace(s, "-"); if (s.EndsWith("-")) { s += " "; } AddComment(_currentNode, s); } } } } else if (_htmlReader.IsDocType) { AddAttribute(_htmlNode, "doctype", _htmlReader.DocType); } else if (_htmlReader.IsProperty) { if (_generateXmlNodeOnly || _noTag) { continue; } try { string sPropertyName = _htmlReader.PropertyName; //sPropertyName = sPropertyName.Replace("\"", ""); //sPropertyName = sPropertyName.Replace("/", ""); //sPropertyName = sPropertyName.Replace("\\", ""); //sPropertyName = sPropertyName.Replace("-", ""); //sPropertyName = sPropertyName.Replace(",", ""); sPropertyName = _replace.Replace(sPropertyName, ""); sPropertyName = sPropertyName.ToLower(); if (sPropertyName == "") { sPropertyName = "__value"; } // modif le 28/01/2014 // hexadecimal value 0x03, is an invalid character // found in http://www.reseau-gesat.com/Gesat/Yvelines,78/Fontenay-le-Fleury,31443/esat-cotra,e1596/ // <html><head><meta name="keywords" content="Conditionnement, travaux &agrave; fa&ccedil;onToutes activit&eacute;s en entreprise Entretien et cr&eacute;ation despaces verts" /> string propertyValue = _htmlReader.PropertyValue; if (propertyValue != null) { propertyValue = propertyValue.Replace("\x03", ""); } AddAttribute(_currentNode, sPropertyName, propertyValue); if (_htmlReader.IsMarkBeginEnd) { TagEnd(_htmlReader.MarkName.ToLower()); } } catch (Exception ex) { Trace.WriteLine("error in HtmlToXml.GenerateXml() : line {0} column {1}", _htmlReader.Line, _htmlReader.Column); Trace.WriteLine(ex.Message); } } else if (_htmlReader.IsMarkBeginEnd) { string sTagName = _htmlReader.MarkName.ToLower(); sTagName = _replace.Replace(sTagName, "_"); if (sTagName == "") { sTagName = "_"; } TagBegin(sTagName, true); } else if (_htmlReader.IsMarkBegin) { string sTagName = _htmlReader.MarkName.ToLower(); sTagName = _replace.Replace(sTagName, "_"); if (sTagName == "") { sTagName = "_"; } TagBegin(sTagName, false); } else if (_htmlReader.IsMarkEnd) { string sTagName = _htmlReader.MarkName.ToLower(); sTagName = _replace.Replace(sTagName, "_"); if (sTagName == "") { sTagName = "_"; } TagEnd(sTagName); } } } finally { _htmlReader.Close(); } }
private void InitXml() { gHTMLReader.ReadCommentInText = gbReadCommentInText; //gXmlDocument = new XmlDocument(); if (gXmlDocument != null) { if (!gbGenerateXmlNodeOnly) { gXmlDocument.PreserveWhitespace = true; } else { gXmlDocument.PreserveWhitespace = false; } } //XmlDeclaration declaration = gXml.CreateXmlDeclaration("1.0", "utf-8", null); //gXml.AppendChild(declaration); // création du tag xml //XmlElement element = gXmlDocument.CreateElement("xml"); //gXmlDocument.AppendChild(element); //gCurrentNode = gCurrentTreeNode = (XmlNode)element; XXXNode_v2 element = CreateElement("xml"); AddElement(gDocumentNode, element); gCurrentNode = gCurrentTreeNode = element; if (!gbGenerateXmlNodeOnly) { AddText(gCurrentNode, "\r\n"); } if (gbNormalizeXml) { // création du tag html //element = gXmlDocument.CreateElement("html"); //gCurrentTreeNode.AppendChild(element); //gHtmlNode = gCurrentNode = gCurrentTreeNode = (XmlNode)element; element = CreateElement("html"); AddElement(gCurrentTreeNode, element); gHtmlNode = gCurrentNode = gCurrentTreeNode = element; if (!gbGenerateXmlNodeOnly) { AddText(gCurrentNode, "\r\n"); } // création du tag head //element = gXmlDocument.CreateElement("head"); //gCurrentTreeNode.AppendChild(element); //gHeadNode = gCurrentNode = (XmlNode)element; element = CreateElement("head"); AddElement(gCurrentTreeNode, element); gHeadNode = gCurrentNode = element; if (!gbGenerateXmlNodeOnly) { AddText(gCurrentNode, "\r\n"); } // création du tag title //element = gXmlDocument.CreateElement("title"); //gCurrentNode.AppendChild(element); //gTitleNode = gCurrentNode = (XmlNode)element; element = CreateElement("title"); AddElement(gCurrentNode, element); gTitleNode = gCurrentNode = element; // création du tag body //element = gXmlDocument.CreateElement("body"); //gCurrentTreeNode.AppendChild(element); //gBodyNode = gCurrentNode = (XmlNode)element; element = CreateElement("body"); AddElement(gCurrentTreeNode, element); gBodyNode = gCurrentNode = element; if (!gbGenerateXmlNodeOnly) { AddText(gCurrentNode, "\r\n"); } gCurrentNode = gCurrentTreeNode = gHeadNode; } }
private void TagBegin(string sTagName, bool bTagEnd) { gbNoTag = false; //sTagName = sTagName.Replace('-', '_'); //sTagName = sTagName.Replace('!', '_'); //sTagName = sTagName.Replace('[', '_'); //sTagName = sTagName.Replace(']', '_'); //sTagName = gReplace.Replace(sTagName, "_"); //if (sTagName == "") sTagName = "_"; HtmlTagType tagType = HtmlTags.GetHtmlTagType(sTagName); HtmlTag tag = HtmlTags.GetHtmlTag(tagType); if (gbNormalizeXml) { if (tagType == HtmlTagType.Html || tagType == HtmlTagType.Head) { gbNoTag = true; return; } if (tagType == HtmlTagType.Body) { gbNoTag = true; if (!gbBody) { gbBody = true; gCurrentNode = gCurrentTreeNode = gBodyNode; } return; } if (tagType == HtmlTagType.Title) { if (!gbTitle) { if (!bTagEnd) { gbTitle = true; gCurrentNode = gTitleNode; } } else { gbNoTag = true; } return; } if (!gbBody && tag.TagCategory != HtmlTagCategory.Head) { gbBody = true; gCurrentNode = gCurrentTreeNode = gBodyNode; } } //gCurrentNode = gXmlDocument.CreateElement(sTagName); gCurrentNode = CreateElement(sTagName); if (gbNormalizeXml) { if (tagType == HtmlTagType.Table && !bTagEnd) { if (gTable != null) { gTableStack.Push(gTable); } gTable = new HtmlTable_v2(); gTable.Table = gCurrentNode; //gCurrentTreeNode.AppendChild(gCurrentNode); AddElement(gCurrentTreeNode, gCurrentNode); gCurrentTreeNode = gCurrentNode; return; } if (TagBeginTableCategory(tag, bTagEnd)) { return; } if (tagType == HtmlTagType.DL && !bTagEnd) { if (gDefinitionList != null) { gDefinitionListStack.Push(gDefinitionList); } gDefinitionList = gCurrentNode; //gCurrentTreeNode.AppendChild(gCurrentNode); AddElement(gCurrentTreeNode, gCurrentNode); gCurrentTreeNode = gCurrentNode; return; } if (TagBeginDefinitionListCategory(tag, bTagEnd)) { return; } // $$pb à revérifier // il faut au moins annuler gLastPNode quand un des parents de gLastPNode se ferme //if (tagType == HtmlTagTypeEnum.P) //{ // // pour gérer une balise <p> qui n'a pas de fin de balise </p> // if (gLastPNode != null) // { // gCurrentTreeNode = GetParentXXNode(gLastPNode); // gLastPNode = null; // } // if (!bTagEnd) gLastPNode = gCurrentNode; //} } //gCurrentTreeNode.AppendChild(gCurrentNode); AddElement(gCurrentTreeNode, gCurrentNode); if (!bTagEnd && tag.EndBoundType != HtmlBoundType.Forbidden) { gCurrentTreeNode = gCurrentNode; } }
private bool TagBeginTableCategory(HtmlTag tag, bool bTagEnd) { if (gTable == null || tag.TagCategory != HtmlTagCategory.Table) { return(false); } switch (tag.TagType) { case HtmlTagType.THead: case HtmlTagType.TBody: case HtmlTagType.TFoot: //gTable.Table.AppendChild(gCurrentNode); AddElement(gTable.Table, gCurrentNode); if (!bTagEnd) { gTable.Body = gCurrentNode; gCurrentTreeNode = gCurrentNode; } else { gTable.Body = null; } return(true); case HtmlTagType.ColGroup: //gTable.Table.AppendChild(gCurrentNode); AddElement(gTable.Table, gCurrentNode); if (!bTagEnd) { gTable.ColGroup = gCurrentNode; gCurrentTreeNode = gCurrentNode; } else { gTable.ColGroup = null; } return(true); case HtmlTagType.Col: gCurrentTreeNode = gTable.Table; if (gTable.ColGroup != null) { gCurrentTreeNode = gTable.ColGroup; } //gCurrentTreeNode.AppendChild(gCurrentNode); AddElement(gCurrentTreeNode, gCurrentNode); if (!bTagEnd) { gTable.Col = gCurrentNode; gCurrentTreeNode = gCurrentNode; } else { gTable.Col = null; } return(true); case HtmlTagType.TR: if (gTable.Body == null) { //gTable.Body = gXmlDocument.CreateElement("tbody"); //gTable.Table.AppendChild(gTable.Body); gTable.Body = CreateElement("tbody"); AddElement(gTable.Table, gTable.Body); } //gTable.Body.AppendChild(gCurrentNode); AddElement(gTable.Body, gCurrentNode); if (!bTagEnd) { gTable.Row = gCurrentNode; gCurrentTreeNode = gCurrentNode; } else { gTable.Row = null; } return(true); case HtmlTagType.TH: case HtmlTagType.TD: if (gTable.Row == null) { if (gTable.Body == null) { //gtable.body = gxmldocument.createelement("tbody"); //gtable.table.appendchild(gtable.body); gTable.Body = CreateElement("tbody"); AddElement(gTable.Table, gTable.Body); } //gTable.Row = gXmlDocument.CreateElement("tr"); //gTable.Body.AppendChild(gTable.Row); gTable.Row = CreateElement("tr"); AddElement(gTable.Body, gTable.Row); } //gTable.Row.AppendChild(gCurrentNode); AddElement(gTable.Row, gCurrentNode); if (!bTagEnd) { gTable.Data = gCurrentNode; gCurrentTreeNode = gCurrentNode; } else { gTable.Data = null; } return(true); } return(false); }
private void TagEnd(string sTagName) { if (gbNormalizeXml) { HtmlTagType tagType = HtmlTags.GetHtmlTagType(sTagName); switch (tagType) { case HtmlTagType.Html: case HtmlTagType.Head: case HtmlTagType.Body: return; case HtmlTagType.Title: gCurrentNode = gCurrentTreeNode; return; case HtmlTagType.Table: if (gTable == null) { return; } //gCurrentNode = gCurrentTreeNode = gTable.Table.ParentNode; gCurrentNode = gCurrentTreeNode = GetParentXXNode(gTable.Table); gTable = null; if (gTableStack.Count != 0) { gTable = gTableStack.Pop(); } return; case HtmlTagType.DL: if (gDefinitionList == null) { return; } //gCurrentNode = gCurrentTreeNode = gDefinitionList.ParentNode; gCurrentNode = gCurrentTreeNode = GetParentXXNode(gDefinitionList); gDefinitionList = null; if (gDefinitionListStack.Count != 0) { gDefinitionList = gDefinitionListStack.Pop(); } return; } if (gTable != null) { switch (tagType) { case HtmlTagType.THead: case HtmlTagType.TBody: case HtmlTagType.TFoot: gCurrentNode = gCurrentTreeNode = gTable.Table; gTable.Body = null; return; case HtmlTagType.ColGroup: gCurrentNode = gCurrentTreeNode = gTable.Table; gTable.ColGroup = null; return; case HtmlTagType.Col: if (gTable.Col != null) { //gCurrentNode = gCurrentTreeNode = gTable.Col.ParentNode; gCurrentNode = gCurrentTreeNode = GetParentXXNode(gTable.Col); gTable.Col = null; } return; case HtmlTagType.TR: if (gTable.Row != null) { //gCurrentNode = gCurrentTreeNode = gTable.Row.ParentNode; gCurrentNode = gCurrentTreeNode = GetParentXXNode(gTable.Row); gTable.Row = null; } return; case HtmlTagType.TH: case HtmlTagType.TD: if (gTable.Data != null) { //gCurrentNode = gCurrentTreeNode = gTable.Data.ParentNode; gCurrentNode = gCurrentTreeNode = GetParentXXNode(gTable.Data); gTable.Data = null; } return; } } } //XmlNode node = gCurrentTreeNode; //while (node != null) //{ // if (node.Name == sTagName) // { // gCurrentTreeNode = node.ParentNode; // break; // } // node = node.ParentNode; //} XXXNode_v2 node = GetParentXXNodeByName(gCurrentTreeNode, sTagName); if (node != null) { gCurrentTreeNode = GetParentXXNode(node); } gCurrentNode = gCurrentTreeNode; }