private void GenerateXml() { // gbNormalizeXml = true : // - les tag html, head, title et body sont créés automatiquement // - les tag html, head, title et body rencontrés ne sont pas pris en compte // - seul les tag title et meta sont mis dans la partie head les autre tag sont mis dans la partie body // - si un tag meta est placé après le début de la partie body, ce tag reste dans la partie body // - seul le premier tag title est pris en compte et placé dans la partie head, les autre tag title ne sont pas pris en compte try { //cTrace.Trace("GenerateXml NewGenerateXml : {0}", XmlConfig.CurrentConfig.Get("NewGenerateXml")); InitXml(); gTableStack = new Stack <HtmlTable_v2>(); gTable = null; gDefinitionListStack = new Stack <XXXNode_v2>(); gDefinitionList = null; gbNoTag = false; gbBody = false; gbTitle = false; while (gHTMLReader.Read()) { if (gHTMLReader.IsText || gHTMLReader.IsComment) { if (gHTMLReader.IsText && !gHTMLReader.IsTextSeparator && !gbBody) { gbBody = true; gCurrentNode = gCurrentTreeNode = gBodyNode; } if (!gbGenerateXmlNodeOnly) { //if (string.Compare(XmlConfig.CurrentConfig.Get("NewGenerateXml"), "true", true) != 0) if (gbReadCommentInText) { //XmlText text = gXmlDocument.CreateTextNode("text"); //text.Value = gHTMLReader.Value; //gCurrentNode.AppendChild(text); AddText(gCurrentNode, gHTMLReader.Value); } else { if (gHTMLReader.IsText) { //XmlText text = gXmlDocument.CreateTextNode("text"); //text.Value = gHTMLReader.Value; //gCurrentNode.AppendChild(text); AddText(gCurrentNode, gHTMLReader.Value); } else { string s = gHTMLReader.Value; s = gCommentCorrection.Replace(s, "-"); if (s.EndsWith("-")) { s += " "; } //XmlComment comment = gXmlDocument.CreateComment(s); //gCurrentNode.AppendChild(comment); AddComment(gCurrentNode, s); } } } } else if (gHTMLReader.IsDocType) { //XmlAttribute attrib = gXmlDocument.CreateAttribute("doctype"); //attrib.Value = gHTMLReader.DocType; //gHtmlNode.Attributes.Append(attrib); AddAttribute(gHtmlNode, "doctype", gHTMLReader.DocType); } else if (gHTMLReader.IsProperty) { if (gbGenerateXmlNodeOnly || gbNoTag) { continue; } try { string sPropertyName = gHTMLReader.PropertyName; //sPropertyName = sPropertyName.Replace("\"", ""); //sPropertyName = sPropertyName.Replace("/", ""); //sPropertyName = sPropertyName.Replace("\\", ""); //sPropertyName = sPropertyName.Replace("-", ""); //sPropertyName = sPropertyName.Replace(",", ""); sPropertyName = gReplace.Replace(sPropertyName, ""); sPropertyName = sPropertyName.ToLower(); if (sPropertyName == "") { sPropertyName = "__value"; } //XmlAttribute attrib = gXmlDocument.CreateAttribute(sPropertyName); //attrib.Value = gHTMLReader.PropertyValue; //gCurrentNode.Attributes.Append(attrib); // modif le 28/01/2014 // hexadecimal value 0x03, is an invalid character // found in http://www.reseau-gesat.com/Gesat/Yvelines,78/Fontenay-le-Fleury,31443/esat-cotra,e1596/ // <html><head><meta name="keywords" content="Conditionnement, travaux &agrave; fa&ccedil;onToutes activit&eacute;s en entreprise Entretien et cr&eacute;ation despaces verts" /> string propertyValue = gHTMLReader.PropertyValue; if (propertyValue != null) { propertyValue = propertyValue.Replace("\x03", ""); } //AddAttribute(gCurrentNode, sPropertyName, gHTMLReader.PropertyValue); AddAttribute(gCurrentNode, sPropertyName, propertyValue); if (gHTMLReader.IsMarkBeginEnd) { TagEnd(gHTMLReader.MarkName.ToLower()); } } catch { } } else if (gHTMLReader.IsMarkBeginEnd) { //TagBegin(gHTMLReader.MarkName.ToLower(), true); string sTagName = gHTMLReader.MarkName.ToLower(); sTagName = gReplace.Replace(sTagName, "_"); if (sTagName == "") { sTagName = "_"; } TagBegin(sTagName, true); } else if (gHTMLReader.IsMarkBegin) { //TagBegin(gHTMLReader.MarkName.ToLower(), false); string sTagName = gHTMLReader.MarkName.ToLower(); sTagName = gReplace.Replace(sTagName, "_"); if (sTagName == "") { sTagName = "_"; } TagBegin(sTagName, false); } else if (gHTMLReader.IsMarkEnd) { //TagEnd(gHTMLReader.MarkName.ToLower()); string sTagName = gHTMLReader.MarkName.ToLower(); sTagName = gReplace.Replace(sTagName, "_"); if (sTagName == "") { sTagName = "_"; } TagEnd(sTagName); } } } finally { gHTMLReader.Close(); } }