示例#1
0
        private void GenerateXml()
        {
            // gbNormalizeXml = true :
            //   - les tag html, head, title et body sont créés automatiquement
            //   - les tag html, head, title et body rencontrés ne sont pas pris en compte
            //   - seul les tag title et meta sont mis dans la partie head les autre tag sont mis dans la partie body
            //   - si un tag meta est placé après le début de la partie body, ce tag reste dans la partie body
            //   - seul le premier tag title est pris en compte et placé dans la partie head, les autre tag title ne sont pas pris en compte

            try
            {
                //cTrace.Trace("GenerateXml NewGenerateXml  : {0}", XmlConfig.CurrentConfig.Get("NewGenerateXml"));

                InitXml();

                gTableStack = new Stack <HtmlTable_v2>();
                gTable      = null;

                gDefinitionListStack = new Stack <XXXNode_v2>();
                gDefinitionList      = null;

                gbNoTag = false;
                gbBody  = false;
                gbTitle = false;
                while (gHTMLReader.Read())
                {
                    if (gHTMLReader.IsText || gHTMLReader.IsComment)
                    {
                        if (gHTMLReader.IsText && !gHTMLReader.IsTextSeparator && !gbBody)
                        {
                            gbBody       = true;
                            gCurrentNode = gCurrentTreeNode = gBodyNode;
                        }
                        if (!gbGenerateXmlNodeOnly)
                        {
                            //if (string.Compare(XmlConfig.CurrentConfig.Get("NewGenerateXml"), "true", true) != 0)
                            if (gbReadCommentInText)
                            {
                                //XmlText text = gXmlDocument.CreateTextNode("text");
                                //text.Value = gHTMLReader.Value;
                                //gCurrentNode.AppendChild(text);
                                AddText(gCurrentNode, gHTMLReader.Value);
                            }
                            else
                            {
                                if (gHTMLReader.IsText)
                                {
                                    //XmlText text = gXmlDocument.CreateTextNode("text");
                                    //text.Value = gHTMLReader.Value;
                                    //gCurrentNode.AppendChild(text);
                                    AddText(gCurrentNode, gHTMLReader.Value);
                                }
                                else
                                {
                                    string s = gHTMLReader.Value;
                                    s = gCommentCorrection.Replace(s, "-");
                                    if (s.EndsWith("-"))
                                    {
                                        s += " ";
                                    }
                                    //XmlComment comment = gXmlDocument.CreateComment(s);
                                    //gCurrentNode.AppendChild(comment);
                                    AddComment(gCurrentNode, s);
                                }
                            }
                        }
                    }
                    else if (gHTMLReader.IsDocType)
                    {
                        //XmlAttribute attrib = gXmlDocument.CreateAttribute("doctype");
                        //attrib.Value = gHTMLReader.DocType;
                        //gHtmlNode.Attributes.Append(attrib);
                        AddAttribute(gHtmlNode, "doctype", gHTMLReader.DocType);
                    }
                    else if (gHTMLReader.IsProperty)
                    {
                        if (gbGenerateXmlNodeOnly || gbNoTag)
                        {
                            continue;
                        }
                        try
                        {
                            string sPropertyName = gHTMLReader.PropertyName;
                            //sPropertyName = sPropertyName.Replace("\"", "");
                            //sPropertyName = sPropertyName.Replace("/", "");
                            //sPropertyName = sPropertyName.Replace("\\", "");
                            //sPropertyName = sPropertyName.Replace("-", "");
                            //sPropertyName = sPropertyName.Replace(",", "");
                            sPropertyName = gReplace.Replace(sPropertyName, "");
                            sPropertyName = sPropertyName.ToLower();
                            if (sPropertyName == "")
                            {
                                sPropertyName = "__value";
                            }
                            //XmlAttribute attrib = gXmlDocument.CreateAttribute(sPropertyName);
                            //attrib.Value = gHTMLReader.PropertyValue;
                            //gCurrentNode.Attributes.Append(attrib);


                            // modif le 28/01/2014
                            //   hexadecimal value 0x03, is an invalid character
                            //   found in http://www.reseau-gesat.com/Gesat/Yvelines,78/Fontenay-le-Fleury,31443/esat-cotra,e1596/
                            //   <html><head><meta name="keywords" content="Conditionnement, travaux &amp;agrave; fa&amp;ccedil;onToutes activit&amp;eacute;s en entreprise Entretien et cr&amp;eacute;ation despaces verts" />
                            string propertyValue = gHTMLReader.PropertyValue;
                            if (propertyValue != null)
                            {
                                propertyValue = propertyValue.Replace("\x03", "");
                            }
                            //AddAttribute(gCurrentNode, sPropertyName, gHTMLReader.PropertyValue);
                            AddAttribute(gCurrentNode, sPropertyName, propertyValue);


                            if (gHTMLReader.IsMarkBeginEnd)
                            {
                                TagEnd(gHTMLReader.MarkName.ToLower());
                            }
                        }
                        catch
                        {
                        }
                    }
                    else if (gHTMLReader.IsMarkBeginEnd)
                    {
                        //TagBegin(gHTMLReader.MarkName.ToLower(), true);

                        string sTagName = gHTMLReader.MarkName.ToLower();
                        sTagName = gReplace.Replace(sTagName, "_");
                        if (sTagName == "")
                        {
                            sTagName = "_";
                        }

                        TagBegin(sTagName, true);
                    }
                    else if (gHTMLReader.IsMarkBegin)
                    {
                        //TagBegin(gHTMLReader.MarkName.ToLower(), false);

                        string sTagName = gHTMLReader.MarkName.ToLower();
                        sTagName = gReplace.Replace(sTagName, "_");
                        if (sTagName == "")
                        {
                            sTagName = "_";
                        }

                        TagBegin(sTagName, false);
                    }
                    else if (gHTMLReader.IsMarkEnd)
                    {
                        //TagEnd(gHTMLReader.MarkName.ToLower());

                        string sTagName = gHTMLReader.MarkName.ToLower();
                        sTagName = gReplace.Replace(sTagName, "_");
                        if (sTagName == "")
                        {
                            sTagName = "_";
                        }

                        TagEnd(sTagName);
                    }
                }
            }
            finally
            {
                gHTMLReader.Close();
            }
        }