private bool GetNextPrintFromWeb() { _currentPrint = null; if (_enumNodes == null) { return(false); } while (_enumNodes.MoveNext()) { XXNode node = _enumNodes.Current; if (node.type == XXNodeType.Text) { AddTextNode(node as XXNodeText); } else if (node.type == XXNodeType.Image) { XXNodeImage img = node as XXNodeImage; if (_postMultiPrint && !img.followingImage) { if (_workingPrint.title != null && _workingPrint.images.Count > 0) { //_prints.Add(_print); _currentPrint = _workingPrint; } NewPrint(); } ImageHtml imgUrl = new ImageHtml(img.source, img.alt, img.title, img.className); if (_loadImage) { imgUrl.Image = Frboard.LoadImageFromWeb(imgUrl.Source); } _workingPrint.images.Add(imgUrl); if (_currentPrint != null) { return(true); } } else if (node.type == XXNodeType.Link) { AddTextNode(null); XXNodeLink link = node as XXNodeLink; _workingPrint.downloadLinks.Add(link.text); } } _enumNodes = null; AddTextNode(null); if (_workingPrint.title != null && _workingPrint.images.Count > 0) { //_prints.Add(_print); _currentPrint = _workingPrint; return(true); } return(false); }
private void TagEnd(string sTagName) { if (gbNormalizeXml) { HtmlTagTypeEnum tagType = HtmlTag.GetHtmlTagType(sTagName); switch (tagType) { case HtmlTagTypeEnum.Html: case HtmlTagTypeEnum.Head: case HtmlTagTypeEnum.Body: return; case HtmlTagTypeEnum.Title: gCurrentNode = gCurrentTreeNode; return; case HtmlTagTypeEnum.Table: if (gTable == null) return; //gCurrentNode = gCurrentTreeNode = gTable.Table.ParentNode; gCurrentNode = gCurrentTreeNode = GetParentXXNode(gTable.Table); gTable = null; if (gTableStack.Count != 0) gTable = gTableStack.Pop(); return; case HtmlTagTypeEnum.DL: if (gDefinitionList == null) return; //gCurrentNode = gCurrentTreeNode = gDefinitionList.ParentNode; gCurrentNode = gCurrentTreeNode = GetParentXXNode(gDefinitionList); gDefinitionList = null; if (gDefinitionListStack.Count != 0) gDefinitionList = gDefinitionListStack.Pop(); return; } if (gTable != null) { switch (tagType) { case HtmlTagTypeEnum.THead: case HtmlTagTypeEnum.TBody: case HtmlTagTypeEnum.TFoot: gCurrentNode = gCurrentTreeNode = gTable.Table; gTable.Body = null; return; case HtmlTagTypeEnum.ColGroup: gCurrentNode = gCurrentTreeNode = gTable.Table; gTable.ColGroup = null; return; case HtmlTagTypeEnum.Col: if (gTable.Col != null) { //gCurrentNode = gCurrentTreeNode = gTable.Col.ParentNode; gCurrentNode = gCurrentTreeNode = GetParentXXNode(gTable.Col); gTable.Col = null; } return; case HtmlTagTypeEnum.TR: if (gTable.Row != null) { //gCurrentNode = gCurrentTreeNode = gTable.Row.ParentNode; gCurrentNode = gCurrentTreeNode = GetParentXXNode(gTable.Row); gTable.Row = null; } return; case HtmlTagTypeEnum.TH: case HtmlTagTypeEnum.TD: if (gTable.Data != null) { //gCurrentNode = gCurrentTreeNode = gTable.Data.ParentNode; gCurrentNode = gCurrentTreeNode = GetParentXXNode(gTable.Data); gTable.Data = null; } return; } } } //XmlNode node = gCurrentTreeNode; //while (node != null) //{ // if (node.Name == sTagName) // { // gCurrentTreeNode = node.ParentNode; // break; // } // node = node.ParentNode; //} XXNode node = GetParentXXNodeByName(gCurrentTreeNode, sTagName); if (node != null) gCurrentTreeNode = GetParentXXNode(node); gCurrentNode = gCurrentTreeNode; }
private bool TagBeginDefinitionListCategory(HtmlTag tag, bool bTagEnd) { if (gDefinitionList == null || tag.TagCategory != HtmlTagCategoryEnum.DefinitionList) return false; switch (tag.TagType) { case HtmlTagTypeEnum.DT: case HtmlTagTypeEnum.DD: //gDefinitionList.AppendChild(gCurrentNode); AddElement(gDefinitionList, gCurrentNode); if (!bTagEnd) gCurrentTreeNode = gCurrentNode; return true; } return false; }
private bool TagBeginTableCategory(HtmlTag tag, bool bTagEnd) { if (gTable == null || tag.TagCategory != HtmlTagCategoryEnum.Table) return false; switch (tag.TagType) { case HtmlTagTypeEnum.THead: case HtmlTagTypeEnum.TBody: case HtmlTagTypeEnum.TFoot: //gTable.Table.AppendChild(gCurrentNode); AddElement(gTable.Table, gCurrentNode); if (!bTagEnd) { gTable.Body = gCurrentNode; gCurrentTreeNode = gCurrentNode; } else gTable.Body = null; return true; case HtmlTagTypeEnum.ColGroup: //gTable.Table.AppendChild(gCurrentNode); AddElement(gTable.Table, gCurrentNode); if (!bTagEnd) { gTable.ColGroup = gCurrentNode; gCurrentTreeNode = gCurrentNode; } else gTable.ColGroup = null; return true; case HtmlTagTypeEnum.Col: gCurrentTreeNode = gTable.Table; if (gTable.ColGroup != null) gCurrentTreeNode = gTable.ColGroup; //gCurrentTreeNode.AppendChild(gCurrentNode); AddElement(gCurrentTreeNode, gCurrentNode); if (!bTagEnd) { gTable.Col = gCurrentNode; gCurrentTreeNode = gCurrentNode; } else gTable.Col = null; return true; case HtmlTagTypeEnum.TR: if (gTable.Body == null) { //gTable.Body = gXmlDocument.CreateElement("tbody"); //gTable.Table.AppendChild(gTable.Body); gTable.Body = CreateElement("tbody"); AddElement(gTable.Table, gTable.Body); } //gTable.Body.AppendChild(gCurrentNode); AddElement(gTable.Body, gCurrentNode); if (!bTagEnd) { gTable.Row = gCurrentNode; gCurrentTreeNode = gCurrentNode; } else gTable.Row = null; return true; case HtmlTagTypeEnum.TH: case HtmlTagTypeEnum.TD: if (gTable.Row == null) { if (gTable.Body == null) { //gtable.body = gxmldocument.createelement("tbody"); //gtable.table.appendchild(gtable.body); gTable.Body = CreateElement("tbody"); AddElement(gTable.Table, gTable.Body); } //gTable.Row = gXmlDocument.CreateElement("tr"); //gTable.Body.AppendChild(gTable.Row); gTable.Row = CreateElement("tr"); AddElement(gTable.Body, gTable.Row); } //gTable.Row.AppendChild(gCurrentNode); AddElement(gTable.Row, gCurrentNode); if (!bTagEnd) { gTable.Data = gCurrentNode; gCurrentTreeNode = gCurrentNode; } else gTable.Data = null; return true; } return false; }
private void TagBegin(string sTagName, bool bTagEnd) { gbNoTag = false; //sTagName = sTagName.Replace('-', '_'); //sTagName = sTagName.Replace('!', '_'); //sTagName = sTagName.Replace('[', '_'); //sTagName = sTagName.Replace(']', '_'); //sTagName = gReplace.Replace(sTagName, "_"); //if (sTagName == "") sTagName = "_"; HtmlTagTypeEnum tagType = HtmlTag.GetHtmlTagType(sTagName); HtmlTag tag = HtmlTag.GetHtmlTag(tagType); if (gbNormalizeXml) { if (tagType == HtmlTagTypeEnum.Html || tagType == HtmlTagTypeEnum.Head) { gbNoTag = true; return; } if (tagType == HtmlTagTypeEnum.Body) { gbNoTag = true; if (!gbBody) { gbBody = true; gCurrentNode = gCurrentTreeNode = gBodyNode; } return; } if (tagType == HtmlTagTypeEnum.Title) { if (!gbTitle) { if (!bTagEnd) { gbTitle = true; gCurrentNode = gTitleNode; } } else gbNoTag = true; return; } if (!gbBody && tag.TagCategory != HtmlTagCategoryEnum.Head) { gbBody = true; gCurrentNode = gCurrentTreeNode = gBodyNode; } } //gCurrentNode = gXmlDocument.CreateElement(sTagName); gCurrentNode = CreateElement(sTagName); if (gbNormalizeXml) { if (tagType == HtmlTagTypeEnum.Table && !bTagEnd) { if (gTable != null) gTableStack.Push(gTable); gTable = new HtmlTable(); gTable.Table = gCurrentNode; //gCurrentTreeNode.AppendChild(gCurrentNode); AddElement(gCurrentTreeNode, gCurrentNode); gCurrentTreeNode = gCurrentNode; return; } if (TagBeginTableCategory(tag, bTagEnd)) return; if (tagType == HtmlTagTypeEnum.DL && !bTagEnd) { if (gDefinitionList != null) gDefinitionListStack.Push(gDefinitionList); gDefinitionList = gCurrentNode; //gCurrentTreeNode.AppendChild(gCurrentNode); AddElement(gCurrentTreeNode, gCurrentNode); gCurrentTreeNode = gCurrentNode; return; } if (TagBeginDefinitionListCategory(tag, bTagEnd)) return; // $$pb à revérifier // il faut au moins annuler gLastPNode quand un des parents de gLastPNode se ferme //if (tagType == HtmlTagTypeEnum.P) //{ // // pour gérer une balise <p> qui n'a pas de fin de balise </p> // if (gLastPNode != null) // { // gCurrentTreeNode = GetParentXXNode(gLastPNode); // gLastPNode = null; // } // if (!bTagEnd) gLastPNode = gCurrentNode; //} } //gCurrentTreeNode.AppendChild(gCurrentNode); AddElement(gCurrentTreeNode, gCurrentNode); if (!bTagEnd && tag.EndBoundType != HtmlBoundTypeEnum.Forbidden) gCurrentTreeNode = gCurrentNode; }
private void InitXml() { gHTMLReader.ReadCommentInText = gbReadCommentInText; //gXmlDocument = new XmlDocument(); if (gXmlDocument != null) { if (!gbGenerateXmlNodeOnly) gXmlDocument.PreserveWhitespace = true; else gXmlDocument.PreserveWhitespace = false; } //XmlDeclaration declaration = gXml.CreateXmlDeclaration("1.0", "utf-8", null); //gXml.AppendChild(declaration); // création du tag xml //XmlElement element = gXmlDocument.CreateElement("xml"); //gXmlDocument.AppendChild(element); //gCurrentNode = gCurrentTreeNode = (XmlNode)element; XXNode element = CreateElement("xml"); AddElement(gDocumentNode, element); gCurrentNode = gCurrentTreeNode = element; if (!gbGenerateXmlNodeOnly) AddText(gCurrentNode, "\r\n"); if (gbNormalizeXml) { // création du tag html //element = gXmlDocument.CreateElement("html"); //gCurrentTreeNode.AppendChild(element); //gHtmlNode = gCurrentNode = gCurrentTreeNode = (XmlNode)element; element = CreateElement("html"); AddElement(gCurrentTreeNode, element); gHtmlNode = gCurrentNode = gCurrentTreeNode = element; if (!gbGenerateXmlNodeOnly) AddText(gCurrentNode, "\r\n"); // création du tag head //element = gXmlDocument.CreateElement("head"); //gCurrentTreeNode.AppendChild(element); //gHeadNode = gCurrentNode = (XmlNode)element; element = CreateElement("head"); AddElement(gCurrentTreeNode, element); gHeadNode = gCurrentNode = element; if (!gbGenerateXmlNodeOnly) AddText(gCurrentNode, "\r\n"); // création du tag title //element = gXmlDocument.CreateElement("title"); //gCurrentNode.AppendChild(element); //gTitleNode = gCurrentNode = (XmlNode)element; element = CreateElement("title"); AddElement(gCurrentNode, element); gTitleNode = gCurrentNode = element; // création du tag body //element = gXmlDocument.CreateElement("body"); //gCurrentTreeNode.AppendChild(element); //gBodyNode = gCurrentNode = (XmlNode)element; element = CreateElement("body"); AddElement(gCurrentTreeNode, element); gBodyNode = gCurrentNode = element; if (!gbGenerateXmlNodeOnly) AddText(gCurrentNode, "\r\n"); gCurrentNode = gCurrentTreeNode = gHeadNode; } }
private void GenerateXml() { // gbNormalizeXml = true : // - les tag html, head, title et body sont créés automatiquement // - les tag html, head, title et body rencontrés ne sont pas pris en compte // - seul les tag title et meta sont mis dans la partie head les autre tag sont mis dans la partie body // - si un tag meta est placé après le début de la partie body, ce tag reste dans la partie body // - seul le premier tag title est pris en compte et placé dans la partie head, les autre tag title ne sont pas pris en compte try { //cTrace.Trace("GenerateXml NewGenerateXml : {0}", XmlConfig.CurrentConfig.Get("NewGenerateXml")); InitXml(); gTableStack = new Stack<HtmlTable>(); gTable = null; gDefinitionListStack = new Stack<XXNode>(); gDefinitionList = null; gbNoTag = false; gbBody = false; gbTitle = false; while (gHTMLReader.Read()) { if (gHTMLReader.IsText || gHTMLReader.IsComment) { if (gHTMLReader.IsText && !gHTMLReader.IsTextSeparator && !gbBody) { gbBody = true; gCurrentNode = gCurrentTreeNode = gBodyNode; } if (!gbGenerateXmlNodeOnly) { //if (string.Compare(XmlConfig.CurrentConfig.Get("NewGenerateXml"), "true", true) != 0) if (gbReadCommentInText) { //XmlText text = gXmlDocument.CreateTextNode("text"); //text.Value = gHTMLReader.Value; //gCurrentNode.AppendChild(text); AddText(gCurrentNode, gHTMLReader.Value); } else { if (gHTMLReader.IsText) { //XmlText text = gXmlDocument.CreateTextNode("text"); //text.Value = gHTMLReader.Value; //gCurrentNode.AppendChild(text); AddText(gCurrentNode, gHTMLReader.Value); } else { string s = gHTMLReader.Value; s = gCommentCorrection.Replace(s, "-"); if (s.EndsWith("-")) s += " "; //XmlComment comment = gXmlDocument.CreateComment(s); //gCurrentNode.AppendChild(comment); AddComment(gCurrentNode, s); } } } } else if (gHTMLReader.IsDocType) { //XmlAttribute attrib = gXmlDocument.CreateAttribute("doctype"); //attrib.Value = gHTMLReader.DocType; //gHtmlNode.Attributes.Append(attrib); AddAttribute(gHtmlNode, "doctype", gHTMLReader.DocType); } else if (gHTMLReader.IsProperty) { if (gbGenerateXmlNodeOnly || gbNoTag) continue; try { string sPropertyName = gHTMLReader.PropertyName; //sPropertyName = sPropertyName.Replace("\"", ""); //sPropertyName = sPropertyName.Replace("/", ""); //sPropertyName = sPropertyName.Replace("\\", ""); //sPropertyName = sPropertyName.Replace("-", ""); //sPropertyName = sPropertyName.Replace(",", ""); sPropertyName = gReplace.Replace(sPropertyName, ""); sPropertyName = sPropertyName.ToLower(); if (sPropertyName == "") sPropertyName = "__value"; //XmlAttribute attrib = gXmlDocument.CreateAttribute(sPropertyName); //attrib.Value = gHTMLReader.PropertyValue; //gCurrentNode.Attributes.Append(attrib); AddAttribute(gCurrentNode, sPropertyName, gHTMLReader.PropertyValue); if (gHTMLReader.IsMarkBeginEnd) TagEnd(gHTMLReader.MarkName.ToLower()); } catch { } } else if (gHTMLReader.IsMarkBeginEnd) { //TagBegin(gHTMLReader.MarkName.ToLower(), true); string sTagName = gHTMLReader.MarkName.ToLower(); sTagName = gReplace.Replace(sTagName, "_"); if (sTagName == "") sTagName = "_"; TagBegin(sTagName, true); } else if (gHTMLReader.IsMarkBegin) { //TagBegin(gHTMLReader.MarkName.ToLower(), false); string sTagName = gHTMLReader.MarkName.ToLower(); sTagName = gReplace.Replace(sTagName, "_"); if (sTagName == "") sTagName = "_"; TagBegin(sTagName, false); } else if (gHTMLReader.IsMarkEnd) { //TagEnd(gHTMLReader.MarkName.ToLower()); string sTagName = gHTMLReader.MarkName.ToLower(); sTagName = gReplace.Replace(sTagName, "_"); if (sTagName == "") sTagName = "_"; TagEnd(sTagName); } } } finally { gHTMLReader.Close(); } }
public XDocument GenerateXDocument() { gXDocument = new XDocument(); gDocumentNode = new XXNode(); gDocumentNode.XNode = gXDocument; GenerateXml(); return gXDocument; }
private static XXNode GetParentXXNodeByName(XXNode node, string name) { bool found = false; XXNode node2 = new XXNode(); XmlNode xmlNode = node.XmlNode; while (xmlNode != null) { if (xmlNode.Name == name) { node2.XmlNode = xmlNode; found = true; break; } xmlNode = xmlNode.ParentNode; } if (node.XNode != null) { if (!(node.XNode is XElement)) throw new PBException("error generating XDocument node is not a XElement"); XElement element = (XElement)node.XNode; while (element != null) { if (element.Name == name) { node2.XNode = element; found = true; break; } element = element.Parent; } } if (found) return node2; else return null; }
private static XXNode GetParentXXNode(XXNode node) { XXNode parentNode = new XXNode(); if (node.XmlNode != null) parentNode.XmlNode = node.XmlNode.ParentNode; if (node.XNode != null) parentNode.XNode = node.XNode.Parent; return parentNode; }
private void AddComment(XXNode parent, string comment) { if (gXmlDocument != null) { XmlComment node = gXmlDocument.CreateComment(comment); parent.XmlNode.AppendChild(node); } if (gXDocument != null) { XComment node = new XComment(comment); ((XElement)parent.XNode).Add(node); } }
private void AddText(XXNode parent, string text) { if (IsSeparator(text)) return; if (gXmlDocument != null) { //XmlText node = gXmlDocument.CreateTextNode("text"); //node.Value = text; XmlText node = gXmlDocument.CreateTextNode(text); parent.XmlNode.AppendChild(node); } if (gXDocument != null) { XText node = new XText(text); ((XElement)parent.XNode).Add(node); } }
private void AddAttribute(XXNode parent, string name, string value) { if (gXmlDocument != null) { if (parent.XmlNode.Attributes.GetNamedItem(name) == null) { XmlAttribute attrib = gXmlDocument.CreateAttribute(name); if (value == null) value = ""; attrib.Value = value; parent.XmlNode.Attributes.Append(attrib); } } if (gXDocument != null) { XElement xeParent = (XElement)parent.XNode; if (xeParent.Attribute(name) == null) { if (value == null) value = ""; XAttribute attrib = new XAttribute(name, value); xeParent.Add(attrib); } } }
private void AddElement(XXNode parent, string element) { if (parent.XmlNode != null) { XmlElement node = gXmlDocument.CreateElement(element); parent.XmlNode.AppendChild(node); } if (parent.XNode != null) { if (!(parent.XNode is XElement)) throw new PBException("error generating XDocument node is not a XElement"); XElement node = new XElement(element); ((XElement)parent.XNode).Add(node); } }
private void AddElement(XXNode parent, XXNode child) { if (parent.XmlNode != null) parent.XmlNode.AppendChild(child.XmlNode); XNode xnode = parent.XNode; if (xnode != null) { if (xnode is XElement) ((XElement)xnode).Add(child.XNode); else if (xnode is XDocument) ((XDocument)xnode).Add(child.XNode); else throw new PBException("error generating XDocument node is neither a XElement nor a XDocument"); } }
private XXNode CreateElement(string name) { XXNode node = new XXNode(); if (gXmlDocument != null) { XmlElement element = gXmlDocument.CreateElement(name); node.XmlNode = element; } if (gXDocument != null) { XElement element = new XElement(name); node.XNode = element; } return node; }