/// <summary> /// Removes empty nodes from data tree /// </summary> private void CleanData(UfDataNode parent) { foreach (UfDataNode child in parent.Nodes) { CleanDataNode(parent, child); } }
private void AddNode(UfDataNode node, XmlTextWriter writer) { if (node.Name != string.Empty) { writer.WriteStartElement(node.Name); if (!string.IsNullOrEmpty(node.SourceUrl)) { writer.WriteAttributeString("sourceurl", node.SourceUrl); } if (node.RepresentativeNode) { writer.WriteAttributeString("representativehcard", "true"); } if (!string.IsNullOrEmpty(node.ElementId)) { writer.WriteAttributeString("id", node.ElementId); } writer.WriteString(node.Value); } foreach (UfDataNode child in node.Nodes) { AddNode(child, writer); } if (node.Name != string.Empty) { writer.WriteEndElement(); } }
public UfDataNode Append(string name, string value, string sourceurl, bool representativenode) { int index = -1; UfDataNode outputobj = new UfDataNode(); for (int i = 0; i < this.InnerList.Count; i++) { UfDataNode testNode = (UfDataNode)this.InnerList[i]; if (testNode.Name == name) { UfDataNode newNode = new UfDataNode("", value, sourceurl, representativenode); index = testNode.Nodes.Add(newNode); newNode.Name = index.ToString(); outputobj = newNode; } } if (index == -1) { // Create a new node UfDataNode newNode1 = new UfDataNode(name, "Array"); this.InnerList.Add(newNode1); // Add name value pair to first objects of nodes collection UfDataNode newNode2 = new UfDataNode("0", value, sourceurl, representativenode); index = newNode1.Nodes.Add(newNode2); outputobj = newNode2; } return(outputobj); }
/// <summary> /// Converts a UfXtract JSON structure into UfDataNode structure /// </summary> public UfDataNode Convert(string json) { UfDataNode node = new UfDataNode(); string propertyName = ""; using (JsonReader jsonReader = new JsonTextReader(new StringReader(json))) { while (jsonReader.Read()) { if (jsonReader.Depth > 2) { if (jsonReader.TokenType == JsonToken.PropertyName) propertyName = jsonReader.Value.ToString(); if (jsonReader.TokenType == JsonToken.StartArray) CreateArray(node, jsonReader, propertyName); if (jsonReader.TokenType == JsonToken.StartObject) CreateObject(node, jsonReader, propertyName); if (jsonReader.TokenType == JsonToken.String) AddString(node, propertyName, jsonReader.Value); } } } CleanData(node); return node; }
/// <summary> /// Calander end date optimization /// </summary> /// <param name="node">Node containing 'vevent' data</param> public static void CalendarDateOptimization(UfDataNode node) { if (node.Nodes["dtend"] != null && node.Nodes["dtstart"] != null) { //// Does dtend have the chars used for dates or datetime structures if (node.Nodes["dtend"].Value.StartsWith("T")) { // Get date from dtstart and add to dtend if ISODateTime isoDateTime = new ISODateTime(); isoDateTime.Parse(node.Nodes["dtstart"].Value); // Has to be complete date structure if (isoDateTime.Date > 0) { // Knock over into next day if (node.Nodes["dtend"].Value.Contains("T24")) { DateTime dateTime = new DateTime(isoDateTime.Year, isoDateTime.Month, isoDateTime.Date); dateTime = dateTime.AddDays(1); node.Nodes["dtend"].Value = dateTime.Year + "-" + isoDateTime.TwoDigitString(dateTime.Month) + "-" + isoDateTime.TwoDigitString(dateTime.Day) + node.Nodes["dtend"].Value.Replace("T24", "T00"); } else { node.Nodes["dtend"].Value = isoDateTime.Year + "-" + isoDateTime.TwoDigitString(isoDateTime.Month) + "-" + isoDateTime.TwoDigitString(isoDateTime.Date) + node.Nodes["dtend"].Value; } } } } }
private void CreateNode(UfDataNode node, XmlNode xmlNode) { UfDataNode newNode = new UfDataNode(); newNode.Name = xmlNode.Name; // Value from value if (!string.IsNullOrEmpty(xmlNode.Value)) newNode.Value = xmlNode.Value; if (xmlNode.NodeType == XmlNodeType.Text) node.Value = xmlNode.InnerText; if (xmlNode.Attributes != null) { if (xmlNode.Attributes["id"] != null) { XmlAttribute idAtt = xmlNode.Attributes["id"]; newNode.ElementId = idAtt.Value; } } foreach (XmlNode childXmlNode in xmlNode.ChildNodes) CreateNode(newNode, childXmlNode); if (xmlNode.NodeType != XmlNodeType.Text) node.Nodes.Add(newNode); }
/// <summary> /// Converts a UfDataNode structure into a very basic form HTML. /// </summary> /// <param name="node">Node</param> /// <param name="formatDescriber">Microformat format describer object</param> /// <returns>HTML string</returns> public string Convert(UfDataNode node, UfFormatDescriber formatDescriber) { string output = string.Empty; StringWriter stringWriter = new StringWriter(); UfElementDescriber elementDescriber = formatDescriber.BaseElement; using (XhtmlTextWriter writer = new XhtmlTextWriter(stringWriter)) { writer.WriteBeginTag("div"); writer.WriteAttribute("class", "microformats"); writer.Write(HtmlTextWriter.TagRightChar); foreach (UfDataNode child in node.Nodes) { writer.WriteLine(); AddNode(child, elementDescriber, writer); } writer.WriteEndTag("div"); writer.WriteLine(); } return(stringWriter.ToString()); }
/// <summary> /// Build a string from data /// </summary> /// <param name="output">Allows for method to call itself</param> /// <param name="node">The data object</param> /// <param name="indent">Current indent</param> /// <returns></returns> private string BuildDataString(string output, UfDataNode node, int indent) { string sIndent = string.Empty; if (indent == 1) { output += "\n"; } for (int i = 0; i < indent; i++) { sIndent += " "; } // Json data structure if (tree) { if (node.ValueArray.Count > 0) { output += sIndent + node.Name + ": " + "\n"; for (int i = 0; i < node.ValueArray.Count; i++) { output += sIndent + "[" + i.ToString() + "]: " + node.ValueArray[i] + "\n"; } } else { output += sIndent + node.Name + ": " + node.Value + "\n"; } } // Standard data structure else { if (node.Name != string.Empty) { output += sIndent + node.Name + ": " + node.Value + "\n"; if (node.RepresentativeNode) { output += sIndent + " " + "representative-hcard: true\n"; } if (node.SourceUrl != string.Empty) { output += sIndent + " " + "source-url: " + node.SourceUrl + "\n"; } } } //if (reporting && node.OuterHtml != string.Empty) // output += sIndent + "OuterHtml: " + sIndent + HttpUtility.HtmlEncode(node.OuterHtml) + "\n"; foreach (UfDataNode childNode in node.Nodes) { output += BuildDataString("", childNode, indent + 1); } return(output); }
/// <summary> /// Converts a UfDataNode structure into a very basic form HTML. /// </summary> /// <param name="node">Node</param> /// <param name="formatDescriber">Microformat format describer object</param> /// <returns>HTML string</returns> public string Convert(UfDataNode node, UfFormatDescriber formatDescriber) { string output = string.Empty; StringWriter stringWriter = new StringWriter(); UfElementDescriber elementDescriber = formatDescriber.BaseElement; using (XhtmlTextWriter writer = new XhtmlTextWriter(stringWriter)) { writer.WriteBeginTag("div"); writer.WriteAttribute("class", "microformats"); writer.Write(HtmlTextWriter.TagRightChar); foreach (UfDataNode child in node.Nodes) { writer.WriteLine(); AddNode(child, elementDescriber, writer); } writer.WriteEndTag("div"); writer.WriteLine(); } return stringWriter.ToString(); }
/// <summary> /// Runs a series of optimization rules across a collection /// </summary> /// <param name="node">Node been optimized</param> public static void RunNodeOptimization(UfDataNode node) { for (int i = node.Nodes.Count; i > 0; i--) { OptimizesNode(node, node.Nodes[i - 1]); } }
public int AppendArrayList(string name, string value) { int index = -1; for (int i = 0; i < this.InnerList.Count; i++) { UfDataNode testNode = (UfDataNode)this.InnerList[i]; if (testNode.Name == name) { // Add to value to the arraylist of already existing node testNode.ValueArray.Add(value); index = i; } } // If no node was found if (index == -1) { // Create a new and add value to array list UfDataNode newNode = new UfDataNode(); newNode.Name = name; newNode.ValueArray.Add(value); index = this.InnerList.Add(newNode); } return(index); }
/// <summary> /// Converts a UfDataNode structure into JSON /// </summary> /// <param name="node">Node</param> /// <param name="formatArray">Array of microformat format describer to describer data in node</param> /// <param name="callBack">JSONP callback function name to wrap JSON object</param> /// <returns>JSON string</returns> public string Convert(UfDataNode node, ArrayList formatArray, string callBack) { this.callBack = callBack; this.callBack = this.callBack.Replace("(", "").Replace(")", "").Trim(); this.urls = urls; this.errors = errors; foreach (UfFormatDescriber formatDescriber in formatArray) { foreach (UfDataNode childNode in node.Nodes) { foreach (UfDataNode grandChildNode in childNode.Nodes) { if (grandChildNode.Name == formatDescriber.BaseElement.Name) { UfDataNode xChild = tree.Nodes.Append(grandChildNode.Name, grandChildNode.Value, grandChildNode.SourceUrl, grandChildNode.RepresentativeNode); if (grandChildNode.Nodes.Count > 0) { AddChildNodes(xChild, grandChildNode, formatDescriber.BaseElement); } } } } } //string output = "// UfXtract \n"; string output = ""; if (callBack != string.Empty) { output += callBack + "( "; } output += "{"; foreach (UfDataNode childNode in tree.Nodes) { output += BuildDataString(childNode, true, false); } if (tree.Nodes.Count > 0) { output = output.Substring(0, output.Length - 2); } output += AddUfErrors(); output += AddReporting(node); // End whole block output += "}"; if (callBack != string.Empty) { output += " )"; } //return output.Replace(",", ",\n").Replace("}", "}\n").Replace("{", "{\n").Replace("]", "]\n").Replace("[", "[\n"); ; return(output); }
/// <summary> /// Converts a UfXtract JSON structure into UfDataNode structure /// </summary> /// <param name="xmlDocument">XmlDocument</param> /// <returns>UfDataNode</returns> public UfDataNode Convert(XmlDocument xmlDocument) { UfDataNode node = new UfDataNode(); foreach(XmlNode xmlNode in xmlDocument.DocumentElement.ChildNodes) CreateNode(node, xmlNode); return node; }
/// <summary> /// This takes the text value of org and places it into organization-name /// </summary> /// <param name="node">Node containing 'org' data</param> public static void OrgOptimization(UfDataNode node) { if (node.Nodes["organization-name"] == null) { string name = node.Value; node.Nodes.Add(new UfDataNode("organization-name", name)); node.Value = ""; } }
/// <summary> /// This add a updated element to hEntry if its missing /// </summary> /// <param name="node">Node containing 'hentry' data</param> public static void UpdatedDateOptimization(UfDataNode node) { // Swap value into organization-name node if (node.Nodes["published"] != null && node.Nodes["updated"] == null) { UfDataNode newUfDataNode = new UfDataNode("updated", node.Nodes["published"].Value); node.Nodes.Add(newUfDataNode); } }
public void WriteNode(UfDataNode node, string indent) { Response.Write("<div>" + indent + node.Name + " - " + node.Value); indent += " "; foreach (UfDataNode childnode in node.Nodes) { //Response.Write("<div>" + indent + childnode.Name + " - " + childnode.Value + "</div>"); WriteNode(childnode, indent); } }
/// <summary> /// Converts a UfXtract JSON structure into UfDataNode structure /// </summary> /// <param name="xmlDocument">XmlDocument</param> /// <returns>UfDataNode</returns> public UfDataNode Convert(XmlDocument xmlDocument) { UfDataNode node = new UfDataNode(); foreach (XmlNode xmlNode in xmlDocument.DocumentElement.ChildNodes) { CreateNode(node, xmlNode); } return(node); }
/// <summary> /// Gets the value of a descendant node using a custom tree expression /// </summary> /// <param name="treeExpression">Custom expression of a node tree position ie "n/given-name"</param> /// <returns>The text value of a node. The string is empty if not found</returns> public string DescendantValue(string treeExpression) { string output = ""; UfDataNode node = DescendantNode(treeExpression); if (node.Value != null) { output = node.Value; } return(output); }
/// <summary> /// Takes a JSON array and add child UfDataNode nodes to parent /// </summary> private void CreateArray(UfDataNode node, JsonReader jsonReader, string name) { string propertyName = ""; UfDataNode newNode = new UfDataNode(); newNode.Name = name; jsonReader.Read(); if (jsonReader.TokenType == JsonToken.String && propertyName == "") { AddString(node, name, jsonReader.Value); } while (jsonReader.Read()) { if (jsonReader.TokenType == JsonToken.EndArray) { break; } else { if (jsonReader.TokenType == JsonToken.PropertyName) { propertyName = jsonReader.Value.ToString(); } if (jsonReader.TokenType == JsonToken.StartArray) { CreateArray(newNode, jsonReader, propertyName); } if (jsonReader.TokenType == JsonToken.StartObject) { CreateObject(newNode, jsonReader, propertyName); } if (jsonReader.TokenType == JsonToken.String && propertyName == "") { AddString(node, name, jsonReader.Value); } if (jsonReader.TokenType == JsonToken.String && propertyName != "") { AddString(newNode, propertyName, jsonReader.Value); } } } node.Nodes.Add(newNode); }
/// <summary> /// Runs a series of optimization rules against a node /// </summary> /// <param name="parent">Parent node</param> /// <param name="child">Child node been optimized</param> public static void OptimizesNode(UfDataNode parent, UfDataNode child) { if (child.Name == "org") { OrgOptimization(child); } if (child.Name == "fn") { NameOptimization(parent, child); } if (child.Name == "rrule") { RruleOptimization(child); } if (child.Name == "geo" || child.Name == "location") { GeoOptimization(child); } if (child.Name == "hentry") { UpdatedDateOptimization(child); } if (child.Name == "vevent") { CalendarDateOptimization(child); } if (child.Value == string.Empty && child.Nodes.Count == 0) { // Romove unneeded blank node parent.Nodes.Remove(child); } else { // Check that child node called "value" or "type" and override parent values if (child.Name == "value" || child.Name == "type") { parent.Value = string.Empty; } for (int i = child.Nodes.Count; i > 0; i--) { OptimizesNode(child, child.Nodes[i - 1]); } } }
/// <summary> /// Takes a JSON string and add child UfDataNode node to parent /// </summary> private void AddString(UfDataNode node, string propertyName, object jsonValue) { if (propertyName == "id") { node.ElementId = System.Convert.ToString(jsonValue); } else { UfDataNode newNode = new UfDataNode(); newNode.Name = propertyName; newNode.Value = System.Convert.ToString(jsonValue); node.Nodes.Add(newNode); } }
/// <summary> /// Removes empty node from data tree /// </summary> private void CleanDataNode(UfDataNode parent, UfDataNode child) { if (child.Value == string.Empty && child.Nodes.Count == 0) { parent.Nodes.Remove(child); } else { for (int i = child.Nodes.Count; i > 0; i--) { CleanDataNode(child, child.Nodes[i - 1]); } } }
public bool Exists(string name) { bool found = false; for (int i = 0; i < this.InnerList.Count; i++) { UfDataNode node = (UfDataNode)this.InnerList[i]; if (node.Name == name) { found = true; } } return(found); }
public UfDataNode this[string name] { get { UfDataNode node = null; for (int i = 0; i < this.InnerList.Count; i++) { UfDataNode testNode = (UfDataNode)this.InnerList[i]; if (testNode.Name == name) { return(testNode); } } return(node); } }
// Parse uf private void ParseUf(HtmlDocument htmlDoc, string url, UfFormatDescriber format, bool multiples, Url urlReport) { UfParse ufparse = new UfParse(); ufparse.Load(htmlDoc, url, format); if (multiples) { data.Nodes.Add(ufparse.Data); } else { data = ufparse.Data; } urlReport.HtmlPageTitle = ufparse.HtmlPageTitle; }
/// <summary> /// Converts a UfDataNode structure into JSON /// </summary> /// <param name="node">Node</param> /// <param name="formatDescriber">Microformat format describer object</param> /// <returns>JSON string</returns> public string Convert(UfDataNode node, UfFormatDescriber formatDescriber) { foreach (UfDataNode childNode in node.Nodes) { if (childNode.Name == formatDescriber.BaseElement.Name) { UfDataNode xChild = tree.Nodes.Append(childNode.Name, childNode.Value, childNode.SourceUrl, childNode.RepresentativeNode); if (childNode.Nodes.Count > 0) { AddChildNodes(xChild, childNode, formatDescriber.BaseElement); } } } //string output = "// UfXtract \n"; string output = ""; if (callBack != string.Empty) { output += callBack + "( "; } output += "{\"microformats\": {"; foreach (UfDataNode childNode in tree.Nodes) { output += BuildDataString(childNode, true, false); } if (tree.Nodes.Count > 0) { output = output.Substring(0, output.Length - 2); } output += AddUfErrors(); output += AddReporting(node); // End whole block output += "}}"; if (callBack != string.Empty) { output += " )"; } return(output); }
/// <summary> /// Adds a new data node to the tree /// </summary> /// <param name="ufData">Parent node</param> /// <param name="ufNewDataNode">Node to be added</param> /// <param name="ufElement">The uF element describer</param> private void AddNewDateNode(HtmlNode baseNode, UfDataNode ufData, UfDataNode ufNewDataNode, UfElementDescriber ufElement) { if (IsDuplicateNode(ufData, ufNewDataNode) == false) { ufNewDataNode.OuterHtml = baseNode.OuterHtml; // This function deal both with the concatenation of multiple values // and the validation of multiple flag // If the structure is a value/type pair change the insert point if (ufElement.Elements["value"] != null && ufElement.Elements["type"] != null) { // Add to child value node UfDataNode ufdatanode = new UfDataNode("value", ufNewDataNode.Value); ufNewDataNode.Nodes.Add(ufdatanode); ufNewDataNode.Value = ""; } // Concatenation of values if (ufElement.ConcatenateValues) { // Create a new node or add to the existing one if (ufData.Nodes[ufNewDataNode.Name] == null) { ufData.Nodes.Add(ufNewDataNode); } else { ufData.Nodes[ufNewDataNode.Name].Value += ufNewDataNode.Value; } } else if (ufElement.Multiples == false) { // Singluar - only take first instance if (ufData.Nodes[ufNewDataNode.Name] == null) { ufData.Nodes.Add(ufNewDataNode); } } else if (ufElement.Multiples == true) { // Multiples ufData.Nodes.Add(ufNewDataNode); } } }
/// <summary> /// Provides string version of data object /// </summary> /// <param name="node">Node</param> /// <returns>String with indented tree structure</returns> public string Convert(UfDataNode node) { string output = "ufxtract\n"; output = BuildDataString(output, node, 0); //Find errors // ------------------------------------------------- if (errors != null) { if (errors.Count != 0) { output += "\n\nerrors" + "\n"; if (errors != null) { foreach (UfError ufError in errors) { output += "msg: " + ufError.Message + "\n"; output += "url: " + ufError.Address + "\n"; if (ufError.Status != 0) { output += "status: " + ufError.Status.ToString() + "\n"; } } } } } // Write report if a Urls object is provided // ------------------------------------------------- if (urls != null) { output += "\n\nreport" + "\n"; foreach (Url url in urls) { output += "url: " + url.Address + "\n"; output += "status: " + url.Status.ToString() + "\n"; output += "millisec: " + url.LoadTime.Milliseconds.ToString() + "\n\n"; } output += "found: " + node.Nodes.Count.ToString(); } return(output); }
/// <summary> /// Break telephonestrings such as "fax:01234 1234567" into value/type child nodes /// </summary> /// <param name="node">Node containing 'tel' data</param> /// <param name="text">Telephone number string</param> public static void TelOptimization(UfDataNode node, string text) { // If it contains both the value and type if (text.IndexOf(":") > 0) { string[] parts = text.Split(':'); node.Name = "tel"; node.Value = ""; node.Nodes.Add(new UfDataNode("value", parts[1])); node.Nodes.Add(new UfDataNode("type", parts[0])); } else { node.Name = "tel"; node.Value = ""; node.Nodes.Add(new UfDataNode("value", text)); } }
private string AddReporting(UfDataNode node) { string output = string.Empty; if (urls != null) { if (tree.Nodes.Count > 0 || errors.Count > 0) { output += ", \"parser-information\" : {"; } else { output += "\"parser-information\" : {"; } output += "\"name\" : \"UfXtract\", "; output += "\"version\" : \"" + Assembly.GetExecutingAssembly().GetName().Version.ToString() + "\", "; for (int i = 0; i < urls.Count; i++) { output += "\"page\" : [{\"url\" : \"" + EncodeJsonText(urls[i].Address) + "\", "; output += "\"http-status\" : \"" + urls[i].Status.ToString() + "\", "; if (urls[i].HtmlPageTitle != null) { output += "\"title\" : \"" + EncodeJsonText(urls[i].HtmlPageTitle) + "\", "; } //ISODuration duration = new ISODuration(0, 0, 0, 0, 0, urls[i].LoadTime.Minutes, urls[i].LoadTime.Seconds); output += "\"parse-time\" : \"" + EncodeJsonText(urls[i].LoadTime.Milliseconds.ToString()) + "\""; if (i != urls.Count - 1) { output += "}, "; } else { output += "}] "; } } output += "}"; } return(output); }
private void AddChildNodes(UfDataNode xNode, UfDataNode node, UfElementDescriber ufElement) { //if (!string.IsNullOrEmpty(node.ElementId)) //{ // xNode.Nodes.Add("id", node.ElementId); //} if (ufElement.AttributeValues.Count > 0) { // If its a rel or rev uf based on attribute values, just copy it // UfDataNode xNodeChild = xNode.Nodes.Append(node.Name, node.Value); foreach (UfDataNode childNode in node.Nodes) { xNode.Nodes.Add(childNode.Name, childNode.Value, childNode.SourceUrl, childNode.RepresentativeNode); } } foreach (UfElementDescriber childElement in ufElement.Elements) { // Loop orginal data tree foreach (UfDataNode childNode in node.Nodes) { // If node name = element describer name if (childNode.Name == childElement.Name || childNode.Name == childElement.CompoundName) { // If element can have multiples call the AppendArrayList method if (childElement.Multiples) { UfDataNode xChild = xNode.Nodes.Append(childNode.Name, childNode.Value, childNode.SourceUrl, childNode.RepresentativeNode); AddChildNodes(xChild, childNode, childElement); } else { int index = xNode.Nodes.Add(childNode.Name, childNode.Value, childNode.SourceUrl, childNode.RepresentativeNode); UfDataNode xChild = xNode.Nodes[index]; AddChildNodes(xChild, childNode, childElement); } } } } }
/// <summary> /// Provides string version of data object /// </summary> /// <param name="node">Node</param> /// <returns>String with indented tree structure</returns> public string Convert( UfDataNode node) { string output = "ufxtract\n"; output = BuildDataString(output, node, 0); //Find errors // ------------------------------------------------- if (errors != null) { if (errors.Count != 0) { output += "\n\nerrors" + "\n"; if (errors != null) { foreach (UfError ufError in errors) { output += "msg: " + ufError.Message + "\n"; output += "url: " + ufError.Address + "\n"; if (ufError.Status != 0) output += "status: " + ufError.Status.ToString() + "\n"; } } } } // Write report if a Urls object is provided // ------------------------------------------------- if (urls != null) { output += "\n\nreport" + "\n"; foreach (Url url in urls) { output += "url: " + url.Address + "\n"; output += "status: " + url.Status.ToString() + "\n"; output += "millisec: " + url.LoadTime.Milliseconds.ToString() + "\n\n"; } output += "found: " + node.Nodes.Count.ToString(); } return output; }
/// <summary> /// Finds double value entry /// This can happen as xPath will find the legal use of more than one class/rel attribute on a single element /// </summary> /// <param name="ufData">Parent node</param> /// <param name="ufNewDataNode">Node to be added</param> private bool IsDuplicateNode(UfDataNode ufData, UfDataNode ufNewDataNode) { if (ufData.Nodes.Count > 0) { UfDataNode lastNode = ufData.Nodes[ufData.Nodes.Count - 1]; if (ufNewDataNode.Value != "" && ufNewDataNode.Value == lastNode.Value && ufNewDataNode.ParentNodeNames == lastNode.ParentNodeNames) { return(true); } else { return(false); } } else { return(false); } }
/// <summary> /// Finds a node by position from all the nodes with the same name /// </summary> /// <param name="name">Name to saerch for</param> /// <param name="pos">Position in the sub collection created by the search</param> /// <returns></returns> public UfDataNode GetNameByPosition(string name, int pos) { UfDataNodes subCollection = new UfDataNodes(); for (int i = 0; i < this.InnerList.Count; i++) { UfDataNode testNode = (UfDataNode)this.InnerList[i]; if (testNode.Name == name) { subCollection.Add(testNode); } } if (subCollection.Count >= pos) { return(subCollection[pos]); } else { return(new UfDataNode()); } }
private void CreateNode(UfDataNode node, XmlNode xmlNode) { UfDataNode newNode = new UfDataNode(); newNode.Name = xmlNode.Name; // Value from value if (!string.IsNullOrEmpty(xmlNode.Value)) { newNode.Value = xmlNode.Value; } if (xmlNode.NodeType == XmlNodeType.Text) { node.Value = xmlNode.InnerText; } if (xmlNode.Attributes != null) { if (xmlNode.Attributes["id"] != null) { XmlAttribute idAtt = xmlNode.Attributes["id"]; newNode.ElementId = idAtt.Value; } } foreach (XmlNode childXmlNode in xmlNode.ChildNodes) { CreateNode(newNode, childXmlNode); } if (xmlNode.NodeType != XmlNodeType.Text) { node.Nodes.Add(newNode); } }
/// <summary> /// Converts a UfXtract JSON structure into UfDataNode structure /// </summary> public UfDataNode Convert(string json) { UfDataNode node = new UfDataNode(); string propertyName = ""; using (JsonReader jsonReader = new JsonTextReader(new StringReader(json))) { while (jsonReader.Read()) { if (jsonReader.Depth > 2) { if (jsonReader.TokenType == JsonToken.PropertyName) { propertyName = jsonReader.Value.ToString(); } if (jsonReader.TokenType == JsonToken.StartArray) { CreateArray(node, jsonReader, propertyName); } if (jsonReader.TokenType == JsonToken.StartObject) { CreateObject(node, jsonReader, propertyName); } if (jsonReader.TokenType == JsonToken.String) { AddString(node, propertyName, jsonReader.Value); } } } } CleanData(node); return(node); }
public int AppendArrayList(string name, string value) { int index = -1; for (int i = 0; i < this.InnerList.Count; i++) { UfDataNode testNode = (UfDataNode)this.InnerList[i]; if (testNode.Name == name) { // Add to value to the arraylist of already existing node testNode.ValueArray.Add(value); index = i; } } // If no node was found if (index == -1) { // Create a new and add value to array list UfDataNode newNode = new UfDataNode(); newNode.Name = name; newNode.ValueArray.Add(value); index = this.InnerList.Add(newNode); } return index; }
public UfDataNode Append(string name, string value, string sourceurl, bool representativenode) { int index = -1; UfDataNode outputobj = new UfDataNode(); for (int i = 0; i < this.InnerList.Count; i++) { UfDataNode testNode = (UfDataNode)this.InnerList[i]; if (testNode.Name == name) { UfDataNode newNode = new UfDataNode("", value, sourceurl, representativenode); index = testNode.Nodes.Add(newNode); newNode.Name = index.ToString(); outputobj = newNode; } } if (index == -1) { // Create a new node UfDataNode newNode1 = new UfDataNode(name, "Array"); this.InnerList.Add(newNode1); // Add name value pair to first objects of nodes collection UfDataNode newNode2 = new UfDataNode("0", value, sourceurl, representativenode); index = newNode1.Nodes.Add(newNode2); outputobj = newNode2; } return outputobj; }
public int Add(string name, string value, string sourceurl, bool representativenode) { UfDataNode newDataNode = new UfDataNode(name, value, sourceurl, representativenode); return InnerList.Add(newDataNode); }
public int Add(string name, string value) { UfDataNode newDataNode = new UfDataNode(name,value); return InnerList.Add(newDataNode); }
/// <summary> /// Converts a UfDataNode structure into XML /// </summary> /// <param name="node">Node</param> /// <param name="stream">Stream XML is added to</param> public void Convert(UfDataNode node, Stream stream) { XmlConformWriter writer = new XmlConformWriter(stream, Encoding.UTF8); ConvertIt(node, writer); }
public void Insert(int index, UfDataNode newDataNode) { InnerList.Insert(index, newDataNode); }
private void ParseUfElement(HtmlNode baseNode, UfElementDescriber ufElement, UfDataNode ufData, bool ufTopLevel) { // Select nodes with required attribute: class, rel or rev HtmlNodeCollection nodes = baseNode.SelectNodes(".//@" + ufElement.Attribute ); if (nodes != null) { foreach (HtmlNode node in nodes) { // Load the attribute class, rel or rev HtmlAttribute att = node.Attributes[ufElement.Attribute]; HtmlAttribute compoundAtt = node.Attributes[ufElement.CompoundAttribute]; if (att != null) { // We are dealing with elemental uf like XFN if (ufElement.AttributeValues.Count > 0) { bool found = false; // Search for a mulitple attribute values ie friend or contact in rel foreach (UfAttributeValueDescriber avd in ufElement.AttributeValues) { if (UfHelpers.FindAttributeValue(att.Value.ToLower(), avd.Name)) found = true; } if (found) { // Adds the Html from which uf is parsed // ufData.OuterHtml = node.OuterHtml; ParseUfElementValue(node, ufElement, ufData); } } else { bool found = false; // Search for a single attribute values ie hcard in class if (UfHelpers.FindAttributeValue(att.Value.ToLower(), ufElement.Name) && ufElement.CompoundName == string.Empty) found = true; // Search for a dual attribute values // This is for compound structures, ie reviewer in hreview which is a hcard if (UfHelpers.FindAttributeValue(att.Value.ToLower(), ufElement.Name) && UfHelpers.FindAttributeValue(compoundAtt.Value.ToLower(), ufElement.CompoundName)) found = true; if (found) { if (HasCompound(baseNode, node.ParentNode, false) == false || ufElement.RootElement == true) { HtmlNodeCollection includeRefNodes = null; includeRefNodes = node.SelectNodes(".//a[@class[contains(.,'include')]]"); if (includeRefNodes != null) { foreach (HtmlNode includeRefNode in includeRefNodes) { string link = GetAttributeValue(includeRefNode, "href"); if (link.StartsWith("#")) { link = link.Replace("#", ""); HtmlNodeCollection includeNodes = node.SelectNodes("//*[@id='" + link + "']"); if (includeNodes != null && includeNodes.Count > 0) node.AppendChild(HtmlNode.CreateNode("<div>" + includeNodes[0].OuterHtml + "</div>")); } } } includeRefNodes = node.SelectNodes(".//object[@class[contains(.,'include')]]"); if (includeRefNodes != null) { foreach (HtmlNode includeRefNode in includeRefNodes) { string link = GetAttributeValue(includeRefNode, "data"); if (link.StartsWith("#")) { link = link.Replace("#", ""); HtmlNodeCollection includeNodes = node.SelectNodes("//*[@id='" + link + "']"); if (includeNodes != null && includeNodes.Count > 0) node.AppendChild(HtmlNode.CreateNode("<div>" + includeNodes[0].OuterHtml + "</div>")); } } } // For TD // Finds table head include pattern and appends node collection if (node.Name == "td" && GetAttributeValue(node, "headers") != string.Empty) { string link = GetAttributeValue(node, "headers"); string[] itemArray = new string[1]; itemArray[0] = link; if (link.IndexOf(' ') > -1) itemArray = link.Split(' '); for (int i = 0; i < itemArray.Length; i++) { HtmlNodeCollection includeNodes = null; includeNodes = this.startNode.SelectNodes("//node()[@id='" + itemArray[i].Trim() + "']"); if (includeNodes != null && includeNodes.Count > 0) { // Appends fresh node to avoid overload issues foreach (HtmlNode childNode in includeNodes) node.AppendChild(HtmlNode.CreateNode("<div>" + childNode.OuterHtml + "</div>")); } } } // For TR // Finds table head include pattern and appends node collection if (node.Name == "tr") { foreach (HtmlNode child in node.ChildNodes) { if (child.Name == "td" && GetAttributeValue(child, "headers") != string.Empty) { string link = GetAttributeValue(child, "headers"); string[] itemArray = new string[1]; itemArray[0] = link; if (link.IndexOf(' ') > -1) itemArray = link.Split(' '); for (int i = 0; i < itemArray.Length; i++) { HtmlNodeCollection includeNodes = null; includeNodes = this.startNode.SelectNodes("//node()[@id='" + itemArray[i].Trim() + "']"); if (includeNodes != null && includeNodes.Count > 0) { // Appends fresh node to avoid overload issues foreach (HtmlNode childNode in includeNodes) child.AppendChild(HtmlNode.CreateNode("<div>" + childNode.OuterHtml + "</div>")); } } } } } // Adds the Html from which uf is parsed foreach (HtmlNode childNode in node.ChildNodes) ufData.OuterHtml += childNode.OuterHtml; // Recursion if (ufElement.Multiples || ufElement.ConcatenateValues) ParseUfElementValue(node, ufElement, ufData); else // Dont add a second data node for a format decription that does not support either // multiples or concatenation of values if (ufData.Nodes.Exists(ufElement.Name) == false) ParseUfElementValue(node, ufElement, ufData); } } } } } } }
/// <summary> /// Adds a new data node to the tree /// </summary> /// <param name="ufData">Parent node</param> /// <param name="ufNewDataNode">Node to be added</param> /// <param name="ufElement">The uF element describer</param> private void AddNewDateNode(HtmlNode baseNode, UfDataNode ufData, UfDataNode ufNewDataNode, UfElementDescriber ufElement) { if (IsDuplicateNode(ufData, ufNewDataNode) == false) { ufNewDataNode.OuterHtml = baseNode.OuterHtml; // This function deal both with the concatenation of multiple values // and the validation of multiple flag // If the structure is a value/type pair change the insert point if (ufElement.Elements["value"] != null && ufElement.Elements["type"] != null) { // Add to child value node UfDataNode ufdatanode = new UfDataNode("value", ufNewDataNode.Value); ufNewDataNode.Nodes.Add(ufdatanode); ufNewDataNode.Value = ""; } // Concatenation of values if (ufElement.ConcatenateValues) { // Create a new node or add to the existing one if (ufData.Nodes[ufNewDataNode.Name] == null) ufData.Nodes.Add(ufNewDataNode); else ufData.Nodes[ufNewDataNode.Name].Value += ufNewDataNode.Value; } else if (ufElement.Multiples == false) { // Singluar - only take first instance if (ufData.Nodes[ufNewDataNode.Name] == null) ufData.Nodes.Add(ufNewDataNode); } else if (ufElement.Multiples == true) { // Multiples ufData.Nodes.Add(ufNewDataNode); } } }
public void BuildAsset(UfDataNode node, StringBuilder stringBuilder, int assertNumber) { string test = node.Nodes["test"].Value; string result = node.Nodes["result"].Value; string comment = node.Nodes["comment"].Value; stringBuilder.AppendLine(" "); stringBuilder.AppendLine("[Test]"); if( assertNumber > 9 ) stringBuilder.AppendLine("public void Test_" + assertNumber.ToString() + "()"); else stringBuilder.AppendLine("public void Test_0" + assertNumber.ToString() + "()"); stringBuilder.AppendLine("{"); stringBuilder.AppendLine("// " + test); BuildAssertResult(stringBuilder, test, result, comment); stringBuilder.AppendLine("}"); stringBuilder.AppendLine(" "); }
public bool Contains(UfDataNode aDataNode) { return InnerList.Contains(aDataNode); }
public int IndexOf(UfDataNode aDataNode) { return InnerList.IndexOf(aDataNode); }
private void ConvertIt(UfDataNode node, XmlTextWriter writer) { string output = string.Empty; writer.Formatting = System.Xml.Formatting.Indented; writer.WriteStartDocument(true); writer.WriteStartElement("microformats"); foreach (UfDataNode child in node.Nodes) { AddNode(child, writer); } //Find errors // ------------------------------------------------- if (errors != null) { if (errors.Count != 0) { writer.WriteStartElement("errors"); if (errors != null) { foreach (UfError UfError in errors) { writer.WriteStartElement("UfError"); writer.WriteElementString("msg", UfError.Message); writer.WriteStartElement("url"); if (UfError.Status != 0) writer.WriteAttributeString("status", UfError.Status.ToString()); writer.WriteString(UfError.Address); writer.WriteEndElement(); writer.WriteEndElement(); } } writer.WriteEndElement(); } } if (urls.Count > 0) { // Write parser information writer.WriteStartElement("parser-information"); writer.WriteElementString("name", "UfXtract"); writer.WriteElementString("version", Assembly.GetExecutingAssembly().GetName().Version.ToString()); if (urls != null) { foreach (Url url in urls) { writer.WriteStartElement("page"); writer.WriteElementString("url", url.Address); writer.WriteElementString("http-status", url.Status.ToString()); writer.WriteElementString("title", url.HtmlPageTitle); writer.WriteElementString("parse-time", url.LoadTime.Milliseconds.ToString()); writer.WriteEndElement(); } } writer.WriteEndElement(); } writer.WriteEndElement(); writer.WriteEndDocument(); writer.Flush(); }
public void Remove(UfDataNode aDataNode) { InnerList.Remove(aDataNode); }
/// <summary> /// Converts a UfDataNode structure into XML /// </summary> /// <param name="node">Node</param> /// <param name="Writer">Text writer</param> public void Convert(UfDataNode node, TextWriter writer) { XmlConformWriter xmlWriter = new XmlConformWriter(writer); ConvertIt(node, xmlWriter); }
public void BuildTest(UfDataNode node, string url) { // Find summary, description and format string summary = node.Nodes[0].Nodes["summary"].Value; string description = node.Nodes[0].Nodes["description"].Value; string format = node.Nodes[0].Nodes["format"].Value; //Console.WriteLine(summary); StringBuilder stringBuilder = new StringBuilder(); stringBuilder.AppendLine("using System;"); stringBuilder.AppendLine("using System.Collections;"); stringBuilder.AppendLine("using System.Collections.Generic;"); stringBuilder.AppendLine("using System.Text;"); stringBuilder.AppendLine("using NUnit.Framework;"); stringBuilder.AppendLine("using NUnit.Framework.Constraints;"); stringBuilder.AppendLine("using NUnit.Framework.SyntaxHelpers;"); stringBuilder.AppendLine("using UfXtract;"); stringBuilder.AppendLine("using UfXtract.Utilities;"); stringBuilder.AppendLine(" "); stringBuilder.AppendLine("namespace UfXtract.UnitTests." + format); stringBuilder.AppendLine("{"); stringBuilder.AppendLine(" "); stringBuilder.AppendLine("[TestFixture]"); stringBuilder.AppendLine("public class test_" + TestName(summary, url)); stringBuilder.AppendLine("{"); stringBuilder.AppendLine("// " + url ); stringBuilder.AppendLine("// " + summary); stringBuilder.AppendLine("// " + description); stringBuilder.AppendLine("// Built: " + DateTime.Now.ToLongDateString() ); stringBuilder.AppendLine(" "); stringBuilder.AppendLine("UfWebRequest webRequest;"); stringBuilder.AppendLine("UfDataNodes nodes;"); stringBuilder.AppendLine(" "); stringBuilder.AppendLine("[SetUp]"); stringBuilder.AppendLine("public void Test_Settup()"); stringBuilder.AppendLine("{"); stringBuilder.AppendLine("webRequest = new UfWebRequest();"); stringBuilder.AppendLine("string url = \"" + url + "#uf\";"); if( format.ToLower() == "hcard") stringBuilder.AppendLine("webRequest.Load(url, UfFormats.HCard());"); if (format.ToLower() == "hcalendar") stringBuilder.AppendLine("webRequest.Load(url, UfFormats.HCalendar());"); if( format.ToLower() == "hresume") stringBuilder.AppendLine("webRequest.Load(url, UfFormats.HResume());"); if (format.ToLower() == "hrecipe") stringBuilder.AppendLine("webRequest.Load(url, UfFormats.HRecipe());"); if (format.ToLower() == "geo") stringBuilder.AppendLine("webRequest.Load(url, UfFormats.Geo());"); stringBuilder.AppendLine("nodes = webRequest.Data.Nodes;"); stringBuilder.AppendLine("}"); stringBuilder.AppendLine(" "); int assertNumber = 1; foreach (UfDataNode childNode in node.Nodes[0].Nodes) { if (childNode.Name == "assert") { BuildAsset(childNode, stringBuilder, assertNumber); assertNumber ++; } } stringBuilder.AppendLine("}"); stringBuilder.AppendLine("}"); string filename = "test_" + TestName(summary, url) + ".cs"; string filepath = path + filename; WriteFile(stringBuilder.ToString(), filepath); }
public int Add(UfDataNode newDataNode) { return InnerList.Add(newDataNode); }
/// <summary> /// Finds double value entry /// This can happen as xPath will find the legal use of more than one class/rel attribute on a single element /// </summary> /// <param name="ufData">Parent node</param> /// <param name="ufNewDataNode">Node to be added</param> private bool IsDuplicateNode(UfDataNode ufData, UfDataNode ufNewDataNode) { if (ufData.Nodes.Count > 0) { UfDataNode lastNode = ufData.Nodes[ufData.Nodes.Count-1]; if (ufNewDataNode.Value != "" && ufNewDataNode.Value == lastNode.Value && ufNewDataNode.ParentNodeNames == lastNode.ParentNodeNames) { return true; } else { return false; } } else { return false; } }
/// <summary> /// Finds each test suite group /// </summary> /// <param name="node"></param> public void FindTestSuite(UfDataNode node) { foreach (UfDataNode childNode in node.Nodes) { string url = childNode.Nodes["url"].Value; string format = childNode.Nodes["format"].Value; //Console.WriteLine("Found: " + url); LoadTestFixture(url); number++; } }
private void ParseUfElementValue(HtmlNode baseNode, UfElementDescriber ufElement, UfDataNode ufData) { // Create a single data node for whatever data insertion is needed. UfDataNode ufd = new UfDataNode(); if(ufElement.CompoundName != string.Empty) ufd.ParentNodeNames = ufData.ParentNodeNames + ufElement.CompoundName + " "; else ufd.ParentNodeNames = ufData.ParentNodeNames + ufElement.Name + " "; ufd.ElementId = GetAttributeValue(baseNode, "id"); // A parent node in the data schema if (ufElement.Elements.Count > 0) { if (ufElement.CompoundName == string.Empty) { // Add a emtpy structural node ufd.Name = ufElement.Name; } else { // This is for compound structures, ie reviewer in hreview is a hcard // Need to find a second attribute value to do this HtmlAttribute att = baseNode.Attributes[ufElement.CompoundAttribute]; if (att != null) { if (UfHelpers.FindAttributeValue(att.Value.ToLower(), ufElement.CompoundName)) { // Add a emtpy structural node using compound name ufd.Name = ufElement.CompoundName; } } } // Recursion through the dom structure foreach (UfElementDescriber ufChildElement in ufElement.Elements) ParseUfElement(baseNode, ufChildElement, ufd, false); } // A value needs to be found if (ufElement.Type != UfElementDescriber.PropertyTypes.None) { // Find child nodes with "value" or "value-title" classes HtmlNodeCollection valueNodes = null; HtmlNodeCollection valueTitleNodes = null; // The value pattern if (ufElement.Elements["value"] == null && ufElement.Name != "value") { valueNodes = baseNode.SelectNodes(".//*[contains(concat(' ', @class, ' '),' value ')]"); } // The value-title pattern is only allow for some property types ie dates // or name properties ie type, duration, geo, latitude and longitude if (ufElement.Type == UfElementDescriber.PropertyTypes.Date || ufElement.Name == "type" || ufElement.Name == "duration" || ufElement.Name == "geo" || ufElement.Name == "latitude" || ufElement.Name == "longitude") { valueTitleNodes = baseNode.SelectNodes(".//*[contains(concat(' ', @class, ' '),' value-title ')]"); } if (ufElement.Type == UfElementDescriber.PropertyTypes.UrlTextAttribute || ufElement.Type == UfElementDescriber.PropertyTypes.UrlTextTag || ufElement.Type == UfElementDescriber.PropertyTypes.UrlText) { string text = UfHelpers.HtmlToText(baseNode, false); string link = UfHelpers.GetAbsoluteUrl(GetAttributeValue(baseNode, "href"), this.baseUrl, url); string att = GetAttributeValue(baseNode, ufElement.Attribute); ufd.Name = ufElement.Name; UfDataNode ufd1 = new UfDataNode(); UfDataNode ufd2 = new UfDataNode(); UfDataNode ufd3 = new UfDataNode(); ufd1.Name = "text"; ufd1.Value = text; ufd.Nodes.Add(ufd1); ufd2.Name = "link"; ufd2.Value = link; ufd.Nodes.Add(ufd2); // Add the attribute value used for XFN like structures if (ufElement.Type == UfElementDescriber.PropertyTypes.UrlTextAttribute) { ufd3.Name = ufElement.Attribute; ufd3.Value = att; ufd.Nodes.Add(ufd3); } // Add the tag element of the url if (ufElement.Type == UfElementDescriber.PropertyTypes.UrlTextTag) { ufd3.Name = "tag"; ufd3.Value = UfHelpers.GetTagFromUrl(link); ufd.Nodes.Add(ufd3); } if (ufElement.CompoundName == string.Empty) { ufData.Nodes.Add(ufd); } else { HtmlAttribute att1 = baseNode.Attributes[ufElement.CompoundAttribute]; if (att1 != null) { if (UfHelpers.FindAttributeValue(att1.Value.ToLower(), ufElement.CompoundName)) { ufd.Name = ufElement.CompoundName; ufData.Nodes.Add(ufd); } } } } // The value excerpting pattern else if (valueNodes != null) { string text = string.Empty ; foreach (HtmlNode node in valueNodes) { if (node.Name == "img" || node.Name == "area") { if (ufElement.Type == UfElementDescriber.PropertyTypes.Date) text += GetAttributeValue(node, "title").Replace(" ", "") + " "; else text += GetAttributeValue(node, "title"); } else if (node.Name == "abbr") { if (ufElement.Type == UfElementDescriber.PropertyTypes.Date) text += GetAttributeValue(node, "title").Replace(" ", "") + " "; else text += GetAttributeValue(node, "title"); } else { if (ufElement.Type == UfElementDescriber.PropertyTypes.Date) text += UfHelpers.HtmlToText(node, false).Replace(" ", "") + " "; else text += UfHelpers.HtmlToText(node, false) + " "; } } if(ufElement.Type == UfElementDescriber.PropertyTypes.Date) { // Take the fagmented bits and create a true ISODateTime string ISODateTime isoDateTime = new ISODateTime(); text = isoDateTime.ParseUFFragmented(text); } ufd.Name = ufElement.Name; ufd.Value = text.Trim(); AddNewDateNode(baseNode, ufData, ufd, ufElement); } // The value-title excerpting pattern else if (valueTitleNodes != null) { string text = GetAttributeValue(valueTitleNodes[0], "title"); ufd.Name = ufElement.Name; ufd.Value = text; AddNewDateNode(baseNode, ufData, ufd, ufElement); } // Url from "a" or "link" else if ((baseNode.Name == "a" || baseNode.Name == "link") && GetAttributeValue(baseNode, "href") != string.Empty && ufElement.Type == UfElementDescriber.PropertyTypes.Url) { string link = UfHelpers.GetAbsoluteUrl(GetAttributeValue(baseNode, "href"), this.baseUrl, url); ufd.Name = ufElement.Name; ufd.Value = link; AddNewDateNode(baseNode, ufData, ufd, ufElement); } // Url from "img" else if ((baseNode.Name == "img" || baseNode.Name == "area") && GetAttributeValue(baseNode, "src") != string.Empty && ufElement.Type == UfElementDescriber.PropertyTypes.Url) { string link = UfHelpers.GetAbsoluteUrl(GetAttributeValue(baseNode, "src"), this.baseUrl, url); ufd.Name = ufElement.Name; ufd.Value = link; AddNewDateNode(baseNode, ufData, ufd, ufElement); } // Email from "a" or "link" else if (baseNode.Name == "a" && GetAttributeValue(baseNode, "href") != string.Empty && ufElement.Type == UfElementDescriber.PropertyTypes.Email) { string address = UfHelpers.CleanEmailAddress(GetAttributeValue(baseNode, "href")); ufd.Name = ufElement.Name; ufd.Value = address; AddNewDateNode(baseNode, ufData, ufd, ufElement); } // Tel from "object" else if (baseNode.Name == "object" && (GetAttributeValue(baseNode, "data") != "") && ufElement.Name == "tel") { UfHelpers.TelOptimization(ufd, GetAttributeValue(baseNode, "data")); AddNewDateNode(baseNode, ufData, ufd, ufElement); } // Date from "time" else if (baseNode.Name == "time" && GetAttributeValue(baseNode, "datetime") != "" && ufElement.Type == UfElementDescriber.PropertyTypes.Date) { string text = GetAttributeValue(baseNode, "datetime"); ufd.Name = ufElement.Name; ufd.Value = text; AddNewDateNode(baseNode, ufData, ufd, ufElement); } // Date from "abbr" else if (baseNode.Name == "abbr" && GetAttributeValue(baseNode, "title") != string.Empty && ufElement.Type == UfElementDescriber.PropertyTypes.Date) { string text = GetAttributeValue(baseNode, "title"); ufd.Name = ufElement.Name; ufd.Value = text; AddNewDateNode(baseNode, ufData, ufd, ufElement); } // Text from "abbr" else if (baseNode.Name == "abbr" || baseNode.Name == "acronym" && GetAttributeValue(baseNode, "title") != string.Empty) { string text = GetAttributeValue(baseNode, "title"); ufd.Name = ufElement.Name; // This is for geo been used as a location in hcalandar if (ufElement.CompoundName != string.Empty) ufd.Name = ufElement.CompoundName; ufd.Value = text; AddNewDateNode(baseNode, ufData, ufd, ufElement); } // Text from "input" else if (baseNode.Name == "input" && GetAttributeValue(baseNode, "value") != string.Empty) { string text = GetAttributeValue(baseNode, "value"); ufd.Name = ufElement.Name; ufd.Value = text; AddNewDateNode(baseNode, ufData, ufd, ufElement); } // Tel from "area" else if (baseNode.Name == "area" && (GetAttributeValue(baseNode, "href") != "") && ufElement.Name == "tel") { UfHelpers.TelOptimization(ufd, GetAttributeValue(baseNode, "href")); AddNewDateNode(baseNode, ufData, ufd, ufElement); } // Text and url from "area" else if (baseNode.Name == "area" && (GetAttributeValue(baseNode, "href") != string.Empty || GetAttributeValue(baseNode, "alt") != string.Empty)) { if ((ufElement.Type == UfElementDescriber.PropertyTypes.Url || ufElement.Type == UfElementDescriber.PropertyTypes.Email) && GetAttributeValue(baseNode, "href") != string.Empty) { string text = GetAttributeValue(baseNode, "href"); if (ufElement.Type == UfElementDescriber.PropertyTypes.Email) text = UfHelpers.CleanEmailAddress(text); if (ufElement.Type == UfElementDescriber.PropertyTypes.Url) text = UfHelpers.GetAbsoluteUrl(text, this.baseUrl, url); ufd.Name = ufElement.Name; ufd.Value = text; AddNewDateNode(baseNode, ufData, ufd, ufElement); } else if (GetAttributeValue(baseNode, "alt") != string.Empty) { string text = GetAttributeValue(baseNode, "alt"); ufd.Name = ufElement.Name; ufd.Value = text; AddNewDateNode(baseNode, ufData, ufd, ufElement); } } // Url/Image from "object" else if (baseNode.Name == "object" && GetAttributeValue(baseNode, "data") != string.Empty && (ufElement.Type == UfElementDescriber.PropertyTypes.Url || ufElement.Type == UfElementDescriber.PropertyTypes.Image)) { string text = UfHelpers.GetAbsoluteUrl(GetAttributeValue(baseNode, "data"), this.baseUrl, url); ufd.Name = ufElement.Name; ufd.Value = text; AddNewDateNode(baseNode, ufData, ufd, ufElement); } // Image from "img" or "area" else if ((baseNode.Name == "img" || baseNode.Name == "area") && GetAttributeValue(baseNode, "src") != string.Empty && ufElement.Type == UfElementDescriber.PropertyTypes.Image) { string text = UfHelpers.GetAbsoluteUrl(GetAttributeValue(baseNode, "src"), this.baseUrl, url); ufd.Name = ufElement.Name; ufd.Value = text; AddNewDateNode(baseNode, ufData, ufd, ufElement); } // Text from "img" longdesc attribute else if (baseNode.Name == "img" && GetAttributeValue(baseNode, "longdesc") != string.Empty) { string text = GetAttributeValue(baseNode, "longdesc"); ufd.Name = ufElement.Name; ufd.Value = text; AddNewDateNode(baseNode, ufData, ufd, ufElement); ; } // Text from "img" alt attribute else if (baseNode.Name == "img" && GetAttributeValue(baseNode, "alt") != string.Empty) { string text = GetAttributeValue(baseNode, "alt"); ufd.Name = ufElement.Name; ufd.Value = text; AddNewDateNode(baseNode, ufData, ufd, ufElement); } // Text for type/value structures with no found children else if (ufElement.NodeType == UfElementDescriber.StructureTypes.TypeValuePair) { // if no chidren nodes ie type/value are found use text // the calls for a children node type and value are alway both thier parent if (ufd.Nodes.Count == 0) { // Add text from node value string text = UfHelpers.HtmlToText(baseNode, false); ufd.Name = ufElement.Name; ufd.Value = text; AddNewDateNode(baseNode, ufData, ufd, ufElement); } else { // Add child type/value pair ufd.Name = ufElement.Name; AddNewDateNode(baseNode, ufData, ufd, ufElement); } } // Text from Html node collect else if (ufElement.Type == UfElementDescriber.PropertyTypes.FormattedText) { string text = UfHelpers.HtmlToText(baseNode, true); ufd.Name = ufElement.Name; ufd.Value = text; AddNewDateNode(baseNode, ufData, ufd, ufElement); } else { // Text from node value //string text = FindValuePattern(baseNode, ufElement); //if(text == string.Empty) // text = HtmlToText(baseNode, false); string text = UfHelpers.HtmlToText(baseNode, false); ufd.Name = ufElement.Name; ufd.Value = text; AddNewDateNode(baseNode, ufData, ufd, ufElement); } } else { AddNewDateNode(baseNode, ufData, ufd, ufElement); } }
/// <summary> /// Gets the descendant node using a custom tree expression /// </summary> /// <param name="treeExpression">Custom expression of a node tree position ie "n/given-name"</param> /// <returns>The node or null</returns> public UfDataNode DescendantNode(string treeExpression) { UfDataNode output = new UfDataNode(); if(treeExpression != "" ) { UfDataNode currentNode = this; if (treeExpression.IndexOf('/') > 0) { string[] expressions = treeExpression.Split('/'); for (int i = 0; i < expressions.Length; i++) { string propertyName = expressions[i]; // We are looking for a node from an array if (propertyName.IndexOf('[') > 0) { string[] parts = propertyName.Split('['); propertyName = parts[0]; int index = Convert.ToInt32( parts[1].Replace("]","") ); UfDataNodes nodeCollection = new UfDataNodes(); for (int x = 0; x < currentNode.Nodes.Count; x++) { if (currentNode.Nodes[x].Name == propertyName) nodeCollection.Add(currentNode.Nodes[x]); } if (nodeCollection.Count > 0) { if (nodeCollection.Count-1 >= index) currentNode = nodeCollection[index]; } } else { // We are looking for a single node if (currentNode.Nodes[propertyName] != null) { currentNode = currentNode.Nodes[propertyName]; } else { currentNode = null; break; } } } } else { if (currentNode.Nodes[treeExpression] != null) currentNode = currentNode.Nodes[treeExpression]; } if (currentNode != null) output = currentNode; } return output; }
private void AddNode(UfDataNode node, UfElementDescriber elementDescriber, XhtmlTextWriter writer) { if (node.Name != string.Empty) { indentNum++; writer.Indent = indentNum; UfElementDescriber currentDescriber = elementDescriber; foreach (UfElementDescriber childElementDescriber in elementDescriber.Elements) { if (node.Name == childElementDescriber.Name || node.Name == childElementDescriber.CompoundName) { currentDescriber = childElementDescriber; } } if (currentDescriber.Attribute == "class") { writer.WriteBeginTag("div"); if (currentDescriber.CompoundName == "") writer.WriteAttribute("class", node.Name); else writer.WriteAttribute("class", node.Name + " " + currentDescriber.Name); if (!string.IsNullOrEmpty(node.ElementId)) writer.WriteAttribute("id", node.ElementId); writer.Write(HtmlTextWriter.TagRightChar); writer.WriteEncodedText(node.Value); foreach (UfDataNode child in node.Nodes) { writer.WriteLine(); AddNode(child, currentDescriber, writer); } if (node.Name != string.Empty) { writer.WriteEndTag("div"); writer.WriteLine(); } } if (currentDescriber.Attribute == "rel") { writer.WriteBeginTag("a"); writer.WriteAttribute("href", node.DescendantValue("link")); writer.WriteAttribute("rel", node.Name); writer.Write(HtmlTextWriter.TagRightChar); writer.WriteEncodedText(node.DescendantValue("text")); writer.WriteEndTag("a"); writer.WriteLine(); } indentNum--; writer.Indent = indentNum; } }
private void AddNode(UfDataNode node, XmlTextWriter writer) { if (node.Name != string.Empty) { writer.WriteStartElement(node.Name); if (!string.IsNullOrEmpty(node.SourceUrl)) writer.WriteAttributeString("sourceurl", node.SourceUrl); if (node.RepresentativeNode) writer.WriteAttributeString("representativehcard", "true"); if (!string.IsNullOrEmpty(node.ElementId)) writer.WriteAttributeString("id", node.ElementId); writer.WriteString(node.Value); } foreach (UfDataNode child in node.Nodes) { AddNode(child, writer); } if (node.Name != string.Empty) writer.WriteEndElement(); }