/// <summary> /// Loads a single Html pages and does a microformat parse /// </summary> /// <param name="url">The Url of the webpage to be pasred</param> /// <param name="formatDescriber">A format describer for microformat to be parsed</param> public void Load(string url, UfFormatDescriber formatDescriber) { this.formatDescriber = formatDescriber; try { if (url != string.Empty) { // Check for issues with url url = url.Trim(); url = HttpUtility.UrlDecode(url); UfWebPage webPage = LoadHtmlDoc(url); if (webPage != null) { Url urlReport = new Url(); urlReport.Address = webPage.Url; urlReport.Status = webPage.StatusCode; parsedUrls.Add(urlReport); DateTime started = DateTime.Now; if (webPage.StatusCode == 200 && webPage.Html != null) { ParseUf(webPage.Html, url, formatDescriber, false, urlReport); } if (webPage.StatusCode != 200) { throw (new Exception("Could not load url: " + url + " " + webPage.StatusCode)); } DateTime ended = DateTime.Now; urlReport.LoadTime = ended.Subtract(started); Urls.Add(urlReport); } } else { throw (new Exception("No Url given")); } } catch (Exception ex) { if (ex.Message != string.Empty) { throw (new Exception(ex.Message)); } else { throw (new Exception("Could not load Url: " + url)); } } }
/// <summary> /// Converts a UfDataNode structure into JSON /// </summary> /// <param name="node">Node</param> /// <param name="formatArray">Array of microformat format describer to describer data in node</param> /// <param name="callBack">JSONP callback function name to wrap JSON object</param> /// <returns>JSON string</returns> public string Convert(UfDataNode node, ArrayList formatArray, string callBack) { this.callBack = callBack; this.callBack = this.callBack.Replace("(", "").Replace(")", "").Trim(); this.urls = urls; this.errors = errors; foreach (UfFormatDescriber formatDescriber in formatArray) { foreach (UfDataNode childNode in node.Nodes) { foreach (UfDataNode grandChildNode in childNode.Nodes) { if (grandChildNode.Name == formatDescriber.BaseElement.Name) { UfDataNode xChild = tree.Nodes.Append(grandChildNode.Name, grandChildNode.Value, grandChildNode.SourceUrl, grandChildNode.RepresentativeNode); if (grandChildNode.Nodes.Count > 0) AddChildNodes(xChild, grandChildNode, formatDescriber.BaseElement); } } } } //string output = "// UfXtract \n"; string output = ""; if (callBack != string.Empty) output += callBack + "( "; output += "{"; foreach (UfDataNode childNode in tree.Nodes) output += BuildDataString(childNode, true, false); if (tree.Nodes.Count > 0) output = output.Substring(0, output.Length - 2); output += AddUfErrors(); output += AddReporting(node); // End whole block output += "}"; if (callBack != string.Empty) output += " )"; //return output.Replace(",", ",\n").Replace("}", "}\n").Replace("{", "{\n").Replace("]", "]\n").Replace("[", "[\n"); ; return output; }