Esempio n. 1
0
        /// <summary>
        /// Loads a single Html pages and does a microformat parse
        /// </summary>
        /// <param name="url">The Url of the webpage to be pasred</param>
        /// <param name="formatDescriber">A format describer for microformat to be parsed</param>
        public void Load(string url, UfFormatDescriber formatDescriber)
        {
            this.formatDescriber = formatDescriber;
            try
            {
                if (url != string.Empty)
                {
                    // Check for issues with url
                    url = url.Trim();
                    url = HttpUtility.UrlDecode(url);

                    UfWebPage webPage = LoadHtmlDoc(url);

                    if (webPage != null)
                    {
                        Url urlReport = new Url();
                        urlReport.Address = webPage.Url;
                        urlReport.Status  = webPage.StatusCode;
                        parsedUrls.Add(urlReport);
                        DateTime started = DateTime.Now;

                        if (webPage.StatusCode == 200 && webPage.Html != null)
                        {
                            ParseUf(webPage.Html, url, formatDescriber, false, urlReport);
                        }

                        if (webPage.StatusCode != 200)
                        {
                            throw (new Exception("Could not load url: " + url + " " + webPage.StatusCode));
                        }


                        DateTime ended = DateTime.Now;
                        urlReport.LoadTime = ended.Subtract(started);
                        Urls.Add(urlReport);
                    }
                }
                else
                {
                    throw (new Exception("No Url given"));
                }
            }
            catch (Exception ex)
            {
                if (ex.Message != string.Empty)
                {
                    throw (new Exception(ex.Message));
                }
                else
                {
                    throw (new Exception("Could not load Url: " + url));
                }
            }
        }
Esempio n. 2
0
        /// <summary>
        /// Converts a UfDataNode structure into JSON
        /// </summary>
        /// <param name="node">Node</param>
        /// <param name="formatArray">Array of microformat format describer to describer data in node</param>
        /// <param name="callBack">JSONP callback function name to wrap JSON object</param>
        /// <returns>JSON string</returns>
        public string Convert(UfDataNode node, ArrayList formatArray, string callBack)
        {
            this.callBack = callBack;
            this.callBack = this.callBack.Replace("(", "").Replace(")", "").Trim();
            this.urls = urls;
            this.errors = errors;

            foreach (UfFormatDescriber formatDescriber in formatArray)
            {
                foreach (UfDataNode childNode in node.Nodes)
                {
                    foreach (UfDataNode grandChildNode in childNode.Nodes)
                    {
                        if (grandChildNode.Name == formatDescriber.BaseElement.Name)
                        {
                            UfDataNode xChild = tree.Nodes.Append(grandChildNode.Name, grandChildNode.Value, grandChildNode.SourceUrl, grandChildNode.RepresentativeNode);
                            if (grandChildNode.Nodes.Count > 0)
                                AddChildNodes(xChild, grandChildNode, formatDescriber.BaseElement);

                        }
                    }
                }
            }

            //string output = "// UfXtract \n";
            string output = "";
            if (callBack != string.Empty)
                output += callBack + "( ";

            output += "{";

            foreach (UfDataNode childNode in tree.Nodes)
                output += BuildDataString(childNode, true, false);

            if (tree.Nodes.Count > 0)
                output = output.Substring(0, output.Length - 2);

            output += AddUfErrors();
            output += AddReporting(node);

            // End whole block
            output += "}";

            if (callBack != string.Empty)
                output += " )";

            //return output.Replace(",", ",\n").Replace("}", "}\n").Replace("{", "{\n").Replace("]", "]\n").Replace("[", "[\n"); ;
            return output;
        }