Пример #1
0
        /// <summary>
        /// Initializes HtmlNode, providing type, owner and where it exists in a collection
        /// </summary>
        /// <param name="type"></param>
        /// <param name="ownerdocument"></param>
        /// <param name="index"></param>
        public HtmlNode(HtmlNodeType type, HtmlDocument ownerdocument, int index)
        {
            _nodetype = type;
            _ownerdocument = ownerdocument;
            _outerstartindex = index;

            switch (type)
            {
                case HtmlNodeType.Comment:
                    Name = HtmlNodeTypeNameComment;
                    _endnode = this;
                    break;

                case HtmlNodeType.Document:
                    Name = HtmlNodeTypeNameDocument;
                    _endnode = this;
                    break;

                case HtmlNodeType.Text:
                    Name = HtmlNodeTypeNameText;
                    _endnode = this;
                    break;
            }

            if (_ownerdocument._openednodes != null)
            {
                if (!Closed)
                {
                    // we use the index as the key

                    // -1 means the node comes from public
                    if (-1 != index)
                    {
                        _ownerdocument._openednodes.Add(index, this);
                    }
                }
            }

            if ((-1 != index) || (type == HtmlNodeType.Comment) || (type == HtmlNodeType.Text)) return;
            // innerhtml and outerhtml must be calculated
            _outerchanged = true;
            _innerchanged = true;
        }
Пример #2
0
 /// <summary>
 /// Creates an HTML node from a string representing literal HTML.
 /// </summary>
 /// <param name="html">The HTML text.</param>
 /// <returns>The newly created node instance.</returns>
 public static HtmlNode CreateNode(string html)
 {
     // REVIEW: this is *not* optimum...
     HtmlDocument doc = new HtmlDocument();
     doc.LoadHtml(html);
     return doc.DocumentNode.FirstChild;
 }
Пример #3
0
 private HtmlDocument LoadUrl(Uri uri, string method, WebProxy proxy, NetworkCredential creds)
 {
     HtmlDocument doc = new HtmlDocument();
     doc.OptionAutoCloseOnEnd = false;
     doc.OptionFixNestedTags = true;
     _statusCode = Get(uri, method, null, doc, proxy, creds);
     if (_statusCode == HttpStatusCode.NotModified)
     {
         // read cached encoding
         doc.DetectEncodingAndLoad(GetCachePath(uri));
     }
     return doc;
 }
Пример #4
0
        public void Parse()
        {
            contentHolderId = -1;
            AspContent = new Dictionary<int, string>();

            // Extract tags
            AspTags = AspTagsStripRegex.Matches(Raw);
            GaspConditions = GaspXPConditionRegex.Matches(Raw);
            GaspForeaches = GaspXPForeachRegex.Matches(Raw);

            // Preprocess the HTML

            // Strip the <% asp code %> (replace with a placeholder)
            Processed = AspTagsStripRegex.Replace(Raw,
                                      me =>
                                      {
                                          AspContent.Add(++contentHolderId, me.Value);
                                          return "<!-- GaspXP[[" + contentHolderId + "]] -->";
                                      });

            // Strip the <condition></condition> tags
            Processed = GaspXPConditionRegex.Replace(Processed,
                                      me => "");

            // Strip the <foreach></foreach> tags
            Processed = GaspXPForeachRegex.Replace(Processed,
                                      me => "");

            var doc = new HtmlDocument();
            doc.OptionWriteEmptyNodes = true;
            doc.OptionOutputOriginalCase = true;
            doc.OptionAutoCloseOnEnd = true;

            // todo OptionOutputOriginalCase => doesnt seem to work for attributes! (not all?)
            doc.LoadHtml(Processed);
            string debug = "";

            // Loop through all conditions
            foreach (Match condition in GaspConditions)
            {
                foreach (Match tag in AttributesRegex.Matches(condition.Groups[0].Value))
                {
                    if (tag.Groups[1].Value == "for")
                    {
                        var elementId = tag.Groups[2].Value;

                        // find the element
                        foreach (var n in doc.DocumentNode.SelectNodes("//*", GaspNamespace))
                        {
                            bool found = false;
                            foreach (var a in n.Attributes)
                            {
                                if (a.OriginalName != "gasp:id" || a.Value != elementId)
                                    continue;

                                found = true;
                                break;
                            }
                            if (!found)
                                continue;

                            n.ParentNode.InsertBefore(HtmlNode.CreateNode("<% if(" + condition.Groups[2].Value + "){%>"), n);
                            n.ParentNode.InsertAfter(HtmlNode.CreateNode("<% } %>"), n);
                        }

                        foreach (var n in doc.DocumentNode.SelectNodes("id('" + elementId + "')"))
                        {
                            n.ParentNode.InsertBefore(HtmlNode.CreateNode("<% if(" + condition.Groups[2].Value + "){%>"), n);
                            n.ParentNode.InsertAfter(HtmlNode.CreateNode("<% } %>"), n);
                        }
                    }
                }
            }

            // Loop through all foreaches
            foreach (Match condition in GaspForeaches)
            {
                foreach (Match tag in AttributesRegex.Matches(condition.Groups[0].Value))
                {
                    if (tag.Groups[1].Value == "for")
                    {
                        var elementId = tag.Groups[2].Value;
                        string key = "item";

                        foreach (Match keyTag in AttributesRegex.Matches(condition.Groups[0].Value))
                        {
                            if (keyTag.Groups[1].Value == "key")
                            {
                                key = keyTag.Groups[2].Value;
                                break;
                            }
                        }

                        // find the element (first search on 'gaspid')
                        // allows to be applied to multiple elements at once!
                        foreach (var n in doc.DocumentNode.SelectNodes("//*", GaspNamespace))
                        {
                            bool found = false;
                            foreach (var a in n.Attributes)
                            {
                                if (a.OriginalName != "gasp:id" || a.Value != elementId)
                                    continue;

                                found = true;
                                break;
                            }
                            if (!found)
                                continue;

                            n.ParentNode.InsertBefore(HtmlNode.CreateNode("<% if(" + condition.Groups[2].Value + "){%>"), n);
                            n.ParentNode.InsertAfter(HtmlNode.CreateNode("<% } %>"), n);
                        }

                        foreach (var n in doc.DocumentNode.SelectNodes("id('" + elementId + "')"))
                        {
                            n.InsertBefore(HtmlNode.CreateNode("<% foreach( var " + key + " in (" + condition.Groups[2].Value + ")){%>"), n.FirstChild);
                            n.InsertAfter(HtmlNode.CreateNode("<% } %>"), n.LastChild);
                            break;
                        }
                        break;
                    }
                }
            }

            // cleanup gaspid's
            foreach (var n in new List<HtmlNode>(doc.DocumentNode.SelectNodes("//*", GaspNamespace)))
            {
                n.Attributes.Remove("gasp:id");
            }

            /* return the asp code back into the doc */
            Processed = GaspXPContentRegex.Replace(doc.DocumentNode.OuterHtml,m => AspContent[int.Parse(m.Groups[1].Value)]);
        }
Пример #5
0
        private HttpStatusCode Get(Uri uri, string method, string path, HtmlDocument doc, IWebProxy proxy,
            ICredentials creds)
        {
            string cachePath = null;
            HttpWebRequest req;
            bool oldFile = false;

            req = WebRequest.Create(uri) as HttpWebRequest;
            req.Method = method;
            req.UserAgent = UserAgent;
            if (proxy != null)
            {
                if (creds != null)
                {
                    proxy.Credentials = creds;
                    req.Credentials = creds;
                }
                else
                {
                    proxy.Credentials = CredentialCache.DefaultCredentials;
                    req.Credentials = CredentialCache.DefaultCredentials;
                }
                req.Proxy = proxy;
            }

            _fromCache = false;
            _requestDuration = 0;
            int tc = Environment.TickCount;
            if (UsingCache)
            {
                cachePath = GetCachePath(req.RequestUri);
                if (File.Exists(cachePath))
                {
                    req.IfModifiedSince = File.GetLastAccessTime(cachePath);
                    oldFile = true;
                }
            }

            if (_cacheOnly)
            {
                if (!File.Exists(cachePath))
                {
                    throw new HtmlWebException("File was not found at cache path: '" + cachePath + "'");
                }

                if (path != null)
                {
                    IOLibrary.CopyAlways(cachePath, path);
                    // touch the file
                    if (cachePath != null) File.SetLastWriteTime(path, File.GetLastWriteTime(cachePath));
                }
                _fromCache = true;
                return HttpStatusCode.NotModified;
            }

            if (_useCookies)
            {
                req.CookieContainer = new CookieContainer();
            }

            if (PreRequest != null)
            {
                // allow our user to change the request at will
                if (!PreRequest(req))
                {
                    return HttpStatusCode.ResetContent;
                }

                // dump cookie
                //				if (_useCookies)
                //				{
                //					foreach(Cookie cookie in req.CookieContainer.GetCookies(req.RequestUri))
                //					{
                //						HtmlLibrary.Trace("Cookie " + cookie.Name + "=" + cookie.Value + " path=" + cookie.Path + " domain=" + cookie.Domain);
                //					}
                //				}
            }

            HttpWebResponse resp;

            try
            {
                resp = req.GetResponse() as HttpWebResponse;
            }
            catch (WebException we)
            {
                _requestDuration = Environment.TickCount - tc;
                resp = (HttpWebResponse)we.Response;
                if (resp == null)
                {
                    if (oldFile)
                    {
                        if (path != null)
                        {
                            IOLibrary.CopyAlways(cachePath, path);
                            // touch the file
                            File.SetLastWriteTime(path, File.GetLastWriteTime(cachePath));
                        }
                        return HttpStatusCode.NotModified;
                    }
                    throw;
                }
            }
            catch (Exception)
            {
                _requestDuration = Environment.TickCount - tc;
                throw;
            }

            // allow our user to get some info from the response
            if (PostResponse != null)
            {
                PostResponse(req, resp);
            }

            _requestDuration = Environment.TickCount - tc;
            _responseUri = resp.ResponseUri;

            bool html = IsHtmlContent(resp.ContentType);

            Encoding respenc = !string.IsNullOrEmpty(resp.ContentEncoding)
                                   ? Encoding.GetEncoding(resp.ContentEncoding)
                                   : null;

            if (resp.StatusCode == HttpStatusCode.NotModified)
            {
                if (UsingCache)
                {
                    _fromCache = true;
                    if (path != null)
                    {
                        IOLibrary.CopyAlways(cachePath, path);
                        // touch the file
                        File.SetLastWriteTime(path, File.GetLastWriteTime(cachePath));
                    }
                    return resp.StatusCode;
                }
                // this should *never* happen...
                throw new HtmlWebException("Server has send a NotModifed code, without cache enabled.");
            }
            Stream s = resp.GetResponseStream();
            if (s != null)
            {
                if (UsingCache)
                {
                    // NOTE: LastModified does not contain milliseconds, so we remove them to the file
                    SaveStream(s, cachePath, RemoveMilliseconds(resp.LastModified), _streamBufferSize);

                    // save headers
                    SaveCacheHeaders(req.RequestUri, resp);

                    if (path != null)
                    {
                        // copy and touch the file
                        IOLibrary.CopyAlways(cachePath, path);
                        File.SetLastWriteTime(path, File.GetLastWriteTime(cachePath));
                    }
                }
                else
                {
                    // try to work in-memory
                    if ((doc != null) && (html))
                    {
                        if (respenc != null)
                        {
                            doc.Load(s, respenc);
                        }
                        else
                        {
                            doc.Load(s, true);
                        }
                    }
                }
                resp.Close();
            }
            return resp.StatusCode;
        }
Пример #6
0
 /// <summary>
 /// Loads an HTML document from an Internet resource.
 /// </summary>
 /// <param name="url">The requested URL, such as "http://Myserver/Mypath/Myfile.asp".</param>
 /// <param name="method">The HTTP method used to open the connection, such as GET, POST, PUT, or PROPFIND.</param>
 /// <param name="proxy">Proxy to use with this request</param>
 /// <param name="credentials">Credentials to use when authenticating</param>
 /// <returns>A new HTML document.</returns>
 public HtmlDocument Load(string url, string method, WebProxy proxy, NetworkCredential credentials)
 {
     Uri uri = new Uri(url);
     HtmlDocument doc;
     if ((uri.Scheme == Uri.UriSchemeHttps) ||
         (uri.Scheme == Uri.UriSchemeHttp))
     {
         doc = LoadUrl(uri, method, proxy, credentials);
     }
     else
     {
         if (uri.Scheme == Uri.UriSchemeFile)
         {
             doc = new HtmlDocument();
             doc.OptionAutoCloseOnEnd = false;
             doc.OptionAutoCloseOnEnd = true;
             doc.DetectEncodingAndLoad(url, _autoDetectEncoding);
         }
         else
         {
             throw new HtmlWebException("Unsupported uri scheme: '" + uri.Scheme + "'.");
         }
     }
     if (PreHandleDocument != null)
     {
         PreHandleDocument(doc);
     }
     return doc;
 }
Пример #7
0
        private HtmlNodeNavigator(HtmlNodeNavigator nav)
        {
            if (nav == null)
            {
                throw new ArgumentNullException("nav");
            }
            InternalTrace(null);

            _doc = nav._doc;
            _currentnode = nav._currentnode;
            _attindex = nav._attindex;
            _nametable = nav._nametable; // REVIEW: should we do this?
        }
Пример #8
0
        internal HtmlNodeNavigator(HtmlDocument doc, HtmlNode currentNode)
        {
            if (currentNode == null)
            {
                throw new ArgumentNullException("currentNode");
            }
            if (currentNode.OwnerDocument != doc)
            {
                throw new ArgumentException(HtmlDocument.HtmlExceptionRefNotChild);
            }
            InternalTrace(null);

            _doc = doc;
            Reset();
            _currentnode = currentNode;
        }
Пример #9
0
 internal HtmlAttribute(HtmlDocument ownerdocument)
 {
     _ownerdocument = ownerdocument;
 }