Exemple #1
0
 /// <summary>
 /// Creates an HTML node from a string representing literal HTML.
 /// </summary>
 /// <param name="html">The HTML text.</param>
 /// <returns>The newly created node instance.</returns>
 public static HtmlNode CreateNode(string html)
 {
     // REVIEW: this is *not* optimum...
     HtmlDocument doc = new HtmlDocument();
     doc.LoadHtml(html);
     return doc.DocumentNode.FirstChild;
 }
        public override void MakeReady(int sectionId)
        {
            if (!this.User.IsLoggedIn) return;

            AsyncHelper.Run(() =>
            {
                string url = this.BaseUrl + this.NewTopicPath.Replace("{0}", sectionId.ToString());
                var doc = new HtmlDocument();
                var replacements = new Dictionary<string, string>();

                HttpWebRequest req = Http.Prepare(url);

                req.Method = "GET";

                HttpResult result = this.AllowRedirects ? Http.HandleRedirects(Http.Request(req), false) : Http.Request(req);

                // Did the request fail?
                if (result.HasError || result.Data.Trim().Length == 0)
                {
                    ErrorLog.LogException(result.Error);
                    this.OnReadyChanged(this, new EventArgs());
                    return;
                }

                doc.LoadHtml(result.Data);

                // Extract required data
                foreach (HtmlNode n in doc.DocumentNode.SelectNodes("//input[@type='hidden']"))
                {
                    switch (n.Attributes["name"].Value)
                    {
                        case "auth_key":
                            replacements.Add("[auth_key]", n.Attributes["value"].Value);
                            break;

                        case "attach_post_key":
                            replacements.Add("[attach_post_key]", n.Attributes["value"].Value);
                            break;

                        case "s":
                            replacements.Add("[s]", n.Attributes["value"].Value);
                            break;

                        default:
                            break;
                    }
                }

                // Check if we got the needed info
                if (replacements.Count != 3) replacements.Clear();

                // Done
                this.TemplateReplacements = replacements;
                this.OnReadyChanged(this, new EventArgs());

                if (!this.IsReady)
                {
                    var error = new Exception(String.Format(
                        "MakeReady({0}) failed for site '{1}'.\r\nUrl used: {2}.",
                        sectionId,
                        this.BaseUrl,
                        url
                    ));

                    ErrorLog.LogException(error);
                }
            });
        }
Exemple #3
0
 public static HtmlDocument GetDoc(string url)
 {
     var doc = new HtmlDocument();
     doc.LoadHtml(Get(url).Data);
     return doc;
 }
        public override SiteTopic GetTopic(string url)
        {
            if (!this.User.IsLoggedIn) return null;

            HtmlDocument doc = new HtmlDocument();
            HttpWebRequest req;
            HttpResult result;

            req = Http.Prepare(url);
            req.Method = "GET";
            req.Referer = url;

            try
            {
                result = this.AllowRedirects ? Http.HandleRedirects(Http.Request(req), false) : Http.Request(req);
                doc.LoadHtml(result.Data);

                ErrorLog.LogException(result.Error);

                HtmlNodeCollection nodes = doc.DocumentNode.SelectNodes("//a[@title='Reply directly to this post']");
                string link = HttpUtility.HtmlDecode(nodes[0].GetAttributeValue("href", String.Empty));

                req = Http.Prepare(link);
                req.Method = "GET";
                req.Referer = url;

                result = this.AllowRedirects ? Http.HandleRedirects(Http.Request(req), false) : Http.Request(req);
                doc.LoadHtml(result.Data);

                ErrorLog.LogException(result.Error);

                string title = (from n in doc.DocumentNode.SelectNodes("//h2[@class='maintitle']")
                                where n.InnerText.Trim().Contains("Replying to ")
                                select n.InnerText.Replace("Replying to ", "")).ToArray()[0];
                string content = doc.DocumentNode.SelectNodes("//textarea[@name='Post']")[0].InnerText;

                content = HttpUtility.HtmlDecode(content.Substring(content.IndexOf(']') + 1)).Trim();
                content = content.Substring(0, content.Length - "[/quote]".Length);

                // Fix IPB3 quotes
                string pattern = @"(?i)\[quote [\w\d " + '"' + @"'-=]+\]";
                string replace = "[quote]";

                content = Regex.Replace(content, pattern, replace);

                return new SiteTopic(
                    HttpUtility.HtmlDecode(title).Trim(),
                    content.Trim(),
                    0, 0, url
                );
            }
            catch (Exception error)
            {
                ErrorLog.LogException(error);
                return null;
            }
        }
        public override string[] GetTopicUrls(string html)
        {
            if (html == null || html.Trim().Length == 0) return new string[0];

            var urls = new List<string>();
            var doc = new HtmlDocument();
            HtmlNodeCollection nodes;
            Uri uri;

            try
            {
                doc.LoadHtml(html);

                nodes = doc.DocumentNode.SelectNodes("//a[@class='topic_title']");
                var anchors = from n in nodes
                              where !n.ParentNode.InnerHtml.Contains("class=\"topic_prefix\"") &&
                                    n.GetAttributeValue("id", "").StartsWith("tid-link-") &&
                                    n.GetAttributeValue("href", "").StartsWith("http:")
                              select HttpUtility.HtmlDecode(n.GetAttributeValue("href", "")).Trim();

                //throw new Exception(anchors.ToArray().Length.ToString());

                foreach (string a in anchors)
                {
                    if (Uri.TryCreate(a, UriKind.Absolute, out uri)) urls.Add(a);
                }

                return urls.ToArray();
            }
            catch (Exception error)
            {
                ErrorLog.LogException(error);
                return new string[0];
            }
        }
        public override string[] GetTopicUrls(string html)
        {
            if (html == null || html.Trim().Length == 0) return new string[0];

            var urls = new List<string>();
            var doc = new HtmlDocument();
            HtmlNodeCollection nodes;

            try
            {
                doc.LoadHtml(html);

                nodes = doc.DocumentNode.SelectNodes("//a");
                var anchors = from n in nodes
                              where n.GetAttributeValue("id", "").StartsWith("thread_title_") &&
                                    !n.ParentNode.InnerHtml.Contains("Sticky:")
                              select HttpUtility.HtmlDecode(n.GetAttributeValue("href", "")).Trim();

                //throw new Exception(anchors.ToArray().Length.ToString());

                foreach (string a in anchors)
                {
                    if (a.Length > 0) urls.Add((a.StartsWith("http:")) ? a : this.BaseUrl + "/" + a);
                }

                return urls.ToArray();
            }
            catch (Exception error)
            {
                ErrorLog.LogException(error);
                return new string[0];
            }
        }
        public override string[] GetTopicUrls(string html)
        {
            if (html == null || html.Trim().Length == 0) return new string[0];

            var allowAdd = true;
            var urls = new List<string>();
            var doc = new HtmlDocument();
            HtmlNodeCollection nodes;
            Uri uri;

            try
            {
                doc.LoadHtml(html);

                nodes = doc.DocumentNode.SelectNodes("//table");
                var table = (from n in nodes
                             where n.InnerHtml.Contains("id=\"tid-link-") &&
                                   n.InnerHtml.Contains("topic_toggle_folder") &&
                                   n.InnerHtml.Contains("<!-- Begin Topic Entry ")
                             select n).ToArray();

                //throw new Exception(table[table.Length - 1].SelectNodes(".//td[1]").Count.ToString());

                foreach (var n in table[table.Length - 1].SelectNodes(".//td[1]"))
                {
                    if (n.InnerHtml.Contains("<b>Forum Topics</b>"))
                    {
                        allowAdd = false;
                        break;
                    }
                }

                foreach (var n in table[table.Length - 1].SelectNodes(".//tr"))
                {
                    if (!allowAdd)
                    {
                        nodes = n.SelectNodes(".//td[1]");

                        if (nodes.Count > 0 && nodes[0].InnerHtml.Contains("<b>Forum Topics</b>"))
                        {
                            allowAdd = true;
                        }
                    }
                    else
                    {
                        nodes = n.SelectNodes(".//a");

                        if (nodes.Count > 0)
                        {
                            var links = (from link in nodes
                                         where link.GetAttributeValue("id", "").StartsWith("tid-link-") &&
                                               link.GetAttributeValue("href", "").StartsWith("http:")
                                         select HttpUtility.HtmlDecode(link.GetAttributeValue("href", ""))).
                                         ToArray();

                            if (links.Length > 0 && Uri.TryCreate(links[0].Trim(), UriKind.Absolute, out uri))
                            {
                                urls.Add(links[0].Trim());
                            }
                        }
                    }
                }

                return urls.ToArray();
            }
            catch (Exception error)
            {
                ErrorLog.LogException(error);
                return new string[0];
            }
        }
        public override SiteTopic GetTopic(string url)
        {
            if (!this.User.IsLoggedIn) return null;

            HtmlDocument doc = new HtmlDocument();
            HttpWebRequest req;
            HttpResult result;

            req = Http.Prepare(url);
            req.Method = "GET";
            req.Referer = url;

            try
            {
                result = this.AllowRedirects ? Http.HandleRedirects(Http.Request(req), false) : Http.Request(req);
                doc.LoadHtml(result.Data);

                ErrorLog.LogException(result.Error);

                HtmlNodeCollection nodes = doc.DocumentNode.SelectNodes("//img[@alt='Reply With Quote']");
                string link = HttpUtility.HtmlDecode(nodes[0].ParentNode.GetAttributeValue("href", String.Empty));

                nodes = doc.DocumentNode.SelectNodes("//span[@class='threadtitle']");
                string title = HttpUtility.HtmlDecode(nodes[0].InnerText).Trim();

                req = Http.Prepare((link.StartsWith("http:")) ? link : this.BaseUrl + "/" + link);
                req.Method = "GET";
                req.Referer = url;

                result = this.AllowRedirects ? Http.HandleRedirects(Http.Request(req), false) : Http.Request(req);
                doc.LoadHtml(result.Data);

                ErrorLog.LogException(result.Error);

                string content = doc.DocumentNode.SelectNodes("//textarea[@name='message']")[0].InnerText;

                content = HttpUtility.HtmlDecode(content.Substring(content.IndexOf(']') + 1)).Trim();
                content = content.Substring(0, content.Length - "[/quote]".Length);

                return new SiteTopic(
                    title.Trim(),
                    content.Trim(),
                    0, 0, url
                );
            }
            catch (Exception error)
            {
                ErrorLog.LogException(error);
                return null;
            }
        }
        public override string[] GetTopicUrls(string html)
        {
            if (html == null || html.Trim().Length == 0) return new string[0];

            var urls = new List<string>();
            var doc = new HtmlDocument();
            HtmlNodeCollection nodes;
            HtmlNode p;
            string url;
            bool allowAdd = true;

            try
            {
                doc.LoadHtml(html);

                var links = from link in doc.DocumentNode.SelectNodes("//a")
                            where link.GetAttributeValue("class", "").Contains("topictitle")
                            select link;

                foreach (var a in links)
                {
                    p = a.ParentNode.ParentNode;

                    if (p.Name.ToLower() == "tr" || p.Name.ToLower() == "td")
                    {
                        nodes = p.SelectNodes(".//img");

                        foreach (var n in nodes)
                        {
                            if (n.GetAttributeValue("src", "").Contains("announce") || n.GetAttributeValue("src", "").Contains("sticky"))
                            {
                                allowAdd = false;
                                break;
                            }
                        }

                        if (allowAdd)
                        {
                            url = HttpUtility.HtmlDecode(a.GetAttributeValue("href", "")).TrimStart("./".ToCharArray());
                            urls.Add((url.StartsWith("http:")) ? url : this.BaseUrl + "/" + url);
                        }

                        allowAdd = true;
                    }
                    else if (!p.ParentNode.InnerHtml.Contains("announce_") && !p.ParentNode.InnerHtml.Contains("sticky_"))
                    {
                        url = HttpUtility.HtmlDecode(a.GetAttributeValue("href", "")).TrimStart("./".ToCharArray());
                        urls.Add((url.StartsWith("http:")) ? url : this.BaseUrl + "/" + url);
                    }
                }

                return urls.ToArray();
            }
            catch (Exception error)
            {
                ErrorLog.LogException(error);
                return new string[0];
            }
        }
Exemple #10
0
        public override SiteTopic GetTopic(string url)
        {
            if (!this.User.IsLoggedIn) return null;

            HtmlDocument doc = new HtmlDocument();
            HttpWebRequest req;
            HttpResult result;

            req = Http.Prepare(url);
            req.Method = "GET";
            req.Referer = url;

            try
            {
                result = this.AllowRedirects ? Http.HandleRedirects(Http.Request(req), false) : Http.Request(req);
                doc.LoadHtml(result.Data);

                ErrorLog.LogException(result.Error);

                HtmlNodeCollection nodes = doc.DocumentNode.SelectNodes("//a");
                var links = (from n in nodes
                             where HttpUtility.HtmlDecode(n.GetAttributeValue("href", "")).Contains("posting.php?mode=quote")
                             select HttpUtility.HtmlDecode(n.GetAttributeValue("href", ""))).ToArray();

                string link = links[0].TrimStart("./".ToCharArray());

                req = Http.Prepare((link.StartsWith("http:")) ? link : this.BaseUrl + "/" + link);
                req.Method = "GET";
                req.Referer = url;

                result = this.AllowRedirects ? Http.HandleRedirects(Http.Request(req), false) : Http.Request(req);
                doc.LoadHtml(result.Data);

                ErrorLog.LogException(result.Error);

                string title = doc.DocumentNode.SelectNodes("//input[@name='subject']")[0].GetAttributeValue("value", String.Empty);
                string content = doc.DocumentNode.SelectNodes("//textarea[@name='message']")[0].InnerText;

                title = HttpUtility.HtmlDecode(title);
                title = title.Substring(title.IndexOf(':') + 1).Trim();
                content = HttpUtility.HtmlDecode(content.Substring(content.IndexOf(']') + 1)).Trim();
                content = content.Substring(0, content.Length - "[/quote]".Length);

                return new SiteTopic(
                    title.Trim(),
                    content.Trim(),
                    0, 0, url
                );
            }
            catch (Exception error)
            {
                ErrorLog.LogException(error);
                return null;
            }
        }
Exemple #11
0
        public override string[] GetTopicUrls(string html)
        {
            if (html == null || html.Trim().Length == 0) return new string[0];

            var urls = new List<string>();
            var doc = new HtmlDocument();
            HtmlNodeCollection nodes;
            bool allowAdd = false;

            try
            {
                doc.LoadHtml(html);

                nodes = doc.DocumentNode.SelectNodes("//table[@class='forumline']");
                var tables = (from n in nodes
                              where (n.InnerHtml.Contains("class=\"topictitle\"") || n.InnerHtml.Contains("class='topictitle'")) &&
                                    n.InnerHtml.Contains("Replies")
                              select n).ToArray();

                if (tables[tables.Length - 1].InnerHtml.Contains("<td class=\"row3\" colspan=\"6\" height=\"21\">"))
                {
                    /*foreach (var n in table[table.Length - 1].SelectNodes(".//td[1]"))
                    {
                        if (n.InnerHtml.Contains("<b>Forum Topics</b>"))
                        {
                            allowAdd = false;
                            break;
                        }
                    }*/

                    foreach (var n in tables[tables.Length - 1].SelectNodes(".//tr"))
                    {
                        if (!allowAdd)
                        {
                            nodes = n.SelectNodes(".//td[@class='row3']");

                            if (nodes.Count > 0 &&
                                nodes[0].InnerHtml.Contains("Topics") &&
                                !nodes[0].InnerHtml.Contains("Sticky") &&
                                nodes[0].GetAttributeValue("colspan", "") == "6")
                            {
                                allowAdd = true;
                            }
                        }
                        else
                        {
                            nodes = n.SelectNodes(".//a[@class='topictitle']");

                            if (nodes.Count > 0)
                            {
                                var anchors = (from a in nodes
                                               where !a.ParentNode.InnerHtml.Contains("Announcement:</b>") &&
                                                     !a.ParentNode.InnerHtml.Contains("Sticky:</b>")
                                               select HttpUtility.HtmlDecode(a.GetAttributeValue("href", "")).Trim()).ToArray();

                                if (anchors.Length > 0 && anchors[0].Length > 0)
                                {
                                    urls.Add((anchors[0].StartsWith("http:")) ? anchors[0] : this.BaseUrl + "/" + anchors[0]);
                                }
                            }
                        }
                    }
                }
                else
                {
                    nodes = doc.DocumentNode.SelectNodes("//a[@class='topictitle']");
                    var anchors = from n in nodes
                                  where !n.ParentNode.InnerHtml.Contains("Announcement:</b>") &&
                                        !n.ParentNode.InnerHtml.Contains("Sticky:</b>")
                                  select HttpUtility.HtmlDecode(n.GetAttributeValue("href", "")).Trim();

                    foreach (string a in anchors)
                    {
                        if (a.Length > 0) urls.Add((a.StartsWith("http:")) ? a : this.BaseUrl + "/" + a);
                    }
                }

                return urls.ToArray();
            }
            catch (Exception error)
            {
                ErrorLog.LogException(error);
                return new string[0];
            }
        }
Exemple #12
0
        public override void LoginUser(string username, string password)
        {
            AsyncHelper.Run(() =>
            {
                var loginPath = "/index.php?app=core&module=global&section=login";
                var html = Http.Get(this.BaseUrl + loginPath).Data;
                var doc = new HtmlDocument();

                doc.LoadHtml(html);

                var auth = doc.DocumentNode.SelectSingleNode("//input[@name='auth_key']");

                string url = this.BaseUrl + this.LoginPath;
                var details = new SiteLoginDetails(false, username, password);
                var data = String.Format(
                    Res.IPBoard_314_Login,
                    details.GetUrlSafeUsername(this.SiteEncoding),
                    details.GetUrlSafePassword(this.SiteEncoding),
                    auth.GetAttributeValue("value", ""),
                    HttpUtility.UrlEncode(this.BaseUrl + loginPath, this.SiteEncoding)
                );

                byte[] rawData = this.SiteEncoding.GetBytes(data);
                int check = 0;
                int parse = -1;

                this.LogoutUser();

                HttpWebRequest req = Http.Prepare(url);
                Stream stream;

                req.Method = "POST";
                req.Referer = url;
                req.ContentType = Res.FormContentType;
                req.ContentLength = rawData.Length;

                stream = req.GetRequestStream();
                stream.Write(rawData, 0, rawData.Length);
                stream.Close();

                HttpResult result = this.AllowRedirects ? Http.HandleRedirects(Http.Request(req), true) : Http.Request(req);

                // Did the request fail?
                if (result.HasError || Http.SessionCookies.Count < 2)
                {
                    ErrorLog.LogException(result.Error);
                    this.User = details;
                    this.OnLogin(this, new LoginEventArgs(details));
                    return;
                }

                if (result.HasResponse) this.SiteEncoding = Encoding.GetEncoding(result.Response.CharacterSet);

                // Check if we did login
                foreach (Cookie c in Http.GetDomainCookies(req.RequestUri))
                {
                    if (c.Name.EndsWith("member_id"))
                    {
                        if (c.Value.Length > 0 && int.TryParse(c.Value, out parse) && parse > 0) check++;
                    }
                    else if (c.Name.EndsWith("pass_hash"))
                    {
                        if (c.Value.Length > 1) check++;
                    }
                }

                if (check > 1)
                {
                    details.IsLoggedIn = true;

                    foreach (var c in Http.GetDomainCookies(req.RequestUri))
                    {
                        details.Cookies.Add(c);
                    }
                }
                else
                {
                    var error = new Exception(String.Format(
                        "Login check failed for '{0}'.\r\nCheck count: {1}.",
                        this.BaseUrl,
                        check
                    ));

                    ErrorLog.LogException(error);
                }

                this.User = details;
                this.OnLogin(this, new LoginEventArgs(details));
                return;
            });
        }