Example #1
0
        public void followsRelativeRedirect()
        {
            IConnection con = NSoupClient.Connect("http://direct.infohound.net/tools/302-rel.pl"); // to ./ - /tools/
            Document    doc = con.Post();

            Assert.IsTrue(doc.Title.Contains("HTML Tidy Online"));
        }
Example #2
0
        static void Main(string[] args)
        {
            IConnection loginformConnection = (IConnection)NSoupClient.Connect(signInURL).Method(Method.Get).Execute();
            IResponse   response            = loginformConnection.Response();

            System.Console.WriteLine(response.Cookies());
        }
Example #3
0
        public void followsTempRedirect()
        {
            IConnection con = NSoupClient.Connect("http://direct.infohound.net/tools/302.pl"); // http://jsoup.org
            Document    doc = con.Get();

            Assert.IsTrue(doc.Title.Contains("jsoup"));
        }
Example #4
0
 public Stories GetInfo()
 {
     try
     {
         Document objDoc = NSoupClient.Connect(strStoryUrl)
                           .UserAgent(ConstValue.USER_AGENT_CHROME)
                           .Get();
         OutputSettings settings = new OutputSettings();
         settings.SetEncoding(Encoding.UTF8);
         objDoc.OutputSettings(settings);
         string  strTitle     = objDoc.Title;
         Element objEleAuthor = objDoc.Select(".info > div > a[itemprop=author]").First;
         string  strAuthor    = objEleAuthor.Text();
         Element objEleDesc   = objDoc.Select("div[itemprop=description]").First;
         string  strDesc      = System.Web.HttpUtility.HtmlDecode(objEleDesc.Html());
         Element objEleCover  = objDoc.Select("img[itemprop=image]").First;
         string  strCover     = objEleCover.Attr("src");
         Stories objStory     = new Stories()
         {
             Author = strAuthor,
             Cover  = strCover,
             Desc   = strDesc,
             Title  = strTitle
         };
         return(objStory);
     }
     catch (Exception objEx)
     {
         throw objEx;
     }
 }
Example #5
0
        public void doesntRedirectIfSoConfigured()
        {
            IConnection con = NSoupClient.Connect("http://direct.infohound.net/tools/302.pl").FollowRedirects(false).IgnoreContentType(true);
            IResponse   res = con.Execute();

            Assert.IsTrue(res.StatusCode() == (System.Net.HttpStatusCode) 302);
        }
Example #6
0
        public void followsRedirectToHttps()
        {
            IConnection con = NSoupClient.Connect("http://direct.infohound.net/tools/302-secure.pl"); // https://www.google.com

            con.Data("id", "5");
            Document doc = con.Get();

            Assert.IsTrue(doc.Title.Contains("Google"));
        }
Example #7
0
        public void ignoresExceptionIfSoConfigured()
        {
            IConnection con = NSoupClient.Connect("http://direct.infohound.net/tools/404").IgnoreHttpErrors(true);
            IResponse   res = con.Execute();
            Document    doc = res.Parse();

            Assert.AreEqual(System.Net.HttpStatusCode.NotFound, res.StatusCode());
            Assert.AreEqual("404 Not Found", doc.Select("h1").First.Text());
        }
Example #8
0
        public void redirectsResponseCookieToNextResponse()
        {
            IConnection con = NSoupClient.Connect("http://direct.infohound.net/tools/302-cookie.pl");
            IResponse   res = con.Execute();

            Assert.AreEqual("asdfg123", res.Cookie("token")); // confirms that cookies set on 1st hit are presented in final result
            Document doc = res.Parse();

            Assert.AreEqual("token=asdfg123; uid=jhy", ihVal("HTTP_COOKIE", doc)); // confirms that redirected hit saw cookie
        }
Example #9
0
        public void postRedirectsFetchWithGet()
        {
            IConnection con = NSoupClient.Connect("http://direct.infohound.net/tools/302.pl")
                              .Data("Argument", "Riposte")
                              .Method(NSoup.Method.Post);
            IResponse res = con.Execute();

            Assert.AreEqual("http://jsoup.org/", res.Url().ToString());
            Assert.AreEqual(NSoup.Method.Get, res.Method());
        }
Example #10
0
        private static Document GetDocumentFromUrl(string URL)
        {
            // Connecting & Fetching ...
            IConnection connection = NSoupClient.Connect(URL);

            connection.Timeout(Timeout);
            Document document = connection.Get();

            return(document);
        }
Example #11
0
        public List <String> FindImages(String question, String userAgent)
        {
            List <String> imagesList = new List <String>();

            try
            {
                String googleUrl = "https://www.google.com/search?tbm=isch&q=" + question.Replace(",", "");

                NSoup.Nodes.Document htmlDoc = NSoupClient.Connect(googleUrl).UserAgent(userAgent).Timeout(10 * 1000).Get();
                //Handling correctly auto redirects...
                checkForRedirectsOnHTMLDocument(ref htmlDoc, userAgent);

                /*
                 * //This is old method
                 * NSoup.Select.Elements images = htmlDoc.Select("div.rg_di.rg_el.ivg-i img"); //div with class="rg_di rg_el ivg-i" containing img
                 * foreach (NSoup.Nodes.Element img in images) {
                 *  NSoup.Select.Elements links = img.Parent.Select("a[href]");
                 *  if (links.Count() > 0) { //is there a link around img?
                 *      NSoup.Nodes.Element link = img.Parent.Select("a[href]").First();
                 *      String href = img.Parent.Attr("abs:href"); //link which needs to be parsed to get the full img url
                 *      Regex regex = new Regex("imgurl=(.*?)&imgrefurl="); //Everything between "imgurl=" and "&imgrefurl="
                 *      var v = regex.Match(href);
                 *      if (v != null && v.Groups.Count == 2) {
                 *          if (v.Groups[1].Value != String.Empty) {
                 *              String imgURL = v.Groups[1].ToString();
                 *              imagesList.Add(imgURL);
                 *          }
                 *      }
                 *  }
                 * }
                 */
                NSoup.Select.Elements div_with_images = htmlDoc.Select("div.y.yi div.rg_di.rg_bx.rg_el.ivg-i");     //div with class="y yi" containing div with class="rg_di rg_bx rg_el ivg-i"
                foreach (NSoup.Nodes.Element div_with_image in div_with_images)
                {
                    NSoup.Nodes.Element rg_meta_div = div_with_image.Select("div.rg_meta").First();
                    String text_where_the_img_is    = rg_meta_div.ToString();
                    Regex  regex = new Regex("ou&quot;:&quot;(.*?)&quot;");    //Everything between "ou&quot;:&quot;" and "&quot;"
                    var    v     = regex.Match(text_where_the_img_is);
                    if (v != null && v.Groups.Count == 2)
                    {
                        if (v.Groups[1].Value != String.Empty)
                        {
                            String imgURL = v.Groups[1].ToString();
                            imagesList.Add(imgURL);
                        }
                    }
                }
            }
            catch (Exception ex)
            {
                this.Error = ex;
            }

            return(imagesList);
        }
Example #12
0
 public static void GetData()
 {
     //直接通过url来获取Document对象
     doc = NSoupClient.Connect(url).Get();
     //先获取id为artContent的元素,再获取所有的p标签
     updateTime     = ConvertStringToDateTime(regularMatchStr("getStatisticsService", "modifyTime\":(.+?),")).ToString();
     confirmedCount = regularMatchStr("getStatisticsService", "confirmedCount\":(.+?),");
     suspectedCount = regularMatchStr("getStatisticsService", "suspectedCount\":(.+?),");
     deadCount      = regularMatchStr("getStatisticsService", "deadCount\":(.+?),");
     curedCount     = regularMatchStr("getStatisticsService", "curedCount\":(.+?),");
 }
Example #13
0
        public void handlesDodgyCharset()
        {
            // tests that when we get back "UFT8", that it is recognised as unsupported, and falls back to default instead
            String    url = "http://direct.infohound.net/tools/bad-charset.pl";
            IResponse res = NSoupClient.Connect(url).Execute();

            Assert.AreEqual("text/html; charset=UFT8", res.Header("Content-Type")); // from the header
            Assert.AreEqual(null, res.Charset());                                   // tried to get from header, not supported, so returns null
            Document doc = res.Parse();                                             // would throw an error if charset unsupported

            Assert.IsTrue(doc.Text().Contains("Hello!"));
            Assert.AreEqual("UTF-8", res.Charset()); // set from default on parse
        }
Example #14
0
        public void doesPost()
        {
            Document doc = NSoupClient.Connect(echoURL)
                           .Data("uname", "Jsoup", "uname", "Jonathan", "ח™¾", "ו÷¦ה¸€ה¸‹")
                           .Cookie("auth", "token")
                           .Post();

            Assert.AreEqual("POST", ihVal("REQUEST_METHOD", doc));
            //Assert.AreEqual("gzip", ihVal("HTTP_ACCEPT_ENCODING", doc)); // current proxy removes gzip on post
            Assert.AreEqual("auth=token", ihVal("HTTP_COOKIE", doc));
            Assert.AreEqual("ו÷¦ה¸€ה¸‹", ihVal("ח™¾", doc));
            Assert.AreEqual("Jsoup, Jonathan", ihVal("uname", doc));
        }
Example #15
0
        async void lookup()
        {
            int exceptions = 0;

            for (int i = 779; i != 0; i--)
            {
                Invoke(new MethodInvoker(() => printer("Добываем страницу номер " + i + "...")));
                try
                {
                    Document d = NSoupClient.Connect("https://trashbox.ru/public/progs/tags/os_android/page_topics/" + i.ToString()).Get();
                    d = NSoupClient.Parse(d.Select("div.div_content_cat_topics").Html());
                    Elements apps = d.Select("div.div_topic_cat_content");
                    foreach (Element app in apps)
                    {
                        String        name;
                        List <string> tags = new List <string>();
                        name = app.Select("span.div_topic_tcapt_content").First().Text();
                        string version = app.Select("span.div_topic_cat_tag_os_android").First().Text();
                        version = version.Replace("Android ", "").Replace(" и выше", "");
                        Elements _tags = app.Select("div.div_topic_cat_tags a");
                        foreach (Element tag in _tags)
                        {
                            tags.Add(tag.Text());
                        }
                        programms.Add(new AndroidApp(name, tags, version));
                    }
                }
                catch (Exception ex)
                {
                    if (exceptions == 10)
                    {
                        Invoke(new MethodInvoker(() => printer("Случилось 10 ошибок! Завершаем работу...")));
                        td = new TrashboxDump(programms);
                        new FileStream("./TrashDUMP.json", FileMode.OpenOrCreate).Close();
                        File.WriteAllText("./TrashDUMP.json", JsonConvert.SerializeObject(td));
                        break;
                    }
                    Invoke(new MethodInvoker(() => printer("Ошибка при добыче страницы " + i + "!")));
                    Invoke(new MethodInvoker(() => printer(ex.Message)));
                    exceptions++;
                    continue;
                }
                finally
                {
                    Invoke(new MethodInvoker(() => pb.Value++));
                    td = new TrashboxDump(programms);
                }
            }
            td = new TrashboxDump(programms);
        }
        public void testSupplyParserToConnection()
        {
            String xmlUrl = "http://direct.infohound.net/tools/jsoup-xml-test.xml";

            // parse with both xml and html parser, ensure different
            Document xmlDoc  = NSoupClient.Connect(xmlUrl).Parser(NSoup.Parse.Parser.XmlParser()).Get();
            Document htmlDoc = NSoupClient.Connect(xmlUrl).Get();

            Assert.AreEqual("<doc><val>One<val>Two</val>Three</val></doc>",
                            TextUtil.StripNewLines(xmlDoc.Html()));
            Assert.AreNotSame(htmlDoc, xmlDoc);
            Assert.AreEqual(1, htmlDoc.Select("head").Count); // html parser normalises
            Assert.AreEqual(0, xmlDoc.Select("head").Count);  // xml parser does not
        }
Example #17
0
        public void doesGet()
        {
            IConnection con = NSoupClient.Connect(echoURL + "?what=the")
                              .UserAgent("Mozilla")
                              .Referrer("http://example.com")
                              .Data("what", "about & me?");

            Document doc = con.Get();

            //Assert.AreEqual("what=the&what=about+%26+me%3F", ihVal("QUERY_STRING", doc));
            Assert.AreEqual("what=the&what=about+%26+me%3f", ihVal("QUERY_STRING", doc)); // Again, a change due to specific behavior, by HttpUtility.UrlEncode(). Difference is acceptable.
            Assert.AreEqual("the, about & me?", ihVal("what", doc));
            Assert.AreEqual("Mozilla", ihVal("HTTP_USER_AGENT", doc));
            Assert.AreEqual("http://example.com", ihVal("HTTP_REFERER", doc));
        }
Example #18
0
        public void maximumRedirects()
        {
            bool threw = false;

            try
            {
                Document doc = NSoupClient.Connect("http://direct.infohound.net/tools/loop.pl").Get();
            }
            catch (System.IO.IOException e)
            {
                Assert.IsTrue(e.Message.Contains("Too many redirects"));
                threw = true;
            }
            Assert.IsTrue(threw);
        }
Example #19
0
        private void hatlar()
        {
            IConnection connection = NSoupClient.Connect("http://www.iett.istanbul/tr/main/hatlar").UserAgent("Mozilla");

            connection.Timeout(30000);
            Document document = connection.Get();

            foreach (Element Hat in document.Select("h4.DetailLi_name"))
            {
                int    index  = Hat.Text().IndexOf(' ');
                string hatNo  = Hat.Text().Substring(0, index).Trim();
                string hatAdi = Hat.Text().Substring(index).Trim();
                tmhatlar.Items.Add(i + "*" + hatNo + "*" + hatAdi);
                i++;
            }
        }
Example #20
0
        public void multiCookieSet()
        {
            IConnection con = NSoupClient.Connect("http://direct.infohound.net/tools/302-cookie.pl");
            IResponse   res = con.Execute();

            // test cookies set by redirect:
            IDictionary <string, string> cookies = res.Cookies();

            Assert.AreEqual("asdfg123", cookies["token"]);
            Assert.AreEqual("jhy", cookies["uid"]);

            // send those cookies into the echo URL by map:
            Document doc = NSoupClient.Connect(echoURL).Cookies(cookies).Get();

            Assert.AreEqual("token=asdfg123; uid=jhy", ihVal("HTTP_COOKIE", doc));
        }
Example #21
0
        public List <Post> LoadPosts()
        {
            var posts = new List <Post>();
            var doc   = NSoupClient.Connect($"http://pornreactor.cc/{currentpage}").Timeout(10000).Get();
            var els   = doc.Select("div.postContainer");

            foreach (var el in els)
            {
                var Link   = "http://pornreactor.cc" + el.Select("span.link_wr a.link").First().Attr("href");
                var Author = el.Select(".uhead_nick a").First().Text();
                var tags   = el.Select("h2.taglist b a").Select(tag => tag.Text()).ToList();
                foreach (var img in el.Select(".post_content img"))
                {
                    var imgurl = img.Attr("src");
                    posts.Add(new Post(Link, Author, imgurl, tags));
                }
            }
            return(posts);
        }
Example #22
0
        public void exceptOnUnsupportedProtocol()
        {
            String url   = "file://etc/passwd";
            bool   threw = false;

            try
            {
                Document doc = NSoupClient.Connect(url).Get();
            }
            catch (InvalidOperationException e)
            {
                threw = true;
                Assert.AreEqual("Only http & https protocols supported", e.Message.ToString());
            }
            catch (IOException)
            {
            }
            Assert.IsTrue(threw);
        }
Example #23
0
        public static List <FullJob> GetJobs()
        {
            string key = Keys.GetIndeedKey();

            string searchTerms = "scala";
            string location    = "Baltimore, MD";
            int    radius      = 25;

            List <FullJob> jobs;
            var            cachedResults = $"{searchTerms}_{location}_{radius}.json";

            if (File.Exists(cachedResults))
            {
                jobs = JsonConvert.DeserializeObject <List <FullJob> >(File.ReadAllText(cachedResults));
            }
            else
            {
                var queryUrl        = IndeedQueryUtil.BuildBatchQueryFormatUrl(key, searchTerms, location, radius);
                var jobQueryResults = IndeedQueryUtil.GetAllJobs(queryUrl, key);

                jobs = new List <FullJob>();

                foreach (var job in jobQueryResults)
                {
                    Thread.Sleep(1000);
                    var document     = NSoupClient.Connect(job.Url).Timeout(5000).Get();
                    var summaryNodes = document.Select("#job_summary");
                    var fullText     = summaryNodes.Text;

                    jobs.Add(new FullJob
                    {
                        JobQueryResult = job,
                        FullText       = fullText
                    });
                }

                string json = JsonConvert.SerializeObject(jobs, Formatting.Indented);
                File.WriteAllText(cachedResults, json);
            }

            return(jobs);
        }
Example #24
0
        private void Download_PageContent(NovelModel novelModel, int retryCount, out Document document)
        {
            try
            {
                var connect = NSoupClient.Connect(novelModel.MenuUrl);
                document = connect.Get();
            }
            catch (Exception ex)
            {
                if (retryCount > 5)
                {
                    document = null;
                    return;
                }

                Thread.Sleep(3 * 1000);
                Download_PageContent(novelModel, ++retryCount, out document);
                InvokeScriptFunction(() => { Document.InvokeScript("show_message", new[] { ex.Message, "danger" }); });
            }
        }
Example #25
0
        public void Parse_Chapter(ChapterModel chapterModel)
        {
            Random r = new Random();
            var    delayTimeSpance = r.Next(1, 10);

            Thread.Sleep(delayTimeSpance * 1000);

            try
            {
                var connection  = NSoupClient.Connect(chapterModel.Url);
                var document    = connection.Get();
                var element     = document.GetElementById("content");
                var contentHtml = element.Html();
                var lines       = Regex.Replace(contentHtml, "<br.*?/>", "");
                lines = Regex.Replace(lines, "&nbsp;", " ");

                if (lines.Length <= 0)
                {
                    return;
                }

                using (var uow = new NovelUnitOfWork())
                {
                    var chapterService = new ChapterDomainService(uow);
                    chapterService.Update(chapterModel.Id, x =>
                    {
                        x.Content         = lines;
                        x.LastUpdatedTime = DateTime.Now;
                    });
                }
            }
            catch (Exception e)
            {
                //InvokeScriptFunction("show_message", e.Message);

                InvokeScriptFunction(() =>
                {
                    Document.InvokeScript("show_message", new[] { e.Message, "danger" });
                });
            }
        }
Example #26
0
 public Chapters[] GetChapters(long lngStoryId)
 {
     try
     {
         Document        objDocChapter  = null;
         int             intPage        = 1;
         List <Chapters> objChapterList = new List <Chapters>();
         bool            blnHasClass    = false;
         do
         {
             objDocChapter = NSoupClient.Connect($"{strStoryUrl}/trang-{intPage}/#list-chapter")
                             .UserAgent(ConstValue.USER_AGENT_CHROME)
                             .Get();
             Elements arrChapters = objDocChapter.Select(".list-chapter");
             foreach (var objList in arrChapters)
             {
                 foreach (var item in objList.Children)
                 {
                     Element objChapterElement = item.Select("a").First;
                     objChapterList.Add(new Chapters()
                     {
                         StoryId = lngStoryId,
                         Status  = 0,
                         Url     = objChapterElement.Attr("href"),
                         Name    = objChapterElement.Attr("title"),
                     });
                 }
             }
             // Kiem tra xem da den trang cuoi cung chua
             Element objEleLi         = objDocChapter.Select(".dropup.page-nav").First;
             Element objEleLiPrevious = objEleLi.PreviousElementSibling;
             blnHasClass = objEleLiPrevious.HasClass("active");
             intPage++;
         } while (!blnHasClass);
         return(objChapterList.ToArray());
     }
     catch (Exception objEx)
     {
         throw objEx;
     }
 }
Example #27
0
        private static void checkForRedirectsOnHTMLDocument(ref NSoup.Nodes.Document htmlDoc, String userAgent)
        {
            //Checking if http-equiv=refresh
            foreach (NSoup.Nodes.Element refresh in htmlDoc.Select("html head meta[http-equiv=refresh]"))
            {
                String matcher = refresh.Attr("content");
                Regex  regex   = new Regex("url=(.*)");
                var    v       = regex.Match(matcher);

                if (v != null && v.Groups.Count == 2)
                {
                    if (v.Groups[1].Value != String.Empty)
                    {
                        //Need to know the base uri:
                        String baseURI    = getBaseURI(htmlDoc);
                        String urlToFetch = baseURI + v.Groups[1].ToString();
                        htmlDoc = NSoupClient.Connect(urlToFetch).UserAgent(userAgent).Timeout(10 * 1000).Get();
                        checkForRedirectsOnHTMLDocument(ref htmlDoc, userAgent);
                    }
                }
            }

            //Javascript based redirection handling
            foreach (NSoup.Nodes.Element javascript in htmlDoc.Select("html head script[type=text/javascript]"))
            {
                String matcher = javascript.ToString();
                Regex  regex   = new Regex("window.google.gbvu='(.*?)'");
                var    v       = regex.Match(matcher);
                if (v != null && v.Groups.Count == 2)
                {
                    if (v.Groups[1].Value != String.Empty)
                    {
                        //Need to know the base uri:
                        String baseURI    = getBaseURI(htmlDoc);
                        String urlToFetch = baseURI + v.Groups[1].ToString().Replace("\\75", "=").Replace("\\075", "=").Replace("\\x3d", "=").Replace("\\46", "&").Replace("\\046", "&").Replace("\\x26", "&");     //converting ASCII octet codes to characters
                        htmlDoc = NSoupClient.Connect(urlToFetch).UserAgent(userAgent).Timeout(10 * 1000).Get();
                        checkForRedirectsOnHTMLDocument(ref htmlDoc, userAgent);
                    }
                }
            }
        }
Example #28
0
        public void throwsExceptionOnError()
        {
            string      url   = "http://direct.infohound.net/tools/404";
            IConnection con   = NSoupClient.Connect(url);
            bool        threw = false;

            try
            {
                Document doc = con.Get();
            }
            catch (HttpStatusException e)
            {
                threw = true;
                Assert.AreEqual("HTTP error fetching URL. Status=404, URL=http://direct.infohound.net/tools/404", string.Format("{0}. Status={1}, URL={2}", e.Message.ToString(), e.StatusCode, e.Url));
                Assert.AreEqual(url, e.Url);
                Assert.AreEqual(404, e.StatusCode);
            }
            catch (System.IO.IOException)
            {
            }
            Assert.IsTrue(threw);
        }
Example #29
0
 public int GetLastPageNum()
 {
     return(int.Parse(NSoupClient.Connect("http://pornreactor.cc/").Timeout(10000).Get().Select(".pagination_expanded .current").First().Text()));
 }
Example #30
0
        private void duraklar()
        {
            XmlTextWriter yaz = new XmlTextWriter("Duraklar.xml", System.Text.UTF8Encoding.UTF8);

            yaz.Formatting = Formatting.Indented;
            yaz.WriteStartDocument();
            yaz.WriteStartElement("hepsi");

            foreach (string item in tmhatlar.Items)
            {
                string[] ayir = item.Split('*');

                IConnection connection;

                connection = NSoupClient.Connect("http://www.iett.istanbul/tr/main/hatlar/" + ayir[1]).UserAgent("Mozilla");

                connection.Timeout(600000);

                Document document = connection.Get();

                yaz.WriteStartElement("hatid");
                yaz.WriteAttributeString("h", ayir[1]);
                foreach (Element yon in document.Select("div.LineMapList ol"))
                {
                    int i = 1;
                    if (yon.Select("li").Count > 0)
                    {
                        yaz.WriteStartElement("yon");
                        string yn = "Geliş";
                        if (yon.Attr("data-station-direction") == "Going")
                        {
                            yn = "Gidiş";
                        }
                        yaz.WriteAttributeString("y", yn);
                    }
                    foreach (Element Durak in yon.Select("li"))
                    {
                        yaz.WriteStartElement("durak");
                        yaz.WriteAttributeString("durakno", i.ToString());
                        yaz.WriteAttributeString("lat", Durak.Attr("data-station-lat"));
                        yaz.WriteAttributeString("lng", Durak.Attr("data-station-lng"));
                        yaz.WriteAttributeString("isim", Durak.Attr("data-station-name"));

                        yaz.WriteEndElement();



                        i++;
                    }
                    if (yon.Select("li").Count > 0)
                    {
                        yaz.WriteEndElement();
                    }
                }
                yaz.WriteEndElement();
                durum++;
            }

            yaz.WriteEndElement();
            yaz.Close();
            MessageBox.Show("Duraklar XML kayit edildi.");
        }