public void followsRelativeRedirect() { IConnection con = NSoupClient.Connect("http://direct.infohound.net/tools/302-rel.pl"); // to ./ - /tools/ Document doc = con.Post(); Assert.IsTrue(doc.Title.Contains("HTML Tidy Online")); }
static void Main(string[] args) { IConnection loginformConnection = (IConnection)NSoupClient.Connect(signInURL).Method(Method.Get).Execute(); IResponse response = loginformConnection.Response(); System.Console.WriteLine(response.Cookies()); }
public void followsTempRedirect() { IConnection con = NSoupClient.Connect("http://direct.infohound.net/tools/302.pl"); // http://jsoup.org Document doc = con.Get(); Assert.IsTrue(doc.Title.Contains("jsoup")); }
public Stories GetInfo() { try { Document objDoc = NSoupClient.Connect(strStoryUrl) .UserAgent(ConstValue.USER_AGENT_CHROME) .Get(); OutputSettings settings = new OutputSettings(); settings.SetEncoding(Encoding.UTF8); objDoc.OutputSettings(settings); string strTitle = objDoc.Title; Element objEleAuthor = objDoc.Select(".info > div > a[itemprop=author]").First; string strAuthor = objEleAuthor.Text(); Element objEleDesc = objDoc.Select("div[itemprop=description]").First; string strDesc = System.Web.HttpUtility.HtmlDecode(objEleDesc.Html()); Element objEleCover = objDoc.Select("img[itemprop=image]").First; string strCover = objEleCover.Attr("src"); Stories objStory = new Stories() { Author = strAuthor, Cover = strCover, Desc = strDesc, Title = strTitle }; return(objStory); } catch (Exception objEx) { throw objEx; } }
public void doesntRedirectIfSoConfigured() { IConnection con = NSoupClient.Connect("http://direct.infohound.net/tools/302.pl").FollowRedirects(false).IgnoreContentType(true); IResponse res = con.Execute(); Assert.IsTrue(res.StatusCode() == (System.Net.HttpStatusCode) 302); }
public void followsRedirectToHttps() { IConnection con = NSoupClient.Connect("http://direct.infohound.net/tools/302-secure.pl"); // https://www.google.com con.Data("id", "5"); Document doc = con.Get(); Assert.IsTrue(doc.Title.Contains("Google")); }
public void ignoresExceptionIfSoConfigured() { IConnection con = NSoupClient.Connect("http://direct.infohound.net/tools/404").IgnoreHttpErrors(true); IResponse res = con.Execute(); Document doc = res.Parse(); Assert.AreEqual(System.Net.HttpStatusCode.NotFound, res.StatusCode()); Assert.AreEqual("404 Not Found", doc.Select("h1").First.Text()); }
public void redirectsResponseCookieToNextResponse() { IConnection con = NSoupClient.Connect("http://direct.infohound.net/tools/302-cookie.pl"); IResponse res = con.Execute(); Assert.AreEqual("asdfg123", res.Cookie("token")); // confirms that cookies set on 1st hit are presented in final result Document doc = res.Parse(); Assert.AreEqual("token=asdfg123; uid=jhy", ihVal("HTTP_COOKIE", doc)); // confirms that redirected hit saw cookie }
public void postRedirectsFetchWithGet() { IConnection con = NSoupClient.Connect("http://direct.infohound.net/tools/302.pl") .Data("Argument", "Riposte") .Method(NSoup.Method.Post); IResponse res = con.Execute(); Assert.AreEqual("http://jsoup.org/", res.Url().ToString()); Assert.AreEqual(NSoup.Method.Get, res.Method()); }
private static Document GetDocumentFromUrl(string URL) { // Connecting & Fetching ... IConnection connection = NSoupClient.Connect(URL); connection.Timeout(Timeout); Document document = connection.Get(); return(document); }
public List <String> FindImages(String question, String userAgent) { List <String> imagesList = new List <String>(); try { String googleUrl = "https://www.google.com/search?tbm=isch&q=" + question.Replace(",", ""); NSoup.Nodes.Document htmlDoc = NSoupClient.Connect(googleUrl).UserAgent(userAgent).Timeout(10 * 1000).Get(); //Handling correctly auto redirects... checkForRedirectsOnHTMLDocument(ref htmlDoc, userAgent); /* * //This is old method * NSoup.Select.Elements images = htmlDoc.Select("div.rg_di.rg_el.ivg-i img"); //div with class="rg_di rg_el ivg-i" containing img * foreach (NSoup.Nodes.Element img in images) { * NSoup.Select.Elements links = img.Parent.Select("a[href]"); * if (links.Count() > 0) { //is there a link around img? * NSoup.Nodes.Element link = img.Parent.Select("a[href]").First(); * String href = img.Parent.Attr("abs:href"); //link which needs to be parsed to get the full img url * Regex regex = new Regex("imgurl=(.*?)&imgrefurl="); //Everything between "imgurl=" and "&imgrefurl=" * var v = regex.Match(href); * if (v != null && v.Groups.Count == 2) { * if (v.Groups[1].Value != String.Empty) { * String imgURL = v.Groups[1].ToString(); * imagesList.Add(imgURL); * } * } * } * } */ NSoup.Select.Elements div_with_images = htmlDoc.Select("div.y.yi div.rg_di.rg_bx.rg_el.ivg-i"); //div with class="y yi" containing div with class="rg_di rg_bx rg_el ivg-i" foreach (NSoup.Nodes.Element div_with_image in div_with_images) { NSoup.Nodes.Element rg_meta_div = div_with_image.Select("div.rg_meta").First(); String text_where_the_img_is = rg_meta_div.ToString(); Regex regex = new Regex("ou":"(.*?)""); //Everything between "ou":"" and """ var v = regex.Match(text_where_the_img_is); if (v != null && v.Groups.Count == 2) { if (v.Groups[1].Value != String.Empty) { String imgURL = v.Groups[1].ToString(); imagesList.Add(imgURL); } } } } catch (Exception ex) { this.Error = ex; } return(imagesList); }
public static void GetData() { //直接通过url来获取Document对象 doc = NSoupClient.Connect(url).Get(); //先获取id为artContent的元素,再获取所有的p标签 updateTime = ConvertStringToDateTime(regularMatchStr("getStatisticsService", "modifyTime\":(.+?),")).ToString(); confirmedCount = regularMatchStr("getStatisticsService", "confirmedCount\":(.+?),"); suspectedCount = regularMatchStr("getStatisticsService", "suspectedCount\":(.+?),"); deadCount = regularMatchStr("getStatisticsService", "deadCount\":(.+?),"); curedCount = regularMatchStr("getStatisticsService", "curedCount\":(.+?),"); }
public void handlesDodgyCharset() { // tests that when we get back "UFT8", that it is recognised as unsupported, and falls back to default instead String url = "http://direct.infohound.net/tools/bad-charset.pl"; IResponse res = NSoupClient.Connect(url).Execute(); Assert.AreEqual("text/html; charset=UFT8", res.Header("Content-Type")); // from the header Assert.AreEqual(null, res.Charset()); // tried to get from header, not supported, so returns null Document doc = res.Parse(); // would throw an error if charset unsupported Assert.IsTrue(doc.Text().Contains("Hello!")); Assert.AreEqual("UTF-8", res.Charset()); // set from default on parse }
public void doesPost() { Document doc = NSoupClient.Connect(echoURL) .Data("uname", "Jsoup", "uname", "Jonathan", "ח™¾", "ו÷¦ה¸€ה¸‹") .Cookie("auth", "token") .Post(); Assert.AreEqual("POST", ihVal("REQUEST_METHOD", doc)); //Assert.AreEqual("gzip", ihVal("HTTP_ACCEPT_ENCODING", doc)); // current proxy removes gzip on post Assert.AreEqual("auth=token", ihVal("HTTP_COOKIE", doc)); Assert.AreEqual("ו÷¦ה¸€ה¸‹", ihVal("ח™¾", doc)); Assert.AreEqual("Jsoup, Jonathan", ihVal("uname", doc)); }
async void lookup() { int exceptions = 0; for (int i = 779; i != 0; i--) { Invoke(new MethodInvoker(() => printer("Добываем страницу номер " + i + "..."))); try { Document d = NSoupClient.Connect("https://trashbox.ru/public/progs/tags/os_android/page_topics/" + i.ToString()).Get(); d = NSoupClient.Parse(d.Select("div.div_content_cat_topics").Html()); Elements apps = d.Select("div.div_topic_cat_content"); foreach (Element app in apps) { String name; List <string> tags = new List <string>(); name = app.Select("span.div_topic_tcapt_content").First().Text(); string version = app.Select("span.div_topic_cat_tag_os_android").First().Text(); version = version.Replace("Android ", "").Replace(" и выше", ""); Elements _tags = app.Select("div.div_topic_cat_tags a"); foreach (Element tag in _tags) { tags.Add(tag.Text()); } programms.Add(new AndroidApp(name, tags, version)); } } catch (Exception ex) { if (exceptions == 10) { Invoke(new MethodInvoker(() => printer("Случилось 10 ошибок! Завершаем работу..."))); td = new TrashboxDump(programms); new FileStream("./TrashDUMP.json", FileMode.OpenOrCreate).Close(); File.WriteAllText("./TrashDUMP.json", JsonConvert.SerializeObject(td)); break; } Invoke(new MethodInvoker(() => printer("Ошибка при добыче страницы " + i + "!"))); Invoke(new MethodInvoker(() => printer(ex.Message))); exceptions++; continue; } finally { Invoke(new MethodInvoker(() => pb.Value++)); td = new TrashboxDump(programms); } } td = new TrashboxDump(programms); }
public void testSupplyParserToConnection() { String xmlUrl = "http://direct.infohound.net/tools/jsoup-xml-test.xml"; // parse with both xml and html parser, ensure different Document xmlDoc = NSoupClient.Connect(xmlUrl).Parser(NSoup.Parse.Parser.XmlParser()).Get(); Document htmlDoc = NSoupClient.Connect(xmlUrl).Get(); Assert.AreEqual("<doc><val>One<val>Two</val>Three</val></doc>", TextUtil.StripNewLines(xmlDoc.Html())); Assert.AreNotSame(htmlDoc, xmlDoc); Assert.AreEqual(1, htmlDoc.Select("head").Count); // html parser normalises Assert.AreEqual(0, xmlDoc.Select("head").Count); // xml parser does not }
public void doesGet() { IConnection con = NSoupClient.Connect(echoURL + "?what=the") .UserAgent("Mozilla") .Referrer("http://example.com") .Data("what", "about & me?"); Document doc = con.Get(); //Assert.AreEqual("what=the&what=about+%26+me%3F", ihVal("QUERY_STRING", doc)); Assert.AreEqual("what=the&what=about+%26+me%3f", ihVal("QUERY_STRING", doc)); // Again, a change due to specific behavior, by HttpUtility.UrlEncode(). Difference is acceptable. Assert.AreEqual("the, about & me?", ihVal("what", doc)); Assert.AreEqual("Mozilla", ihVal("HTTP_USER_AGENT", doc)); Assert.AreEqual("http://example.com", ihVal("HTTP_REFERER", doc)); }
public void maximumRedirects() { bool threw = false; try { Document doc = NSoupClient.Connect("http://direct.infohound.net/tools/loop.pl").Get(); } catch (System.IO.IOException e) { Assert.IsTrue(e.Message.Contains("Too many redirects")); threw = true; } Assert.IsTrue(threw); }
private void hatlar() { IConnection connection = NSoupClient.Connect("http://www.iett.istanbul/tr/main/hatlar").UserAgent("Mozilla"); connection.Timeout(30000); Document document = connection.Get(); foreach (Element Hat in document.Select("h4.DetailLi_name")) { int index = Hat.Text().IndexOf(' '); string hatNo = Hat.Text().Substring(0, index).Trim(); string hatAdi = Hat.Text().Substring(index).Trim(); tmhatlar.Items.Add(i + "*" + hatNo + "*" + hatAdi); i++; } }
public void multiCookieSet() { IConnection con = NSoupClient.Connect("http://direct.infohound.net/tools/302-cookie.pl"); IResponse res = con.Execute(); // test cookies set by redirect: IDictionary <string, string> cookies = res.Cookies(); Assert.AreEqual("asdfg123", cookies["token"]); Assert.AreEqual("jhy", cookies["uid"]); // send those cookies into the echo URL by map: Document doc = NSoupClient.Connect(echoURL).Cookies(cookies).Get(); Assert.AreEqual("token=asdfg123; uid=jhy", ihVal("HTTP_COOKIE", doc)); }
public List <Post> LoadPosts() { var posts = new List <Post>(); var doc = NSoupClient.Connect($"http://pornreactor.cc/{currentpage}").Timeout(10000).Get(); var els = doc.Select("div.postContainer"); foreach (var el in els) { var Link = "http://pornreactor.cc" + el.Select("span.link_wr a.link").First().Attr("href"); var Author = el.Select(".uhead_nick a").First().Text(); var tags = el.Select("h2.taglist b a").Select(tag => tag.Text()).ToList(); foreach (var img in el.Select(".post_content img")) { var imgurl = img.Attr("src"); posts.Add(new Post(Link, Author, imgurl, tags)); } } return(posts); }
public void exceptOnUnsupportedProtocol() { String url = "file://etc/passwd"; bool threw = false; try { Document doc = NSoupClient.Connect(url).Get(); } catch (InvalidOperationException e) { threw = true; Assert.AreEqual("Only http & https protocols supported", e.Message.ToString()); } catch (IOException) { } Assert.IsTrue(threw); }
public static List <FullJob> GetJobs() { string key = Keys.GetIndeedKey(); string searchTerms = "scala"; string location = "Baltimore, MD"; int radius = 25; List <FullJob> jobs; var cachedResults = $"{searchTerms}_{location}_{radius}.json"; if (File.Exists(cachedResults)) { jobs = JsonConvert.DeserializeObject <List <FullJob> >(File.ReadAllText(cachedResults)); } else { var queryUrl = IndeedQueryUtil.BuildBatchQueryFormatUrl(key, searchTerms, location, radius); var jobQueryResults = IndeedQueryUtil.GetAllJobs(queryUrl, key); jobs = new List <FullJob>(); foreach (var job in jobQueryResults) { Thread.Sleep(1000); var document = NSoupClient.Connect(job.Url).Timeout(5000).Get(); var summaryNodes = document.Select("#job_summary"); var fullText = summaryNodes.Text; jobs.Add(new FullJob { JobQueryResult = job, FullText = fullText }); } string json = JsonConvert.SerializeObject(jobs, Formatting.Indented); File.WriteAllText(cachedResults, json); } return(jobs); }
private void Download_PageContent(NovelModel novelModel, int retryCount, out Document document) { try { var connect = NSoupClient.Connect(novelModel.MenuUrl); document = connect.Get(); } catch (Exception ex) { if (retryCount > 5) { document = null; return; } Thread.Sleep(3 * 1000); Download_PageContent(novelModel, ++retryCount, out document); InvokeScriptFunction(() => { Document.InvokeScript("show_message", new[] { ex.Message, "danger" }); }); } }
public void Parse_Chapter(ChapterModel chapterModel) { Random r = new Random(); var delayTimeSpance = r.Next(1, 10); Thread.Sleep(delayTimeSpance * 1000); try { var connection = NSoupClient.Connect(chapterModel.Url); var document = connection.Get(); var element = document.GetElementById("content"); var contentHtml = element.Html(); var lines = Regex.Replace(contentHtml, "<br.*?/>", ""); lines = Regex.Replace(lines, " ", " "); if (lines.Length <= 0) { return; } using (var uow = new NovelUnitOfWork()) { var chapterService = new ChapterDomainService(uow); chapterService.Update(chapterModel.Id, x => { x.Content = lines; x.LastUpdatedTime = DateTime.Now; }); } } catch (Exception e) { //InvokeScriptFunction("show_message", e.Message); InvokeScriptFunction(() => { Document.InvokeScript("show_message", new[] { e.Message, "danger" }); }); } }
public Chapters[] GetChapters(long lngStoryId) { try { Document objDocChapter = null; int intPage = 1; List <Chapters> objChapterList = new List <Chapters>(); bool blnHasClass = false; do { objDocChapter = NSoupClient.Connect($"{strStoryUrl}/trang-{intPage}/#list-chapter") .UserAgent(ConstValue.USER_AGENT_CHROME) .Get(); Elements arrChapters = objDocChapter.Select(".list-chapter"); foreach (var objList in arrChapters) { foreach (var item in objList.Children) { Element objChapterElement = item.Select("a").First; objChapterList.Add(new Chapters() { StoryId = lngStoryId, Status = 0, Url = objChapterElement.Attr("href"), Name = objChapterElement.Attr("title"), }); } } // Kiem tra xem da den trang cuoi cung chua Element objEleLi = objDocChapter.Select(".dropup.page-nav").First; Element objEleLiPrevious = objEleLi.PreviousElementSibling; blnHasClass = objEleLiPrevious.HasClass("active"); intPage++; } while (!blnHasClass); return(objChapterList.ToArray()); } catch (Exception objEx) { throw objEx; } }
private static void checkForRedirectsOnHTMLDocument(ref NSoup.Nodes.Document htmlDoc, String userAgent) { //Checking if http-equiv=refresh foreach (NSoup.Nodes.Element refresh in htmlDoc.Select("html head meta[http-equiv=refresh]")) { String matcher = refresh.Attr("content"); Regex regex = new Regex("url=(.*)"); var v = regex.Match(matcher); if (v != null && v.Groups.Count == 2) { if (v.Groups[1].Value != String.Empty) { //Need to know the base uri: String baseURI = getBaseURI(htmlDoc); String urlToFetch = baseURI + v.Groups[1].ToString(); htmlDoc = NSoupClient.Connect(urlToFetch).UserAgent(userAgent).Timeout(10 * 1000).Get(); checkForRedirectsOnHTMLDocument(ref htmlDoc, userAgent); } } } //Javascript based redirection handling foreach (NSoup.Nodes.Element javascript in htmlDoc.Select("html head script[type=text/javascript]")) { String matcher = javascript.ToString(); Regex regex = new Regex("window.google.gbvu='(.*?)'"); var v = regex.Match(matcher); if (v != null && v.Groups.Count == 2) { if (v.Groups[1].Value != String.Empty) { //Need to know the base uri: String baseURI = getBaseURI(htmlDoc); String urlToFetch = baseURI + v.Groups[1].ToString().Replace("\\75", "=").Replace("\\075", "=").Replace("\\x3d", "=").Replace("\\46", "&").Replace("\\046", "&").Replace("\\x26", "&"); //converting ASCII octet codes to characters htmlDoc = NSoupClient.Connect(urlToFetch).UserAgent(userAgent).Timeout(10 * 1000).Get(); checkForRedirectsOnHTMLDocument(ref htmlDoc, userAgent); } } } }
public void throwsExceptionOnError() { string url = "http://direct.infohound.net/tools/404"; IConnection con = NSoupClient.Connect(url); bool threw = false; try { Document doc = con.Get(); } catch (HttpStatusException e) { threw = true; Assert.AreEqual("HTTP error fetching URL. Status=404, URL=http://direct.infohound.net/tools/404", string.Format("{0}. Status={1}, URL={2}", e.Message.ToString(), e.StatusCode, e.Url)); Assert.AreEqual(url, e.Url); Assert.AreEqual(404, e.StatusCode); } catch (System.IO.IOException) { } Assert.IsTrue(threw); }
public int GetLastPageNum() { return(int.Parse(NSoupClient.Connect("http://pornreactor.cc/").Timeout(10000).Get().Select(".pagination_expanded .current").First().Text())); }
private void duraklar() { XmlTextWriter yaz = new XmlTextWriter("Duraklar.xml", System.Text.UTF8Encoding.UTF8); yaz.Formatting = Formatting.Indented; yaz.WriteStartDocument(); yaz.WriteStartElement("hepsi"); foreach (string item in tmhatlar.Items) { string[] ayir = item.Split('*'); IConnection connection; connection = NSoupClient.Connect("http://www.iett.istanbul/tr/main/hatlar/" + ayir[1]).UserAgent("Mozilla"); connection.Timeout(600000); Document document = connection.Get(); yaz.WriteStartElement("hatid"); yaz.WriteAttributeString("h", ayir[1]); foreach (Element yon in document.Select("div.LineMapList ol")) { int i = 1; if (yon.Select("li").Count > 0) { yaz.WriteStartElement("yon"); string yn = "Geliş"; if (yon.Attr("data-station-direction") == "Going") { yn = "Gidiş"; } yaz.WriteAttributeString("y", yn); } foreach (Element Durak in yon.Select("li")) { yaz.WriteStartElement("durak"); yaz.WriteAttributeString("durakno", i.ToString()); yaz.WriteAttributeString("lat", Durak.Attr("data-station-lat")); yaz.WriteAttributeString("lng", Durak.Attr("data-station-lng")); yaz.WriteAttributeString("isim", Durak.Attr("data-station-name")); yaz.WriteEndElement(); i++; } if (yon.Select("li").Count > 0) { yaz.WriteEndElement(); } } yaz.WriteEndElement(); durum++; } yaz.WriteEndElement(); yaz.Close(); MessageBox.Show("Duraklar XML kayit edildi."); }