static void Main(string[] args) { WebClient client = new WebClient(); client.Encoding = System.Text.Encoding.UTF8; var html = client.DownloadString("https://blog.csdn.net/czjnoe/article/details/106600070"); NSoup.Nodes.Document doc = NSoup.NSoupClient.Parse(html); //根据标签名获取节点 NSoup.Select.Elements metaElements = doc.GetElementsByTag("meta"); foreach (var item in metaElements) { } //根据id获取节点 NSoup.Nodes.Element headClassElements = doc.GetElementById("head"); //根据class获取节点 var headIdElements = doc.GetElementsByClass("fm").ToList(); foreach (var item in headIdElements) { } //根据属性名称获取节点 List <Element> attributeNameElements = doc.GetElementsByAttribute("class").ToList(); //根据属性值获取节点 List <Element> attributeValueElements = doc.GetElementsByAttributeValue("id", "su").ToList(); //根据jQuery选择器获取节点 var selectElments = doc.Select("#head").ToList(); }
/// <summary> /// Determines if the input document is valid, against the whitelist. It is considered valid if all the tags and attributes /// in the input HTML are allowed by the whitelist. /// </summary> /// <remarks> /// This method can be used as a validator for user input forms. An invalid document will still be cleaned successfully /// using the <see cref="Clean(Document)"/> document. If using as a validator, it is recommended to still clean the document /// to ensure enforced attributes are set correctly, and that the output is tidied. /// </remarks> /// <param name="dirtyDocument">document to test</param> /// <returns>true if no tags or attributes need to be removed; false if they do</returns> public bool IsValid(Document dirtyDocument) { if (dirtyDocument == null) { throw new ArgumentNullException("dirtyDocument"); } Document clean = Document.CreateShell(dirtyDocument.BaseUri); int numDiscarded = CopySafeNodes(dirtyDocument.Body, clean.Body); return numDiscarded == 0; }
protected Document() { } // Used for Node.Clone(). /// <summary> /// Create a valid, empty shell of a document, suitable for adding more elements to. /// </summary> /// <param name="baseUri">baseUri of document</param> /// <returns>document with html, head, and body elements.</returns> static public Document CreateShell(string baseUri) { if (baseUri == null) { throw new ArgumentNullException("baseUri"); } Document doc = new Document(baseUri); Element html = doc.AppendElement("html"); html.AppendElement("head"); html.AppendElement("body"); return doc; }
/// <summary> /// Creates a new, clean document, from the original dirty document, containing only elements allowed by the whitelist. /// The original document is not modified. Only elements from the dirt document's <code>body</code> are used. /// </summary> /// <param name="dirtyDocument">Untrusted base document to clean.</param> /// <returns>cleaned document.</returns> public Document Clean(Document dirtyDocument) { if (dirtyDocument == null) { throw new ArgumentNullException("dirtyDocument"); } Document clean = Document.CreateShell(dirtyDocument.BaseUri); if (dirtyDocument.Body != null) // frameset documents won't have a body. the clean doc will have empty body. { CopySafeNodes(dirtyDocument.Body, clean.Body); } return clean; }
/// <summary> /// 获得地址代码 /// </summary> private IDictionary <string, string> GetAddressCode() { string url = "http://www.mca.gov.cn/article/sj/xzqh/2020/2020/202003301019.html"; HttpClientHelper httpClient = new HttpClientHelper(); string html = httpClient.GetAsync(url).GetAwaiter().GetResult(); NSoup.Nodes.Document doc = NSoup.NSoupClient.Parse(html); IDictionary <string, string> hashtable = new Dictionary <string, string>(); var trs = doc.Select("tr[height=19]"); foreach (Element tr in trs) { var tb = tr.Select("td"); hashtable.Add(tb[1].Text(), tb[2].Text()); } return(hashtable); }
protected ParseErrorList _errors; // null when not tracking errors protected virtual void InitialiseParse(string input, string baseUri, ParseErrorList errors) { if (input == null) { throw new ArgumentNullException("String input must not be null"); } if (baseUri == null) { throw new ArgumentNullException("BaseURI must not be null"); } _doc = new Document(baseUri); _reader = new CharacterReader(input); _errors = errors; _tokeniser = new Tokeniser(_reader, errors); _stack = new DescendableLinkedList<Element>(); this._baseUri = baseUri; }
/// <summary> /// 从http://www.ip138.com/读取IP地址 /// </summary> /// <returns></returns> public string GetIpAddress(string Ip) { string[] result; if (string.IsNullOrEmpty(Ip.Trim())) { return(null); } //WebClient client = new WebClient(); //client.Encoding = System.Text.Encoding.GetEncoding("GB2312"); //string url = "http://www.ip138.com/ips138.asp"; //string post = "ip=" + Ip + "&action=2"; //client.Headers.Set("Content-Type", "application/x-www-form-urlencoded"); //string response = client.UploadString(url, post); //string p = @"<li>参考数据二:(?<location>[^<>]+?)</li>"; //Match match = Regex.Match(response, p); //string m_Location = match.Groups["location"].Value.Trim(); //result = m_Location.Split(' '); //return result[0]; string strResult; string str; try { HttpWebRequest myReq = (HttpWebRequest)HttpWebRequest.Create("http://www.ip138.com/ips138.asp?ip=" + Ip + "&action=2"); myReq.Timeout = 3000; HttpWebResponse HttpWResp = (HttpWebResponse)myReq.GetResponse(); Stream myStream = HttpWResp.GetResponseStream(); StreamReader sr = new StreamReader(myStream, Encoding.UTF8); strResult = sr.ReadToEnd(); NSoup.Nodes.Document doc = NSoup.NSoupClient.Parse(strResult); Element element = doc.Body.GetElementsByTag("table")[0]; strResult = element.GetElementsByTag("tr")[2].GetElementsByTag("td")[0].GetElementsByTag("ul")[0].GetElementsByTag("li")[0].Text(); str = strResult.Substring(6); } catch (Exception exp) { str = "未知"; } return(str); }
private static string ihVal(string key, Document doc) { return doc.Select("th:contains(" + key + ") + td").First.Text(); }
private void download2() { // 下载地址 string url = textBox1.Text; string html = getHtml(url); NSoup.Nodes.Document d = NSoup.NSoupClient.Parse(html); //获取标题 String title = d.GetElementsByClass("tit").First.GetElementsByTag("h1").Text; //如果没有标题就用毫秒数 title = (title != null && !title.Equals(""))?title:DateTime.Now.ToUniversalTime().Ticks + ""; NSoup.Nodes.Element el = d.GetElementById("vlink_1"); NSoup.Select.Elements es = el.GetElementsByTag("li"); richTextBox1.Text = ""; foreach (var e in es) { string subHtml = getHtml("http://www.ting56.com" + e.GetElementsByTag("a").Attr("href")); Document d1 = NSoup.NSoupClient.Parse(subHtml); Match mc = Regex.Match(subHtml, "FonHen_JieMa\\('([0-9,*]*)'\\)"); //获取加密url string miwen = mc.Groups[1].Value; string[] tArr = Regex.Split(miwen, "\\*", RegexOptions.IgnoreCase); int n = tArr.Length; string s = ""; for (int i = 0; i < n; i++) { if (!tArr[i].Equals("")) { s += (char)int.Parse(tArr[i]); } } //下载地址 string downUrl = Regex.Split(s, "\\&", RegexOptions.IgnoreCase)[0]; //文件扩展名 string downFileExt = Regex.Split(s, "\\&", RegexOptions.IgnoreCase)[2]; string path = textBox2.Text + "\\" + title + "\\"; string fileName = e.Text(); string localFile = path + fileName + downFileExt; // MessageBox.Show(localFile); richTextBox1.AppendText("开始下载 " + fileName + "\n\r" + downUrl + "\n\r"); //用线程执行下载 // Thread oGetArgThread = new Thread(new ThreadStart(HttpDownload)); // oGetArgThread.IsBackground = true; // oGetArgThread.Start(); // DownFile hd = new DownFile(); // hd.downUrl=downUrl; // hd.localFile=localFile; // DownFile df = new DownFile(downUrl,localFile); // ThreadPool.QueueUserWorkItem(new WaitCallback(df.HttpDownload)); // Thread oGetArgThread = new Thread(new ThreadStart(hd.HttpDownload)); // oGetArgThread.IsBackground = true; // oGetArgThread.Start(); HttpDownload(downUrl, localFile); } // MessageBox.Show(title); }
public OutputSettings SetSyntax(Document.Syntax syntax) { this._syntax = syntax; return this; }
private PageTitleFixture GoToWebSite() { /* var thread = new Thread(new ThreadStart(StartWindows)); thread.SetApartmentState(ApartmentState.STA); thread.Start(); thread.Join(); */ var thread = new Thread(new ThreadStart(StartWindows)); thread.SetApartmentState(ApartmentState.STA); thread.Start(); thread.Join(); HttpWebRequest request = (HttpWebRequest)WebRequest.Create(URL); request.MaximumAutomaticRedirections = 4; request.MaximumResponseHeadersLength = 4; // Set credentials to use for this request. request.Credentials = CredentialCache.DefaultCredentials; HttpWebResponse response = (HttpWebResponse)request.GetResponse(); Console.WriteLine("Content length is {0}", response.ContentLength); Console.WriteLine("Content type is {0}", response.ContentType); // Get the stream associated with the response. Stream receiveStream = response.GetResponseStream(); // Pipes the stream to a higher level stream reader with the required encoding format. StreamReader readStream = new StreamReader(receiveStream, Encoding.UTF8); Console.WriteLine("Response stream received."); string responseValue = readStream.ReadToEnd(); doc = NSoup.NSoupClient.ParseBodyFragment(responseValue); Elements element = doc.GetElementsByTag("Title"); response.Close(); readStream.Close(); HttpStatusCode = response.StatusCode.GetHashCode().ToString(); Console.WriteLine(response.StatusCode); PageTitle = element.Text; // Console.WriteLine("Response stream received.\n {0}",responseValue); Elements links = doc.GetElementsByTag("a"); int count = 0; foreach (var _link in links) { Element linkByLink = _link.TagName("a href"); if (!linkByLink.Text().ToString().Equals("")) { //var linkText = _link.TagName("a href").Text; //Console.WriteLine("Link ={0}", linkByLink.Text()); //listOfLinks.Add(linkByLink.Text()); String absHref = linkByLink.Attr("abs:href"); Console.WriteLine("Link ={0}", absHref); LinkNameAndLink.Add(count+"_"+linkByLink.Text(), absHref); count++; } } _noOfLinks = count.ToString(); return null; }