public HtmlNodeList QuerySelectAll(QuerySelectorPatterns patts) { HtmlNodeList nodelist = new HtmlNodeList(); QuerySelectAll(this, patts, nodelist); return(nodelist); }
protected void InitXmlNode(XmlNode xmlNode, HtmlDocument htmlDoc) { Debug.Assert(xmlNode != null); Debug.Assert( xmlNode.OwnerDocument == htmlDoc.m_XmlNode.OwnerDocument || xmlNode.OwnerDocument == htmlDoc.m_XmlNode ); m_XmlNode = xmlNode; m_UpperName = null; m_Doc = htmlDoc; m_Attributes = new HtmlAttributeCollection(this); m_ChildNodeList = new HtmlNodeList(m_XmlNode.ChildNodes); }
static void QuerySelectAll(HtmlElement elem, QuerySelectorPatterns patts, HtmlNodeList nodelist) { if (elem.ChildrenCount < 1) { return; } //---------- foreach (DomNode childnode in elem.GetChildNodeIterForward()) { if (childnode.NodeKind == HtmlNodeKind.OpenElement) { HtmlElement htmlElem = (HtmlElement)childnode; if (patts.Evaluate((HtmlElement)childnode)) { //found nodelist.AddSelectedItem(htmlElem); } QuerySelectAll(htmlElem, patts, nodelist); } } }
internal HtmlNode(XmlNodeType nodeType, NodePosition parsedPosition) : this(parsedPosition) { SetNodeId(); Debug.Assert(nodeType == XmlNodeType.Document); if (nodeType != XmlNodeType.Document) { throw new HtmlParserException("This constructor can only be called for XmlNodeType.Document node type."); } Debug.Assert(this is HtmlDocument); Debug.Assert(this is HtmlDocument); m_Doc = this as HtmlDocument; m_XmlNode = (this as HtmlDocument).m_XmlDoc; m_UpperName = null; m_Attributes = new HtmlAttributeCollection(this); m_ChildNodeList = new HtmlNodeList(m_XmlNode.ChildNodes); }
public string Script; //暂时没用 /// <summary> /// 执行特征筛选,并添加到缓冲区 /// </summary> public void Exe(Dictionary <string, string> pDic, HtmlTree pTree, HtmlNodeList pHtmlNodeList) { List <HtmlNode> sList = Path.Exe(pTree, pHtmlNodeList); if (sList != null) // { for (int i = 0; i < sList.Count; i++) { HtmlNode sHtmlNode = sList[i]; if (ResultMode == 0) // { #region 键值对模式 List <HtmlNodeList> sSubList = sHtmlNode.Nodes.Split(Spliter); for (int j = 0; j < sSubList.Count; j++) { HtmlNodeList sTextNodes = sSubList[j].GetTextNodes(false); if (sTextNodes.Count > 0) { for (int k = sTextNodes.Count - 1; k >= 0; k--) { if (sTextNodes[k].TextDecoded.Trim().Length == 0) { sTextNodes.RemoveAt(k); } } if (sTextNodes.Count > 0) { string[] sNameSpan = sTextNodes[0].TextDecoded.Trim().Replace(" ", "").Split(new char[] { ':', ':' }, StringSplitOptions.RemoveEmptyEntries); if (sNameSpan.Length > 0) { string sName = CommonService.ClearStr(sNameSpan[0]); StringBuilder Sb = new StringBuilder(); for (int k = 1; k < sNameSpan.Length; k++) { Sb.Append(sNameSpan[k]); } for (int k = 1; k < sTextNodes.Count; k++) { Sb.Append(sTextNodes[k].TextDecoded.Trim()); } if (FName != null && FName.Length > 0) { pDic[FName + "_" + sName] = Sb.ToString(); } else { pDic[sName] = Sb.ToString(); } } } } } #endregion } else if (ResultMode == 1) { string sValue = sHtmlNode.TextDecoded.Trim(); if (sValue.Length > 0) { pDic[FName] = sValue; } } } } }
private void button3_Click(object sender, EventArgs e) { ServicePointManager.SecurityProtocol = SecurityProtocolType.Tls | SecurityProtocolType.Tls11 | SecurityProtocolType.Tls12; button3.Enabled = false; button3.Text = "データ取得中..."; button3.Refresh(); int l_old = comboBox1.SelectedIndex; int m_old = comboBox2.SelectedIndex; int n_old = comboBox3.SelectedIndex; ////string enc = "EUC-JP"; // ------------------------------------------------------ //ダウンロードするURL(地方のリストを取得する) string url = "https://weather.yahoo.co.jp/weather/"; string source = ""; button3.Text = "大区分..."; button3.Refresh(); try { //WebClientの作成 WebClient wc = new WebClient(); //文字コードを指定 wc.Encoding = Encoding.GetEncoding("utf-8"); //HTMLソースをダウンロードする source = wc.DownloadString(url); //後始末 wc.Dispose(); } catch (System.Net.WebException e2) { //MessageBox.Show(e2.ToString()); MessageBox.Show( "ネットワークに正しく接続されているか確認してください\nまた、接続先サーバが正常通り稼働しているか確認してください", "エラー"); button3.Enabled = true; button3.Text = "データ取得"; return; } CreteLib.HtmlDocument doc = new CreteLib.HtmlDocument(); doc.LoadHtml(source); ////---------------------------------------------------------- List <string> url_array1 = new List <string>(); List <int> count_array = new List <int>(); name_array1.Clear(); name_array2.Clear(); url_array2.Clear(); name_array3.Clear(); url_array3.Clear(); ////---------------------------------------------------------- HtmlNodeList nodeList = doc.GetNodesByTagName("a"); foreach (HtmlNode node in nodeList) { // href属性のないaがあるので、try-catchにしておく try { if (Regex.IsMatch(node["href"], @"^//weather.yahoo.co.jp/weather/jp/\d[\dabcd]*/")) { string url_1 = "http:" + node["href"]; url_array1.Add(url_1); //MessageBox.Show(url_1); } } catch (Exception e2) { } } ////---------------------------------------------------------- //int j = 0; // 各地方の下のURL(県一覧)を掘る foreach (string url2 in url_array1) { //WebClientの作成 WebClient wc2 = new WebClient(); //文字コードを指定 wc2.Encoding = Encoding.GetEncoding("utf-8"); //HTMLソースをダウンロードする string source2 = wc2.DownloadString(url2); //後始末 wc2.Dispose(); CreteLib.HtmlDocument doc2 = new CreteLib.HtmlDocument(); doc2.LoadHtml(source2); nodeList = doc2.GetNodesByTagName("h1"); String tmp_name = nodeList[0].InnerText.Replace("の天気", ""); name_array1.Add(tmp_name); nodeList = doc2.GetNodesByTagName("a"); List <string> tmp_url_array = new List <string>(); foreach (HtmlNode node in nodeList) { // href属性のないaがあるので、try-catchにしておく try { if (Regex.IsMatch(node["href"], @"^https://weather.yahoo.co.jp/weather/jp/\d[\dabcd]*/\d+")) { tmp_url_array.Add(node["href"]); //MessageBox.Show(node["href"]); } } catch (Exception e2) { } } url_array2.Add(tmp_url_array); button3.Text = "中区分..." + name_array1.Count; button3.Refresh(); } // 県一覧の下のURL(市区町村)を掘る foreach (List <string> u_array in url_array2) { List <List <string> > tmp_name_array2 = new List <List <string> >(); List <List <string> > tmp_url_array2 = new List <List <string> >(); List <string> tmp_name_array1 = new List <string>(); foreach (string url2 in u_array) { //WebClientの作成 WebClient wc2 = new WebClient(); //文字コードを指定 wc2.Encoding = Encoding.GetEncoding("utf-8"); //HTMLソースをダウンロードする string source2 = wc2.DownloadString(url2); //後始末 wc2.Dispose(); CreteLib.HtmlDocument doc2 = new CreteLib.HtmlDocument(); doc2.LoadHtml(source2); nodeList = doc2.GetNodesByTagName("title"); tmp_name_array1.Add(nodeList[0].InnerText.Replace("の天気 - Yahoo!天気・災害", "")); nodeList = doc2.GetNodesByTagName("a"); List <string> tmp_name_array = new List <string>(); List <string> tmp_url_array = new List <string>(); foreach (HtmlNode node in nodeList) { // href属性のないaがあるので、try-catchにしておく try { if (Regex.IsMatch(node["href"], @"^https://weather.yahoo.co.jp/weather/jp/\d[\dabcd]*/\d+/\d+")) { tmp_url_array.Add(node["href"]); tmp_name_array.Add(node.InnerText); //MessageBox.Show(node.InnerText); } } catch (Exception e2) { } } tmp_name_array2.Add(tmp_name_array); tmp_url_array2.Add(tmp_url_array); button3.Text = "小区分..." + url_array3.Count + "-" + tmp_url_array2.Count; button3.Refresh(); } name_array3.Add(tmp_name_array2); url_array3.Add(tmp_url_array2); name_array2.Add(tmp_name_array1); } //---------------------------------------------------------- //Shift JISで書き込む button3.Text = "大区分出力中..."; button3.Refresh(); //書き込むファイルが既に存在している場合は、上書きする System.IO.StreamWriter sw = new System.IO.StreamWriter( @"./regionlist1.csv", false, System.Text.Encoding.GetEncoding("shift_jis")); //TextBox1.Textの内容を書き込む foreach (string s in name_array1) { //textBox1.Text += s + "\r\n"; sw.Write(s + "\r\n"); } //textBox1.Text += "----------------------------" + "\r\n"; sw.Close(); //---------------------------------------------------------- button3.Text = "中区分出力中..."; button3.Refresh(); int l = 0; int m = 0; sw = new System.IO.StreamWriter( @"./regionlist2.csv", false, System.Text.Encoding.GetEncoding("shift_jis")); foreach (List <string> s_array in name_array2) { m = 0; foreach (string s in s_array) { //textBox1.Text += s + "\r\n"; sw.Write(l.ToString() + "," + m.ToString() + "," + s + "\r\n"); m++; } l++; //textBox1.Text += "- - - - - - - - - - - - - - - " + "\r\n"; } //textBox1.Text += "----------------------------" + "\r\n"; sw.Close(); //---------------------------------------------------------- button3.Text = "小区分出力中..."; button3.Refresh(); l = 0; m = 0; int n = 0; sw = new System.IO.StreamWriter( @"./regionlist3.csv", false, System.Text.Encoding.GetEncoding("shift_jis")); foreach (List <List <string> > s_array2 in name_array3) { m = 0; foreach (List <string> s_array in s_array2) { n = 0; foreach (string s in s_array) { //textBox1.Text += s + "\r\n"; sw.Write(l.ToString() + "," + m.ToString() + "," + n.ToString() + "," + s + "," + url_array3[l][m][n] + "\r\n"); n++; } //textBox1.Text += "- - - - - - - - " + "\r\n"; m++; } //textBox1.Text += "- - - - - - - - - - - - - - - " + "\r\n"; l++; } //textBox1.Text += "----------------------------" + "\r\n"; sw.Close(); // comboBoxにまとめて追加する comboBox1.Items.Clear(); comboBox1.Items.AddRange(name_array1.ToArray()); comboBox1.SelectedIndex = l_old < comboBox1.Items.Count ? l_old : 0; comboBox2.SelectedIndex = m_old < comboBox2.Items.Count ? m_old : 0; comboBox3.SelectedIndex = n_old < comboBox3.Items.Count ? n_old : 0; // ------------------------------------------------------ ////ダウンロードするURL(警報・注意報) ////string url2 = "http://bousai.tenki.jp/bousai/warn/"; //string url21 = "http://www.tenki.jp/bousai/warn/"; //string source21 = ""; //try //{ // //WebClientの作成 // WebClient wc = new WebClient(); // //文字コードを指定 // wc.Encoding = Encoding.UTF8; // //HTMLソースをダウンロードする // source21 = wc.DownloadString(url21); // //後始末 // wc.Dispose(); //} //catch (System.Net.WebException e2) //{ // //MessageBox.Show(e2.ToString()); // MessageBox.Show( // "ネットワークに正しく接続されているか確認してください\nまた、接続先サーバが正常通り稼働しているか確認してください", // "エラー"); // button3.Enabled = true; // button3.Text = "データ取得"; // return; //} //button3.Text = "大区分2..."; //button3.Refresh(); //CreteLib.HtmlDocument doc21 = new CreteLib.HtmlDocument(); //doc21.LoadHtml(source21); //HtmlNodeList nodeList21 = doc21.GetNodesByTagName("a"); //Dictionary<string, string> l_list_warn = new Dictionary<string, string>(); //foreach (HtmlNode node in nodeList21) //{ // // href属性のないaがあるので、try-catchにしておく // try // { // if (Regex.IsMatch(node["href"], @"^http://www.tenki.jp/bousai/warn/[0-9]+/[0-9]+/") && // !Regex.IsMatch(node.InnerText, @"^<img>")) // { // String tmp_pref = node.InnerText.Replace("<img>", ""); // //MessageBox.Show(node["href"]); // // 都道府県名をキーに、各ページのURLを配列に入れる // l_list_warn.Add(tmp_pref, node["href"]); // } // } // catch (Exception e2) // { // } //} //Dictionary<string, string> l_list_warn2 = new Dictionary<string, string>(); //Dictionary<string, string> l_list_warn3 = new Dictionary<string, string>(); //Dictionary<string, string> l_list_warn4 = new Dictionary<string, string>(); //foreach (KeyValuePair<string, string> keyValuePair in l_list_warn) //{ // //MessageBox.Show(keyValuePair.Key + ", " + keyValuePair.Value); // String url_detail = keyValuePair.Value; // string source_detail = ""; // try // { // //WebClientの作成 // WebClient wc22 = new WebClient(); // //文字コードを指定 // wc22.Encoding = Encoding.UTF8; // //HTMLソースをダウンロードする // source_detail = wc22.DownloadString(url_detail); // //後始末 // wc22.Dispose(); // } // catch (System.Net.WebException e2) // { // //MessageBox.Show(e2.ToString()); // MessageBox.Show( // "ネットワークに正しく接続されているか確認してください\nまた、接続先サーバが正常通り稼働しているか確認してください", // "エラー"); // button3.Enabled = true; // button3.Text = "データ取得"; // return; // } // CreteLib.HtmlDocument doc22 = new CreteLib.HtmlDocument(); // doc22.LoadHtml(source_detail); // HtmlNodeList nodeList22 = doc22.GetNodesByTagName("a"); // button3.Text = "中区分2..."; // button3.Refresh(); // foreach (HtmlNode node in nodeList22) // { // // href属性のないaがあるので、try-catchにしておく // try // { // if (Regex.IsMatch(node["href"], @"^/bousai/warn/[0-9]+/[0-9]+/[0-9]+.html") && // !Regex.IsMatch(node.InnerText, @"^<img>")) // { // String tmp_pref = node.InnerText.Replace("<img>", ""); // //MessageBox.Show(node["href"]); // l_list_warn2.Add(tmp_pref, node["href"]); // l_list_warn3.Add(tmp_pref, keyValuePair.Key); // l_list_warn4.Add(tmp_pref, keyValuePair.Value); // } // } // catch (Exception e2) // { // } // } //} //button3.Text = "中区分2出力中..."; //button3.Refresh(); //StreamWriter sw21 = new System.IO.StreamWriter( // @"./regionlist2_2.csv", // false, // System.Text.Encoding.GetEncoding("shift_jis")); //foreach (KeyValuePair<string, string> keyValuePair in l_list_warn2) //{ // sw21.Write(l_list_warn3[keyValuePair.Key] + "," + l_list_warn4[keyValuePair.Key] + "," + keyValuePair.Key + "," + keyValuePair.Value + "\r\n"); // //MessageBox.Show(keyValuePair.Key + ", " + keyValuePair.Value); //} //sw21.Close(); form1.DeleteHistories(); button3.Enabled = true; button3.Text = "データ取得"; }
/// <summary> /// 执行特征筛选,并添加到缓冲区 /// </summary> public void Exe(Dictionary<string, string> pDic, HtmlTree pTree, HtmlNodeList pHtmlNodeList) { List<HtmlNode> sList = Path.Exe(pTree, pHtmlNodeList); if (sList != null) // { for (int i = 0; i < sList.Count; i++) { HtmlNode sHtmlNode = sList[i]; if (ResultMode == 0) // { #region 键值对模式 List<HtmlNodeList> sSubList = sHtmlNode.Nodes.Split(Spliter); for (int j = 0; j < sSubList.Count; j++) { HtmlNodeList sTextNodes = sSubList[j].GetTextNodes(false); if (sTextNodes.Count > 0) { for (int k = sTextNodes.Count - 1; k >= 0; k--) { if (sTextNodes[k].TextDecoded.Trim().Length == 0) { sTextNodes.RemoveAt(k); } } if (sTextNodes.Count > 0) { string[] sNameSpan = sTextNodes[0].TextDecoded.Trim().Replace(" ", "").Split(new char[] { ':', ':' }, StringSplitOptions.RemoveEmptyEntries); if (sNameSpan.Length > 0) { string sName = CommonService.ClearStr(sNameSpan[0]); StringBuilder Sb = new StringBuilder(); for (int k = 1; k < sNameSpan.Length; k++) { Sb.Append(sNameSpan[k]); } for (int k = 1; k < sTextNodes.Count; k++) { Sb.Append(sTextNodes[k].TextDecoded.Trim()); } if (FName != null && FName.Length > 0) { pDic[FName + "_" + sName] = Sb.ToString(); } else { pDic[sName] = Sb.ToString(); } } } } } #endregion } else if (ResultMode == 1) { string sValue = sHtmlNode.TextDecoded.Trim(); if (sValue.Length > 0) { pDic[FName] = sValue; } } } } }