예제 #1
0
        public HtmlNodeList QuerySelectAll(QuerySelectorPatterns patts)
        {
            HtmlNodeList nodelist = new HtmlNodeList();

            QuerySelectAll(this, patts, nodelist);
            return(nodelist);
        }
예제 #2
0
        protected void InitXmlNode(XmlNode xmlNode, HtmlDocument htmlDoc)
        {
            Debug.Assert(xmlNode != null);
            Debug.Assert(
                xmlNode.OwnerDocument == htmlDoc.m_XmlNode.OwnerDocument ||
                xmlNode.OwnerDocument == htmlDoc.m_XmlNode
                );

            m_XmlNode   = xmlNode;
            m_UpperName = null;
            m_Doc       = htmlDoc;

            m_Attributes    = new HtmlAttributeCollection(this);
            m_ChildNodeList = new HtmlNodeList(m_XmlNode.ChildNodes);
        }
예제 #3
0
 static void QuerySelectAll(HtmlElement elem, QuerySelectorPatterns patts, HtmlNodeList nodelist)
 {
     if (elem.ChildrenCount < 1)
     {
         return;
     }
     //----------
     foreach (DomNode childnode in elem.GetChildNodeIterForward())
     {
         if (childnode.NodeKind == HtmlNodeKind.OpenElement)
         {
             HtmlElement htmlElem = (HtmlElement)childnode;
             if (patts.Evaluate((HtmlElement)childnode))
             {
                 //found
                 nodelist.AddSelectedItem(htmlElem);
             }
             QuerySelectAll(htmlElem, patts, nodelist);
         }
     }
 }
예제 #4
0
        internal HtmlNode(XmlNodeType nodeType, NodePosition parsedPosition)
            : this(parsedPosition)
        {
            SetNodeId();

            Debug.Assert(nodeType == XmlNodeType.Document);
            if (nodeType != XmlNodeType.Document)
            {
                throw new HtmlParserException("This constructor can only be called for XmlNodeType.Document node type.");
            }

            Debug.Assert(this is HtmlDocument);

            Debug.Assert(this is HtmlDocument);

            m_Doc       = this as HtmlDocument;
            m_XmlNode   = (this as HtmlDocument).m_XmlDoc;
            m_UpperName = null;

            m_Attributes    = new HtmlAttributeCollection(this);
            m_ChildNodeList = new HtmlNodeList(m_XmlNode.ChildNodes);
        }
예제 #5
0
        public string Script;     //暂时没用

        /// <summary>
        /// 执行特征筛选,并添加到缓冲区
        /// </summary>
        public void Exe(Dictionary <string, string> pDic, HtmlTree pTree, HtmlNodeList pHtmlNodeList)
        {
            List <HtmlNode> sList = Path.Exe(pTree, pHtmlNodeList);

            if (sList != null)       //
            {
                for (int i = 0; i < sList.Count; i++)
                {
                    HtmlNode sHtmlNode = sList[i];
                    if (ResultMode == 0) //
                    {
                        #region 键值对模式
                        List <HtmlNodeList> sSubList = sHtmlNode.Nodes.Split(Spliter);
                        for (int j = 0; j < sSubList.Count; j++)
                        {
                            HtmlNodeList sTextNodes = sSubList[j].GetTextNodes(false);
                            if (sTextNodes.Count > 0)
                            {
                                for (int k = sTextNodes.Count - 1; k >= 0; k--)
                                {
                                    if (sTextNodes[k].TextDecoded.Trim().Length == 0)
                                    {
                                        sTextNodes.RemoveAt(k);
                                    }
                                }
                                if (sTextNodes.Count > 0)
                                {
                                    string[] sNameSpan = sTextNodes[0].TextDecoded.Trim().Replace(" ", "").Split(new char[] { ':', ':' }, StringSplitOptions.RemoveEmptyEntries);
                                    if (sNameSpan.Length > 0)
                                    {
                                        string        sName = CommonService.ClearStr(sNameSpan[0]);
                                        StringBuilder Sb    = new StringBuilder();
                                        for (int k = 1; k < sNameSpan.Length; k++)
                                        {
                                            Sb.Append(sNameSpan[k]);
                                        }
                                        for (int k = 1; k < sTextNodes.Count; k++)
                                        {
                                            Sb.Append(sTextNodes[k].TextDecoded.Trim());
                                        }
                                        if (FName != null && FName.Length > 0)
                                        {
                                            pDic[FName + "_" + sName] = Sb.ToString();
                                        }
                                        else
                                        {
                                            pDic[sName] = Sb.ToString();
                                        }
                                    }
                                }
                            }
                        }
                        #endregion
                    }
                    else if (ResultMode == 1)
                    {
                        string sValue = sHtmlNode.TextDecoded.Trim();
                        if (sValue.Length > 0)
                        {
                            pDic[FName] = sValue;
                        }
                    }
                }
            }
        }
예제 #6
0
        private void button3_Click(object sender, EventArgs e)
        {
            ServicePointManager.SecurityProtocol = SecurityProtocolType.Tls | SecurityProtocolType.Tls11 | SecurityProtocolType.Tls12;

            button3.Enabled = false;
            button3.Text    = "データ取得中...";
            button3.Refresh();

            int l_old = comboBox1.SelectedIndex;
            int m_old = comboBox2.SelectedIndex;
            int n_old = comboBox3.SelectedIndex;

            ////string enc = "EUC-JP";

            // ------------------------------------------------------
            //ダウンロードするURL(地方のリストを取得する)
            string url = "https://weather.yahoo.co.jp/weather/";

            string source = "";

            button3.Text = "大区分...";
            button3.Refresh();

            try
            {
                //WebClientの作成
                WebClient wc = new WebClient();
                //文字コードを指定
                wc.Encoding = Encoding.GetEncoding("utf-8");
                //HTMLソースをダウンロードする
                source = wc.DownloadString(url);
                //後始末
                wc.Dispose();
            }
            catch (System.Net.WebException e2)
            {
                //MessageBox.Show(e2.ToString());
                MessageBox.Show(
                    "ネットワークに正しく接続されているか確認してください\nまた、接続先サーバが正常通り稼働しているか確認してください",
                    "エラー");
                button3.Enabled = true;
                button3.Text    = "データ取得";
                return;
            }

            CreteLib.HtmlDocument doc = new CreteLib.HtmlDocument();
            doc.LoadHtml(source);

            ////----------------------------------------------------------

            List <string> url_array1  = new List <string>();
            List <int>    count_array = new List <int>();

            name_array1.Clear();
            name_array2.Clear();
            url_array2.Clear();
            name_array3.Clear();
            url_array3.Clear();

            ////----------------------------------------------------------

            HtmlNodeList nodeList = doc.GetNodesByTagName("a");

            foreach (HtmlNode node in nodeList)
            {
                // href属性のないaがあるので、try-catchにしておく
                try
                {
                    if (Regex.IsMatch(node["href"], @"^//weather.yahoo.co.jp/weather/jp/\d[\dabcd]*/"))
                    {
                        string url_1 = "http:" + node["href"];
                        url_array1.Add(url_1);
                        //MessageBox.Show(url_1);
                    }
                }
                catch (Exception e2)
                {
                }
            }

            ////----------------------------------------------------------

            //int j = 0;

            // 各地方の下のURL(県一覧)を掘る
            foreach (string url2 in url_array1)
            {
                //WebClientの作成
                WebClient wc2 = new WebClient();
                //文字コードを指定
                wc2.Encoding = Encoding.GetEncoding("utf-8");
                //HTMLソースをダウンロードする
                string source2 = wc2.DownloadString(url2);
                //後始末
                wc2.Dispose();

                CreteLib.HtmlDocument doc2 = new CreteLib.HtmlDocument();
                doc2.LoadHtml(source2);


                nodeList = doc2.GetNodesByTagName("h1");
                String tmp_name = nodeList[0].InnerText.Replace("の天気", "");
                name_array1.Add(tmp_name);

                nodeList = doc2.GetNodesByTagName("a");
                List <string> tmp_url_array = new List <string>();

                foreach (HtmlNode node in nodeList)
                {
                    // href属性のないaがあるので、try-catchにしておく
                    try
                    {
                        if (Regex.IsMatch(node["href"], @"^https://weather.yahoo.co.jp/weather/jp/\d[\dabcd]*/\d+"))
                        {
                            tmp_url_array.Add(node["href"]);
                            //MessageBox.Show(node["href"]);
                        }
                    }
                    catch (Exception e2)
                    {
                    }
                }
                url_array2.Add(tmp_url_array);
                button3.Text = "中区分..." + name_array1.Count;
                button3.Refresh();
            }

            // 県一覧の下のURL(市区町村)を掘る
            foreach (List <string> u_array in url_array2)
            {
                List <List <string> > tmp_name_array2 = new List <List <string> >();
                List <List <string> > tmp_url_array2  = new List <List <string> >();
                List <string>         tmp_name_array1 = new List <string>();

                foreach (string url2 in u_array)
                {
                    //WebClientの作成
                    WebClient wc2 = new WebClient();
                    //文字コードを指定
                    wc2.Encoding = Encoding.GetEncoding("utf-8");
                    //HTMLソースをダウンロードする
                    string source2 = wc2.DownloadString(url2);
                    //後始末
                    wc2.Dispose();

                    CreteLib.HtmlDocument doc2 = new CreteLib.HtmlDocument();
                    doc2.LoadHtml(source2);

                    nodeList = doc2.GetNodesByTagName("title");
                    tmp_name_array1.Add(nodeList[0].InnerText.Replace("の天気 - Yahoo!天気・災害", ""));

                    nodeList = doc2.GetNodesByTagName("a");
                    List <string> tmp_name_array = new List <string>();
                    List <string> tmp_url_array  = new List <string>();

                    foreach (HtmlNode node in nodeList)
                    {
                        // href属性のないaがあるので、try-catchにしておく
                        try
                        {
                            if (Regex.IsMatch(node["href"], @"^https://weather.yahoo.co.jp/weather/jp/\d[\dabcd]*/\d+/\d+"))
                            {
                                tmp_url_array.Add(node["href"]);
                                tmp_name_array.Add(node.InnerText);
                                //MessageBox.Show(node.InnerText);
                            }
                        }
                        catch (Exception e2)
                        {
                        }
                    }
                    tmp_name_array2.Add(tmp_name_array);
                    tmp_url_array2.Add(tmp_url_array);

                    button3.Text = "小区分..." + url_array3.Count + "-" + tmp_url_array2.Count;
                    button3.Refresh();
                }
                name_array3.Add(tmp_name_array2);
                url_array3.Add(tmp_url_array2);
                name_array2.Add(tmp_name_array1);
            }

            //----------------------------------------------------------
            //Shift JISで書き込む
            button3.Text = "大区分出力中...";
            button3.Refresh();

            //書き込むファイルが既に存在している場合は、上書きする
            System.IO.StreamWriter sw = new System.IO.StreamWriter(
                @"./regionlist1.csv",
                false,
                System.Text.Encoding.GetEncoding("shift_jis"));


            //TextBox1.Textの内容を書き込む
            foreach (string s in name_array1)
            {
                //textBox1.Text += s + "\r\n";
                sw.Write(s + "\r\n");
            }
            //textBox1.Text += "----------------------------" + "\r\n";
            sw.Close();

            //----------------------------------------------------------
            button3.Text = "中区分出力中...";
            button3.Refresh();

            int l = 0;
            int m = 0;

            sw = new System.IO.StreamWriter(
                @"./regionlist2.csv",
                false,
                System.Text.Encoding.GetEncoding("shift_jis"));
            foreach (List <string> s_array in name_array2)
            {
                m = 0;
                foreach (string s in s_array)
                {
                    //textBox1.Text += s + "\r\n";
                    sw.Write(l.ToString() + "," + m.ToString() + "," + s + "\r\n");
                    m++;
                }
                l++;
                //textBox1.Text += "- - - - - - - - - - - - - - - " + "\r\n";
            }
            //textBox1.Text += "----------------------------" + "\r\n";
            sw.Close();

            //----------------------------------------------------------
            button3.Text = "小区分出力中...";
            button3.Refresh();

            l = 0;
            m = 0;
            int n = 0;

            sw = new System.IO.StreamWriter(
                @"./regionlist3.csv",
                false,
                System.Text.Encoding.GetEncoding("shift_jis"));
            foreach (List <List <string> > s_array2 in name_array3)
            {
                m = 0;
                foreach (List <string> s_array in s_array2)
                {
                    n = 0;
                    foreach (string s in s_array)
                    {
                        //textBox1.Text += s + "\r\n";
                        sw.Write(l.ToString() + "," + m.ToString() + "," + n.ToString() + "," + s + "," + url_array3[l][m][n] + "\r\n");
                        n++;
                    }
                    //textBox1.Text += "-   -   -   -   -   -   -   - " + "\r\n";
                    m++;
                }
                //textBox1.Text += "- - - - - - - - - - - - - - - " + "\r\n";
                l++;
            }
            //textBox1.Text += "----------------------------" + "\r\n";
            sw.Close();

            // comboBoxにまとめて追加する
            comboBox1.Items.Clear();
            comboBox1.Items.AddRange(name_array1.ToArray());

            comboBox1.SelectedIndex = l_old < comboBox1.Items.Count ? l_old : 0;
            comboBox2.SelectedIndex = m_old < comboBox2.Items.Count ? m_old : 0;
            comboBox3.SelectedIndex = n_old < comboBox3.Items.Count ? n_old : 0;

            // ------------------------------------------------------

            ////ダウンロードするURL(警報・注意報)
            ////string url2 = "http://bousai.tenki.jp/bousai/warn/";

            //string url21 = "http://www.tenki.jp/bousai/warn/";

            //string source21 = "";

            //try
            //{
            //    //WebClientの作成
            //    WebClient wc = new WebClient();
            //    //文字コードを指定
            //    wc.Encoding = Encoding.UTF8;
            //    //HTMLソースをダウンロードする
            //    source21 = wc.DownloadString(url21);
            //    //後始末
            //    wc.Dispose();
            //}
            //catch (System.Net.WebException e2)
            //{
            //    //MessageBox.Show(e2.ToString());
            //    MessageBox.Show(
            //        "ネットワークに正しく接続されているか確認してください\nまた、接続先サーバが正常通り稼働しているか確認してください",
            //        "エラー");
            //    button3.Enabled = true;
            //    button3.Text = "データ取得";
            //    return;
            //}

            //button3.Text = "大区分2...";
            //button3.Refresh();

            //CreteLib.HtmlDocument doc21 = new CreteLib.HtmlDocument();
            //doc21.LoadHtml(source21);

            //HtmlNodeList nodeList21 = doc21.GetNodesByTagName("a");

            //Dictionary<string, string> l_list_warn = new Dictionary<string, string>();


            //foreach (HtmlNode node in nodeList21)
            //{
            //    // href属性のないaがあるので、try-catchにしておく
            //    try
            //    {
            //        if (Regex.IsMatch(node["href"], @"^http://www.tenki.jp/bousai/warn/[0-9]+/[0-9]+/") &&
            //            !Regex.IsMatch(node.InnerText, @"^<img>"))
            //        {
            //            String tmp_pref = node.InnerText.Replace("<img>", "");
            //            //MessageBox.Show(node["href"]);
            //            // 都道府県名をキーに、各ページのURLを配列に入れる
            //            l_list_warn.Add(tmp_pref, node["href"]);
            //        }
            //    }
            //    catch (Exception e2)
            //    {
            //    }
            //}

            //Dictionary<string, string> l_list_warn2 = new Dictionary<string, string>();
            //Dictionary<string, string> l_list_warn3 = new Dictionary<string, string>();
            //Dictionary<string, string> l_list_warn4 = new Dictionary<string, string>();

            //foreach (KeyValuePair<string, string> keyValuePair in l_list_warn)
            //{
            //    //MessageBox.Show(keyValuePair.Key + ", " + keyValuePair.Value);
            //    String url_detail = keyValuePair.Value;

            //    string source_detail = "";

            //    try
            //    {
            //        //WebClientの作成
            //        WebClient wc22 = new WebClient();
            //        //文字コードを指定
            //        wc22.Encoding = Encoding.UTF8;
            //        //HTMLソースをダウンロードする
            //        source_detail = wc22.DownloadString(url_detail);
            //        //後始末
            //        wc22.Dispose();
            //    }
            //    catch (System.Net.WebException e2)
            //    {
            //        //MessageBox.Show(e2.ToString());
            //        MessageBox.Show(
            //            "ネットワークに正しく接続されているか確認してください\nまた、接続先サーバが正常通り稼働しているか確認してください",
            //            "エラー");
            //        button3.Enabled = true;
            //        button3.Text = "データ取得";
            //        return;
            //    }

            //    CreteLib.HtmlDocument doc22 = new CreteLib.HtmlDocument();
            //    doc22.LoadHtml(source_detail);

            //    HtmlNodeList nodeList22 = doc22.GetNodesByTagName("a");

            //    button3.Text = "中区分2...";
            //    button3.Refresh();

            //    foreach (HtmlNode node in nodeList22)
            //    {
            //        // href属性のないaがあるので、try-catchにしておく
            //        try
            //        {
            //            if (Regex.IsMatch(node["href"], @"^/bousai/warn/[0-9]+/[0-9]+/[0-9]+.html") &&
            //                !Regex.IsMatch(node.InnerText, @"^<img>"))
            //            {
            //                String tmp_pref = node.InnerText.Replace("<img>", "");
            //                //MessageBox.Show(node["href"]);
            //                l_list_warn2.Add(tmp_pref, node["href"]);
            //                l_list_warn3.Add(tmp_pref, keyValuePair.Key);
            //                l_list_warn4.Add(tmp_pref, keyValuePair.Value);
            //            }
            //        }
            //        catch (Exception e2)
            //        {
            //        }
            //    }
            //}

            //button3.Text = "中区分2出力中...";
            //button3.Refresh();

            //StreamWriter sw21 = new System.IO.StreamWriter(
            //    @"./regionlist2_2.csv",
            //    false,
            //    System.Text.Encoding.GetEncoding("shift_jis"));

            //foreach (KeyValuePair<string, string> keyValuePair in l_list_warn2)
            //{
            //    sw21.Write(l_list_warn3[keyValuePair.Key] + "," + l_list_warn4[keyValuePair.Key] + "," + keyValuePair.Key + "," + keyValuePair.Value + "\r\n");
            //    //MessageBox.Show(keyValuePair.Key + ", " + keyValuePair.Value);
            //}
            //sw21.Close();

            form1.DeleteHistories();

            button3.Enabled = true;
            button3.Text    = "データ取得";
        }
예제 #7
0
 /// <summary>
 /// 执行特征筛选,并添加到缓冲区
 /// </summary>
 public void Exe(Dictionary<string, string> pDic, HtmlTree pTree, HtmlNodeList pHtmlNodeList)
 {
     List<HtmlNode> sList = Path.Exe(pTree, pHtmlNodeList);
     if (sList != null)       //
     {
         for (int i = 0; i < sList.Count; i++)
         {
             HtmlNode sHtmlNode = sList[i];
             if (ResultMode == 0) //
             {
                 #region 键值对模式
                 List<HtmlNodeList> sSubList = sHtmlNode.Nodes.Split(Spliter);
                 for (int j = 0; j < sSubList.Count; j++)
                 {
                     HtmlNodeList sTextNodes = sSubList[j].GetTextNodes(false);
                     if (sTextNodes.Count > 0)
                     {
                         for (int k = sTextNodes.Count - 1; k >= 0; k--)
                         {
                             if (sTextNodes[k].TextDecoded.Trim().Length == 0) { sTextNodes.RemoveAt(k); }
                         }
                         if (sTextNodes.Count > 0)
                         {
                             string[] sNameSpan = sTextNodes[0].TextDecoded.Trim().Replace(" ", "").Split(new char[] { ':', ':' }, StringSplitOptions.RemoveEmptyEntries);
                             if (sNameSpan.Length > 0)
                             {
                                 string sName = CommonService.ClearStr(sNameSpan[0]);
                                 StringBuilder Sb = new StringBuilder();
                                 for (int k = 1; k < sNameSpan.Length; k++)
                                 {
                                     Sb.Append(sNameSpan[k]);
                                 }
                                 for (int k = 1; k < sTextNodes.Count; k++)
                                 {
                                     Sb.Append(sTextNodes[k].TextDecoded.Trim());
                                 }
                                 if (FName != null && FName.Length > 0)
                                 {
                                     pDic[FName + "_" + sName] = Sb.ToString();
                                 }
                                 else
                                 {
                                     pDic[sName] = Sb.ToString();
                                 }
                             }
                         }
                     }
                 }
                 #endregion
             }
             else if (ResultMode == 1)
             {
                 string sValue = sHtmlNode.TextDecoded.Trim();
                 if (sValue.Length > 0)
                 {
                     pDic[FName] = sValue;
                 }
             }
         }
     }
 }