示例#1
0
        private void ScanAlpha()
        {
            char cBegin = Convert.ToChar(txtAlphaBegin.Text);
            char cEnd   = Convert.ToChar(txtAlphaEnd.Text);

            int begin = Convert.ToInt32(cBegin);
            int end   = Convert.ToInt32(cEnd);

            string url     = cboUrl.Text;
            string tempUrl = url;
            string path    = PathSelect();

            for (int i = begin; i <= end; i++)
            {
                tempUrl = tempUrl.Replace("{A}", Convert.ToChar(i).ToString());
                //Get Term
                // get info from hsx.vn
                if (!WebLib.HttpPost(tempUrl, path, i))
                {
                    txtComment.AppendText("Không lấy được dữ liệu từ" + tempUrl + " !");
                    txtComment.AppendText(Environment.NewLine);
                }
                else
                {
                    txtComment.AppendText("Lấy xong dữ liệu từ " + tempUrl + "!");
                    txtComment.AppendText(Environment.NewLine);
                }
                tempUrl = url;
            }
        }
示例#2
0
        private void ScanGoolge()
        {
            string insite = cboUrl.Text;
            string url    = "https://www.google.com/search?q=" + txtGoolgeSearch.Text + "+site:" + insite + "&ie=utf-8&oe=utf-8";

            //get search google content
            string content;
            int    pos1, pos2, pos3;
            string tempUrl, tempContent;
            string path = PathSelect();
            //click next page
            string pageBegin = "<a class=\"fl\" href=\"/search?q=";
            string pageEnd;
            string search = "url?q=" + insite;
            int    page = 1, i = 1;

            while ((content = RequestContent(url)) != "")
            {
                tempContent = content;
                while ((pos1 = tempContent.IndexOf(search) + 6) != -1)
                {
                    if ((pos2 = tempContent.IndexOf(".html", pos1)) == -1)
                    {
                        break;
                    }

                    tempUrl     = tempContent.Substring(pos1, pos2 - pos1 + 5);
                    tempContent = tempContent.Substring(pos2, tempContent.Length - pos2);
                    if (tempUrl.StartsWith(insite))
                    {
                        content = tempContent;
                        if (!WebLib.HttpPost(tempUrl, path, i))
                        {
                            txtComment.AppendText("Không lấy được dữ liệu từ" + tempUrl + " !");
                            txtComment.AppendText(Environment.NewLine);
                        }
                        else
                        {
                            txtComment.AppendText("Lấy xong dữ liệu từ " + tempUrl + "!");
                            txtComment.AppendText(Environment.NewLine);
                        }
                        i++;
                    }
                }
                url = "https://www.google.com/search?q=" + txtGoolgeSearch.Text + "+site:" + insite + "&ie=utf-8&oe=utf-8&start=" + (10 * page);
                page++;
                if (page == 100)
                {
                    break;
                }
                //if (page > 1)
                //{
                //    pageEnd = "start=" + 10 * (page - 1) + "&amp;sa=N";
                //    content = content.Substring(content.IndexOf(pageEnd) + pageEnd.Length);
                //}
                //pos1 = content.IndexOf(pageBegin) + pageBegin.Length - 10;
                //if (pos1 == -1) return;
                //pageEnd = "start=" + 10 * page + "&amp;sa=N";
                //pos2 = content.IndexOf(pageEnd, pos1) + pageEnd.Length;
                //pos3 = content.IndexOf("\">", pos2);
                //page++;
                //url = "https://www.google.com" + content.Substring(pos1 , pos3 - pos1);
            }
        }