private void ScanAlpha() { char cBegin = Convert.ToChar(txtAlphaBegin.Text); char cEnd = Convert.ToChar(txtAlphaEnd.Text); int begin = Convert.ToInt32(cBegin); int end = Convert.ToInt32(cEnd); string url = cboUrl.Text; string tempUrl = url; string path = PathSelect(); for (int i = begin; i <= end; i++) { tempUrl = tempUrl.Replace("{A}", Convert.ToChar(i).ToString()); //Get Term // get info from hsx.vn if (!WebLib.HttpPost(tempUrl, path, i)) { txtComment.AppendText("Không lấy được dữ liệu từ" + tempUrl + " !"); txtComment.AppendText(Environment.NewLine); } else { txtComment.AppendText("Lấy xong dữ liệu từ " + tempUrl + "!"); txtComment.AppendText(Environment.NewLine); } tempUrl = url; } }
private void ScanGoolge() { string insite = cboUrl.Text; string url = "https://www.google.com/search?q=" + txtGoolgeSearch.Text + "+site:" + insite + "&ie=utf-8&oe=utf-8"; //get search google content string content; int pos1, pos2, pos3; string tempUrl, tempContent; string path = PathSelect(); //click next page string pageBegin = "<a class=\"fl\" href=\"/search?q="; string pageEnd; string search = "url?q=" + insite; int page = 1, i = 1; while ((content = RequestContent(url)) != "") { tempContent = content; while ((pos1 = tempContent.IndexOf(search) + 6) != -1) { if ((pos2 = tempContent.IndexOf(".html", pos1)) == -1) { break; } tempUrl = tempContent.Substring(pos1, pos2 - pos1 + 5); tempContent = tempContent.Substring(pos2, tempContent.Length - pos2); if (tempUrl.StartsWith(insite)) { content = tempContent; if (!WebLib.HttpPost(tempUrl, path, i)) { txtComment.AppendText("Không lấy được dữ liệu từ" + tempUrl + " !"); txtComment.AppendText(Environment.NewLine); } else { txtComment.AppendText("Lấy xong dữ liệu từ " + tempUrl + "!"); txtComment.AppendText(Environment.NewLine); } i++; } } url = "https://www.google.com/search?q=" + txtGoolgeSearch.Text + "+site:" + insite + "&ie=utf-8&oe=utf-8&start=" + (10 * page); page++; if (page == 100) { break; } //if (page > 1) //{ // pageEnd = "start=" + 10 * (page - 1) + "&sa=N"; // content = content.Substring(content.IndexOf(pageEnd) + pageEnd.Length); //} //pos1 = content.IndexOf(pageBegin) + pageBegin.Length - 10; //if (pos1 == -1) return; //pageEnd = "start=" + 10 * page + "&sa=N"; //pos2 = content.IndexOf(pageEnd, pos1) + pageEnd.Length; //pos3 = content.IndexOf("\">", pos2); //page++; //url = "https://www.google.com" + content.Substring(pos1 , pos3 - pos1); } }