private int GetMails(string source, StreamWriter writer, LastID lastID) { if (this.Session["list"] != null) { this.list = (List <string>) this.Session["list"]; } string g = this.getPageSource(source); if (string.IsNullOrEmpty(g)) { return(0); } long result = 0; int count = 0; Regex t = new Regex(@"[a-z0-9!#$%&'*+/=?^_`{|}~-]+(?:\.[a-z0-9!#$%&'*+/=?^_`{|}~-]+)*@(?:[a-z0-9](?:[a-z0-9-]*[a-z0-9])?\.)+[a-z0-9](?:[a-z0-9-]*[a-z0-9])?"); foreach (Match m in t.Matches(g)) { if (!list.Contains(m.Value)) { if (ValidateMail(m.Value)) { string mail = ClearEmail(m.Value); CVMail f = dal.CVMails.SingleOrDefault(b => b.Mail == mail); if (f == null) { try { result = (long)lastID.LastID1; dal.CVMails.InsertOnSubmit(new CVMail { Mail = mail, asdws = (result + 1) }); lastID.LastID1++; dal.SubmitChanges(); this.list.Add(mail); writer.Write(mail + ", "); count++; i++; } catch (Exception) { } } } } } this.Session["list"] = this.list; return(count); }
private int GetMails(string source) { int count = 0; string g = getPageSource(source); if (string.IsNullOrEmpty(g)) { return(0); } long result = 0; Regex t = new Regex(@"[a-z0-9!#$%&'*+/=?^_`{|}~-]+(?:\.[a-z0-9!#$%&'*+/=?^_`{|}~-]+)*@(?:[a-z0-9](?:[a-z0-9-]*[a-z0-9])?\.)+[a-z0-9](?:[a-z0-9-]*[a-z0-9])?"); foreach (Match m in t.Matches(g)) { if (ValidateMail(m.Value)) { string mail = ClearEmail(m.Value); CVMail f = dal.CVMails.SingleOrDefault(b => b.Mail == mail); if (f == null) { try { lock (this) { result = (long)lastID.LastID1; dal.CVMails.InsertOnSubmit(new CVMail { asdws = (result + 1), Date = DateTime.Now, Mail = mail }); lastID.LastID1++; dal.SubmitChanges(); MailsListSession.Add(mail); count++; } } catch (Exception) { } } } } return(count); }
partial void DeleteCVMail(CVMail instance);
partial void UpdateCVMail(CVMail instance);
partial void InsertCVMail(CVMail instance);
protected void runJob_Click(object sender, EventArgs e) { if (this.Session["list"] != null) { this.list = (List <string>) this.Session["list"]; } int count = 0; int totalCount = 0; if (this.Session["total"] != null) { totalCount = (int)this.Session["total"]; } StreamWriter writer = null; try { writer = new StreamWriter(@"C:\Works\CV\spider.txt", true); //string g = this.getPageSource(this.getAddress.Text); for (int i = 1; i < 70; i++) { string g = this.getPageSource("http://www.shohamnet.co.il/BoardView.asp?page=" + i + "&numType=11"); LastID lastID = this.cvList.LastIDs.SingleOrDefault(y => y.sdfsdgdf == "1"); if (lastID == null) { return; } long result = 0; Regex t = new Regex(@"[a-z0-9!#$%&'*+/=?^_`{|}~-]+(?:\.[a-z0-9!#$%&'*+/=?^_`{|}~-]+)*@(?:[a-z0-9](?:[a-z0-9-]*[a-z0-9])?\.)+[a-z0-9](?:[a-z0-9-]*[a-z0-9])?"); foreach (Match m in t.Matches(g)) { //if (!list.Contains(m.Value)) //{ CVMail f = cvList.CVMails.SingleOrDefault(b => b.Mail == m.Value); if (f == null) { try { result = lastID.LastID1; cvList.CVMails.InsertOnSubmit(new CVMail { Mail = m.Value, asdws = result }); lastID.LastID1++; cvList.SubmitChanges(); this.list.Add(m.Value); writer.Write(m.Value + ", "); count++; } catch (Exception) { } //} //} } } } totalCount += count; this.Session["total"] = totalCount; this.doneLabel.Text = count.ToString(); this.totalLabel.Text = totalCount.ToString(); this.Session["list"] = this.list; } catch (Exception p) { string f = p.Message; } finally { if (writer != null) { writer.Close(); } } this.getAddress.Text = ""; }
public void FindPositionUrl(string g) { // for (int i = 0; i < 64; i += 8) // { // //var request = (HttpWebRequest)WebRequest.Create("https://ajax.googleapis.com/ajax/services/search/web?v=1.0&key=b41c19d0015b4418f64c40c97a1f86bae921ee2a&userip=87.68.25.208&q=hello"); // ////http://ajax.googleapis.com/ajax/services/search/web?v=1.0&key=831823424278.apps.googleusercontent.com&q=hello" // ////var request = "https://accounts.google.com/o/oauth2/auth?scope=https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fuserinfo.email+https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fuserinfo.profile&state=%2Fprofile&redirect_uri=https%3A%2F%2Foauth2-login-demo.appspot.com%2Foauthcallback&response_type=code&client_id=831823424278.apps.googleusercontent.com"; // var request = (HttpWebRequest)WebRequest.Create("http://ajax.googleapis.com/ajax/services/search/web?v=1.0&q=" + m + "&start=" + i); // var response = (HttpWebResponse)request.GetResponse(); // var responseText = (new StreamReader(response.GetResponseStream())).ReadToEnd(); // if (responseText.Contains("Suspected Terms of Service Abuse")) // { // } // else // { // Regex r = new Regex("unescapedUrl"); // MatchCollection col = r.Matches(m); // foreach (Match dfd in col) // { // string d = dfd.Groups["UNESCAPEDURL"].Value; // FindPositionUrl(d); // } // Thread.Sleep(25000); // } // } Regex t = new Regex(@"[a-z0-9!#$%&'*+/=?^_`{|}~-]+(?:\.[a-z0-9!#$%&'*+/=?^_`{|}~-]+)*@(?:[a-z0-9](?:[a-z0-9-]*[a-z0-9])?\.)+[a-z0-9](?:[a-z0-9-]*[a-z0-9])?"); foreach (Match m in t.Matches(g)) { string val = m.Value; if (val.Contains("ic=/upload/infocenter/info_images/") || val.Contains(".gif") || val.Contains(".jpg")) { continue; } if (val.Contains("wanted_result.aspx?")) { val = val.Replace("wanted_result.aspx?", ""); } if (val.Contains("email=")) { val = val.Replace("email=", ""); } if (val.Contains("//")) { val = val.Replace("//", ""); } if (val.Contains("%20")) { val = val.Replace("%20", ""); } CVMail f = mainDb.CVMails.SingleOrDefault(b => b.MailValue == val); if (f == null) { if (mainDb.CVMails.Count() > 0) { long x = mainDb.CVMails.OrderByDescending(y => y.MailID).First().MailID; mainDb.CVMails.InsertOnSubmit(new CVMail { MailID = x + 1, MailValue = val, MailSent = false, MailDate = DateTime.Now }); } else { mainDb.CVMails.InsertOnSubmit(new CVMail { MailID = 1, MailValue = val, MailSent = false, MailDate = DateTime.Now }); } mainDb.SubmitChanges(); } } }
public void FindPosition(string html) { //string lookup = "(href=\")(\\w+[a-zA-Z0-9.-?=/]*)"; string lookup = @"((https?|http|ftp|gopher|telnet|file|notes|ms-help):((//)|(\\\\))+[\w\d:#@%/;$()~_?\+-=\\\.&]*)"; MatchCollection matches = Regex.Matches(html, lookup); for (int i = 0; i < matches.Count; i++) { string match = matches[i].Value; if (match.IndexOf("google") == -1) { if (match.IndexOf('+') == -1) { continue; } match = match.Substring(0, match.IndexOf('+')); try { string g = getPageSource(match); Regex t = new Regex(@"[a-z0-9!#$%&'*+/=?^_`{|}~-]+(?:\.[a-z0-9!#$%&'*+/=?^_`{|}~-]+)*@(?:[a-z0-9](?:[a-z0-9-]*[a-z0-9])?\.)+[a-z0-9](?:[a-z0-9-]*[a-z0-9])?"); foreach (Match m in t.Matches(g)) { string val = m.Value; if (val.Contains("ic=/upload/infocenter/info_images/") || val.Contains(".gif") || val.Contains(".jpg")) { continue; } if (val.Contains("wanted_result.aspx?")) { val = val.Replace("wanted_result.aspx?", ""); } if (val.Contains("email=")) { val = val.Replace("email=", ""); } if (val.Contains("//")) { val = val.Replace("//", ""); } if (val.Contains("%20")) { val = val.Replace("%20", ""); } CVMail f = mainDb.CVMails.SingleOrDefault(b => b.MailValue == val); if (f == null) { if (mainDb.CVMails.Count() > 0) { long x = mainDb.CVMails.OrderByDescending(y => y.MailID).First().MailID; mainDb.CVMails.InsertOnSubmit(new CVMail { MailID = x + 1, MailValue = val, MailSent = false, MailDate = DateTime.Now }); } else { mainDb.CVMails.InsertOnSubmit(new CVMail { MailID = 1, MailValue = val, MailSent = false, MailDate = DateTime.Now }); } mainDb.SubmitChanges(); } } } catch (Exception) { continue; } } } }
private int GetMails(string source) { int count = 0; string g = getPageSource(source); if (string.IsNullOrEmpty(g)) { { return(0); } } long result = 0; Regex t = new Regex(@"[a-z0-9!#$%&'*+/=?^_`{|}~-]+(?:\.[a-z0-9!#$%&'*+/=?^_`{|}~-]+)*@(?:[a-z0-9](?:[a-z0-9-]*[a-z0-9])?\.)+[a-z0-9](?:[a-z0-9-]*[a-z0-9])?"); foreach (Match m in t.Matches(g)) { { if (ValidateMail(m.Value)) { { string mail = ClearEmail(m.Value); CVMail f = dal.CVMails.SingleOrDefault(b => b.Mail == mail); if (f == null) { { try { lock (this) { result = (long)lastID.LastID1; dal.CVMails.InsertOnSubmit(new CVMail { asdws = (result + 1), Date = DateTime.Now, Mail = mail }); lastID.LastID1++; dal.SubmitChanges(); MailsListSession.Add(mail); count++; } } catch (Exception) { } } } } } } } return(count); }