private static bool SplitNames(int chunkNum) { using (var db = new DomainsEntities()) { db.Configuration.AutoDetectChangesEnabled = false; db.Configuration.ValidateOnSaveEnabled = false; var cnt = 0; var p = 10000; //var m = db.tbDomainsFromSrcs.Count() / p; var t0 = DateTime.Now; var list = db.tbNewDomains.Where(i => i.Id >= chunkNum * p && i.Id < (chunkNum + 1) * p); //var t1 = Math.Round((decimal)DateTime.Now.Subtract(t0).TotalSeconds).ToString(); //Console.WriteLine("list is created ({0})", t1); //t0 = DateTime.Now; // var dlist = new List <tbSplit>(); foreach (var d in list) { result.FindAndSplit(d.Name); var dn = new tbSplit(); dn.DomID = d.Id; dn.NameShown = result.FindBestIncSeparators(); dn.NameWords = result.SplitKeywords(dn.NameShown); dn.WordCount = result.BestItemCount; dlist.Add(dn); cnt++; } try { //db.tbSplits.AddRange(dlist); db.BulkInsert(dlist); //t1 = Math.Round((decimal)DateTime.Now.Subtract(t0).TotalSeconds).ToString(); //Console.WriteLine("List is inserted ({0})", t1); //t0 = DateTime.Now; db.SaveChanges(); //t1 = Math.Round((decimal)DateTime.Now.Subtract(t0).TotalSeconds).ToString(); //Console.WriteLine("List is saved ({0})", t1); //t0 = DateTime.Now; } catch (Exception ex) { var x = 1; } var t1 = Math.Round((decimal)DateTime.Now.Subtract(t0).TotalSeconds).ToString(); Console.WriteLine("{0}-th chunk is processed ({1})", (chunkNum + 1).ToString(), t1); } return(true); }
public static bool GetPage(DomainsEntities db, string urlStr, int n, int minPR) { try { var t1 = DateTime.Now; var test = true; WebClient webClient = new WebClient(); var p = Convert.ToString((n - 1) * 25); var url = string.Format(urlStr, p); string page = webClient.DownloadString(url); HtmlAgilityPack.HtmlDocument doc = new HtmlAgilityPack.HtmlDocument(); doc.LoadHtml(page); List <List <string> > table = doc.DocumentNode.SelectSingleNode("//table[@class='base1']") // responsive .Descendants("tr") .Skip(1) .Where(tr => tr.Elements("td").Count() > 1) .Select(tr => tr.Elements("td").Select(td => td.FirstChild.InnerText.Trim()).ToList()) .ToList(); var s = string.Empty; foreach (var item in table) { s = string.Format("{0}{1},{2}\n", s, item[0], item[1]); if (int.Parse(item[1]) < minPR) { test = false; break; } else { WriteToDB(db, item[0].ToLower(), item[1], item[2], item[4], item[6]); } } db.SaveChanges(); var tm = Math.Floor(DateTime.Now.Subtract(t1).TotalMilliseconds).ToString(); Console.WriteLine("{0} is processed in {1} ms", url, tm.ToString()); return(test); } catch (Exception ex) { Console.Write(ex.Message + '\n'); return(false); } }