public void Load() { ServicePointManager.DefaultConnectionLimit = int.MaxValue; var parallelism = 10; var parallelismSetting = ConfigurationManager.AppSettings["parallelism"]; if (!string.IsNullOrEmpty(parallelismSetting)) { int.TryParse(parallelismSetting, out parallelism); } Console.WriteLine("Parellelism: {0}", parallelism); IEnumerable<int> serials; do { serials = GetSerials(); serials.AsParallel().WithDegreeOfParallelism(parallelism).ForAll(x => { try { var company = new Company() { Serial = x, }; var cvrNumber = CvrHelper.ToCvr(x); if (cvrNumber != -1) { company.Cvr = cvrNumber; var contentHelper = new ContentHelper(Encoding.GetEncoding("ISO-8859-1")); var document = contentHelper.GetContent(cvrNumber); if (!document.DocumentNode.OuterHtml.Contains("Virksomhedsnavnet eller CVR/SE-nummeret findes ikke på skattelisterne for selskaber 2011")) { var extractor = new DataExtractor(); extractor.Extract(company, document); Console.WriteLine("{0} Extracted {1} - {2}", company.Serial, company.Cvr, company.Name); } else { Console.WriteLine("{0} Disregarded {1}", company.Serial, company.Cvr); } } using (var context = new Context()) { context.Companies.Add(company); context.SaveChanges(); } } catch { // just leave that one for later } }); } while (serials.Any()); }
public void RemoveDuplicates() { using (var context = new Context()) { var groups = from x in context.Companies group x by x.Cvr into g where g.Count() > 1 select g; foreach (var group in groups) { foreach (var company in group.Skip(1)) { context.Companies.Remove(company); } } context.SaveChanges(); } }