public void Load() { ServicePointManager.DefaultConnectionLimit = int.MaxValue; var parallelism = 10; var parallelismSetting = ConfigurationManager.AppSettings["parallelism"]; if (!string.IsNullOrEmpty(parallelismSetting)) { int.TryParse(parallelismSetting, out parallelism); } Console.WriteLine("Parellelism: {0}", parallelism); IEnumerable<int> serials; do { serials = GetSerials(); serials.AsParallel().WithDegreeOfParallelism(parallelism).ForAll(x => { try { var company = new Company() { Serial = x, }; var cvrNumber = CvrHelper.ToCvr(x); if (cvrNumber != -1) { company.Cvr = cvrNumber; var contentHelper = new ContentHelper(Encoding.GetEncoding("ISO-8859-1")); var document = contentHelper.GetContent(cvrNumber); if (!document.DocumentNode.OuterHtml.Contains("Virksomhedsnavnet eller CVR/SE-nummeret findes ikke på skattelisterne for selskaber 2011")) { var extractor = new DataExtractor(); extractor.Extract(company, document); Console.WriteLine("{0} Extracted {1} - {2}", company.Serial, company.Cvr, company.Name); } else { Console.WriteLine("{0} Disregarded {1}", company.Serial, company.Cvr); } } using (var context = new Context()) { context.Companies.Add(company); context.SaveChanges(); } } catch { // just leave that one for later } }); } while (serials.Any()); }
public void Test(int cvrNumber, string name, long? tax, long? profit, long? losses, long? fossilTax, long? fossilProfit, long? fossilLosses) { var contentHelper = new ContentHelper(Encoding.GetEncoding("ISO-8859-1")); var document = contentHelper.GetContent(cvrNumber); var company = new Company(); var extractor = new DataExtractor(); extractor.Extract(company, document); Assert.Equal(name, company.Name); Assert.Equal((decimal?)tax, company.TaxPaid); Assert.Equal((decimal?)profit, company.Revenue); Assert.Equal((decimal?)losses, company.Losses); Assert.Equal((decimal?)fossilTax, company.FossilTaxPaid); Assert.Equal((decimal?)fossilProfit, company.FossilProfit); Assert.Equal((decimal?)fossilLosses, company.FossilLosses); }
public void Extract(Company company, HtmlDocument document) { company.Name = GetStringValue(document, "Virksomhedsnavn"); company.Type = GetStringValue(document, "Selskabstype"); company.Legislation = GetStringValue(document, "Den skattelov"); if (document.DocumentNode.OuterHtml.Contains("Selskabet bliver sambeskattet med nedenstående administrationsselskab")) { company.IsSubsidiary = true; return; } company.TaxPaid = GetDecimalValue(document, "Selskabsskatten"); company.Revenue = GetDecimalValue(document, "Skattepligtig indkomst"); company.Losses = GetDecimalValue(document, "Underskud, der er trukket fra indkomsten"); company.FossilTaxPaid = GetDecimalValue(document, "Kulbrinteskatten"); company.FossilProfit = GetDecimalValue(document, "Skattepligtig kulbrinteindkomst"); company.FossilLosses = GetDecimalValue(document, "Underskud, der er trukket fra kulbrinteindkomsten"); company.Subsidiaries = GetSubsidiaryCvrNumbers(document); }