Example #1
0
        public void Load()
        {
            ServicePointManager.DefaultConnectionLimit = int.MaxValue;

            var parallelism = 10;
            var parallelismSetting = ConfigurationManager.AppSettings["parallelism"];
            if (!string.IsNullOrEmpty(parallelismSetting))
            {
                int.TryParse(parallelismSetting, out parallelism);
            }

            Console.WriteLine("Parellelism: {0}", parallelism);

            IEnumerable<int> serials;
            do
            {
                serials = GetSerials();
                serials.AsParallel().WithDegreeOfParallelism(parallelism).ForAll(x =>
                {
                    try
                    {
                        var company = new Company()
                        {
                            Serial = x,
                        };

                        var cvrNumber = CvrHelper.ToCvr(x);
                        if (cvrNumber != -1)
                        {
                            company.Cvr = cvrNumber;

                            var contentHelper = new ContentHelper(Encoding.GetEncoding("ISO-8859-1"));
                            var document = contentHelper.GetContent(cvrNumber);

                            if (!document.DocumentNode.OuterHtml.Contains("Virksomhedsnavnet eller CVR/SE-nummeret findes ikke pÄ skattelisterne for selskaber 2011"))
                            {
                                var extractor = new DataExtractor();
                                extractor.Extract(company, document);
                                Console.WriteLine("{0} Extracted {1} - {2}", company.Serial, company.Cvr, company.Name);
                            }
                            else
                            {
                                Console.WriteLine("{0} Disregarded {1}", company.Serial, company.Cvr);
                            }
                        }

                        using (var context = new Context())
                        {
                            context.Companies.Add(company);
                            context.SaveChanges();
                        }
                    }
                    catch
                    {
                        // just leave that one for later
                    }
                });
            }
            while (serials.Any());
        }
Example #2
0
        public void Test(int cvrNumber, string name, long? tax, long? profit, long? losses, long? fossilTax, long? fossilProfit, long? fossilLosses)
        {
            var contentHelper = new ContentHelper(Encoding.GetEncoding("ISO-8859-1"));
            var document = contentHelper.GetContent(cvrNumber);

            var company = new Company();
            var extractor = new DataExtractor();
            extractor.Extract(company, document);

            Assert.Equal(name, company.Name);

            Assert.Equal((decimal?)tax, company.TaxPaid);
            Assert.Equal((decimal?)profit, company.Revenue);
            Assert.Equal((decimal?)losses, company.Losses);

            Assert.Equal((decimal?)fossilTax, company.FossilTaxPaid);
            Assert.Equal((decimal?)fossilProfit, company.FossilProfit);
            Assert.Equal((decimal?)fossilLosses, company.FossilLosses);
        }
Example #3
0
        public void Extract(Company company, HtmlDocument document)
        {
            company.Name = GetStringValue(document, "Virksomhedsnavn");
            company.Type = GetStringValue(document, "Selskabstype");
            company.Legislation = GetStringValue(document, "Den skattelov");

            if (document.DocumentNode.OuterHtml.Contains("Selskabet bliver sambeskattet med nedenstÄende administrationsselskab"))
            {
                company.IsSubsidiary = true;
                return;
            }

            company.TaxPaid = GetDecimalValue(document, "Selskabsskatten");
            company.Revenue = GetDecimalValue(document, "Skattepligtig indkomst");
            company.Losses = GetDecimalValue(document, "Underskud, der er trukket fra indkomsten");

            company.FossilTaxPaid = GetDecimalValue(document, "Kulbrinteskatten");
            company.FossilProfit = GetDecimalValue(document, "Skattepligtig kulbrinteindkomst");
            company.FossilLosses = GetDecimalValue(document, "Underskud, der er trukket fra kulbrinteindkomsten");

            company.Subsidiaries = GetSubsidiaryCvrNumbers(document);
        }