Example #1
0
        public void Load()
        {
            ServicePointManager.DefaultConnectionLimit = int.MaxValue;

            var parallelism = 10;
            var parallelismSetting = ConfigurationManager.AppSettings["parallelism"];
            if (!string.IsNullOrEmpty(parallelismSetting))
            {
                int.TryParse(parallelismSetting, out parallelism);
            }

            Console.WriteLine("Parellelism: {0}", parallelism);

            IEnumerable<int> serials;
            do
            {
                serials = GetSerials();
                serials.AsParallel().WithDegreeOfParallelism(parallelism).ForAll(x =>
                {
                    try
                    {
                        var company = new Company()
                        {
                            Serial = x,
                        };

                        var cvrNumber = CvrHelper.ToCvr(x);
                        if (cvrNumber != -1)
                        {
                            company.Cvr = cvrNumber;

                            var contentHelper = new ContentHelper(Encoding.GetEncoding("ISO-8859-1"));
                            var document = contentHelper.GetContent(cvrNumber);

                            if (!document.DocumentNode.OuterHtml.Contains("Virksomhedsnavnet eller CVR/SE-nummeret findes ikke på skattelisterne for selskaber 2011"))
                            {
                                var extractor = new DataExtractor();
                                extractor.Extract(company, document);
                                Console.WriteLine("{0} Extracted {1} - {2}", company.Serial, company.Cvr, company.Name);
                            }
                            else
                            {
                                Console.WriteLine("{0} Disregarded {1}", company.Serial, company.Cvr);
                            }
                        }

                        using (var context = new Context())
                        {
                            context.Companies.Add(company);
                            context.SaveChanges();
                        }
                    }
                    catch
                    {
                        // just leave that one for later
                    }
                });
            }
            while (serials.Any());
        }
Example #2
0
        public void RemoveDuplicates()
        {
            using (var context = new Context())
            {
                var groups = from x in context.Companies
                             group x by x.Cvr into g
                             where g.Count() > 1
                             select g;

                foreach (var group in groups)
                {
                    foreach (var company in group.Skip(1))
                    {
                        context.Companies.Remove(company);
                    }
                }

                context.SaveChanges();
            }
        }