Exemple #1
0
        static List <DispersionOutput> CalculateDispersions(List <CombinedData> CombinedData)
        {
            // All the data we need is now loaded in. Time to calculate Theil Index and GDP dispersion
            // https://en.wikipedia.org/wiki/Theil_index
            //
            List <string> UniqueCountries = CombinedData.Select(x => x.RegionCode.Substring(0, 2)).Distinct().ToList();

            UniqueCountries.Remove("EU");
            UniqueCountries.Add("EA12");
            UniqueCountries.Add("EU15");
            UniqueCountries.Add("EU19");
            UniqueCountries.Add("EU27");
            UniqueCountries.Add("UK*");
            UniqueCountries.Add("Nordics");
            UniqueCountries.Add("North England");
            UniqueCountries.Add("Low Countries");

            List <int> UniqueYears = CombinedData.Select(x => x.Year).Distinct().OrderBy(x => x).ToList();
            List <DispersionOutput> DispersionOutputs = new List <DispersionOutput>();
            List <CombinedData>     AllRawData        = new List <CombinedData>();

            foreach (string country in UniqueCountries)
            {
                List <CombinedData> NUTS2RegionsInTheCountry = CombinedData.Where(x => x.RegionCode.StartsWith(country) && x.NUTSLevel == 2).ToList();
                // whole-EU calculations are a special case. The LondonFix is applied and all NUTS2 regions for nations within the EU are used.
                // CURRENTLY DOES NOT INCLUDE CROATIA (HR). THIS MAY NEED FIXING SOON!
                List <string> EA12Countries = new List <string>()
                {
                    "AT", "BE", "DE", "EL", "ES", "FI", "FR", "IE", "IT", "LU", "NL", "PT"
                };
                List <string> EU15Countries = new List <string>()
                {
                    "AT", "BE", "DE", "DK", "EL", "ES", "FI", "FR", "IE", "IT", "LU", "NL", "PT", "SE", "UK"
                };
                List <string> EU19Countries = new List <string>()
                {
                    "AT", "BE", "DE", "DK", "EL", "ES", "FI", "FR", "IE", "IT", "LU", "NL", "PT", "SE", "UK", "SK", "PL", "HU", "CZ"
                };
                List <string> EU27Countries = new List <string>()
                {
                    "AT", "BE", "BG", "CY", "CZ", "DE", "DK", "EE", "EL", "ES", "FI", "FR", "HU", "IE", "IT", "LT", "LU", "LV", "MT", "NL", "PL", "PT", "RO", "SE", "SI", "SK", "UK"
                };
                List <string> Nordics = new List <string>()
                {
                    "SE", "FI", "DK"
                };
                List <string> NorthEngland = new List <string>()
                {
                    "UKE", "UKD", "UKC"
                };
                List <string> LowCountries = new List <string>()
                {
                    "NL", "BE"
                };

                // IRELAND is REMOVED BECAUSE ITS GDP DATA IS USELESS
                EA12Countries.Remove("IE");
                EU15Countries.Remove("IE");
                EU19Countries.Remove("IE");
                EU27Countries.Remove("IE");

                if (country == "Nordics")
                {
                    NUTS2RegionsInTheCountry.Clear();
                    foreach (string EUcountry in Nordics)
                    {
                        NUTS2RegionsInTheCountry.AddRange(CombinedData.Where(x => x.RegionCode.StartsWith(EUcountry) && x.NUTSLevel == 2));
                    }
                }

                if (country == "North England")
                {
                    NUTS2RegionsInTheCountry.Clear();
                    foreach (string EUcountry in NorthEngland)
                    {
                        NUTS2RegionsInTheCountry.AddRange(CombinedData.Where(x => x.RegionCode.StartsWith(EUcountry) && x.NUTSLevel == 2));
                    }
                }

                if (country == "Low Countries")
                {
                    NUTS2RegionsInTheCountry.Clear();
                    foreach (string EUcountry in LowCountries)
                    {
                        NUTS2RegionsInTheCountry.AddRange(CombinedData.Where(x => x.RegionCode.StartsWith(EUcountry) && x.NUTSLevel == 2));
                    }
                }

                if (country == "EA12")
                {
                    NUTS2RegionsInTheCountry.Clear();
                    foreach (string EUcountry in EA12Countries)
                    {
                        NUTS2RegionsInTheCountry.AddRange(CombinedData.Where(x => x.RegionCode.StartsWith(EUcountry) && x.NUTSLevel == 2));
                    }
                    // Apply the London fix
                    NUTS2RegionsInTheCountry.RemoveAll(x => x.RegionCode.StartsWith("UKI"));
                    NUTS2RegionsInTheCountry.AddRange(CombinedData.Where(x => x.RegionCode == "UKI").ToList());
                }

                if (country == "EU15")
                {
                    NUTS2RegionsInTheCountry.Clear();
                    foreach (string EUcountry in EU15Countries)
                    {
                        NUTS2RegionsInTheCountry.AddRange(CombinedData.Where(x => x.RegionCode.StartsWith(EUcountry) && x.NUTSLevel == 2));
                    }
                    // Apply the London fix
                    NUTS2RegionsInTheCountry.RemoveAll(x => x.RegionCode.StartsWith("UKI"));
                    NUTS2RegionsInTheCountry.AddRange(CombinedData.Where(x => x.RegionCode == "UKI").ToList());
                }
                if (country == "EU19")
                {
                    NUTS2RegionsInTheCountry.Clear();
                    foreach (string EUcountry in EU19Countries)
                    {
                        NUTS2RegionsInTheCountry.AddRange(CombinedData.Where(x => x.RegionCode.StartsWith(EUcountry) && x.NUTSLevel == 2));
                    }
                    // Apply the London fix
                    NUTS2RegionsInTheCountry.RemoveAll(x => x.RegionCode.StartsWith("UKI"));
                    NUTS2RegionsInTheCountry.AddRange(CombinedData.Where(x => x.RegionCode == "UKI").ToList());
                }
                if (country == "EU27")
                {
                    NUTS2RegionsInTheCountry.Clear();
                    foreach (string EUcountry in EU27Countries)
                    {
                        NUTS2RegionsInTheCountry.AddRange(CombinedData.Where(x => x.RegionCode.StartsWith(EUcountry) && x.NUTSLevel == 2));
                    }
                    // Apply the London fix
                    NUTS2RegionsInTheCountry.RemoveAll(x => x.RegionCode.StartsWith("UKI"));
                    NUTS2RegionsInTheCountry.AddRange(CombinedData.Where(x => x.RegionCode == "UKI").ToList());
                }

                if (country == "UK*")
                {
                    // There are three approaches, London, MegaLondon, and PartialMegaLondon
                    // Of these I think "London" is the best mix of fair and simple.
                    string approach = "LondonFix";
                    if (approach == "LondonFix")
                    {
                        // The UK has stupid NUTS regions, so we need to swap out the five NUTS2 regions for London with the NUTS1 region for London
                        NUTS2RegionsInTheCountry = CombinedData.Where(x => x.RegionCode.StartsWith("UK") && x.NUTSLevel == 2).ToList();
                        NUTS2RegionsInTheCountry.RemoveAll(x => x.RegionCode.StartsWith("UKI"));
                        NUTS2RegionsInTheCountry.AddRange(CombinedData.Where(x => x.RegionCode == "UKI").ToList());
                    }
                }

                foreach (int year in UniqueYears)
                {
                    List <CombinedData> NUTS2RegionsInTheCountryThisYear = NUTS2RegionsInTheCountry.Where(x => x.Year == year).ToList();

                    DispersionOutput dispersionOutput = new DispersionOutput()
                    {
                        Year             = year,
                        Country          = country,
                        IncomeDispersion = double.NaN,
                        IncomeTheilT     = double.NaN,
                        GDPDispersion    = double.NaN,
                        GDPTheilT        = double.NaN
                    };

                    foreach (CombinedData cd in NUTS2RegionsInTheCountryThisYear)
                    {
                        CombinedData newcd = new CombinedData()
                        {
                            CountryOrOtherGrouping = country,
                            Year          = cd.Year,
                            GDPPerHead    = cd.GDPPerHead,
                            GDPTotal      = cd.GDPTotal,
                            IncomePerHead = cd.IncomePerHead,
                            IncomeTotal   = cd.IncomeTotal,
                            Population    = cd.Population,
                            RegionCode    = cd.RegionCode,
                            RegionName    = cd.RegionName,
                            NUTSLevel     = cd.NUTSLevel,
                        };
                        AllRawData.Add(newcd);
                    }

                    if (NUTS2RegionsInTheCountryThisYear.Where(x => x.IncomePerHead > 0).Count() > 1)
                    {
                        dispersionOutput.IncomeDispersion        = calculateIncomeDispersion(NUTS2RegionsInTheCountryThisYear);
                        dispersionOutput.IncomeTheilT            = calculateIncomeTheilTIndex(NUTS2RegionsInTheCountryThisYear);
                        dispersionOutput.RegionsConsideredForGDP = NUTS2RegionsInTheCountryThisYear.Where(x => x.IncomePerHead > 0).Count();
                    }

                    if (NUTS2RegionsInTheCountryThisYear.Where(x => x.GDPPerHead > 0).Count() > 1)
                    {
                        dispersionOutput.GDPDispersion           = calculateGDPDispersion(NUTS2RegionsInTheCountryThisYear);
                        dispersionOutput.GDPTheilT               = calculateGDPTheilTIndex(NUTS2RegionsInTheCountryThisYear);
                        dispersionOutput.RegionsConsideredForGDP = NUTS2RegionsInTheCountryThisYear.Where(x => x.GDPPerHead > 0).Count();
                    }

                    DispersionOutputs.Add(dispersionOutput);
                }
            }

            // Output the prepared raw data for further analysis
            using (TextWriter writer = new StreamWriter($"PreparedDataForDispersions.csv", false, System.Text.Encoding.UTF8))
            {
                var csv = new CsvWriter(writer, CultureInfo.InvariantCulture);
                csv.WriteRecords(AllRawData);
            }

            return(DispersionOutputs);
        }
Exemple #2
0
        static void Main(string[] args)
        {
            //List<GDPTotalRaw> RawGDPTotals;
            List <GDPPerHeadRaw> RawGDPPerHead;
            List <IncomeRaw>     RawIncomes;

            Console.WriteLine("Loading GDP per person at PPS for small regions of Europe.");
            using (TextReader textReader = File.OpenText(@"Assets\nama_10r_2gdp_1_Data.csv"))
            {
                CsvReader csvReader = new CsvReader(textReader, CultureInfo.InvariantCulture);
                csvReader.Configuration.HeaderValidated   = null;
                csvReader.Configuration.MissingFieldFound = null;
                RawGDPPerHead = new List <GDPPerHeadRaw>(csvReader.GetRecords <GDPPerHeadRaw>());
            }

            /*
             * Console.WriteLine("Loading household income per person at PPS for small regions of Europe.");
             * using (TextReader textReader = File.OpenText(@"Assets\nama_10r_2hhinc_1_Data.csv"))
             * {
             *  CsvReader csvReader = new CsvReader(textReader, CultureInfo.InvariantCulture);
             *  RawIncomes = new List<IncomeRaw>(csvReader.GetRecords<IncomeRaw>());
             * }
             */
            Console.WriteLine("Loading total household income at PPS for small regions of Europe.");
            using (TextReader textReader = File.OpenText(@"Assets\nama_10r_2hhinc_1_Data_Abs.csv"))
            {
                CsvReader csvReader = new CsvReader(textReader, CultureInfo.InvariantCulture);
                RawIncomes = new List <IncomeRaw>(csvReader.GetRecords <IncomeRaw>());
            }

            Console.WriteLine("Loading populations for small regions of Europe.");
            List <Population> Populations = new List <Population>();

            using (TextReader textReader = File.OpenText("Assets/demo_r_d2jan_1_Data.csv"))
            {
                CsvReader csvReader = new CsvReader(textReader, CultureInfo.InvariantCulture);
                csvReader.Configuration.HeaderValidated   = null;
                csvReader.Configuration.MissingFieldFound = null;
                Populations = csvReader.GetRecords <Population>().ToList();
            }

            Console.WriteLine("Loading historic GDP/head (at PPS) for small regions of Europe. This lets us create complete time series for countries such as France and The Netherlands which have changed their geographies since 2000.");
            List <HistoricGDPPerHeadPPS> HistoricGDPs = new List <HistoricGDPPerHeadPPS>();

            using (TextReader textReader = File.OpenText("Assets/euregionsabsolutepps_unpivotted.csv"))
            {
                CsvReader csvReader = new CsvReader(textReader, CultureInfo.InvariantCulture);
                HistoricGDPs = csvReader.GetRecords <HistoricGDPPerHeadPPS>().ToList();
            }

            // Remove 2015 and 2016 French data (we have new data that's better)
            HistoricGDPs.RemoveAll(x => x.Year >= 2015 && x.NUTScode.StartsWith("FR"));


            /*
             * List<CombinedData> HistoricCombinedData = new List<CombinedData>();
             * foreach(HistoricGDPPerHeadPPS historicGDPPerHeadPPS in HistoricGDPs)
             * {
             *  CombinedData combinedData = new CombinedData();
             *  combinedData.CountryOrGrouping = historicGDPPerHeadPPS.regionName;
             *  combinedData.NUTSLevel = historicGDPPerHeadPPS.NUTSlevel;
             *  combinedData.RegionCode = historicGDPPerHeadPPS.NUTScode;
             *  combinedData.GDPPerHead = historicGDPPerHeadPPS.Value;
             *  combinedData.Year = historicGDPPerHeadPPS.Year;
             *
             *  if (Populations.Where(x => x.GEO == combinedData.RegionCode && x.TIME == combinedData.Year).FirstOrDefault() != null)
             *  {
             *      combinedData.Population = Populations.Where(x => x.GEO == combinedData.RegionCode && x.TIME == combinedData.Year).First().Value;
             *  }
             *  else if (Populations.Where(x => x.GEO == combinedData.RegionCode).FirstOrDefault() != null)
             *  {
             *      combinedData.Population = Populations.Where(x => x.GEO == combinedData.RegionCode).First().Value;
             *  }
             *  else
             *  {
             *      Console.WriteLine($"No population data found for {combinedData.CountryOrGrouping} in {combinedData.Year}.");
             *  }
             *  combinedData.GDPTotal = combinedData.Population * combinedData.GDPPerHead;
             *
             *  HistoricCombinedData.Add(combinedData);
             * }
             */

            List <CombinedData> CombinedCurrentData = new List <CombinedData>();

            List <int> DistinctYears = RawGDPPerHead.Select(x => x.TIME).ToList();

            DistinctYears.AddRange(RawIncomes.Select(x => x.TIME));
            DistinctYears = DistinctYears.Distinct().ToList();

            List <string> DistinctRegionCodes = RawGDPPerHead.Select(x => x.GEO).ToList();

            DistinctRegionCodes.AddRange(RawIncomes.Select(x => x.GEO));
            DistinctRegionCodes.AddRange(HistoricGDPs.Select(x => x.NUTScode));
            DistinctRegionCodes = DistinctRegionCodes.Distinct().ToList();
            foreach (int year in DistinctYears)
            {
                foreach (string regioncode in DistinctRegionCodes)
                {
                    HistoricGDPPerHeadPPS HistoricGDPPerHead = HistoricGDPs.Where(x => x.NUTScode == regioncode && x.Year == year).FirstOrDefault();
                    GDPPerHeadRaw         GDPPerHead         = RawGDPPerHead.Where(x => x.GEO == regioncode && x.TIME == year).FirstOrDefault();
                    IncomeRaw             Income             = RawIncomes.Where(x => x.GEO == regioncode && x.TIME == year).FirstOrDefault();
                    if (Income != null && Income.Value == ":")
                    {
                        Income = null;
                    }
                    if (GDPPerHead != null && GDPPerHead.Value == ":")
                    {
                        GDPPerHead = null;
                    }

                    if (HistoricGDPPerHead != null || GDPPerHead != null || Income != null)
                    {
                        CombinedData combinedData = new CombinedData()
                        {
                            Year       = year,
                            RegionCode = regioncode
                        };
                        if (GDPPerHead != null)
                        {
                            combinedData.RegionName = GDPPerHead.GEO_LABEL;
                            if (GDPPerHead.Value.Contains(','))
                            {
                                combinedData.GDPPerHead = int.Parse(GDPPerHead.Value.Replace(",", ""));
                            }
                            if (GDPPerHead.GEO.StartsWith("EU") == false)
                            {
                                combinedData.NUTSLevel = GDPPerHead.GEO.Length - 2;
                            }
                        }
                        else if (HistoricGDPPerHead != null)
                        {
                            combinedData.RegionName = HistoricGDPPerHead.regionName;
                            combinedData.NUTSLevel  = HistoricGDPPerHead.NUTSlevel;
                            combinedData.GDPPerHead = HistoricGDPPerHead.Value;
                        }

                        // Add GDP (PPP) Total and calculate population
                        if (Populations.Where(x => x.GEO == combinedData.RegionCode && x.TIME == combinedData.Year).FirstOrDefault() != null)
                        {
                            combinedData.Population = Populations.Where(x => x.GEO == combinedData.RegionCode && x.TIME == combinedData.Year).First().Value;
                        }
                        else if (Populations.Where(x => x.GEO == combinedData.RegionCode).FirstOrDefault() != null)
                        {
                            combinedData.Population = Populations.Where(x => x.GEO == combinedData.RegionCode).First().Value;
                        }
                        else
                        {
                            Console.WriteLine($"No population data found for {combinedData.RegionCode} in {combinedData.Year}.");
                        }
                        combinedData.GDPTotal = combinedData.Population * combinedData.GDPPerHead;

                        // Add Income (PPP) per head
                        if (Income != null)
                        {
                            combinedData.RegionName  = Income.GEO_LABEL;
                            combinedData.IncomeTotal = 1000000 * double.Parse(Income.Value.Replace(",", ""));
                            combinedData.NUTSLevel   = Income.GEO.Length - 2;
                        }
                        // Calculate Income total
                        if (combinedData.Population > 0)
                        {
                            combinedData.IncomePerHead = (int)Math.Round(combinedData.IncomeTotal / combinedData.Population, 0);
                        }
                        CombinedCurrentData.Add(combinedData);
                    }
                }
            }

            // Calculate dispersions for both historic and current datasets
            //List<DispersionOutput> HistoricDispersionOutputs = CalculateDispersions(HistoricCombinedData);
            List <DispersionOutput> DispersionOutputs = CalculateDispersions(CombinedCurrentData);


            // Merge the two outputs (prefering the dataset which was calculated using the highest number of regions)
            //List<DispersionOutput> CombinedDispersionOutputs = new List<DispersionOutput>();
            //CombinedDispersionOutputs.AddRange(HistoricDispersionOutputs);

            /*
             * foreach (DispersionOutput currentDispersionOutput in CurrentDispersionOutputs)
             * {
             *  if (CombinedDispersionOutputs.Exists(x => x.Country == currentDispersionOutput.Country && x.Year == currentDispersionOutput.Year) == false)
             *  {
             *      CombinedDispersionOutputs.Add(currentDispersionOutput);
             *  }
             *  else if (CombinedDispersionOutputs.Exists(x => x.Country == currentDispersionOutput.Country && x.Year == currentDispersionOutput.Year && x.RegionsConsidered < currentDispersionOutput.RegionsConsidered))
             *  {
             *      CombinedDispersionOutputs.RemoveAll(x => x.Country == currentDispersionOutput.Country && x.Year == currentDispersionOutput.Year);
             *      CombinedDispersionOutputs.Add(currentDispersionOutput);
             *  }
             *  else
             *  {
             *      // the existing calculated dispersion was calculated from a higher number of regions than the potential replacement. The existing dispersion is retained.
             *      CombinedDispersionOutputs.Add(currentDispersionOutput);
             *  }
             * }
             */

            // Output the dispersion calculation results
            using (TextWriter writer = new StreamWriter($"CalculatedDispersions_LondonFixed.csv", false, System.Text.Encoding.UTF8))
            {
                var csv = new CsvWriter(writer, CultureInfo.InvariantCulture);
                csv.WriteRecords(DispersionOutputs);
            }
        }