示例#1
0
        private static void GenerateDefaultOutput(
            string input,
            string output,
            Dictionary <string, List <string> > features,
            List <string> filters,
            Dictionary <string, List <string> > countries,
            Dictionary <string, List <string> > admin1codes,
            Dictionary <string, List <string> > admin2codes)
        {
            // check input file if it exists
            if (!File.Exists(input))
            {
                Console.WriteLine("Input file not found");
                return;
            }

            // check output file if it exists
            if (File.Exists(output))
            {
                File.Delete(output);
            }

            // read input file
            try
            {
                using StreamReader sr = new StreamReader(input);
                using StreamWriter sw = new StreamWriter(output);

                sw.Write("[");
                string inputLine = string.Empty;
                bool   isfirst   = true;
                var    options   = new JsonSerializerOptions()
                {
                    IgnoreNullValues = true
                };
                while ((inputLine = sr.ReadLine()) != null)
                {
                    var s = inputLine.Split(new char[] { '\t' });
                    // check if feature is included in filters
                    var featurecode = $"{s[6]}.{s[7]}";
                    if (filters.FirstOrDefault(r => r.Equals(featurecode, StringComparison.OrdinalIgnoreCase)) == null)
                    {
                        continue;
                    }
                    var geo = new geonameFlat();
                    if (long.TryParse(s[0], out long geoid))
                    {
                        geo.geonameId = geoid;
                    }
                    geo.name      = s[1].Trim();
                    geo.asciiName = s[2].Trim();
                    var altNames = s[3].Split(new char[] { ',' });
                    geo.alternateNames = string.IsNullOrWhiteSpace(altNames[0]) ? null : altNames;
                    if (decimal.TryParse(s[4], out decimal lat))
                    {
                        geo.latitude = lat;
                    }
                    if (decimal.TryParse(s[5], out decimal lng))
                    {
                        geo.longitude = lng;
                    }
                    if (features.TryGetValue(featurecode, out List <string> feats))
                    {
                        geo.feature = feats[0];
                    }
                    geo.countryCode = s[8];
                    if (countries.TryGetValue(geo.countryCode, out List <string> ctry))
                    {
                        geo.countryCodeISO3 = ctry[0];
                        geo.country         = ctry[3];
                        geo.continent       = ctry[7];
                        geo.tld             = ctry[8];
                        geo.currencyCode    = ctry[9];
                        geo.currency        = ctry[10];
                        var langs = ctry[14].Split(new char[] { ',' });
                        geo.languages = string.IsNullOrWhiteSpace(langs[0]) ? null : langs;
                        var neighbors = ctry[16].Split(new char[] { ',' });
                        geo.neighborCountries = string.IsNullOrWhiteSpace(neighbors[0]) ? null : neighbors;
                    }
                    var admin1code = $"{geo.countryCode}.{s[10]}";
                    if (admin1codes.TryGetValue(admin1code, out List <string> a1))
                    {
                        geo.admin1 = a1[0].Trim();
                    }
                    var admin2code = $"{admin1code}.{s[11]}";
                    if (admin2codes.TryGetValue(admin2code, out List <string> a2))
                    {
                        geo.admin2 = a2[0].Trim();
                    }
                    geo.timezone = s[17];
                    if (!string.IsNullOrWhiteSpace(geo.timezone))
                    {
                        geo.windowsTimezone = TZConvert.IanaToWindows(geo.timezone);
                    }
                    geo.dateModified = DateTime.Parse(s[18]);

                    var towrite = JsonSerializer.Serialize(geo, options);
                    if (isfirst)
                    {
                        isfirst = false;
                    }
                    else
                    {
                        sw.Write(',');
                    }
                    sw.Write(towrite);
                }
                sw.Write("]");
                sw.Flush();
            }
            catch (IOException e)
            {
                Console.WriteLine(e.Message);
            }
        }
示例#2
0
        private static void GenerateFullStructuredOutput(
            string input,
            string output,
            Dictionary <string, List <string> > features,
            List <string> filters,
            Dictionary <string, List <string> > countries,
            Dictionary <string, List <string> > admin1codes,
            Dictionary <string, List <string> > admin2codes)
        {
            Console.WriteLine("Start GenerateFullStructuredOutput");
            // check input file if it exists
            if (!File.Exists(input))
            {
                Console.WriteLine("Input file not found");
                return;
            }

            // check output file if it exists
            if (File.Exists(output))
            {
                File.Delete(output);
            }

            // read input file
            try
            {
                using StreamReader sr = new StreamReader(input);
                using StreamWriter sw = new StreamWriter(output);
                var    semaphore = new Semaphore(1000, 1000);
                object filelock  = new object();

                string inputLine = string.Empty;
                var    options   = new JsonSerializerOptions()
                {
                    IgnoreNullValues = true
                };
                var tracker = new Dictionary <string, geonameStructured>();
                var tasks   = new List <Task>();
                while ((inputLine = sr.ReadLine()) != null)
                {
                    semaphore.WaitOne();
                    var s      = inputLine.Split(new char[] { '\t' });
                    var handle = new EventWaitHandle(false, EventResetMode.ManualReset);
                    var task   = Task.Run(action: () =>
                    {
                        Console.WriteLine("Processing GeonameID: " + s[0]);
                        var geo = new geonameFlat();

                        // check if feature is included in filters
                        geo.feature = $"{s[6]}.{s[7]}";
                        if (filters.FirstOrDefault(r => r.Equals(geo.feature, StringComparison.OrdinalIgnoreCase)) == null)
                        {
                            return;
                        }

                        geo.countryCode = s[8];
                        var admin1code  = $"{geo.countryCode}.{s[10]}";
                        var admin2code  = $"{admin1code}.{s[11]}";

                        geonameStructured geostruct = null;
                        var key = $"{geo.countryCode}-{admin1code}-{admin2code}";
                        lock (filelock)
                        {
                            if (!tracker.TryGetValue(key, out geostruct))
                            {
                                geostruct = new geonameStructured()
                                {
                                    countryCode   = geo.countryCode,
                                    admin1Code    = admin1code,
                                    admin2Code    = admin2code,
                                    tempFilename  = Path.GetTempFileName(),
                                    isNewTempFile = true
                                };
                                if (countries.TryGetValue(geo.countryCode, out List <string> ctry))
                                {
                                    geostruct.countryCodeISO3 = ctry[0];
                                    geostruct.country         = ctry[3];
                                    geostruct.continent       = ctry[7];
                                    geostruct.tld             = ctry[8];
                                    geostruct.currencyCode    = ctry[9];
                                    geostruct.currency        = ctry[10];
                                    geostruct.currency        = ctry[10];
                                    var langs                   = ctry[14].Split(new char[] { ',' });
                                    geostruct.languages         = string.IsNullOrWhiteSpace(langs[0]) ? null : langs;
                                    var neighbors               = ctry[16].Split(new char[] { ',' });
                                    geostruct.neighborCountries = string.IsNullOrWhiteSpace(neighbors[0]) ? null : neighbors;
                                }
                                if (admin1codes.TryGetValue(admin1code, out List <string> a1))
                                {
                                    geostruct.admin1 = a1[0] ?? "None";
                                }

                                if (admin2codes.TryGetValue(admin2code, out List <string> a2))
                                {
                                    geostruct.admin2 = a2[0] ?? "None";
                                }
                                tracker.Add(geostruct.key, geostruct);
                                File.WriteAllText(geostruct.tempFilename, "[");
                            }
                        }

                        if (long.TryParse(s[0], out long geoid))
                        {
                            geo.geonameId = geoid;
                        }
                        geo.name           = s[1].Trim();
                        geo.asciiName      = s[2].Trim();
                        var altNames       = s[3].Split(new char[] { ',' });
                        geo.alternateNames = string.IsNullOrWhiteSpace(altNames[0]) ? null : altNames;
                        if (decimal.TryParse(s[4], out decimal lat))
                        {
                            geo.latitude = lat;
                        }
                        if (decimal.TryParse(s[5], out decimal lng))
                        {
                            geo.longitude = lng;
                        }

                        //if (countries.TryGetValue(geo.countryCode, out List<string> ctry))
                        //{
                        //    geo.countryCodeISO3 = ctry[0];
                        //    geo.country = ctry[3];
                        //    geo.continent = ctry[7];
                        //    geo.tld = ctry[8];
                        //    geo.currencyCode = ctry[9];
                        //    geo.currency = ctry[10];
                        //    var langs = ctry[14].Split(new char[] { ',' });
                        //    geo.languages = string.IsNullOrWhiteSpace(langs[0]) ? null : langs;
                        //    var neighbors = ctry[16].Split(new char[] { ',' });
                        //    geo.neighborCountries = string.IsNullOrWhiteSpace(neighbors[0]) ? null : neighbors;
                        //}

                        //if (admin1codes.TryGetValue(admin1code, out List<string> a1))
                        //{
                        //    geo.admin1 = a1[0].Trim();
                        //}

                        //if (admin2codes.TryGetValue(admin2code, out List<string> a2))
                        //{
                        //    geo.admin2 = a2[0].Trim();
                        //}

                        geo.timezone = s[17];
                        if (!string.IsNullOrWhiteSpace(geo.timezone))
                        {
                            geo.windowsTimezone = TZConvert.IanaToWindows(geo.timezone);
                        }
                        geo.dateModified = DateTime.Parse(s[18]);

                        lock (geostruct.filelock)
                        {
                            if (!geostruct.isNewTempFile)
                            {
                                File.AppendAllText(geostruct.tempFilename, ",");
                            }
                            geostruct.isNewTempFile = false;
                            File.AppendAllText(geostruct.tempFilename, JsonSerializer.Serialize(geo, options));
                        }
                    });
                    tasks.Add(task);
                    semaphore.Release();
                    Console.WriteLine("--- Done GeonameID: " + s[0]);
                }
                Console.WriteLine("Waiting Tasks");
                Task.WaitAll(tasks.ToArray());
                // close array
                Console.WriteLine("Closing Array in Files");
                foreach (var t in tracker)
                {
                    File.AppendAllText(t.Value.tempFilename, "]");
                }
                Console.WriteLine("Flatten Tracker");
                // flatten tracker
                List <geonameStructured> flattracker = new List <geonameStructured>();
                foreach (var f in tracker)
                {
                    flattracker.Add(f.Value);
                }
                // Write output
                sw.Write("[{");
                var  ctrys          = flattracker.GroupBy(r => r.countryCode).Select(r => r.First());
                bool isFirstCountry = true;
                foreach (var ctry in ctrys)
                {
                    if (!isFirstCountry)
                    {
                        sw.Write(",");
                    }
                    isFirstCountry = false;
                    sw.Write($"\"{ctry.country ?? "None"}\":{{");
                    sw.Write($"\"CountryCode\":\"{ctry.countryCode}\",");
                    sw.Write($"\"CountryISO\":\"{ctry.countryCodeISO3}\",");
                    sw.Write($"\"Continent\":\"{ctry.continent}\",");
                    sw.Write($"\"Currency\":\"{ctry.currency}\",");
                    sw.Write($"\"CurrencyCode\":\"{ctry.currencyCode}\",");
                    sw.Write($"\"Languages\":{JsonSerializer.Serialize(ctry.languages, options)},");
                    sw.Write($"\"Neighbors\":{JsonSerializer.Serialize(ctry.neighborCountries, options)},");
                    sw.Write($"\"TLD\":\"{ctry.tld}\",");
                    sw.Write("\"Regions\":[{");
                    var  admin1s       = flattracker.Where(r => r.countryCode.Equals(ctry.countryCode, StringComparison.OrdinalIgnoreCase)).GroupBy(r => r.admin1Code).Select(r => r.First());
                    bool isFirstAdmin1 = true;
                    foreach (var a1 in admin1s)
                    {
                        if (!isFirstAdmin1)
                        {
                            sw.Write(",");
                        }
                        isFirstAdmin1 = false;
                        sw.Write($"\"{a1.admin1 ?? "None"}\":{{");
                        var  admin2s       = flattracker.Where(r => r.admin1Code.Equals(a1.admin1Code, StringComparison.OrdinalIgnoreCase)).GroupBy(r => r.admin2Code).Select(r => r.First());
                        bool isFirstAdmin2 = true;
                        foreach (var a2 in admin2s)
                        {
                            if (!isFirstAdmin2)
                            {
                                sw.Write(",");
                            }
                            isFirstAdmin2 = false;
                            sw.Write($"\"{a2.admin2 ?? "None"}\":");
                            var  files       = flattracker.Where(r => r.admin2Code.Equals(a2.admin2Code, StringComparison.OrdinalIgnoreCase));
                            bool isFirstFile = true;
                            foreach (var f in files)
                            {
                                var content = File.ReadAllText(f.tempFilename);
                                if (!isFirstFile)
                                {
                                    sw.Write(",");
                                }
                                isFirstFile = false;
                                sw.Write(content);
                            }
                        }
                        sw.Write("}");
                    }
                    sw.Write("}]}");
                }
                sw.Write("}]");
                sw.Flush();
                Console.WriteLine("Output Done");
                // delete tracker temp files
                foreach (var t in tracker)
                {
                    if (File.Exists(t.Value.tempFilename))
                    {
                        File.Delete(t.Value.tempFilename);
                    }
                }
                Console.WriteLine("Delete Temp Files");
            }
            catch (IOException e)
            {
                Console.WriteLine(e.Message);
            }
        }