public void Admin2CodesComposer_ComposesFileCorrectly() { var src = @"testdata\test_admin2Codes.txt"; var dst = @"testdata\test_admin2Codes.out.txt"; GeoFileWriter.WriteAdmin2Codes(dst, GeoFileReader.ReadAdmin2Codes(src)); FileUtil.EnsureFilesAreFunctionallyEqual(src, dst, 4, 0, new[] { '\t' }, Encoding.UTF8, false); }
public void Admin2CodesParser_ParsesFileCorrectly() { var target = GeoFileReader.ReadAdmin2Codes(@"testdata\test_admin2Codes.txt").ToArray(); Assert.AreEqual(2, target.Length); Assert.AreEqual("AF.01.7052666", target[0].Code); Assert.AreEqual("Darwāz-e Bālā", target[0].Name); Assert.AreEqual("Darwaz-e Bala", target[0].NameASCII); Assert.AreEqual(7052666, target[0].GeoNameId); Assert.AreEqual("CA.10.11", target[1].Code); Assert.AreEqual("Gaspésie-Îles-de-la-Madeleine", target[1].Name); Assert.AreEqual("Gaspesie-Iles-de-la-Madeleine", target[1].NameASCII); Assert.AreEqual(0, target[1].GeoNameId); }
private static void DumpASCIILies(string logpath) { using (var lw = File.CreateText(Path.Combine(logpath, "_asciilies.log"))) { //Test for fields that claim to contain ASCII only but contain non-ASCII data anyways var nonasciifilter = new Regex("[^\x20-\x7F]", RegexOptions.Compiled | RegexOptions.CultureInvariant | RegexOptions.IgnoreCase); var geofilefilter = new Regex("^[A-Z]{2}.txt$", RegexOptions.Compiled | RegexOptions.CultureInvariant | RegexOptions.IgnoreCase); lw.WriteLine("The following files contain entries that claim to contain ASCII only but contain non-ASCII data anyways:"); var extgeofiles = new[] { "allCountries", "cities1000", "cities5000", "cities15000", "no-country" } .Select(f => Path.Combine(Dump_DownloadDirectory, f + ".txt")) .Union(Directory.GetFiles(Dump_DownloadDirectory, "*.txt") .Where(f => geofilefilter.IsMatch(Path.GetFileName(f))) ); var lies = extgeofiles.AsParallel() .SelectMany(f => GeoFileReader.ReadExtendedGeoNames(f) .Where(e => nonasciifilter.IsMatch(e.NameASCII)) .Select(i => new NonASCIIEntry { FileName = f, Id = i.Id, Value = i.NameASCII }) ).Union( GeoFileReader.ReadAdmin1Codes(Path.Combine(Dump_DownloadDirectory, "admin1CodesASCII.txt")).AsParallel() .Where(c => nonasciifilter.IsMatch(c.NameASCII)) .Select(i => new NonASCIIEntry { FileName = "admin1CodesASCII.txt", Id = i.GeoNameId, Value = i.NameASCII }) ).Union( GeoFileReader.ReadAdmin2Codes(Path.Combine(Dump_DownloadDirectory, "admin2Codes.txt")).AsParallel() .Where(c => nonasciifilter.IsMatch(c.NameASCII)) .Select(i => new NonASCIIEntry { FileName = "admin2Codes.txt", Id = i.GeoNameId, Value = i.NameASCII }) ); foreach (var l in lies.OrderBy(l => l.FileName).ThenBy(l => l.Value)) { lw.WriteLine(string.Join("\t", Path.GetFileName(l.FileName), l.Id, l.Value)); } ; } }
public void FileReader_Admin2Codes_StreamOverload() { using (var s = File.OpenRead(@"testdata\test_admin2Codes.txt")) GeoFileReader.ReadAdmin2Codes(s).Count(); }
private static GeoFile[] GetDumps(GeoFileDownloader downloader) { return(new[] { new GeoFile { Filename = "admin1CodesASCII.txt", Test = (f) => ExecuteTest(f, (fn) => { return GeoFileReader.ReadAdmin1Codes(fn).Count(); }) }, new GeoFile { Filename = "admin2Codes.txt", Test = (f) => ExecuteTest(f, (fn) => { return GeoFileReader.ReadAdmin2Codes(fn).Count(); }) }, new GeoFile { Filename = "allCountries.zip", Test = (f) => ExecuteTest(f, (fn) => { return GeoFileReader.ReadExtendedGeoNames(fn).Count(); }) }, new GeoFile { Filename = "alternateNames.zip", Test = (f) => ExecuteTest(f, (fn) => { return GeoFileReader.ReadAlternateNames(fn).Count(); }) }, new GeoFile { Filename = "cities1000.zip", Test = (f) => ExecuteTest(f, (fn) => { return GeoFileReader.ReadExtendedGeoNames(fn).Count(); }) }, new GeoFile { Filename = "cities15000.zip", Test = (f) => ExecuteTest(f, (fn) => { return GeoFileReader.ReadExtendedGeoNames(fn).Count(); }) }, new GeoFile { Filename = "cities5000.zip", Test = (f) => ExecuteTest(f, (fn) => { return GeoFileReader.ReadExtendedGeoNames(fn).Count(); }) }, new GeoFile { Filename = "countryInfo.txt", Test = (f) => ExecuteTest(f, (fn) => { return GeoFileReader.ReadCountryInfo(fn).Count(); }) }, //Featurecodes are downloaded by GetCountryDumps() new GeoFile { Filename = "hierarchy.zip", Test = (f) => ExecuteTest(f, (fn) => { return GeoFileReader.ReadHierarchy(fn).Count(); }) }, new GeoFile { Filename = "iso-languagecodes.txt", Test = (f) => ExecuteTest(f, (fn) => { return GeoFileReader.ReadISOLanguageCodes(fn).Count(); }) }, new GeoFile { Filename = "no-country.zip", Test = (f) => ExecuteTest(f, (fn) => { return GeoFileReader.ReadExtendedGeoNames(fn).Count(); }) }, new GeoFile { Filename = "timeZones.txt", Test = (f) => ExecuteTest(f, (fn) => { return GeoFileReader.ReadTimeZones(fn).Count(); }) }, new GeoFile { Filename = "userTags.zip", Test = (f) => ExecuteTest(f, (fn) => { return GeoFileReader.ReadUserTags(fn).Count(); }) }, }.Union(GetCountryDumps(downloader)).ToArray()); }