Ejemplo n.º 1
0
    public static void exportForWikibulary()
    {
        var metaData     = XElement.Load(@"D:\rewise\design\langsDesign\cldr\supplementalData.xml");
        var fromMetadata = metaData.Descendants("languagePopulation").Select(e => e.Attribute("type").Value.Replace('_', '-')).ToArray();
        var fromMain     = Directory.GetFiles(@"d:\wikibulary\data\cldr\common\main", "*.xml", SearchOption.TopDirectoryOnly)
                           .Select(fn => Path.GetFileNameWithoutExtension(fn).Replace('_', '-'))
                           .ToArray();
        var missing = File.ReadAllLines(@"d:\rewise\design\langsDesign\cldr\langs-for-export.txt");
        //.Select(l => {
        //  if (!LocaleIdentifier.TryParse(l, out var li)) return null;
        //  return new { lang = l, likely = li.MostLikelySubtags().ToString() };
        //});
        var fromMeta = Langs.meta.Select(m => m.Id);
        //var fromMeta = Langs.meta.Select(m => new { lang = m.Id, likely = LocaleIdentifier.Parse(m.Id).MostLikelySubtags().ToString() });
        var fromCulture = CultureInfo.GetCultures(CultureTypes.AllCultures)//.Where(c => !wrong.Contains(c.Name))
                          .Select(c => c.Name).ToArray();
        //.Select(c => {
        //  if (!LocaleIdentifier.TryParse(c.Name, out var li)) return null;
        //  return new { lang = c.Name, likely = li.MostLikelySubtags().ToString() };
        //});

        var wrong = new HashSet <string> {
            "no", "", "root"
        };
        var all = fromMetadata.Concat(fromMain).Concat(fromMeta).Concat(fromCulture).Concat(missing)
                  .Where(l => l != null && !wrong.Contains(l)).Distinct().ToArray();

        var res = all
                  .Select(l => new {
            lang   = l,
            likely = LocaleIdentifier.TryParse(l, out var li) ? li.MostLikelySubtags().ToString() : null
        })
Ejemplo n.º 2
0
    public static void Build()
    {
        var langs      = Corpus.DownloadWikies.getUrls().Where(u => u.size > /*1000000*/ 0).Select(u => u.name.Split(new string[] { "wi" }, StringSplitOptions.RemoveEmptyEntries)[0]).Distinct().ToArray();
        var lmLangs    = Langs.meta.Select(l => l.Lang).Distinct().ToArray();
        var notInWiki  = lmLangs.Except(langs).ToArray();
        var validLangs = langs.Where(l => LocaleIdentifier.TryParse(l, out LocaleIdentifier li)).ToArray();
        var wikiLocs   = validLangs.Select(l => LocaleIdentifier.Parse(l).MostLikelySubtags().ToString()).ToArray();
        var oks        = wikiLocs.
                         Select(loc => Langs.fullNameToMeta.TryGetValue(loc.ToString(), out Langs.CldrLang cl) ? cl : null).
                         NotNulls().
                         ToArray();
        var wrongs = wikiLocs.
                     Select(loc => Langs.fullNameToMeta.TryGetValue(loc.ToString(), out Langs.CldrLang cl) ? null : loc).
                     NotNulls().
                     ToArray();
        //ALPHAs
        // from clibs\utils\unicode\unicodeBlocks.json
        //Armi (http://zuga.net/articles/unicode/script/imperial-aramaic/) and Goth (? https://en.wikipedia.org/wiki/Gothic_alphabet) missing
        var alphas = new HashSet <String> {
            "Latn", "Zyyy", "Grek", "Copt", "Cyrl", "Armn", "Hebr", "Arab", "Syrc", "Thaa", "Nkoo", "Samr", "Mand", "Deva", "Beng", "Guru", "Gujr", "Orya", "Taml", "Telu", "Knda", "Mlym", "Sinh", "Thai", "Laoo", "Tibt", "Mymr", "Geor", "Hang", "Ethi", "Cher", "Cans", "Ogam", "Runr", "Tglg", "Hano", "Buhd", "Tagb", "Khmr", "Mong", "Limb", "Tale", "Talu", "Bugi", "Lana", "Bali", "Sund", "Batk", "Lepc", "Olck", "Glag", "Tfng", "Hira", "Kana", "Bopo", "Hani", "Yiii", "Lisu", "Vaii", "Bamu", "Sylo", "Phag", "Saur", "Kali", "Rjng", "Java", "Cham", "Tavt", "Mtei"
        };
        var wrongAlphas = wrongs.Where(l => !alphas.Contains(LocaleIdentifier.Parse(l).Script)).ToArray();

        //var path = LangsDesignDirs.cldrRepo;
    }