コード例 #1
0
ファイル: wikt-dumps.cs プロジェクト: reactxx/rewise
    public static void dumpObjectCount()
    {
        // dump count
        var lowByteToCount = database.Select((list, low) => new { list, low }).Where(li => li.list.Count() > 1).ToDictionary(li => li.low, li => li.list.Length);
        var lines          = lowByteToCount.Select(kv => {
            decodeLowByte(kv.Key, out string lang, out string classUri);
            return(new { lang, classUri, kv.Value });
        }).
                             GroupBy(lcv => lcv.classUri).
                             SelectMany(g => {
            return(g.Select(lcv => $"{lcv.lang}={lcv.classUri}={lcv.Value}").
                   Concat(Linq.Items($"**={g.Key}={g.Sum(lcv => lcv.Value)}")));
        }).
                             OrderBy(s => s);

        File.WriteAllLines(LowUtilsDirs.logs + "dump-objects-count.txt", lines);
    }
コード例 #2
0
ファイル: wikt-dumps.cs プロジェクト: reactxx/rewise
    public static void dumpPageParts()
    {
        // dump page tree counts
        IEnumerable <string[]> pageDump(Page p)
        {
            yield return(new[] { "p" });

            if (p.entries == null)
            {
                yield return(new[] { "p", "a noentry" }); yield break;
            }
            var ens = p.entries.Length == 1 ? "b entry" : "c entries";

            yield return(new[] { "p", ens });

            foreach (var en in p.entries)
            {
                if (en.otherForm == null)
                {
                    yield return(new[] { "p", ens, "a noform" }); yield break;
                }
                var fms = en.otherForm.Length == 1 ? "b form" : "c forms";
                yield return(new[] { "p", ens, fms });
            }
        }

        var pageParts = new Dictionary <string, int>();

        foreach (var s in AllLangs.SelectMany(lang => getObjsStr <Page>(lang).
                                              SelectMany(p => pageDump(p)).
                                              Select(arr => string.Join("=", arr)).
                                              SelectMany(l => Linq.Items("**=" + l, lang + "=" + l))))
        {
            pageParts[s] = pageParts.TryGetValue(s, out int c) ? c + 1 : 1;
        }

        File.WriteAllLines(LowUtilsDirs.logs + "dump-page-parts.txt", pageParts.OrderBy(kv => kv.Key).Select(kv => $"{kv.Key} {kv.Value}"));
    }
コード例 #3
0
    public LangMatrix(StreamReader rdr) : this()
    {
        try {
            var             lines    = rdr.ReadAllLines().ToArray();
            var             cell00   = lines[0].Split(new char[] { ';' }, 2)[0].Split('/');
            LangMatrixRow[] rawLines = null;
            var             data     = new List <string[]>();
            if (cell00.Length != 2) // => RJ import format: change COLS and ROWS
            {
                var matxOld = lines.Select(l => l.Split(';')).ToArray();
                var len     = matxOld[0].Length;
                if (!matxOld.All(r => r.Length == len))
                {
                    throw new Exception();
                }
                var matxNew = new List <string[]>();
                for (var i = 0; i < matxOld[0].Length; i++)
                {
                    matxNew.Add(new string[matxOld.Length]);
                }
                for (var i = 0; i < matxOld.Length; i++)
                {
                    for (var j = 0; j < matxOld[0].Length; j++)
                    {
                        matxNew[j][i] = matxOld[i][j].Replace('\t', ' ').Replace("@@s", ";");
                    }
                }

                rawLines = readRaw(matxNew);
                for (var i = 0; i < rawLines.Length; i++)
                {
                    rawLines[i].lang = Langs.oldToNew(rawLines[i].lang);
                }

                rawLines = new LangMatrixRow[] { null }.Concat(rawLines).ToArray();
                cell00   = new string[] { "lang", "" };

                //var colsNum = rawLines.Length - 1;
                //var rowsNum = rawLines[0].row.Length + 1; // adds 1 for rawLines[].lang
                //langs = new string[rawLines[0].row.Length + 1];
                //for (var rowIdx = 0; rowIdx < rowsNum; rowIdx++) {
                //  if (rowIdx == 0) langs[0] = rawLines[0].lang;
                //  else langs[rowIdx] = rawLines[0].row[rowIdx - 1];
                //}
                //for (var i = 0; i < langs.Length; i++) langs[i] = Langs.oldToNew(langs[i]);
                //for (var rowIdx = 0; rowIdx < rowsNum; rowIdx++) {
                //  var row = new string[colsNum];
                //  for (var colIdx = 0; colIdx < colsNum; colIdx++) {
                //    if (rowIdx == 0) row[colIdx] = rawLines[colIdx + 1].lang;
                //    else row[colIdx] = rawLines[colIdx + 1].row[rowIdx - 1];
                //  }
                //  data.Add(row);
                //}
            }
            else
            {
                rawLines = readRaw(lines);
            }
            var langs            = new List <string>();
            var groupTheSameRows = cell00[0] == "langs"; // group by rows
            if (cell00[1] == "colNames")
            {
                colNames = rawLines[0].row;                    // save column names
            }
            rawLines.Skip(1).ForEach(l => {
                foreach (var lang in groupTheSameRows ? l.lang.Split(',') : Linq.Items(l.lang))
                {
                    langs.Add(lang);
                    data.Add(l.row);
                }
            });
            this.langs = langs.ToArray();

            this.data = data.ToArray();
        } finally {
            rdr.Close();
        }
    }
コード例 #4
0
    public static void Build()
    {
        var cldr  = Json.Deserialize <Langs.CldrLang[]>(LangsDirs.dirCldrTexts);
        var roots = cldr.SelectMany(c => c.Regions.Select(r => LocaleIdentifier.Parse(string.Format("{0}-{1}-{2}", c.Lang, c.ScriptId, r)))).ToArray();
        var locs  = roots.Select(root => new Locale(root)).ToArray();

        // ALPHABETS
        var rx     = new Regex("[{} ]");
        var alphas = locs.Select(loc => {
            var localePattern = loc.Find("//characters");
            var data          = localePattern.SelectChildren(XPathNodeType.Element).OfType <XPathNavigator>().Where(al => al.Name == "exemplarCharacters").Select(al => {
                var value   = CldrUtils.decodeUnicodeLiteral(al.Value.Normalize().Trim('[', ']'));
                value       = rx.Replace(value, "");
                var keyNode = al.SelectSingleNode("./@type");
                var key     = keyNode == null ? "root" : keyNode.Value;
                //if (key == "numbers" || key == "punctuation" || string.IsNullOrEmpty(value)) return null;
                return(new { key, value });
            }).Where(n => n != null).ToArray();
            return(new {
                lang = loc.Id.ToString(),
                data
            });
        }).Where(d => d.data.Length > 0).OrderBy(l => l.lang).ToArray();

        Func <string, IEnumerable <LangMatrixRow> > alphasRes = (string key) => alphas.SelectMany(a => a.data.Where(aa => aa.key == key).Select(aa =>
                                                                                                                                                new LangMatrixRow {
            lang = a.lang, columnNames = Linq.Items(aa.key).ToArray(), row = Linq.Items(aa.value).ToArray()
        }));
        var alphaRoot        = new LangMatrix(alphasRes("root"), null, true);
        var alphaAuxlity     = new LangMatrix(alphasRes("auxiliary"), null, true);
        var alphaIndex       = new LangMatrix(alphasRes("index"), null, true);
        var alphaNumbers     = new LangMatrix(alphasRes("numbers"), null, true);
        var alphaPunctuation = new LangMatrix(alphasRes("punctuation"), null, true);

        var patterns = new LangMatrix(locs.Select(loc => {
            var localePattern   = loc.FindOrDefault("//localeDisplayNames/localeDisplayPattern/localePattern").ToString();
            var localeSeparator = loc.FindOrDefault("//localeDisplayNames/localeDisplayPattern/localeSeparator").ToString();
            return(new LangMatrixRow {
                lang = loc.Id.ToString(),
                row = new string[] { localePattern, localeSeparator },
                columnNames = new string[] { "pattern", "separator" },
            });
        }), null, true);
        Dictionary <string, Dictionary <string, string> > langsProtocol = new Dictionary <string, Dictionary <string, string> >();
        var langs = new LangMatrix(
            locs.Select(loc => fromCldr(loc, "//localeDisplayNames/languages")),
            langsProtocol, true
            );
        Dictionary <string, Dictionary <string, string> > scriptsProtocol = new Dictionary <string, Dictionary <string, string> >();
        var scripts = new LangMatrix(
            locs.Select(loc => fromCldr(loc, "//localeDisplayNames/scripts")),
            scriptsProtocol, true
            );
        Dictionary <string, Dictionary <string, string> > regionsProtocol = new Dictionary <string, Dictionary <string, string> >();
        var regions = new LangMatrix(
            locs.Select(loc => fromCldr(loc, "//localeDisplayNames/territories")),
            regionsProtocol, true
            );

        langs.save(LangsDesignDirs.cldr + "cldrNameLangs.csv", true);
        scripts.save(LangsDesignDirs.cldr + "cldrNameScripts.csv", true);
        regions.save(LangsDesignDirs.cldr + "cldrNameRegions.csv", true);
        patterns.save(LangsDesignDirs.cldr + "cldrNamePatterns.csv", true);
        alphaRoot.save(LangsDesignDirs.cldr + "alphaRoot.csv", true);
        alphaAuxlity.save(LangsDesignDirs.cldr + "alphaAuxlity.csv", true);
        alphaIndex.save(LangsDesignDirs.cldr + "alphaIndex.csv", true);
        alphaNumbers.save(LangsDesignDirs.cldr + "alphaNumbers.csv", true);
        alphaPunctuation.save(LangsDesignDirs.cldr + "alphaPunctuation.csv", true);

        // save to DART messages
        File.WriteAllBytes(LangsDesignDirs.data + @"langsDesign\cldrNameLangs.msg", Protobuf.ToBytes(matrixToDart(langs)));
        File.WriteAllBytes(LangsDesignDirs.data + @"langsDesign\cldrNameScripts.msg", Protobuf.ToBytes(matrixToDart(scripts)));
        File.WriteAllBytes(LangsDesignDirs.data + @"langsDesign\cldrNameRegions.msg", Protobuf.ToBytes(matrixToDart(regions)));
        File.WriteAllBytes(LangsDesignDirs.data + @"langsDesign\cldrNamePatterns.msg", Protobuf.ToBytes(matrixToDart(patterns)));
        File.WriteAllBytes(LangsDesignDirs.data + @"langsDesign\alphaRoot.msg", Protobuf.ToBytes(matrixToDart(alphaRoot)));
        File.WriteAllBytes(LangsDesignDirs.data + @"langsDesign\alphaAuxlity.msg", Protobuf.ToBytes(matrixToDart(alphaAuxlity)));
        File.WriteAllBytes(LangsDesignDirs.data + @"langsDesign\alphaIndex.msg", Protobuf.ToBytes(matrixToDart(alphaIndex)));
        File.WriteAllBytes(LangsDesignDirs.data + @"langsDesign\alphaNumbers.msg", Protobuf.ToBytes(matrixToDart(alphaNumbers)));
        File.WriteAllBytes(LangsDesignDirs.data + @"langsDesign\alphaPunctuation.msg", Protobuf.ToBytes(matrixToDart(alphaPunctuation)));

        //  var localePattern = loc.FindOrDefault("//localeDisplayNames/localeDisplayPattern/localePattern").ToString();
        //  var localeSeparator = loc.FindOrDefault("//localeDisplayNames/localeDisplayPattern/localeSeparator").ToString();
        //  fromCldr(loc, "//localeDisplayNames/languages");
        //  fromCldr(loc, "//localeDisplayNames/scripts");
        //  fromCldr(loc, "//localeDisplayNames/territories");
        //});
    }
コード例 #5
0
 public static Dictionary <string, HashSet <char> > getBlockNames(string str)
 {
     return(getBlockNames(Linq.Items(str)));
 }
コード例 #6
0
 public static Dictionary <string, string> checkBlockNames(string str, string script)
 {
     return(str == null ? null : checkBlockNames(Linq.Items(str), script));
 }