Beispiel #1
0
 public static string fileToLang(string fn) => Langs.oldToNew(Path.GetFileNameWithoutExtension(fn).Split('.')[0]);
Beispiel #2
0
    public LangMatrix(StreamReader rdr) : this()
    {
        try {
            var             lines    = rdr.ReadAllLines().ToArray();
            var             cell00   = lines[0].Split(new char[] { ';' }, 2)[0].Split('/');
            LangMatrixRow[] rawLines = null;
            var             data     = new List <string[]>();
            if (cell00.Length != 2) // => RJ import format: change COLS and ROWS
            {
                var matxOld = lines.Select(l => l.Split(';')).ToArray();
                var len     = matxOld[0].Length;
                if (!matxOld.All(r => r.Length == len))
                {
                    throw new Exception();
                }
                var matxNew = new List <string[]>();
                for (var i = 0; i < matxOld[0].Length; i++)
                {
                    matxNew.Add(new string[matxOld.Length]);
                }
                for (var i = 0; i < matxOld.Length; i++)
                {
                    for (var j = 0; j < matxOld[0].Length; j++)
                    {
                        matxNew[j][i] = matxOld[i][j].Replace('\t', ' ').Replace("@@s", ";");
                    }
                }

                rawLines = readRaw(matxNew);
                for (var i = 0; i < rawLines.Length; i++)
                {
                    rawLines[i].lang = Langs.oldToNew(rawLines[i].lang);
                }

                rawLines = new LangMatrixRow[] { null }.Concat(rawLines).ToArray();
                cell00   = new string[] { "lang", "" };

                //var colsNum = rawLines.Length - 1;
                //var rowsNum = rawLines[0].row.Length + 1; // adds 1 for rawLines[].lang
                //langs = new string[rawLines[0].row.Length + 1];
                //for (var rowIdx = 0; rowIdx < rowsNum; rowIdx++) {
                //  if (rowIdx == 0) langs[0] = rawLines[0].lang;
                //  else langs[rowIdx] = rawLines[0].row[rowIdx - 1];
                //}
                //for (var i = 0; i < langs.Length; i++) langs[i] = Langs.oldToNew(langs[i]);
                //for (var rowIdx = 0; rowIdx < rowsNum; rowIdx++) {
                //  var row = new string[colsNum];
                //  for (var colIdx = 0; colIdx < colsNum; colIdx++) {
                //    if (rowIdx == 0) row[colIdx] = rawLines[colIdx + 1].lang;
                //    else row[colIdx] = rawLines[colIdx + 1].row[rowIdx - 1];
                //  }
                //  data.Add(row);
                //}
            }
            else
            {
                rawLines = readRaw(lines);
            }
            var langs            = new List <string>();
            var groupTheSameRows = cell00[0] == "langs"; // group by rows
            if (cell00[1] == "colNames")
            {
                colNames = rawLines[0].row;                    // save column names
            }
            rawLines.Skip(1).ForEach(l => {
                foreach (var lang in groupTheSameRows ? l.lang.Split(',') : Linq.Items(l.lang))
                {
                    langs.Add(lang);
                    data.Add(l.row);
                }
            });
            this.langs = langs.ToArray();

            this.data = data.ToArray();
        } finally {
            rdr.Close();
        }
    }
Beispiel #3
0
    public static void MergeOldToCldr()
    {
        var olds = new LangMatrix(LangsDesignDirs.otherappdata + "oldVersionInfo.csv");

        olds.langs.ForEach((l, idx) => {
            var cldr          = Langs.nameToMeta[Langs.oldToNew(l)];
            var old           = olds.data[idx];
            cldr.BreakerClass = old[0];
            cldr.StemmerClass = old[1];
            //cldr.IsEuroTalk = old[2] != null;
            //cldr.IsLingea = old[3] != null;
            //cldr.IsGoethe = old[4] != null;
            int.TryParse(old[6], out cldr.LCID);
            cldr.GoogleTransId      = old[7];
            cldr.WordSpellCheckLCID = old[8] != null ? int.Parse(old[8]) : 0;
            cldr.Name = old[9];
        });
        // prepare langGuids
        var withGuid = Langs.meta.
                       Where(m => (m.BreakerClass != null) && m.Lang != "zh" && m.Id != "en-US" && m.Lang != "pt" && m.Lang != "sr");
        var dupls2    = withGuid.GroupBy(m => m.Lang).Where(g => g.Count() > 1).ToArray();
        var dupls     = withGuid.GroupBy(m => m.Lang).Where(g => g.Count() > 1).Select(g => g.Key).ToArray();
        var langGuids = withGuid.Where(m => !dupls.Contains(m.Lang)).ToDictionary(m => m.Lang, m => new { m.StemmerClass, m.BreakerClass });

        // prepare scriptGuids
        withGuid = Langs.meta.
                   Where(m => (m.BreakerClass != null) && m.Lang != "zh" && m.Id != "en-US" && m.Lang != "pt" && m.Lang != "sr" && m.Id != "ur-PK");
        dupls2 = withGuid.GroupBy(m => m.ScriptId).Where(g => g.Count() > 1).ToArray();
        dupls  = withGuid.GroupBy(m => m.ScriptId).Where(g => g.Count() > 1).Select(g => g.Key).ToArray();
        var scriptGuids = withGuid.Where(m => !dupls.Contains(m.ScriptId)).ToDictionary(m => m.ScriptId, m => m.BreakerClass);

        //expand breaking and stemming GUID to other langs
        foreach (var m in Langs.meta.Where(m => m.StemmerClass == null && m.BreakerClass == null && langGuids.ContainsKey(m.Lang)))
        {
            var lg = langGuids[m.Lang];
            m.StemmerClass = lg.StemmerClass;
            m.BreakerClass = lg.BreakerClass;
        }

        foreach (var m in Langs.meta.Where(m => m.BreakerClass == null && scriptGuids.ContainsKey(m.ScriptId)))
        {
            m.BreakerClass = scriptGuids[m.ScriptId];
        }
        // alphabets
        var alphs     = new LangMatrix(LangsDesignDirs.cldr + "alphaRoot.csv");
        var ignAlphas = new HashSet <string>()
        {
            "Hant", "Hans", "Jpan", "Kore"
        };

        Langs.meta.ForEach(m => {
            m.Alphabet      = "";
            m.AlphabetUpper = null;
            if (!ignAlphas.Contains(m.ScriptId))
            {
                Langs.getFullNames(m).ForEach(n => m.Alphabet += alphs[n.ToString(), 0]);
            }
            finishAlphabet(m);
            if (m.Alphabet.Length == 0)
            {
                m.Alphabet = null;
            }
            else
            {
                m.Alphabet      = new String(m.Alphabet.Distinct().OrderBy(ch => ch).ToArray());
                m.AlphabetUpper = m.Alphabet.ToUpper();
            }
        });

        Langs.save();
    }