public static string fileToLang(string fn) => Langs.oldToNew(Path.GetFileNameWithoutExtension(fn).Split('.')[0]);
public LangMatrix(StreamReader rdr) : this() { try { var lines = rdr.ReadAllLines().ToArray(); var cell00 = lines[0].Split(new char[] { ';' }, 2)[0].Split('/'); LangMatrixRow[] rawLines = null; var data = new List <string[]>(); if (cell00.Length != 2) // => RJ import format: change COLS and ROWS { var matxOld = lines.Select(l => l.Split(';')).ToArray(); var len = matxOld[0].Length; if (!matxOld.All(r => r.Length == len)) { throw new Exception(); } var matxNew = new List <string[]>(); for (var i = 0; i < matxOld[0].Length; i++) { matxNew.Add(new string[matxOld.Length]); } for (var i = 0; i < matxOld.Length; i++) { for (var j = 0; j < matxOld[0].Length; j++) { matxNew[j][i] = matxOld[i][j].Replace('\t', ' ').Replace("@@s", ";"); } } rawLines = readRaw(matxNew); for (var i = 0; i < rawLines.Length; i++) { rawLines[i].lang = Langs.oldToNew(rawLines[i].lang); } rawLines = new LangMatrixRow[] { null }.Concat(rawLines).ToArray(); cell00 = new string[] { "lang", "" }; //var colsNum = rawLines.Length - 1; //var rowsNum = rawLines[0].row.Length + 1; // adds 1 for rawLines[].lang //langs = new string[rawLines[0].row.Length + 1]; //for (var rowIdx = 0; rowIdx < rowsNum; rowIdx++) { // if (rowIdx == 0) langs[0] = rawLines[0].lang; // else langs[rowIdx] = rawLines[0].row[rowIdx - 1]; //} //for (var i = 0; i < langs.Length; i++) langs[i] = Langs.oldToNew(langs[i]); //for (var rowIdx = 0; rowIdx < rowsNum; rowIdx++) { // var row = new string[colsNum]; // for (var colIdx = 0; colIdx < colsNum; colIdx++) { // if (rowIdx == 0) row[colIdx] = rawLines[colIdx + 1].lang; // else row[colIdx] = rawLines[colIdx + 1].row[rowIdx - 1]; // } // data.Add(row); //} } else { rawLines = readRaw(lines); } var langs = new List <string>(); var groupTheSameRows = cell00[0] == "langs"; // group by rows if (cell00[1] == "colNames") { colNames = rawLines[0].row; // save column names } rawLines.Skip(1).ForEach(l => { foreach (var lang in groupTheSameRows ? l.lang.Split(',') : Linq.Items(l.lang)) { langs.Add(lang); data.Add(l.row); } }); this.langs = langs.ToArray(); this.data = data.ToArray(); } finally { rdr.Close(); } }
public static void MergeOldToCldr() { var olds = new LangMatrix(LangsDesignDirs.otherappdata + "oldVersionInfo.csv"); olds.langs.ForEach((l, idx) => { var cldr = Langs.nameToMeta[Langs.oldToNew(l)]; var old = olds.data[idx]; cldr.BreakerClass = old[0]; cldr.StemmerClass = old[1]; //cldr.IsEuroTalk = old[2] != null; //cldr.IsLingea = old[3] != null; //cldr.IsGoethe = old[4] != null; int.TryParse(old[6], out cldr.LCID); cldr.GoogleTransId = old[7]; cldr.WordSpellCheckLCID = old[8] != null ? int.Parse(old[8]) : 0; cldr.Name = old[9]; }); // prepare langGuids var withGuid = Langs.meta. Where(m => (m.BreakerClass != null) && m.Lang != "zh" && m.Id != "en-US" && m.Lang != "pt" && m.Lang != "sr"); var dupls2 = withGuid.GroupBy(m => m.Lang).Where(g => g.Count() > 1).ToArray(); var dupls = withGuid.GroupBy(m => m.Lang).Where(g => g.Count() > 1).Select(g => g.Key).ToArray(); var langGuids = withGuid.Where(m => !dupls.Contains(m.Lang)).ToDictionary(m => m.Lang, m => new { m.StemmerClass, m.BreakerClass }); // prepare scriptGuids withGuid = Langs.meta. Where(m => (m.BreakerClass != null) && m.Lang != "zh" && m.Id != "en-US" && m.Lang != "pt" && m.Lang != "sr" && m.Id != "ur-PK"); dupls2 = withGuid.GroupBy(m => m.ScriptId).Where(g => g.Count() > 1).ToArray(); dupls = withGuid.GroupBy(m => m.ScriptId).Where(g => g.Count() > 1).Select(g => g.Key).ToArray(); var scriptGuids = withGuid.Where(m => !dupls.Contains(m.ScriptId)).ToDictionary(m => m.ScriptId, m => m.BreakerClass); //expand breaking and stemming GUID to other langs foreach (var m in Langs.meta.Where(m => m.StemmerClass == null && m.BreakerClass == null && langGuids.ContainsKey(m.Lang))) { var lg = langGuids[m.Lang]; m.StemmerClass = lg.StemmerClass; m.BreakerClass = lg.BreakerClass; } foreach (var m in Langs.meta.Where(m => m.BreakerClass == null && scriptGuids.ContainsKey(m.ScriptId))) { m.BreakerClass = scriptGuids[m.ScriptId]; } // alphabets var alphs = new LangMatrix(LangsDesignDirs.cldr + "alphaRoot.csv"); var ignAlphas = new HashSet <string>() { "Hant", "Hans", "Jpan", "Kore" }; Langs.meta.ForEach(m => { m.Alphabet = ""; m.AlphabetUpper = null; if (!ignAlphas.Contains(m.ScriptId)) { Langs.getFullNames(m).ForEach(n => m.Alphabet += alphs[n.ToString(), 0]); } finishAlphabet(m); if (m.Alphabet.Length == 0) { m.Alphabet = null; } else { m.Alphabet = new String(m.Alphabet.Distinct().OrderBy(ch => ch).ToArray()); m.AlphabetUpper = m.Alphabet.ToUpper(); } }); Langs.save(); }