private void AddFanoutHanzi(CikLingDto d) { var allHanziList = new List <string>(); allHanziList.Add(HanziToString(d.HanziCanonical)); allHanziList.AddRange(d.HanziAlternatives.Select(h => HanziToString(h)).ToList()); var fanOutHanziList = _hanziVariantsUtil.GetFanoutVariants(allHanziList.ToArray()); d.HanziMatchable.AddRange(fanOutHanziList); }
public IEnumerable <CikLingDto> Run() { var jsonOutput = new List <string>(); var documents = new List <CikLingDto>(); IEnumerable <CikLinRow> cikLinRows; IDictionary <string, string> ciklingMapping; // load only3km's CikLinBekin using (var reader = new StreamReader(_cikLinCsvFile)) using (var csv = new CsvReader(reader, CultureInfo.InvariantCulture)) { cikLinRows = csv.GetRecords <CikLinRow>().ToList(); } using (var reader = new StreamReader(_cikLingMappingFile)) using (var csv = new CsvReader(reader, CultureInfo.InvariantCulture)) { ciklingMapping = csv.GetRecords <CikLingMappingRow>().ToDictionary(r => r.CikLingId, r => r.ZingzeuId); } // Load new cikling.csv using (var reader = new StreamReader(_newCikLingCsvFile)) { using (var csv = new CsvReader(reader, CultureInfo.InvariantCulture)) { foreach (var r in csv.GetRecords <NewCikLingRow>()) { var final = GetFinal(r); if (!final.HasValue) { Console.WriteLine($"Skipping {r.Id}, unknown Final: {r.FinalCik}{r.FinalLing}"); continue; } var document = new CikLingDto { HanziCanonical = StringToHanziProto(r.Hanzi), Initial = GetInitial(r), Final = final.Value, Tone = StringToTone[r.Tone], CikLinSourceInfo = new CikLinSourceInfo() { ExplanationCik = CleanExplanation(r.ExplanationCik), ExplanationLing = CleanExplanation(r.ExplanationLing) } }; var oldRow = cikLinRows.SingleOrDefault(row => row.Id == r.Id); document.Buc = ToBucString(document.Initial, document.Final, document.Tone); if (!string.IsNullOrEmpty(oldRow?.HanziEquiv)) { document.HanziAlternatives.Add(StringToHanziProto(oldRow.HanziEquiv)); } if (!string.IsNullOrEmpty(oldRow?.HanziAlt)) { document.HanziAlternatives.Add(StringToHanziProto(oldRow.HanziAlt)); } AddFanoutHanzi(document); if (ciklingMapping.ContainsKey(r.Id)) { document.ZingzeuId = ciklingMapping[r.Id]; } documents.Add(document); jsonOutput.Add(document.ToString()); } } } File.WriteAllLines(Path.Combine(_outputFolder, "ciklin_index_debug.txt"), jsonOutput); return(documents); }