private void AddFanoutHanzi(DfdDto d) { var allHanziList = new List <string>(); allHanziList.Add(HanziToString(d.HanziCanonical)); allHanziList.AddRange(d.HanziAlternatives.Select(h => HanziToString(h)).ToList()); var fanOutHanziList = _hanziVariantsUtil.GetFanoutVariants(allHanziList.ToArray()); d.HanziMatchable.AddRange(fanOutHanziList); }
public IEnumerable <DfdDto> Run() { var jsonOutput = new List <string>(); var documents = new List <DfdDto>(); using (var reader = new StreamReader(_dfdCharactersFile)) { using (var csv = new CsvReader(reader, CultureInfo.InvariantCulture)) { var records = csv.GetRecords <DFDRow>(); foreach (var r in records) { try { (var sInitial, var sFinal, var sTone) = Parse(r.Buc); var document = new DfdDto { HanziCanonical = StringToHanziProto(r.Hanzi), Initial = sInitial, Final = sFinal, Tone = sTone, Buc = r.Buc, DFDSourceInfo = new DFDSourceInfo() { PageNumber = r.PageNumber, ColumnNumber = r.ColumnNumber, LineNumber = r.LineNumber, RadicalId = r.RadicalId } }; if (r.HanziAlt.Length > 0) { document.HanziAlternatives.Add(StringToHanziProto(r.HanziAlt)); } AddFanoutHanzi(document); documents.Add(document); jsonOutput.Add(document.ToString()); } catch (Exception) { Console.WriteLine($"Skipping {r.Id} {r.Hanzi}"); continue; } } } } File.WriteAllLines(Path.Combine(_outputFolder, "dfd_index_debug.txt"), jsonOutput); return(documents); }