Ejemplo n.º 1
0
        private void AddFanoutHanzi(CikLingDto d)
        {
            var allHanziList = new List <string>();

            allHanziList.Add(HanziToString(d.HanziCanonical));
            allHanziList.AddRange(d.HanziAlternatives.Select(h => HanziToString(h)).ToList());
            var fanOutHanziList = _hanziVariantsUtil.GetFanoutVariants(allHanziList.ToArray());

            d.HanziMatchable.AddRange(fanOutHanziList);
        }
Ejemplo n.º 2
0
        public IEnumerable <CikLingDto> Run()
        {
            var jsonOutput = new List <string>();
            var documents  = new List <CikLingDto>();

            IEnumerable <CikLinRow>      cikLinRows;
            IDictionary <string, string> ciklingMapping;

            // load only3km's CikLinBekin
            using (var reader = new StreamReader(_cikLinCsvFile))
                using (var csv = new CsvReader(reader, CultureInfo.InvariantCulture))
                {
                    cikLinRows = csv.GetRecords <CikLinRow>().ToList();
                }

            using (var reader = new StreamReader(_cikLingMappingFile))
                using (var csv = new CsvReader(reader, CultureInfo.InvariantCulture))
                {
                    ciklingMapping = csv.GetRecords <CikLingMappingRow>().ToDictionary(r => r.CikLingId, r => r.ZingzeuId);
                }

            // Load new cikling.csv
            using (var reader = new StreamReader(_newCikLingCsvFile))
            {
                using (var csv = new CsvReader(reader, CultureInfo.InvariantCulture))
                {
                    foreach (var r in csv.GetRecords <NewCikLingRow>())
                    {
                        var final = GetFinal(r);
                        if (!final.HasValue)
                        {
                            Console.WriteLine($"Skipping {r.Id}, unknown Final: {r.FinalCik}{r.FinalLing}");
                            continue;
                        }
                        var document = new CikLingDto
                        {
                            HanziCanonical   = StringToHanziProto(r.Hanzi),
                            Initial          = GetInitial(r),
                            Final            = final.Value,
                            Tone             = StringToTone[r.Tone],
                            CikLinSourceInfo = new CikLinSourceInfo()
                            {
                                ExplanationCik  = CleanExplanation(r.ExplanationCik),
                                ExplanationLing = CleanExplanation(r.ExplanationLing)
                            }
                        };
                        var oldRow = cikLinRows.SingleOrDefault(row => row.Id == r.Id);
                        document.Buc = ToBucString(document.Initial, document.Final, document.Tone);
                        if (!string.IsNullOrEmpty(oldRow?.HanziEquiv))
                        {
                            document.HanziAlternatives.Add(StringToHanziProto(oldRow.HanziEquiv));
                        }
                        if (!string.IsNullOrEmpty(oldRow?.HanziAlt))
                        {
                            document.HanziAlternatives.Add(StringToHanziProto(oldRow.HanziAlt));
                        }
                        AddFanoutHanzi(document);

                        if (ciklingMapping.ContainsKey(r.Id))
                        {
                            document.ZingzeuId = ciklingMapping[r.Id];
                        }

                        documents.Add(document);
                        jsonOutput.Add(document.ToString());
                    }
                }
            }
            File.WriteAllLines(Path.Combine(_outputFolder, "ciklin_index_debug.txt"), jsonOutput);
            return(documents);
        }