public KanjiEtl(RadicalDictionary radicalDictionary) : base() { _radicalDictionary = radicalDictionary; _log = log4net.LogManager.GetLogger(this.GetType()); CreateJlptDictionary(); CreateFrequencyRankDictionary(); CreateWkDictionary(); }
/// <summary> /// Reads the krad files and returns a collection of parsed kanji radicals composition. /// </summary> /// <returns>Parsed kanji radicals compositions.</returns> private RadicalDictionary ParseKradFiles() { RadicalDictionary composition = new RadicalDictionary(); // Open both files and browse each line of their joined content. foreach (string line in File.ReadAllLines(PathHelper.KradFilePath, Encoding.GetEncoding(KradFileCodepage)).Union( File.ReadAllLines(PathHelper.KradFile2Path, Encoding.GetEncoding(KradFileCodepage)))) { // Test for a comment line if (line.StartsWith(KradFileCommentStarter)) { // Comment. Go to the next line. continue; } // Not a comment. Separate the kanji part and the radicals part. string[] split = line.Split(KradFileKanjiSeparator); string kanjiCharacter = split.First().Trim(); // Get the list of radicals by splitting the radicals part. string[] radicals = split[1].Split(new char[] { KradFileRadicalSeparator }, StringSplitOptions.RemoveEmptyEntries); // Drop characters already added (there are some errors (?) in the files). if (!composition.ContainsKey(kanjiCharacter)) { // Add the composition to the resulting dictionary and go to the next line. composition.Add(kanjiCharacter, radicals.Select(r => new RadicalValue() { Character = r }).ToArray()); } } // Return the final dictionary. return(composition); }
/// <summary> /// Reads the krad files and returns a collection of parsed kanji radicals composition. /// </summary> /// <returns>Parsed kanji radicals compositions.</returns> private RadicalDictionary ParseKradFiles() { RadicalDictionary composition = new RadicalDictionary(); // Open both files and browse each line of their joined content. foreach (string line in File.ReadAllLines(PathHelper.KradFilePath, Encoding.GetEncoding(KradFileCodepage)).Union( File.ReadAllLines(PathHelper.KradFile2Path, Encoding.GetEncoding(KradFileCodepage)))) { // Test for a comment line if (line.StartsWith(KradFileCommentStarter)) { // Comment. Go to the next line. continue; } // Not a comment. Separate the kanji part and the radicals part. string[] split = line.Split(KradFileKanjiSeparator); string kanjiCharacter = split.First().Trim(); // Get the list of radicals by splitting the radicals part. string[] radicals = split[1].Split(new char[] { KradFileRadicalSeparator }, StringSplitOptions.RemoveEmptyEntries); // Drop characters already added (there are some errors (?) in the files). if (!composition.ContainsKey(kanjiCharacter)) { // Add the composition to the resulting dictionary and go to the next line. composition.Add(kanjiCharacter, radicals.Select(r => new RadicalValue() { Character = r }).ToArray()); } } // Return the final dictionary. return composition; }