/// <summary> /// Reads the krad files and returns a collection of parsed kanji radicals composition. /// </summary> /// <returns>Parsed kanji radicals compositions.</returns> private RadicalDictionary ParseKradFiles() { RadicalDictionary composition = new RadicalDictionary(); // Open both files and browse each line of their joined content. foreach (string line in File.ReadAllLines(PathHelper.KradFilePath, Encoding.GetEncoding(KradFileCodepage)).Union( File.ReadAllLines(PathHelper.KradFile2Path, Encoding.GetEncoding(KradFileCodepage)))) { // Test for a comment line if (line.StartsWith(KradFileCommentStarter)) { // Comment. Go to the next line. continue; } // Not a comment. Separate the kanji part and the radicals part. string[] split = line.Split(KradFileKanjiSeparator); string kanjiCharacter = split.First().Trim(); // Get the list of radicals by splitting the radicals part. string[] radicals = split[1].Split(new char[] { KradFileRadicalSeparator }, StringSplitOptions.RemoveEmptyEntries); // Drop characters already added (there are some errors (?) in the files). if (!composition.ContainsKey(kanjiCharacter)) { // Add the composition to the resulting dictionary and go to the next line. composition.Add(kanjiCharacter, radicals.Select(r => new RadicalValue() { Character = r }).ToArray()); } } // Return the final dictionary. return(composition); }
/// <summary> /// Reads the krad files and returns a collection of parsed kanji radicals composition. /// </summary> /// <returns>Parsed kanji radicals compositions.</returns> private RadicalDictionary ParseKradFiles() { RadicalDictionary composition = new RadicalDictionary(); // Open both files and browse each line of their joined content. foreach (string line in File.ReadAllLines(PathHelper.KradFilePath, Encoding.GetEncoding(KradFileCodepage)).Union( File.ReadAllLines(PathHelper.KradFile2Path, Encoding.GetEncoding(KradFileCodepage)))) { // Test for a comment line if (line.StartsWith(KradFileCommentStarter)) { // Comment. Go to the next line. continue; } // Not a comment. Separate the kanji part and the radicals part. string[] split = line.Split(KradFileKanjiSeparator); string kanjiCharacter = split.First().Trim(); // Get the list of radicals by splitting the radicals part. string[] radicals = split[1].Split(new char[] { KradFileRadicalSeparator }, StringSplitOptions.RemoveEmptyEntries); // Drop characters already added (there are some errors (?) in the files). if (!composition.ContainsKey(kanjiCharacter)) { // Add the composition to the resulting dictionary and go to the next line. composition.Add(kanjiCharacter, radicals.Select(r => new RadicalValue() { Character = r }).ToArray()); } } // Return the final dictionary. return composition; }
/// <summary> /// Reads kanji and stores them in the database. /// </summary> public override void Execute() { List <KanjiRadicalJoinEntity> kanjiRadicalList = new List <KanjiRadicalJoinEntity>(); List <KanjiMeaning> kanjiMeaningList = new List <KanjiMeaning>(); List <KanjiStrokes> kanjiStrokes = new List <KanjiStrokes>(); using (SQLiteBulkInsert <KanjiEntity> kanjiInsert = new SQLiteBulkInsert <KanjiEntity>(KanjiMaxCommit)) { // Parse the file. foreach (KanjiEntity kanji in ReadKanjiDic2()) { // For each kanji read: string addedRadicalsString = string.Empty; // Log // Try to find the matching composition. if (_radicalDictionary.ContainsKey(kanji.Character)) { RadicalValue[] matchingRadicals = _radicalDictionary[kanji.Character]; // If the composition is found: foreach (RadicalValue radicalValue in matchingRadicals) { // Retrieve each radical from the database and add it in the kanji. kanji.Radicals.Add(radicalValue.Radical); addedRadicalsString += radicalValue.Character + " "; // Log } } // Search for a matching SVG. kanjiStrokes.Add(RetrieveSvg(kanji)); // Add the finalized kanji to the database. kanji.ID = kanjiInsert.Insert(kanji); // Add the kanji meaning entities. kanjiMeaningList.AddRange(kanji.Meanings); // Add the kanji-radical join entities. foreach (RadicalEntity radical in kanji.Radicals) { kanjiRadicalList.Add(new KanjiRadicalJoinEntity() { KanjiId = kanji.ID, RadicalId = radical.ID }); } // Increment counter KanjiCount++; // Log _log.InfoFormat("Inserted kanji {0} ({1}) with radicals {2}", kanji.Character, kanji.ID, addedRadicalsString); } } CloseZipArchive(); // Insert the strokes. using (SQLiteBulkInsert <KanjiStrokes> kanjiStrokesInsert = new SQLiteBulkInsert <KanjiStrokes>(KanjiMaxCommit)) { foreach (KanjiStrokes strokes in kanjiStrokes) { kanjiStrokesInsert.Insert(strokes); } } // Insert the kanji meaning entities. KanjiMeaningCount = kanjiMeaningList.Count; _log.InfoFormat("Inserting {0} kanji meaning entities", KanjiMeaningCount); using (SQLiteBulkInsert <KanjiMeaning> kanjiMeaningInsert = new SQLiteBulkInsert <KanjiMeaning>(int.MaxValue)) { foreach (KanjiMeaning km in kanjiMeaningList) { kanjiMeaningInsert.Insert(km); } } // Insert the kanji-radical join entities KanjiRadicalCount = kanjiRadicalList.Count; _log.InfoFormat("Inserting {0} kanji-radical join entities", KanjiRadicalCount); using (SQLiteBulkInsert <KanjiRadicalJoinEntity> kanjiRadicalInsert = new SQLiteBulkInsert <KanjiRadicalJoinEntity>(int.MaxValue)) { foreach (KanjiRadicalJoinEntity kr in kanjiRadicalList) { kanjiRadicalInsert.Insert(kr); } } }