/// <summary> /// Process one line in the CEDICT format: parse, and store/index in dictionary. /// !! Does not check against dupes; cannot be used to update. /// </summary> public void AddEntry(string line) { ++lineNum; // Cycle through transactions if (lineNum % 3000 == 0) { tr.Commit(); tr.Dispose(); tr = null; tr = conn.BeginTransaction(); } // Parse line from CEDICT format CedictEntry entry = CedictCompiler.ParseEntry(line, lineNum, swLog, swDropped); if (entry == null) { return; } string head, trg; entry.GetCedict(out head, out trg); // Check restrictions - can end up dropped entry try { checkRestrictions(entry.ChSimpl, trg); } catch { return; } // Serialize, store in DB, index int binId = indexEntry(entry); // Populate entries table int entryId = storeEntry(entry.ChSimpl, head, trg, binId); // Folding history: mark new entry as affected by this bulk operation if (foldHistory) { cmdInsBulkModif.Parameters["@modif_id"].Value = modifId; cmdInsBulkModif.Parameters["@entry_id"].Value = entryId; cmdInsBulkModif.ExecuteNonQuery(); } // Verbose (per-entry) history else { // Record change cmdInsModifNew.Parameters["@timestamp"].Value = DateTime.UtcNow; cmdInsModifNew.Parameters["@user_id"].Value = userId; cmdInsModifNew.Parameters["@note"].Value = note; cmdInsModifNew.Parameters["@entry_id"].Value = entryId; cmdInsModifNew.ExecuteNonQuery(); int modifId = (int)cmdInsModifNew.LastInsertedId; // Also link from entry cmdUpdLastModif.Parameters["@entry_id"].Value = entryId; cmdUpdLastModif.Parameters["@last_modif_id"].Value = modifId; cmdUpdLastModif.ExecuteNonQuery(); } }
public void DictLine(string line, bool cedict, BinWriter bw) { // Parse entry CedictEntry entry = CedictCompiler.ParseEntry(line); // Verify that simp, trad and pinyin are equal length if (entry != null) { if (entry.ChSimpl.Length != entry.ChTrad.Length || entry.ChSimpl.Length != entry.PinyinCount) { entry = null; } } // Just count if failed to parse if (entry == null) { if (cedict) { ++cedictDropped; } else { ++hddDropped; } return; } // Serialize int fpos = bw.Position; // First: hash chain: next entry in file with same hash. Will fill later. bw.WriteInt(0); // Then, entry itself entry.Serialize(bw); // Hash simplified and remember file position int hash = CedictEntry.Hash(entry.ChSimpl); List <int> poss; Dictionary <int, List <int> > hashPoss = cedict ? cedictHashPoss : hddHashPoss; if (!hashPoss.ContainsKey(hash)) { poss = new List <int>(); hashPoss[hash] = poss; } else { poss = hashPoss[hash]; } poss.Add(fpos); }
static int Main(string[] args) { if (args.Length != 5) { Console.WriteLine("Takes these arguments:"); Console.WriteLine("1: CEDICT input file"); Console.WriteLine("2: MakeMeAHanzi input file"); Console.WriteLine("3: Compiled dictionary file"); Console.WriteLine("4: Date of CEDICT release in YYYY-MM-DD format"); Console.WriteLine("5: Folder for diagnostics/log/kept/dropped data"); if (Debugger.IsAttached) { Console.WriteLine("Press Enter..."); Console.ReadLine(); } return(-1); } StreamReader cedictIn = null; StreamReader mmahIn = null; StreamWriter logStream = null; StreamWriter outKept = null; StreamWriter outDropped = null; CedictCompiler cc = null; try { cedictIn = new StreamReader(args[0]); mmahIn = new StreamReader(args[1]); DateTime date = parseDate(args[3]); string logFileName = Path.Combine(args[4], "ccomp.log"); logStream = new StreamWriter(logFileName); string outKeptName = Path.Combine(args[4], "cc-kept.txt"); string outDroppedName = Path.Combine(args[4], "cc-drop.txt"); outKept = new StreamWriter(outKeptName, false, Encoding.UTF8); outDropped = new StreamWriter(outDroppedName, false, Encoding.UTF8); cc = new CedictCompiler(); string line; // Compile dictionary proper while ((line = cedictIn.ReadLine()) != null) { cc.ProcessLine(line, logStream, outKept, outDropped); } // Compike MakeMeAHanzi while ((line = mmahIn.ReadLine()) != null) { cc.ProcessHanziLine(line, logStream); } cc.WriteResults(date, args[2], args[4]); } catch (Exception ex) { Console.WriteLine(ex.ToString()); if (Debugger.IsAttached) { Console.WriteLine("Press Enter..."); Console.ReadLine(); } return(-1); } finally { if (cc != null) { cc.Dispose(); } if (cedictIn != null) { cedictIn.Dispose(); } if (mmahIn != null) { mmahIn.Dispose(); } if (logStream != null) { logStream.Dispose(); } if (outKept != null) { outKept.Dispose(); } if (outDropped != null) { outDropped.Dispose(); } } // Double-check result: does dictionary open? try { DictEngine de = new DictEngine(args[2], new FontCoverageFull()); } catch (Exception ex) { Console.WriteLine(ex.ToString()); if (Debugger.IsAttached) { Console.WriteLine("Press Enter..."); Console.ReadLine(); } return(-1); } if (Debugger.IsAttached) { Console.WriteLine("Press Enter..."); Console.ReadLine(); } return(0); }