Exemple #1
0
            /// <summary>
            /// Process one line in the CEDICT format: parse, and store/index in dictionary.
            /// !! Does not check against dupes; cannot be used to update.
            /// </summary>
            public void AddEntry(string line)
            {
                ++lineNum;
                // Cycle through transactions
                if (lineNum % 3000 == 0)
                {
                    tr.Commit(); tr.Dispose(); tr = null;
                    tr = conn.BeginTransaction();
                }
                // Parse line from CEDICT format
                CedictEntry entry = CedictCompiler.ParseEntry(line, lineNum, swLog, swDropped);

                if (entry == null)
                {
                    return;
                }
                string head, trg;

                entry.GetCedict(out head, out trg);
                // Check restrictions - can end up dropped entry
                try { checkRestrictions(entry.ChSimpl, trg); }
                catch { return; }

                // Serialize, store in DB, index
                int binId = indexEntry(entry);
                // Populate entries table
                int entryId = storeEntry(entry.ChSimpl, head, trg, binId);

                // Folding history: mark new entry as affected by this bulk operation
                if (foldHistory)
                {
                    cmdInsBulkModif.Parameters["@modif_id"].Value = modifId;
                    cmdInsBulkModif.Parameters["@entry_id"].Value = entryId;
                    cmdInsBulkModif.ExecuteNonQuery();
                }
                // Verbose (per-entry) history
                else
                {
                    // Record change
                    cmdInsModifNew.Parameters["@timestamp"].Value = DateTime.UtcNow;
                    cmdInsModifNew.Parameters["@user_id"].Value   = userId;
                    cmdInsModifNew.Parameters["@note"].Value      = note;
                    cmdInsModifNew.Parameters["@entry_id"].Value  = entryId;
                    cmdInsModifNew.ExecuteNonQuery();
                    int modifId = (int)cmdInsModifNew.LastInsertedId;
                    // Also link from entry
                    cmdUpdLastModif.Parameters["@entry_id"].Value      = entryId;
                    cmdUpdLastModif.Parameters["@last_modif_id"].Value = modifId;
                    cmdUpdLastModif.ExecuteNonQuery();
                }
            }
Exemple #2
0
        public void DictLine(string line, bool cedict, BinWriter bw)
        {
            // Parse entry
            CedictEntry entry = CedictCompiler.ParseEntry(line);

            // Verify that simp, trad and pinyin are equal length
            if (entry != null)
            {
                if (entry.ChSimpl.Length != entry.ChTrad.Length || entry.ChSimpl.Length != entry.PinyinCount)
                {
                    entry = null;
                }
            }
            // Just count if failed to parse
            if (entry == null)
            {
                if (cedict)
                {
                    ++cedictDropped;
                }
                else
                {
                    ++hddDropped;
                }
                return;
            }
            // Serialize
            int fpos = bw.Position;

            // First: hash chain: next entry in file with same hash. Will fill later.
            bw.WriteInt(0);
            // Then, entry itself
            entry.Serialize(bw);
            // Hash simplified and remember file position
            int        hash = CedictEntry.Hash(entry.ChSimpl);
            List <int> poss;
            Dictionary <int, List <int> > hashPoss = cedict ? cedictHashPoss : hddHashPoss;

            if (!hashPoss.ContainsKey(hash))
            {
                poss           = new List <int>();
                hashPoss[hash] = poss;
            }
            else
            {
                poss = hashPoss[hash];
            }
            poss.Add(fpos);
        }
Exemple #3
0
        static int Main(string[] args)
        {
            if (args.Length != 5)
            {
                Console.WriteLine("Takes these arguments:");
                Console.WriteLine("1: CEDICT input file");
                Console.WriteLine("2: MakeMeAHanzi input file");
                Console.WriteLine("3: Compiled dictionary file");
                Console.WriteLine("4: Date of CEDICT release in YYYY-MM-DD format");
                Console.WriteLine("5: Folder for diagnostics/log/kept/dropped data");
                if (Debugger.IsAttached)
                {
                    Console.WriteLine("Press Enter..."); Console.ReadLine();
                }
                return(-1);
            }

            StreamReader   cedictIn   = null;
            StreamReader   mmahIn     = null;
            StreamWriter   logStream  = null;
            StreamWriter   outKept    = null;
            StreamWriter   outDropped = null;
            CedictCompiler cc         = null;

            try
            {
                cedictIn = new StreamReader(args[0]);
                mmahIn   = new StreamReader(args[1]);
                DateTime date        = parseDate(args[3]);
                string   logFileName = Path.Combine(args[4], "ccomp.log");
                logStream = new StreamWriter(logFileName);
                string outKeptName    = Path.Combine(args[4], "cc-kept.txt");
                string outDroppedName = Path.Combine(args[4], "cc-drop.txt");
                outKept    = new StreamWriter(outKeptName, false, Encoding.UTF8);
                outDropped = new StreamWriter(outDroppedName, false, Encoding.UTF8);
                cc         = new CedictCompiler();
                string line;
                // Compile dictionary proper
                while ((line = cedictIn.ReadLine()) != null)
                {
                    cc.ProcessLine(line, logStream, outKept, outDropped);
                }
                // Compike MakeMeAHanzi
                while ((line = mmahIn.ReadLine()) != null)
                {
                    cc.ProcessHanziLine(line, logStream);
                }
                cc.WriteResults(date, args[2], args[4]);
            }
            catch (Exception ex)
            {
                Console.WriteLine(ex.ToString());
                if (Debugger.IsAttached)
                {
                    Console.WriteLine("Press Enter..."); Console.ReadLine();
                }
                return(-1);
            }
            finally
            {
                if (cc != null)
                {
                    cc.Dispose();
                }
                if (cedictIn != null)
                {
                    cedictIn.Dispose();
                }
                if (mmahIn != null)
                {
                    mmahIn.Dispose();
                }
                if (logStream != null)
                {
                    logStream.Dispose();
                }
                if (outKept != null)
                {
                    outKept.Dispose();
                }
                if (outDropped != null)
                {
                    outDropped.Dispose();
                }
            }
            // Double-check result: does dictionary open?
            try
            {
                DictEngine de = new DictEngine(args[2], new FontCoverageFull());
            }
            catch (Exception ex)
            {
                Console.WriteLine(ex.ToString());
                if (Debugger.IsAttached)
                {
                    Console.WriteLine("Press Enter..."); Console.ReadLine();
                }
                return(-1);
            }
            if (Debugger.IsAttached)
            {
                Console.WriteLine("Press Enter..."); Console.ReadLine();
            }
            return(0);
        }