예제 #1
0
        public void Work()
        {
            Random        rnd    = new Random(0);
            CedictParser  parser = new CedictParser();
            HashSet <int> idSet  = new HashSet <int>();
            StringBuilder sb     = new StringBuilder();

            using (FileStream fsIn = new FileStream("handedict.u8", FileMode.Open, FileAccess.Read))
                using (StreamReader sr = new StreamReader(fsIn))
                {
                    string line;
                    while ((line = sr.ReadLine()) != null)
                    {
                        if (line.StartsWith("#"))
                        {
                            continue;
                        }
                        CedictEntry entry = parser.ParseEntry(line, 0, null);
                        if (entry == null)
                        {
                            continue;
                        }
                        if (entry.ChSimpl.Length > 16)
                        {
                            continue;
                        }

                        int id = rnd.Next();
                        while (idSet.Contains(id))
                        {
                            id = rnd.Next();
                        }
                        idSet.Add(id);
                        string strId   = EntryId.IdToString(id);
                        bool   isVerif = isVerified(entry);

                        sb.Clear();
                        // Line with ID
                        sb.AppendLine("# ID-" + strId);
                        // First version metainfo
                        string statStr = isVerif ? "Stat-Verif" : "Stat-New";
                        sb.AppendLine("# Ver 2011-05-28T01:27:49Z HanDeDict " + statStr + " 001>Originalversion HanDeDict-Datei");
                        // The entry itself
                        sb.AppendLine(CedictWriter.Write(entry));

                        items.Add(new ResItem {
                            ID = id, Lines = sb.ToString()
                        });
                    }
                }
        }
예제 #2
0
        public void Work()
        {
            string line;

            while ((line = srDict.ReadLine()) != null)
            {
                ++lineNum;
                if (line.StartsWith("#"))
                {
                    continue;
                }
                CedictEntry entry = parser.ParseEntry(line, lineNum, swDiag);
                if (entry != null)
                {
                    string trippedLine = CedictWriter.Write(entry);
                    if (trippedLine != line)
                    {
                        swTrip.WriteLine(line);
                        swTrip.WriteLine(trippedLine);
                    }
                    countTags(line);
                }
            }
            List <TC> tlst = new List <TC>();

            foreach (var x in tags)
            {
                tlst.Add(new TC {
                    Tag = x.Key, Count = x.Value
                });
            }
            tlst.Sort((x, y) => y.Count.CompareTo(x.Count));
            using (FileStream fsTags = new FileStream("hdd-tags.txt", FileMode.Create, FileAccess.ReadWrite))
                using (StreamWriter sw = new StreamWriter(fsTags))
                {
                    foreach (var x in tlst)
                    {
                        sw.WriteLine(x.Count + "\t" + x.Tag);
                    }
                }
        }
예제 #3
0
        private static void histRenderChange(StringBuilder sb, ChangeItem ci, bool trailingSeparator, string lang,
                                             CedictParser parser, string extraItemClass = "")
        {
            var tprov = TextProvider.Instance;

            sb.AppendLine();
            string itemClass = "historyItem";

            if (!string.IsNullOrEmpty(extraItemClass))
            {
                itemClass += " " + extraItemClass;
            }
            if (ci.EntryId >= 0)
            {
                sb.AppendLine("<div class='" + itemClass + "' data-entry-id='" + EntryId.IdToString(ci.EntryId) + "'>");
            }
            else
            {
                sb.AppendLine("<div class='" + itemClass + "'>");
            }
            sb.AppendLine("<div class='changeHead'>");

            string iconClass = "";

            if (ci.ChangeType == ChangeType.New)
            {
                iconClass = "fa fa-lightbulb-o ctNew";
            }
            else if (ci.ChangeType == ChangeType.Edit)
            {
                iconClass = "fa fa-pencil-square-o ctEdit";
            }
            else if (ci.ChangeType == ChangeType.Note)
            {
                iconClass = "fa fa-commenting-o ctNote";
            }
            else if (ci.ChangeType == ChangeType.BulkImport)
            {
                iconClass = "fa fa-newspaper-o ctBulk";
            }
            else if (ci.ChangeType == ChangeType.StatusChange)
            {
                if (ci.EntryStatus == EntryStatus.Approved)
                {
                    iconClass = "fa fa-check-square-o ctApprove";
                }
                else if (ci.EntryStatus == EntryStatus.Flagged)
                {
                    iconClass = "fa fa-flag-o ctFlag";
                }
                else
                {
                    iconClass = "fa fa-flag-o ctUnflag";
                }
            }

            sb.Append("<i class='" + iconClass + "' />");
            sb.Append("<div class='changeSummary'>");

            string changeMsg = getChangeTypeStr(ci.ChangeType, ci.CountB, ci.EntryStatus, lang);
            string changeCls = "changeType";

            sb.Append("<span class='" + changeCls + "'>");
            sb.Append(HtmlEncoder.Default.Encode(changeMsg));
            sb.Append(" &bull; </span>");

            sb.Append("<span class='changeUser'>");
            sb.Append(HtmlEncoder.Default.Encode(ci.User));
            sb.Append("</span>");

            sb.Append("<span class='changeTime'>");
            sb.Append(HtmlEncoder.Default.Encode(getTimeStr(ci.When, lang)));
            sb.Append("</span>");

            if (ci.ChangeType != ChangeType.BulkImport && ci.CountA > 0)
            {
                sb.Append("<span class='revealPast'>+" + ci.CountA.ToString() + "</span>");
            }

            sb.Append("</div>"); // <div class='changeSummary'>

            sb.AppendLine("<div class='changeNote'>");
            if (ci.ChangeType == ChangeType.BulkImport)
            {
                string newCount = null;
                string chgCount = null;
                if (ci.CountA > 0)
                {
                    newCount = tprov.GetString(lang, "history.bulkNewWords");
                    newCount = string.Format(newCount, ci.CountA);
                }
                if (ci.CountB > 0)
                {
                    chgCount = tprov.GetString(lang, "history.bulkChangedWords");
                    chgCount = string.Format(chgCount, ci.CountB);
                }
                if (chgCount != null || newCount != null)
                {
                    sb.Append("<p>");
                    if (newCount != null)
                    {
                        sb.Append(HtmlEncoder.Default.Encode(newCount));
                    }
                    if (newCount != null && chgCount != null)
                    {
                        sb.Append(" &bull; ");
                    }
                    if (chgCount != null)
                    {
                        sb.Append(HtmlEncoder.Default.Encode(chgCount));
                    }
                    sb.Append("</p>");
                }
                sb.Append("<span class='bulkLink'>[");
                sb.Append("<a href='/" + lang + "/read/details/change-" + ci.BulkRef.ToString("000") + "' target='_blank'>");
                sb.Append(tprov.GetString(lang, "history.bulkLink") + "</a>");
                sb.Append("]</span> ");
            }
            sb.Append("<span class='changeNoteText'>");
            string note = HtmlEncoder.Default.Encode(ci.Note);

            note = note.Replace("&#xA;", "<br/>");
            sb.Append(note);
            sb.Append("</span>");
            sb.Append("</div>");

            sb.Append("</div>"); // <div class='changeHead'>

            sb.AppendLine("<div class='changeEntry'>");
            if (ci.ChangeType != ChangeType.BulkImport)
            {
                sb.AppendLine("<div class='histEntryOps'>");
                sb.Append("<a class='ajax' href='/" + lang + "/edit/existing/" + EntryId.IdToString(ci.EntryId) + "'>");
                sb.Append("<i class='opHistEdit fa fa-pencil'></i></a>");
                //sb.Append("<i class='opHistEdit fa fa-pencil' />");
                sb.Append("<i class='opHistComment fa fa-commenting-o' />");
                sb.Append("<i class='opHistFlag fa fa-flag-o' />");
                sb.Append("</div>"); // <div class='histEntryOps'>
            }

            if (ci.ChangeType != ChangeType.BulkImport)
            {
                // NOT edited
                if (ci.BodyBefore == null && ci.HeadBefore == null)
                {
                    CedictEntry entry = parser.ParseEntry(ci.EntryHead + " " + ci.EntryBody, 0, null);
                    entry.Status = ci.EntryStatus;
                    EntryRenderer er = new EntryRenderer(lang, entry, true);
                    er.OneLineHanziLimit = 12;
                    er.Render(sb, null);
                }
                // Entry edited: show "diff" in head and/or body
                else
                {
                    // Current, and comparison base
                    CedictEntry eCurr = parser.ParseEntry(ci.EntryHead + " " + ci.EntryBody, 0, null);
                    eCurr.Status = ci.EntryStatus;
                    string      headOld       = ci.HeadBefore == null ? ci.EntryHead : ci.HeadBefore;
                    string      bodyOld       = ci.BodyBefore == null ? ci.EntryBody : ci.BodyBefore;
                    CedictEntry eOld          = parser.ParseEntry(headOld + " " + bodyOld, 0, null);
                    bool        simpChanged   = eCurr.ChSimpl != eOld.ChSimpl;
                    bool        tradChanged   = eCurr.ChTrad != eOld.ChTrad;
                    bool        pinyinChanged = CedictWriter.WritePinyin(eCurr) != CedictWriter.WritePinyin(eOld);
                    bool        bodyChanged   = ci.BodyBefore != null;
                    // Render in parts
                    sb.AppendLine("<div class='entry'>");
                    // Let's not dim identical chars if anything changed in HW
                    EntryRenderer rCurr = new EntryRenderer(lang, eCurr, !simpChanged && !tradChanged);
                    EntryRenderer rOld  = new EntryRenderer(lang, eOld, !simpChanged && !tradChanged);
                    rCurr.OneLineHanziLimit = rOld.OneLineHanziLimit = 12;
                    rCurr.XRenderStatus(sb);
                    rCurr.XRenderHanzi(sb, simpChanged ? "new" : "", tradChanged ? "new" : "");
                    if (simpChanged || tradChanged)
                    {
                        rOld.XRenderHanzi(sb, simpChanged ? "old" : "", tradChanged ? "old" : "");
                    }
                    rCurr.XRenderPinyin(sb, pinyinChanged ? "new" : "");
                    if (pinyinChanged)
                    {
                        rOld.XRenderPinyin(sb, "old");
                    }
                    rCurr.XRenderSenses(sb, bodyChanged ? "new" : "");
                    if (bodyChanged)
                    {
                        rOld.XRenderSenses(sb, "old");
                    }
                    sb.AppendLine("</div>"); // <div class='entry'>
                }
            }
            sb.Append("</div>"); // <div class='changeEntry'>

            sb.Append("</div>"); // <div class='changeHead'>
            sb.Append("</div>"); // <div class='historyItem'>
            sb.AppendLine();

            if (trailingSeparator)
            {
                sb.AppendLine("<div class='historySep'></div>");
            }
        }
예제 #4
0
        private static void renderPastChange(StringBuilder sb, CedictParser parser,
                                             ref string headNow, ref string trgNow, ref EntryStatus statusNow,
                                             ChangeItem ci, string lang)
        {
            sb.AppendLine("<div class='pastItem'>");

            sb.Append("<div class='changeSummary'>");
            sb.Append("<span class='changeType'>");
            sb.Append(HtmlEncoder.Default.Encode(getChangeTypeStr(ci.ChangeType, ci.CountB, statusNow, lang)));
            sb.Append(" &bull; </span>");
            sb.Append("<span class='changeUser'>");
            sb.Append(HtmlEncoder.Default.Encode(ci.User));
            sb.Append("</span>");
            sb.Append("<span class='changeTime'>");
            sb.Append(HtmlEncoder.Default.Encode(getTimeStr(ci.When, lang)));
            sb.Append("</span>");
            sb.AppendLine("</div>"); // <div class='changeSummary'>

            sb.AppendLine("<div class='changeNote'>");
            sb.Append("<span class='changeNoteText'>");

            if (ci.BulkRef != -1)
            {
                sb.Append("<span class='bulkLink'>[");
                sb.Append("<a href='/" + lang + "/read/details/change-" + ci.BulkRef.ToString("000") + "' target='_blank'>");
                sb.Append(TextProvider.Instance.GetString(lang, "history.bulkLink") + "</a>");
                sb.Append("]</span> ");
            }

            string note = HtmlEncoder.Default.Encode(ci.Note);

            note = note.Replace("&#xA;", "<br/>");
            sb.Append(note);
            sb.Append("</span>");
            sb.AppendLine("</div>"); // <div class='changeNote'>

            if (ci.HeadBefore != null)
            {
                CedictEntry eCurr         = parser.ParseEntry(headNow + " /x/", -1, null);
                CedictEntry eOld          = parser.ParseEntry(ci.HeadBefore + " /x/", -1, null);
                bool        simpChanged   = eCurr.ChSimpl != eOld.ChSimpl;
                bool        tradChanged   = eCurr.ChTrad != eOld.ChTrad;
                bool        pinyinChanged = CedictWriter.WritePinyin(eCurr) != CedictWriter.WritePinyin(eOld);
                // Render in parts
                sb.AppendLine("<div class='entry'>");
                // Let's not dim identical chars if anything changed in HW
                EntryRenderer rCurr = new EntryRenderer(lang, eCurr, !simpChanged && !tradChanged);
                EntryRenderer rOld  = new EntryRenderer(lang, eOld, !simpChanged && !tradChanged);
                rCurr.OneLineHanziLimit = rOld.OneLineHanziLimit = 12;
                if (simpChanged || tradChanged)
                {
                    rCurr.XRenderHanzi(sb, simpChanged ? "new" : "", tradChanged ? "new" : "");
                    rOld.XRenderHanzi(sb, simpChanged ? "old" : "", tradChanged ? "old" : "");
                }
                if (pinyinChanged)
                {
                    rCurr.XRenderPinyin(sb, pinyinChanged ? "new" : "");
                    rOld.XRenderPinyin(sb, "old");
                }
                sb.AppendLine("</div>"); // <div class='entry'>
                // Propagate change
                headNow = ci.HeadBefore;
            }

            if (ci.BodyBefore != null)
            {
                CedictEntry   entryNew = parser.ParseEntry("的 的 [de5] " + trgNow, -1, null);
                EntryRenderer er       = new EntryRenderer(lang, entryNew, true);
                er.XRenderSenses(sb, "new");
                CedictEntry entryOld = parser.ParseEntry("的 的 [de5] " + ci.BodyBefore, -1, null);
                er = new EntryRenderer(lang, entryOld, true);
                er.XRenderSenses(sb, "old");
                // Propagate change
                trgNow = ci.BodyBefore;
            }
            if (ci.StatusBefore != 99)
            {
                statusNow = (EntryStatus)ci.StatusBefore;
            }

            sb.AppendLine("</div>"); // <div class='pastItem'>
        }
예제 #5
0
        public void Work()
        {
            string line;

            using (var fsDict = new FileStream("chdict.u8", FileMode.Open, FileAccess.Read))
                using (var srDict = new StreamReader(fsDict))
                    using (var fsDiag = new FileStream("chd-diag.txt", FileMode.Create, FileAccess.ReadWrite))
                        using (var swDiag = new StreamWriter(fsDiag))
                            using (var fsTrip = new FileStream("chd-trip.txt", FileMode.Create, FileAccess.ReadWrite))
                                using (var swTrip = new StreamWriter(fsTrip))
                                {
                                    while ((line = srDict.ReadLine()) != null)
                                    {
                                        ++lineNum;
                                        if (line.StartsWith("#"))
                                        {
                                            continue;
                                        }
                                        CedictEntry entry = parser.ParseEntry(line, lineNum, swDiag);
                                        if (entry != null)
                                        {
                                            string trippedLine = CedictWriter.Write(entry);
                                            if (trippedLine != line)
                                            {
                                                swTrip.WriteLine(line);
                                                swTrip.WriteLine(trippedLine);
                                            }
                                            fileHead(entry);
                                            countTags(entry);
                                            checkCommas(entry, lineNum, swDiag);
                                            countPrefixes(entry, swDiag);
                                            ++entryCount;
                                            senseCount += entry.SenseCount;
                                            countMeasureWords(entry);
                                        }
                                    }
                                    writeHeadIssues(swDiag);
                                    writePrefixes();
                                    List <TC> tlst = new List <TC>();
                                    foreach (var x in tags)
                                    {
                                        tlst.Add(new TC {
                                            Tag = x.Key, Count = x.Value
                                        });
                                    }
                                    tlst.Sort((x, y) => y.Count.CompareTo(x.Count));
                                    using (FileStream fsTags = new FileStream("chd-stats.txt", FileMode.Create, FileAccess.ReadWrite))
                                        using (StreamWriter sw = new StreamWriter(fsTags))
                                        {
                                            sw.WriteLine("ZH entries: " + entryCount);
                                            sw.WriteLine("HU senses: " + senseCount);
                                            sw.WriteLine("Entries with CL: " + entriesWithMW);
                                            sw.WriteLine();
                                            foreach (var x in tlst)
                                            {
                                                sw.WriteLine(x.Count + "\t" + x.Tag);
                                            }
                                            sw.WriteLine();
                                            List <string> mws = new List <string>();
                                            foreach (var x in simpMWCounts)
                                            {
                                                mws.Add(x.Key);
                                            }
                                            mws.Sort((x, y) => simpMWCounts[y].CompareTo(simpMWCounts[x]));
                                            foreach (string mw in mws)
                                            {
                                                sw.WriteLine(simpMWCounts[mw] + "\t" + mw);
                                            }
                                        }
                                }
        }
예제 #6
0
        public void Work()
        {
            Random         rnd       = new Random(0);
            CedictParser   parser    = new CedictParser();
            HashSet <int>  idSet     = new HashSet <int>();
            StringBuilder  sb        = new StringBuilder();
            HashSet <char> simpChars = new HashSet <char>();

            using (FileStream fsIn = new FileStream("chdict.u8", FileMode.Open, FileAccess.Read))
                using (StreamReader sr = new StreamReader(fsIn))
                {
                    string line;
                    while ((line = sr.ReadLine()) != null)
                    {
                        if (line.StartsWith("#"))
                        {
                            continue;
                        }

                        int ix1 = line.IndexOf(" [");
                        int ix2 = line.IndexOf("] /");
                        line = line.Substring(0, ix1) + " [" + line.Substring(ix1 + 2, ix2 - ix1).ToLower() + line.Substring(ix2 + 2);

                        CedictEntry entry = parser.ParseEntry(line, 0, null);
                        if (entry == null)
                        {
                            continue;
                        }
                        if (entry.ChSimpl.Length > 16)
                        {
                            continue;
                        }

                        int id = rnd.Next();
                        while (idSet.Contains(id))
                        {
                            id = rnd.Next();
                        }
                        idSet.Add(id);
                        string strId = EntryId.IdToString(id);

                        sb.Clear();
                        // Line with ID
                        sb.AppendLine("# ID-" + strId);
                        // First version metainfo
                        string statStr = "Stat-Verif";
                        sb.AppendLine("# Ver 2017-05-02T22:41:05Z gabor " + statStr + " 001>CHDICT törzsanyag");
                        // The entry itself
                        sb.AppendLine(CedictWriter.Write(entry));

                        foreach (char c in entry.ChSimpl)
                        {
                            simpChars.Add(c);
                        }

                        items.Add(new ResItem {
                            ID = id, Lines = sb.ToString()
                        });
                    }
                }
        }