public void Work() { Random rnd = new Random(0); CedictParser parser = new CedictParser(); HashSet <int> idSet = new HashSet <int>(); StringBuilder sb = new StringBuilder(); using (FileStream fsIn = new FileStream("handedict.u8", FileMode.Open, FileAccess.Read)) using (StreamReader sr = new StreamReader(fsIn)) { string line; while ((line = sr.ReadLine()) != null) { if (line.StartsWith("#")) { continue; } CedictEntry entry = parser.ParseEntry(line, 0, null); if (entry == null) { continue; } if (entry.ChSimpl.Length > 16) { continue; } int id = rnd.Next(); while (idSet.Contains(id)) { id = rnd.Next(); } idSet.Add(id); string strId = EntryId.IdToString(id); bool isVerif = isVerified(entry); sb.Clear(); // Line with ID sb.AppendLine("# ID-" + strId); // First version metainfo string statStr = isVerif ? "Stat-Verif" : "Stat-New"; sb.AppendLine("# Ver 2011-05-28T01:27:49Z HanDeDict " + statStr + " 001>Originalversion HanDeDict-Datei"); // The entry itself sb.AppendLine(CedictWriter.Write(entry)); items.Add(new ResItem { ID = id, Lines = sb.ToString() }); } } }
public void Work() { string line; while ((line = srDict.ReadLine()) != null) { ++lineNum; if (line.StartsWith("#")) { continue; } CedictEntry entry = parser.ParseEntry(line, lineNum, swDiag); if (entry != null) { string trippedLine = CedictWriter.Write(entry); if (trippedLine != line) { swTrip.WriteLine(line); swTrip.WriteLine(trippedLine); } countTags(line); } } List <TC> tlst = new List <TC>(); foreach (var x in tags) { tlst.Add(new TC { Tag = x.Key, Count = x.Value }); } tlst.Sort((x, y) => y.Count.CompareTo(x.Count)); using (FileStream fsTags = new FileStream("hdd-tags.txt", FileMode.Create, FileAccess.ReadWrite)) using (StreamWriter sw = new StreamWriter(fsTags)) { foreach (var x in tlst) { sw.WriteLine(x.Count + "\t" + x.Tag); } } }
private static void histRenderChange(StringBuilder sb, ChangeItem ci, bool trailingSeparator, string lang, CedictParser parser, string extraItemClass = "") { var tprov = TextProvider.Instance; sb.AppendLine(); string itemClass = "historyItem"; if (!string.IsNullOrEmpty(extraItemClass)) { itemClass += " " + extraItemClass; } if (ci.EntryId >= 0) { sb.AppendLine("<div class='" + itemClass + "' data-entry-id='" + EntryId.IdToString(ci.EntryId) + "'>"); } else { sb.AppendLine("<div class='" + itemClass + "'>"); } sb.AppendLine("<div class='changeHead'>"); string iconClass = ""; if (ci.ChangeType == ChangeType.New) { iconClass = "fa fa-lightbulb-o ctNew"; } else if (ci.ChangeType == ChangeType.Edit) { iconClass = "fa fa-pencil-square-o ctEdit"; } else if (ci.ChangeType == ChangeType.Note) { iconClass = "fa fa-commenting-o ctNote"; } else if (ci.ChangeType == ChangeType.BulkImport) { iconClass = "fa fa-newspaper-o ctBulk"; } else if (ci.ChangeType == ChangeType.StatusChange) { if (ci.EntryStatus == EntryStatus.Approved) { iconClass = "fa fa-check-square-o ctApprove"; } else if (ci.EntryStatus == EntryStatus.Flagged) { iconClass = "fa fa-flag-o ctFlag"; } else { iconClass = "fa fa-flag-o ctUnflag"; } } sb.Append("<i class='" + iconClass + "' />"); sb.Append("<div class='changeSummary'>"); string changeMsg = getChangeTypeStr(ci.ChangeType, ci.CountB, ci.EntryStatus, lang); string changeCls = "changeType"; sb.Append("<span class='" + changeCls + "'>"); sb.Append(HtmlEncoder.Default.Encode(changeMsg)); sb.Append(" • </span>"); sb.Append("<span class='changeUser'>"); sb.Append(HtmlEncoder.Default.Encode(ci.User)); sb.Append("</span>"); sb.Append("<span class='changeTime'>"); sb.Append(HtmlEncoder.Default.Encode(getTimeStr(ci.When, lang))); sb.Append("</span>"); if (ci.ChangeType != ChangeType.BulkImport && ci.CountA > 0) { sb.Append("<span class='revealPast'>+" + ci.CountA.ToString() + "</span>"); } sb.Append("</div>"); // <div class='changeSummary'> sb.AppendLine("<div class='changeNote'>"); if (ci.ChangeType == ChangeType.BulkImport) { string newCount = null; string chgCount = null; if (ci.CountA > 0) { newCount = tprov.GetString(lang, "history.bulkNewWords"); newCount = string.Format(newCount, ci.CountA); } if (ci.CountB > 0) { chgCount = tprov.GetString(lang, "history.bulkChangedWords"); chgCount = string.Format(chgCount, ci.CountB); } if (chgCount != null || newCount != null) { sb.Append("<p>"); if (newCount != null) { sb.Append(HtmlEncoder.Default.Encode(newCount)); } if (newCount != null && chgCount != null) { sb.Append(" • "); } if (chgCount != null) { sb.Append(HtmlEncoder.Default.Encode(chgCount)); } sb.Append("</p>"); } sb.Append("<span class='bulkLink'>["); sb.Append("<a href='/" + lang + "/read/details/change-" + ci.BulkRef.ToString("000") + "' target='_blank'>"); sb.Append(tprov.GetString(lang, "history.bulkLink") + "</a>"); sb.Append("]</span> "); } sb.Append("<span class='changeNoteText'>"); string note = HtmlEncoder.Default.Encode(ci.Note); note = note.Replace("
", "<br/>"); sb.Append(note); sb.Append("</span>"); sb.Append("</div>"); sb.Append("</div>"); // <div class='changeHead'> sb.AppendLine("<div class='changeEntry'>"); if (ci.ChangeType != ChangeType.BulkImport) { sb.AppendLine("<div class='histEntryOps'>"); sb.Append("<a class='ajax' href='/" + lang + "/edit/existing/" + EntryId.IdToString(ci.EntryId) + "'>"); sb.Append("<i class='opHistEdit fa fa-pencil'></i></a>"); //sb.Append("<i class='opHistEdit fa fa-pencil' />"); sb.Append("<i class='opHistComment fa fa-commenting-o' />"); sb.Append("<i class='opHistFlag fa fa-flag-o' />"); sb.Append("</div>"); // <div class='histEntryOps'> } if (ci.ChangeType != ChangeType.BulkImport) { // NOT edited if (ci.BodyBefore == null && ci.HeadBefore == null) { CedictEntry entry = parser.ParseEntry(ci.EntryHead + " " + ci.EntryBody, 0, null); entry.Status = ci.EntryStatus; EntryRenderer er = new EntryRenderer(lang, entry, true); er.OneLineHanziLimit = 12; er.Render(sb, null); } // Entry edited: show "diff" in head and/or body else { // Current, and comparison base CedictEntry eCurr = parser.ParseEntry(ci.EntryHead + " " + ci.EntryBody, 0, null); eCurr.Status = ci.EntryStatus; string headOld = ci.HeadBefore == null ? ci.EntryHead : ci.HeadBefore; string bodyOld = ci.BodyBefore == null ? ci.EntryBody : ci.BodyBefore; CedictEntry eOld = parser.ParseEntry(headOld + " " + bodyOld, 0, null); bool simpChanged = eCurr.ChSimpl != eOld.ChSimpl; bool tradChanged = eCurr.ChTrad != eOld.ChTrad; bool pinyinChanged = CedictWriter.WritePinyin(eCurr) != CedictWriter.WritePinyin(eOld); bool bodyChanged = ci.BodyBefore != null; // Render in parts sb.AppendLine("<div class='entry'>"); // Let's not dim identical chars if anything changed in HW EntryRenderer rCurr = new EntryRenderer(lang, eCurr, !simpChanged && !tradChanged); EntryRenderer rOld = new EntryRenderer(lang, eOld, !simpChanged && !tradChanged); rCurr.OneLineHanziLimit = rOld.OneLineHanziLimit = 12; rCurr.XRenderStatus(sb); rCurr.XRenderHanzi(sb, simpChanged ? "new" : "", tradChanged ? "new" : ""); if (simpChanged || tradChanged) { rOld.XRenderHanzi(sb, simpChanged ? "old" : "", tradChanged ? "old" : ""); } rCurr.XRenderPinyin(sb, pinyinChanged ? "new" : ""); if (pinyinChanged) { rOld.XRenderPinyin(sb, "old"); } rCurr.XRenderSenses(sb, bodyChanged ? "new" : ""); if (bodyChanged) { rOld.XRenderSenses(sb, "old"); } sb.AppendLine("</div>"); // <div class='entry'> } } sb.Append("</div>"); // <div class='changeEntry'> sb.Append("</div>"); // <div class='changeHead'> sb.Append("</div>"); // <div class='historyItem'> sb.AppendLine(); if (trailingSeparator) { sb.AppendLine("<div class='historySep'></div>"); } }
private static void renderPastChange(StringBuilder sb, CedictParser parser, ref string headNow, ref string trgNow, ref EntryStatus statusNow, ChangeItem ci, string lang) { sb.AppendLine("<div class='pastItem'>"); sb.Append("<div class='changeSummary'>"); sb.Append("<span class='changeType'>"); sb.Append(HtmlEncoder.Default.Encode(getChangeTypeStr(ci.ChangeType, ci.CountB, statusNow, lang))); sb.Append(" • </span>"); sb.Append("<span class='changeUser'>"); sb.Append(HtmlEncoder.Default.Encode(ci.User)); sb.Append("</span>"); sb.Append("<span class='changeTime'>"); sb.Append(HtmlEncoder.Default.Encode(getTimeStr(ci.When, lang))); sb.Append("</span>"); sb.AppendLine("</div>"); // <div class='changeSummary'> sb.AppendLine("<div class='changeNote'>"); sb.Append("<span class='changeNoteText'>"); if (ci.BulkRef != -1) { sb.Append("<span class='bulkLink'>["); sb.Append("<a href='/" + lang + "/read/details/change-" + ci.BulkRef.ToString("000") + "' target='_blank'>"); sb.Append(TextProvider.Instance.GetString(lang, "history.bulkLink") + "</a>"); sb.Append("]</span> "); } string note = HtmlEncoder.Default.Encode(ci.Note); note = note.Replace("
", "<br/>"); sb.Append(note); sb.Append("</span>"); sb.AppendLine("</div>"); // <div class='changeNote'> if (ci.HeadBefore != null) { CedictEntry eCurr = parser.ParseEntry(headNow + " /x/", -1, null); CedictEntry eOld = parser.ParseEntry(ci.HeadBefore + " /x/", -1, null); bool simpChanged = eCurr.ChSimpl != eOld.ChSimpl; bool tradChanged = eCurr.ChTrad != eOld.ChTrad; bool pinyinChanged = CedictWriter.WritePinyin(eCurr) != CedictWriter.WritePinyin(eOld); // Render in parts sb.AppendLine("<div class='entry'>"); // Let's not dim identical chars if anything changed in HW EntryRenderer rCurr = new EntryRenderer(lang, eCurr, !simpChanged && !tradChanged); EntryRenderer rOld = new EntryRenderer(lang, eOld, !simpChanged && !tradChanged); rCurr.OneLineHanziLimit = rOld.OneLineHanziLimit = 12; if (simpChanged || tradChanged) { rCurr.XRenderHanzi(sb, simpChanged ? "new" : "", tradChanged ? "new" : ""); rOld.XRenderHanzi(sb, simpChanged ? "old" : "", tradChanged ? "old" : ""); } if (pinyinChanged) { rCurr.XRenderPinyin(sb, pinyinChanged ? "new" : ""); rOld.XRenderPinyin(sb, "old"); } sb.AppendLine("</div>"); // <div class='entry'> // Propagate change headNow = ci.HeadBefore; } if (ci.BodyBefore != null) { CedictEntry entryNew = parser.ParseEntry("的 的 [de5] " + trgNow, -1, null); EntryRenderer er = new EntryRenderer(lang, entryNew, true); er.XRenderSenses(sb, "new"); CedictEntry entryOld = parser.ParseEntry("的 的 [de5] " + ci.BodyBefore, -1, null); er = new EntryRenderer(lang, entryOld, true); er.XRenderSenses(sb, "old"); // Propagate change trgNow = ci.BodyBefore; } if (ci.StatusBefore != 99) { statusNow = (EntryStatus)ci.StatusBefore; } sb.AppendLine("</div>"); // <div class='pastItem'> }
public void Work() { string line; using (var fsDict = new FileStream("chdict.u8", FileMode.Open, FileAccess.Read)) using (var srDict = new StreamReader(fsDict)) using (var fsDiag = new FileStream("chd-diag.txt", FileMode.Create, FileAccess.ReadWrite)) using (var swDiag = new StreamWriter(fsDiag)) using (var fsTrip = new FileStream("chd-trip.txt", FileMode.Create, FileAccess.ReadWrite)) using (var swTrip = new StreamWriter(fsTrip)) { while ((line = srDict.ReadLine()) != null) { ++lineNum; if (line.StartsWith("#")) { continue; } CedictEntry entry = parser.ParseEntry(line, lineNum, swDiag); if (entry != null) { string trippedLine = CedictWriter.Write(entry); if (trippedLine != line) { swTrip.WriteLine(line); swTrip.WriteLine(trippedLine); } fileHead(entry); countTags(entry); checkCommas(entry, lineNum, swDiag); countPrefixes(entry, swDiag); ++entryCount; senseCount += entry.SenseCount; countMeasureWords(entry); } } writeHeadIssues(swDiag); writePrefixes(); List <TC> tlst = new List <TC>(); foreach (var x in tags) { tlst.Add(new TC { Tag = x.Key, Count = x.Value }); } tlst.Sort((x, y) => y.Count.CompareTo(x.Count)); using (FileStream fsTags = new FileStream("chd-stats.txt", FileMode.Create, FileAccess.ReadWrite)) using (StreamWriter sw = new StreamWriter(fsTags)) { sw.WriteLine("ZH entries: " + entryCount); sw.WriteLine("HU senses: " + senseCount); sw.WriteLine("Entries with CL: " + entriesWithMW); sw.WriteLine(); foreach (var x in tlst) { sw.WriteLine(x.Count + "\t" + x.Tag); } sw.WriteLine(); List <string> mws = new List <string>(); foreach (var x in simpMWCounts) { mws.Add(x.Key); } mws.Sort((x, y) => simpMWCounts[y].CompareTo(simpMWCounts[x])); foreach (string mw in mws) { sw.WriteLine(simpMWCounts[mw] + "\t" + mw); } } } }
public void Work() { Random rnd = new Random(0); CedictParser parser = new CedictParser(); HashSet <int> idSet = new HashSet <int>(); StringBuilder sb = new StringBuilder(); HashSet <char> simpChars = new HashSet <char>(); using (FileStream fsIn = new FileStream("chdict.u8", FileMode.Open, FileAccess.Read)) using (StreamReader sr = new StreamReader(fsIn)) { string line; while ((line = sr.ReadLine()) != null) { if (line.StartsWith("#")) { continue; } int ix1 = line.IndexOf(" ["); int ix2 = line.IndexOf("] /"); line = line.Substring(0, ix1) + " [" + line.Substring(ix1 + 2, ix2 - ix1).ToLower() + line.Substring(ix2 + 2); CedictEntry entry = parser.ParseEntry(line, 0, null); if (entry == null) { continue; } if (entry.ChSimpl.Length > 16) { continue; } int id = rnd.Next(); while (idSet.Contains(id)) { id = rnd.Next(); } idSet.Add(id); string strId = EntryId.IdToString(id); sb.Clear(); // Line with ID sb.AppendLine("# ID-" + strId); // First version metainfo string statStr = "Stat-Verif"; sb.AppendLine("# Ver 2017-05-02T22:41:05Z gabor " + statStr + " 001>CHDICT törzsanyag"); // The entry itself sb.AppendLine(CedictWriter.Write(entry)); foreach (char c in entry.ChSimpl) { simpChars.Add(c); } items.Add(new ResItem { ID = id, Lines = sb.ToString() }); } } }