public void Work() { Random rnd = new Random(0); CedictParser parser = new CedictParser(); HashSet <int> idSet = new HashSet <int>(); StringBuilder sb = new StringBuilder(); using (FileStream fsIn = new FileStream("handedict.u8", FileMode.Open, FileAccess.Read)) using (StreamReader sr = new StreamReader(fsIn)) { string line; while ((line = sr.ReadLine()) != null) { if (line.StartsWith("#")) { continue; } CedictEntry entry = parser.ParseEntry(line, 0, null); if (entry == null) { continue; } if (entry.ChSimpl.Length > 16) { continue; } int id = rnd.Next(); while (idSet.Contains(id)) { id = rnd.Next(); } idSet.Add(id); string strId = EntryId.IdToString(id); bool isVerif = isVerified(entry); sb.Clear(); // Line with ID sb.AppendLine("# ID-" + strId); // First version metainfo string statStr = isVerif ? "Stat-Verif" : "Stat-New"; sb.AppendLine("# Ver 2011-05-28T01:27:49Z HanDeDict " + statStr + " 001>Originalversion HanDeDict-Datei"); // The entry itself sb.AppendLine(CedictWriter.Write(entry)); items.Add(new ResItem { ID = id, Lines = sb.ToString() }); } } }
public void Work() { string line; while ((line = srDict.ReadLine()) != null) { ++lineNum; if (line.StartsWith("#")) { continue; } CedictEntry entry = parser.ParseEntry(line, lineNum, swDiag); if (entry != null) { string trippedLine = CedictWriter.Write(entry); if (trippedLine != line) { swTrip.WriteLine(line); swTrip.WriteLine(trippedLine); } countTags(line); } } List <TC> tlst = new List <TC>(); foreach (var x in tags) { tlst.Add(new TC { Tag = x.Key, Count = x.Value }); } tlst.Sort((x, y) => y.Count.CompareTo(x.Count)); using (FileStream fsTags = new FileStream("hdd-tags.txt", FileMode.Create, FileAccess.ReadWrite)) using (StreamWriter sw = new StreamWriter(fsTags)) { foreach (var x in tlst) { sw.WriteLine(x.Count + "\t" + x.Tag); } } }
public void Work() { string line; using (var fsDict = new FileStream("chdict.u8", FileMode.Open, FileAccess.Read)) using (var srDict = new StreamReader(fsDict)) using (var fsDiag = new FileStream("chd-diag.txt", FileMode.Create, FileAccess.ReadWrite)) using (var swDiag = new StreamWriter(fsDiag)) using (var fsTrip = new FileStream("chd-trip.txt", FileMode.Create, FileAccess.ReadWrite)) using (var swTrip = new StreamWriter(fsTrip)) { while ((line = srDict.ReadLine()) != null) { ++lineNum; if (line.StartsWith("#")) { continue; } CedictEntry entry = parser.ParseEntry(line, lineNum, swDiag); if (entry != null) { string trippedLine = CedictWriter.Write(entry); if (trippedLine != line) { swTrip.WriteLine(line); swTrip.WriteLine(trippedLine); } fileHead(entry); countTags(entry); checkCommas(entry, lineNum, swDiag); countPrefixes(entry, swDiag); ++entryCount; senseCount += entry.SenseCount; countMeasureWords(entry); } } writeHeadIssues(swDiag); writePrefixes(); List <TC> tlst = new List <TC>(); foreach (var x in tags) { tlst.Add(new TC { Tag = x.Key, Count = x.Value }); } tlst.Sort((x, y) => y.Count.CompareTo(x.Count)); using (FileStream fsTags = new FileStream("chd-stats.txt", FileMode.Create, FileAccess.ReadWrite)) using (StreamWriter sw = new StreamWriter(fsTags)) { sw.WriteLine("ZH entries: " + entryCount); sw.WriteLine("HU senses: " + senseCount); sw.WriteLine("Entries with CL: " + entriesWithMW); sw.WriteLine(); foreach (var x in tlst) { sw.WriteLine(x.Count + "\t" + x.Tag); } sw.WriteLine(); List <string> mws = new List <string>(); foreach (var x in simpMWCounts) { mws.Add(x.Key); } mws.Sort((x, y) => simpMWCounts[y].CompareTo(simpMWCounts[x])); foreach (string mw in mws) { sw.WriteLine(simpMWCounts[mw] + "\t" + mw); } } } }
public void Work() { Random rnd = new Random(0); CedictParser parser = new CedictParser(); HashSet <int> idSet = new HashSet <int>(); StringBuilder sb = new StringBuilder(); HashSet <char> simpChars = new HashSet <char>(); using (FileStream fsIn = new FileStream("chdict.u8", FileMode.Open, FileAccess.Read)) using (StreamReader sr = new StreamReader(fsIn)) { string line; while ((line = sr.ReadLine()) != null) { if (line.StartsWith("#")) { continue; } int ix1 = line.IndexOf(" ["); int ix2 = line.IndexOf("] /"); line = line.Substring(0, ix1) + " [" + line.Substring(ix1 + 2, ix2 - ix1).ToLower() + line.Substring(ix2 + 2); CedictEntry entry = parser.ParseEntry(line, 0, null); if (entry == null) { continue; } if (entry.ChSimpl.Length > 16) { continue; } int id = rnd.Next(); while (idSet.Contains(id)) { id = rnd.Next(); } idSet.Add(id); string strId = EntryId.IdToString(id); sb.Clear(); // Line with ID sb.AppendLine("# ID-" + strId); // First version metainfo string statStr = "Stat-Verif"; sb.AppendLine("# Ver 2017-05-02T22:41:05Z gabor " + statStr + " 001>CHDICT törzsanyag"); // The entry itself sb.AppendLine(CedictWriter.Write(entry)); foreach (char c in entry.ChSimpl) { simpChars.Add(c); } items.Add(new ResItem { ID = id, Lines = sb.ToString() }); } } }