static PrefNode wvaBuildTree(HeapPrefNode hn, char c) { PrefNode pn = new PrefNode { C = c, Word = hn.Word }; if (hn.Nexts.Count == 0) { return(pn); } pn.Nexts = new PrefNode[hn.Nexts.Count]; List <char> nexts = new List <char>(); foreach (char next in hn.Nexts.Keys) { nexts.Add(next); } nexts.Sort(); for (int i = 0; i != nexts.Count; ++i) { pn.Nexts[i] = wvaBuildTree(hn.Nexts[nexts[i]], nexts[i]); } return(pn); }
static void wvaReadCedict() { Console.WriteLine("Building simplified ZH prefix tree"); Dictionary <int, int> senseCountToEntryCount = new Dictionary <int, int>(); HeapPrefNode hroot = new HeapPrefNode(); HashSet <string> simps = new HashSet <string>(); string line; using (var sr = ropen("cedict_ts.u8")) { while ((line = sr.ReadLine()) != null) { if (line.StartsWith("#") || line == "") { continue; } Match m = reEntry.Match(line); string simp = m.Groups[2].Value; if (simps.Contains(simp)) { continue; } simps.Add(simp); HeapPrefNode n = hroot; foreach (char c in simp) { if (!n.Nexts.ContainsKey(c)) { n.Nexts[c] = new HeapPrefNode(); } n = n.Nexts[c]; } n.Word = true; // How many senses? int senseCount = -1; foreach (char c in line) { if (c == '/') { ++senseCount; } } if (senseCountToEntryCount.ContainsKey(senseCount)) { ++senseCountToEntryCount[senseCount]; } else { senseCountToEntryCount[senseCount] = 1; } } } prefRoot = wvaBuildTree(hroot, (char)0); List <int> scList = new List <int>(); foreach (var x in senseCountToEntryCount) { scList.Add(x.Key); } scList.Sort((x, y) => senseCountToEntryCount[y].CompareTo(senseCountToEntryCount[x])); Console.WriteLine("Sense #\tEntry #"); foreach (int sc in scList) { Console.WriteLine(sc + "\t" + senseCountToEntryCount[sc]); } }