Beispiel #1
0
        public void Work()
        {
            Random        rnd    = new Random(0);
            CedictParser  parser = new CedictParser();
            HashSet <int> idSet  = new HashSet <int>();
            StringBuilder sb     = new StringBuilder();

            using (FileStream fsIn = new FileStream("handedict.u8", FileMode.Open, FileAccess.Read))
                using (StreamReader sr = new StreamReader(fsIn))
                {
                    string line;
                    while ((line = sr.ReadLine()) != null)
                    {
                        if (line.StartsWith("#"))
                        {
                            continue;
                        }
                        CedictEntry entry = parser.ParseEntry(line, 0, null);
                        if (entry == null)
                        {
                            continue;
                        }
                        if (entry.ChSimpl.Length > 16)
                        {
                            continue;
                        }

                        int id = rnd.Next();
                        while (idSet.Contains(id))
                        {
                            id = rnd.Next();
                        }
                        idSet.Add(id);
                        string strId   = EntryId.IdToString(id);
                        bool   isVerif = isVerified(entry);

                        sb.Clear();
                        // Line with ID
                        sb.AppendLine("# ID-" + strId);
                        // First version metainfo
                        string statStr = isVerif ? "Stat-Verif" : "Stat-New";
                        sb.AppendLine("# Ver 2011-05-28T01:27:49Z HanDeDict " + statStr + " 001>Originalversion HanDeDict-Datei");
                        // The entry itself
                        sb.AppendLine(CedictWriter.Write(entry));

                        items.Add(new ResItem {
                            ID = id, Lines = sb.ToString()
                        });
                    }
                }
        }
Beispiel #2
0
        public void Work()
        {
            string line;

            while ((line = srDict.ReadLine()) != null)
            {
                ++lineNum;
                if (line.StartsWith("#"))
                {
                    continue;
                }
                CedictEntry entry = parser.ParseEntry(line, lineNum, swDiag);
                if (entry != null)
                {
                    string trippedLine = CedictWriter.Write(entry);
                    if (trippedLine != line)
                    {
                        swTrip.WriteLine(line);
                        swTrip.WriteLine(trippedLine);
                    }
                    countTags(line);
                }
            }
            List <TC> tlst = new List <TC>();

            foreach (var x in tags)
            {
                tlst.Add(new TC {
                    Tag = x.Key, Count = x.Value
                });
            }
            tlst.Sort((x, y) => y.Count.CompareTo(x.Count));
            using (FileStream fsTags = new FileStream("hdd-tags.txt", FileMode.Create, FileAccess.ReadWrite))
                using (StreamWriter sw = new StreamWriter(fsTags))
                {
                    foreach (var x in tlst)
                    {
                        sw.WriteLine(x.Count + "\t" + x.Tag);
                    }
                }
        }
Beispiel #3
0
        public void Work()
        {
            string line;

            using (var fsDict = new FileStream("chdict.u8", FileMode.Open, FileAccess.Read))
                using (var srDict = new StreamReader(fsDict))
                    using (var fsDiag = new FileStream("chd-diag.txt", FileMode.Create, FileAccess.ReadWrite))
                        using (var swDiag = new StreamWriter(fsDiag))
                            using (var fsTrip = new FileStream("chd-trip.txt", FileMode.Create, FileAccess.ReadWrite))
                                using (var swTrip = new StreamWriter(fsTrip))
                                {
                                    while ((line = srDict.ReadLine()) != null)
                                    {
                                        ++lineNum;
                                        if (line.StartsWith("#"))
                                        {
                                            continue;
                                        }
                                        CedictEntry entry = parser.ParseEntry(line, lineNum, swDiag);
                                        if (entry != null)
                                        {
                                            string trippedLine = CedictWriter.Write(entry);
                                            if (trippedLine != line)
                                            {
                                                swTrip.WriteLine(line);
                                                swTrip.WriteLine(trippedLine);
                                            }
                                            fileHead(entry);
                                            countTags(entry);
                                            checkCommas(entry, lineNum, swDiag);
                                            countPrefixes(entry, swDiag);
                                            ++entryCount;
                                            senseCount += entry.SenseCount;
                                            countMeasureWords(entry);
                                        }
                                    }
                                    writeHeadIssues(swDiag);
                                    writePrefixes();
                                    List <TC> tlst = new List <TC>();
                                    foreach (var x in tags)
                                    {
                                        tlst.Add(new TC {
                                            Tag = x.Key, Count = x.Value
                                        });
                                    }
                                    tlst.Sort((x, y) => y.Count.CompareTo(x.Count));
                                    using (FileStream fsTags = new FileStream("chd-stats.txt", FileMode.Create, FileAccess.ReadWrite))
                                        using (StreamWriter sw = new StreamWriter(fsTags))
                                        {
                                            sw.WriteLine("ZH entries: " + entryCount);
                                            sw.WriteLine("HU senses: " + senseCount);
                                            sw.WriteLine("Entries with CL: " + entriesWithMW);
                                            sw.WriteLine();
                                            foreach (var x in tlst)
                                            {
                                                sw.WriteLine(x.Count + "\t" + x.Tag);
                                            }
                                            sw.WriteLine();
                                            List <string> mws = new List <string>();
                                            foreach (var x in simpMWCounts)
                                            {
                                                mws.Add(x.Key);
                                            }
                                            mws.Sort((x, y) => simpMWCounts[y].CompareTo(simpMWCounts[x]));
                                            foreach (string mw in mws)
                                            {
                                                sw.WriteLine(simpMWCounts[mw] + "\t" + mw);
                                            }
                                        }
                                }
        }
Beispiel #4
0
        public void Work()
        {
            Random         rnd       = new Random(0);
            CedictParser   parser    = new CedictParser();
            HashSet <int>  idSet     = new HashSet <int>();
            StringBuilder  sb        = new StringBuilder();
            HashSet <char> simpChars = new HashSet <char>();

            using (FileStream fsIn = new FileStream("chdict.u8", FileMode.Open, FileAccess.Read))
                using (StreamReader sr = new StreamReader(fsIn))
                {
                    string line;
                    while ((line = sr.ReadLine()) != null)
                    {
                        if (line.StartsWith("#"))
                        {
                            continue;
                        }

                        int ix1 = line.IndexOf(" [");
                        int ix2 = line.IndexOf("] /");
                        line = line.Substring(0, ix1) + " [" + line.Substring(ix1 + 2, ix2 - ix1).ToLower() + line.Substring(ix2 + 2);

                        CedictEntry entry = parser.ParseEntry(line, 0, null);
                        if (entry == null)
                        {
                            continue;
                        }
                        if (entry.ChSimpl.Length > 16)
                        {
                            continue;
                        }

                        int id = rnd.Next();
                        while (idSet.Contains(id))
                        {
                            id = rnd.Next();
                        }
                        idSet.Add(id);
                        string strId = EntryId.IdToString(id);

                        sb.Clear();
                        // Line with ID
                        sb.AppendLine("# ID-" + strId);
                        // First version metainfo
                        string statStr = "Stat-Verif";
                        sb.AppendLine("# Ver 2017-05-02T22:41:05Z gabor " + statStr + " 001>CHDICT törzsanyag");
                        // The entry itself
                        sb.AppendLine(CedictWriter.Write(entry));

                        foreach (char c in entry.ChSimpl)
                        {
                            simpChars.Add(c);
                        }

                        items.Add(new ResItem {
                            ID = id, Lines = sb.ToString()
                        });
                    }
                }
        }