Beispiel #1
0
        private WordDictionaryFile LoadFromTextFile(String fileName)
        {
            WordDictionaryFile dictFile = new WordDictionaryFile();
            dictFile.Dicts = new List<WordAttribute>();

            using (StreamReader sr = new StreamReader(fileName, Encoding.UTF8))
            {
                while (!sr.EndOfStream)
                {
                    string line = sr.ReadLine();

                    string[] strs = line.Split(new char[] { '|' });

                    if (strs.Length == 3)
                    {
                        string word = strs[0].Trim();

                        POS pos = (POS)int.Parse(strs[1].Substring(2, strs[1].Length - 2), System.Globalization.NumberStyles.HexNumber);
                        double frequency = double.Parse(strs[2]);
                        WordAttribute dict = new WordAttribute(word, pos, frequency);

                        dictFile.Dicts.Add(dict);
                    }
                }
            }

            return dictFile;
        }
Beispiel #2
0
        private WordDictionaryFile LoadFromBinFile(String fileName, out string verNumStr)
        {
            WordDictionaryFile dictFile = new WordDictionaryFile();
            dictFile.Dicts = new List<WordAttribute>();

            FileStream fs = new FileStream(fileName, FileMode.Open, FileAccess.Read);

            byte[] version = new byte[32];
            fs.Read(version, 0, version.Length);

            String ver = Encoding.UTF8.GetString(version, 0, version.Length);

            int zeroPosition = ver.IndexOf('\0');
            if (zeroPosition >= 0)
            {
                ver = ver.Substring(0, zeroPosition);
            }

            verNumStr = Framework.Regex.GetMatch(ver, "Pan Gu Segment V(.+)", true);

            while (fs.Position < fs.Length)
            {
                byte[] buf = new byte[sizeof(int)];
                fs.Read(buf, 0, buf.Length);
                int length = BitConverter.ToInt32(buf, 0);

                buf = new byte[length];

                fs.Read(buf, 0, buf.Length);

                string word = Encoding.UTF8.GetString(buf, 0, length - sizeof(int) - sizeof(double));
                POS pos = (POS)BitConverter.ToInt32(buf, length - sizeof(int) - sizeof(double));
                double frequency = BitConverter.ToDouble(buf, length - sizeof(double));

                WordAttribute dict = new WordAttribute(word, pos, frequency);
                string.Intern(dict.Word);

                dictFile.Dicts.Add(dict);
            }

            fs.Close();

            return dictFile;
        }