Esempio n. 1
0
        /// <summary>
        /// 加载字典
        /// </summary>
        /// <param name="clear">是否清除词频</param>
        public virtual void LoadDict(bool clear)
        {
            //加载字典
            _Dict         = Dict.LoadFromBinFileEx(_DictPath + "Dict.dct");
            _DictMgr.Dict = _Dict;

            foreach (T_DictStruct word in _Dict.Dicts)
            {
                if (clear)
                {
                    word.Frequency = 0;
                }
                _ExtractWords.InsertWordToDfa(word.Word, word);
                _POS.AddWordPos(word.Word, word.Pos);
            }

            //加载未登录词统计字典
            if (File.Exists(_DictPath + "UnknownWords.dct"))
            {
                _UnknownWordsDict = Dict.LoadFromBinFileEx(_DictPath + "UnknownWords.dct");
            }
            else
            {
                _UnknownWordsDict = new T_DictFile();
            }
            _UnknownWordsDictMgr.Dict = _UnknownWordsDict;

            //加载姓名前缀后缀统计表
            _MatchNameRule.LoadNameTraffic(_DictPath + "Name.dct");
            if (clear)
            {
                _MatchNameRule.ClearNameTraffic();
            }
            _MatchNameRule.TrafficUnknownWordHandle = TrafficUnknownWord;
        }
Esempio n. 2
0
        static public void SaveToBinFileEx(String fileName, T_DictFile dictFile)
        {
            FileStream fs = new FileStream(fileName, FileMode.Create);

            byte[] version = new byte[32];

            int i = 0;

            foreach (byte v in System.Text.Encoding.UTF8.GetBytes("KTDictSeg Dict V1.3"))
            {
                version[i] = v;
                i++;
            }

            fs.Write(version, 0, version.Length);

            foreach (T_DictStruct dict in dictFile.Dicts)
            {
                byte[] word      = System.Text.Encoding.UTF8.GetBytes(dict.Word);
                byte[] pos       = System.BitConverter.GetBytes(dict.Pos);
                byte[] frequency = System.BitConverter.GetBytes(dict.Frequency);
                byte[] length    = System.BitConverter.GetBytes(word.Length + frequency.Length + pos.Length);

                fs.Write(length, 0, length.Length);
                fs.Write(word, 0, word.Length);
                fs.Write(pos, 0, pos.Length);
                fs.Write(frequency, 0, frequency.Length);
            }

            fs.Close();
        }
Esempio n. 3
0
        static public void SaveToBinFile(String fileName, T_DictFile dictFile)
        {
            Stream s = CSerialization.SerializeBinary(dictFile);

            s.Position = 0;
            CFile.WriteStream(fileName, (MemoryStream)s);
        }
Esempio n. 4
0
        /// <summary>
        /// 从文本文件读取字典
        /// </summary>
        /// <param name="fileName"></param>
        static public T_DictFile LoadFromTextDict(String fileName)
        {
            T_DictFile dictFile = new T_DictFile();

            String dictStr = CFile.ReadFileToString(fileName, "utf-8");

            String[] words = CRegex.Split(dictStr, "\r\n");

            foreach (String word in words)
            {
                String[] wp = CRegex.Split(word, @"\|");

                if (wp == null)
                {
                    continue;
                }

                if (wp.Length != 2)
                {
                    continue;
                }

                int pos = 0;

                try
                {
                    pos = int.Parse(wp[1]);
                }
                catch
                {
                    continue;
                }

                T_DictStruct dict = new T_DictStruct();
                dict.Word = wp[0];
                dict.Pos  = pos;

                if (dict.Word.Contains("一") || dict.Word.Contains("二") ||
                    dict.Word.Contains("三") || dict.Word.Contains("四") ||
                    dict.Word.Contains("五") || dict.Word.Contains("六") ||
                    dict.Word.Contains("七") || dict.Word.Contains("八") ||
                    dict.Word.Contains("九") || dict.Word.Contains("十"))
                {
                    dict.Pos |= (int)T_POS.POS_A_M;
                }

                if (dict.Word == "字典")
                {
                    dict.Pos = (int)T_POS.POS_D_N;
                }

                dictFile.Dicts.Add(dict);
            }

            return(dictFile);
        }
Esempio n. 5
0
        static public void SaveToTextFile(String fileNmae, T_DictFile dictFile)
        {
            if (dictFile.Dicts == null)
            {
                return;
            }

            StringBuilder dictStr = new StringBuilder();

            foreach (T_DictStruct dict in dictFile.Dicts)
            {
                dictStr.AppendFormat("{0}|{1}\r\n", dict.Word, dict.Pos);
            }

            CFile.WriteString(fileNmae, dictStr.ToString(), "utf-8");
        }
Esempio n. 6
0
        static public T_DictFile LoadFromBinFileEx(string fileName)
        {
            T_DictFile dictFile = new T_DictFile();

            dictFile.Dicts = new List <T_DictStruct>();

            File.SetAttributes(fileName, FileAttributes.Normal);
            FileStream fs = new FileStream(fileName, FileMode.Open);

            byte[] version = new byte[32];
            fs.Read(version, 0, version.Length);
            String ver = Encoding.UTF8.GetString(version, 0, version.Length);

            String verNumStr = CRegex.GetMatch(ver, "KTDictSeg Dict V(.+)", true);

            if (verNumStr == null || verNumStr == "")
            {
                //1.3以前版本

                fs.Close();
                return(LoadFromBinFile(fileName));
            }

            while (fs.Position < fs.Length)
            {
                byte[] buf = new byte[sizeof(int)];
                fs.Read(buf, 0, buf.Length);
                int length = BitConverter.ToInt32(buf, 0);

                buf = new byte[length];

                T_DictStruct dict = new T_DictStruct();

                fs.Read(buf, 0, buf.Length);

                dict.Word      = Encoding.UTF8.GetString(buf, 0, length - sizeof(int) - sizeof(double));
                dict.Pos       = BitConverter.ToInt32(buf, length - sizeof(int) - sizeof(double));
                dict.Frequency = BitConverter.ToDouble(buf, length - sizeof(double));
                dictFile.Dicts.Add(dict);
            }

            fs.Close();

            return(dictFile);
        }