コード例 #1
0
        public static string Seg4Longest(char[] chars, ACDoubleArrayTrie <string> trie)
        {
            var wordNet   = new string[chars.Length];
            var lengthNet = new int[chars.Length];      // 最长字符串匹配,所以从某个起始点开始的字符串,需要记录其长度,用于长度比较

            Action <int, int, string> action = (begin, end, value) =>
            {
                var len = end - begin;
                if (len > lengthNet[begin])
                {
                    wordNet[begin]   = value;
                    lengthNet[begin] = len;
                }
            };

            trie.Match(chars, action);

            var sb = new StringBuilder(chars.Length);

            for (int i = 0; i < wordNet.Length;)
            {
                if (wordNet[i] == null)
                {
                    sb.Append(chars[i]);
                    i++;
                    continue;
                }
                sb.Append(wordNet[i]);
                i += lengthNet[i];
            }
            return(sb.ToString());
        }
コード例 #2
0
        private static bool SaveDat(string path, ACDoubleArrayTrie <Pinyin[]> trie, SortedDictionary <string, Pinyin[]> dict)
        {
            var fs = new FileStream(path + Predefine.BIN_EXT, FileMode.Create, FileAccess.Write);

            try
            {
                var bytes = BitConverter.GetBytes(dict.Count);
                fs.Write(bytes, 0, 4);
                foreach (var p in dict)
                {
                    var values = p.Value;
                    bytes = BitConverter.GetBytes(values.Length);
                    fs.Write(bytes, 0, 4);
                    for (int j = 0; j < values.Length; j++)
                    {
                        var py = values[j];
                        fs.Write(BitConverter.GetBytes(py.Index), 0, 4);
                    }
                }

                return(trie.Save(fs));
            }
            catch (Exception e)
            {
                // log warning "save data file error"
                return(false);
            }
            finally { fs.Close(); }
        }
コード例 #3
0
        static ChsPersonNameDict()
        {
            dictionary = new NRDictionary();
            if (!dictionary.Load(Config.Person_Dict_Path))
            {
                // log: loading error
                return;
            }

            transformMatrixDictionary = new TransformMatrixDictionary <NR>(typeof(NR));
            transformMatrixDictionary.Load(Config.Person_TR_Dict_Path);

            _trie = new ACDoubleArrayTrie <NRPattern>();
            var map = new SortedDictionary <string, NRPattern>(StrComparer.Default);

            var nrPatMax = (int)NRPattern.XD + 1;

            for (int i = 0; i < nrPatMax; i++)
            {
                var nrPat = (NRPattern)i;
                map.Add(nrPat.ToString(), nrPat);
            }

            _trie.Build(map);
        }
コード例 #4
0
        static PlaceDictionary()
        {
            dict = new NSDictionary();
            dict.Load(Config.Place_Dict_Path);
            trans_tr_dict = new TransformMatrixDictionary <NS>(typeof(NS));
            trans_tr_dict.Load(Config.Place_TR_Dict_Path);
            trie = new ACDoubleArrayTrie <string>();

            var patternMap = new SortedDictionary <string, string>(StrComparer.Default);

            patternMap.Add("CH", null);
            patternMap.Add("CDH", null);
            patternMap.Add("CDEH", null);
            patternMap.Add("GH", null);
            trie.Build(patternMap);
        }
コード例 #5
0
        static OrgDictionary()
        {
            dictionary = new NTDictionary();
            dictionary.Load(Config.Org_Dict_Path);
            transformMatrixDictionary = new TransformMatrixDictionary <NT>(typeof(NT));
            transformMatrixDictionary.Load(Config.Org_TR_Dict_Path);
            _trie = new ACDoubleArrayTrie <string>();

            var patternMap = new SortedDictionary <string, string>(StrComparer.Default);

            for (int i = 0; i <= (int)NTPattern.WWIWWCWD; i++)
            {
                var enumStr = ((NTPattern)i).ToString();
                patternMap.Add(enumStr, enumStr);
            }
            _trie.Build(patternMap);
        }
コード例 #6
0
        /// <summary>
        /// 加载二进制数据文件
        /// </summary>
        /// <param name="path"></param>
        /// <param name="trie"></param>
        /// <returns></returns>
        public static bool LoadDat(string path, ACDoubleArrayTrie <string> trie)
        {
            var ba = ByteArray.Create(path + Predefine.BIN_EXT);

            if (ba == null)
            {
                return(false);
            }

            var size = ba.NextInt();
            var strs = new string[size];

            for (int i = 0; i < size; i++)
            {
                strs[i] = ba.NextString();
            }

            trie.Load(ba, strs);
            return(true);
        }
コード例 #7
0
        /// <summary>
        /// 最长词语搜索匹配拼音
        /// </summary>
        /// <param name="chars"></param>
        /// <param name="trie"></param>
        /// <param name="remainNone">对于没有匹配到拼音的子串,是否使用pinyin.none 代替</param>
        /// <returns></returns>
        private static List <Pinyin> Seg4Longest(char[] chars, ACDoubleArrayTrie <Pinyin[]> trie, bool remainNone)
        {
            var wordNet = new Pinyin[chars.Length][];   // 第一维表示以每个字符下标开始的子串的拼音,第二维表示子串中各字符(拼音)的个数
            Action <int, int, Pinyin[]> action = (begin, end, value) =>
            {
                var len = end - begin;                                     // 本次匹配到的子串长度
                if (wordNet[begin] == null || len > wordNet[begin].Length) // 如果当前下标位置没有匹配到子串,或者当前匹配子串长度大于上一次该位置的匹配子串长度,则重置该下标位置的匹配子串拼音
                {
                    wordNet[begin] = len == 1 ? new[] { value[0] }
                }
                : value;
            };

            trie.Match(chars, action);

            var list = new List <Pinyin>();

            for (int i = 0; i < wordNet.Length;)
            {
                if (wordNet[i] == null)         // chars 中 i 位置开始的子串没有匹配到拼音
                {
                    if (remainNone)             // 是否使用 none 代替没有匹配到的子串
                    {
                        list.Add(Pinyin.PinyinTable[(int)PYName.none5]);
                    }

                    i++;
                    continue;
                }

                for (var j = 0; j < wordNet[i].Length; j++)     // 从位置i处开始匹配到子串拼音
                {
                    list.Add(wordNet[i][j]);                    // 依次将各字符的拼音加入列表
                }
                i += wordNet[i].Length;
            }
            return(list);
        }
コード例 #8
0
        /// <summary>
        /// 加载指定路径的词典文件
        /// </summary>
        /// <param name="path"></param>
        /// <param name="trie"></param>
        /// <param name="reverse"></param>
        /// <returns></returns>
        public static bool Load(string path, ACDoubleArrayTrie <string> trie, bool reverse)
        {
            string datPath = path;

            if (reverse)
            {
                datPath += Predefine.REVERSE_EXT;
            }

            if (LoadDat(datPath, trie))
            {
                return(true);                               // 先尝试加载二进制数据文件
            }
            var dict = new SortedDictionary <string, string>(StrComparer.Default);

            if (!Load(dict, reverse, path))
            {
                return(false);                              // 加载字符串文件到指定字典对象中
            }
            trie.Build(dict);                               // 根据字典对象建议trie
            SaveDat(datPath, trie, dict);
            return(true);
        }
コード例 #9
0
        /// <summary>
        /// 保存二进制数据文件
        /// 先保存Values,然后保存Keys
        /// </summary>
        /// <param name="path"></param>
        /// <param name="trie"></param>
        /// <param name="entries"></param>
        /// <returns></returns>
        public static bool SaveDat(string path, ACDoubleArrayTrie <string> trie, SortedDictionary <string, string> entries)
        {
            if (trie.Size != entries.Count)
            {
                // log warning "key value pair is unmatched"
                return(false);
            }

            var fs = new FileStream(path + Predefine.BIN_EXT, FileMode.Create, FileAccess.Write);

            try
            {
                var bytes = BitConverter.GetBytes(entries.Count);
                fs.Write(bytes, 0, 4);

                foreach (var entry in entries)
                {
                    bytes = BitConverter.GetBytes(entry.Value.Length);
                    fs.Write(bytes, 0, 4);
                    foreach (var c in entry.Value)
                    {
                        bytes = BitConverter.GetBytes(c);
                        fs.Write(bytes, 0, 2);
                    }
                }
                return(trie.Save(fs));
            }
            catch (Exception e)
            {
                return(false);
            }
            finally
            {
                fs.Close();
            }
        }