Example #1
0
        private static bool Load(string path)
        {
            _trie = new DoubleArrayTrie <string>();
            if (LoadDat(path + ".bi" + Predefine.BIN_EXT))
            {
                return(true);
            }

            var map = new SortedDictionary <string, string>(StrComparer.Default);

            foreach (var line in File.ReadLines(path))
            {
                var param = line.Split(' ');
                if (param[0].EndsWith("@"))
                {
                    continue;
                }

                var dependency = param[1];
                map[param[0]] = dependency;
            }

            if (map.Count == 0)
            {
                return(false);
            }
            _trie.Build(map);
            if (!SaveDat(path, map))
            {
                // log error
            }
            return(true);
        }
Example #2
0
        public bool Load(string path)
        {
            _trie = new DoubleArrayTrie <V>();
            var valueArr = OnLoadValue(path);

            if (valueArr == null)
            {
                // log info ""
                return(false);
            }
            if (LoadDat(path + ".trie.dat", valueArr))
            {
                // log info ""
                return(true);
            }

            var keys = new List <string>(valueArr.Length);

            try
            {
                foreach (var line in File.ReadLines(path))
                {
                    if (string.IsNullOrWhiteSpace(line))
                    {
                        continue;
                    }
                    var segs = line.Split(new[] { ' ', '\t' }, StringSplitOptions.RemoveEmptyEntries);
                    keys.Add(segs[0]);
                }
            }
            catch (Exception e) { }

            var error = _trie.Build(keys, valueArr);

            if (error != 0)              // 出错
            {
                var map = new SortedDictionary <string, V>(StrComparer.Default);
                for (int i = 0; i < valueArr.Length; i++)
                {
                    map[keys[i]] = valueArr[i];
                }
                _trie = new DoubleArrayTrie <V>();
                _trie.Build(map);
                int j = 0;
                foreach (var v in map.Values)
                {
                    valueArr[j++] = v;
                }
            }

            var fs = new FileStream(path + ".trie.dat", FileMode.Create, FileAccess.Write);

            _trie.Save(fs);
            fs.Close();
            OnSaveValue(valueArr, path);
            return(true);
        }
Example #3
0
        private static bool Load()
        {
            _trie = new DoubleArrayTrie <bool>();
            if (LoadDat())
            {
                return(true);
            }

            // 从原始字符串编码文件读取词典数据
            try
            {
                var map         = new SortedDictionary <string, bool>(StrComparer.Default); // 翻译人名,存在,则value为true
                var charFreqMap = new SortedDictionary <char, int>();                       // 统计翻译人名中的各字符的频次
                foreach (var line in File.ReadLines(Config.Translated_Person_Dict_Path))
                {
                    map[line] = true;
                    foreach (var c in line)
                    {
                        if ("不赞".IndexOf(c) >= 0)
                        {
                            continue;                           // 排除一些不常用的字
                        }
                        if (charFreqMap.TryGetValue(c, out int f))
                        {
                            charFreqMap[c] = f + 1;
                        }
                        else
                        {
                            charFreqMap[c] = 1;
                        }
                    }
                }

                map["·"] = true;

                foreach (var p in charFreqMap)
                {
                    if (p.Value < 10)
                    {
                        continue;                           // 如果单字符频次小于10,则忽略
                    }
                    map[p.Key.ToString()] = true;           // 否则视为一个名称的简称,认为是一个有效名
                }

                _trie.Build(map);
                return(true);
            }
            catch (Exception e)
            {
                return(false);
            }
        }
Example #4
0
        public static DoubleArrayTrie Build(List <string> surfaces, bool compact)
        {
            var trie = new Trie.Trie();

            foreach (var surface in surfaces)
            {
                trie.Add(surface);
            }
            var doubleArrayTrie = new DoubleArrayTrie(compact);

            doubleArrayTrie.Build(trie);

            return(doubleArrayTrie);
        }
Example #5
0
        public void BuildTest()
        {
            var path = Path.Combine(Config.DataRootPath, _testFile);

            if (File.Exists(path))
            {
                File.Delete(path);
            }

            DoubleArrayTrie <string> trie = new DoubleArrayTrie <string>();

            var errorCount = trie.Build(_mockData.Keys.ToList(), _mockData.Values.ToList());

            Assert.Equal(errorCount, 0);

            trie.Save(Path.Combine(Config.DataRootPath, _testFile));
        }
Example #6
0
        private static bool Load()
        {
            if (LoadDat(Config.Custom_Dict_Path[0]))
            {
                return(true);
            }

            dat = new DoubleArrayTrie <WordAttr>();

            var dict = new SortedDictionary <string, WordAttr>(StrComparer.Default);

            try
            {
                for (var i = 0; i < Config.Custom_Dict_Path.Length; i++)
                {
                    var p        = Config.Custom_Dict_Path[i];  // 当前自定义词典文件路径
                    var defNat   = Nature.n;
                    int spaceIdx = p.IndexOf(' ');
                    if (spaceIdx > 0)
                    {
                        // 有默认词性
                        var nat = p.Substring(spaceIdx + 1);    // 空格之后为词性
                        p      = p.Substring(0, spaceIdx);      //
                        defNat = NatureHelper.GetOrCreate(nat);
                    }
                    Load(p, defNat, dict);
                    //bool success =
                    //if(!success)
                    // log warning "loading file failed: " + p
                }
                if (dict.Count == 0)
                {
                    // log warning "no items loaded"
                    dict[Constants.TAG_OTHER] = null;   // 当作空白占位符
                }

                dat.Build(dict);

                SaveDat(Config.Custom_Dict_Path[0], dict);
                return(true);
            }
            catch (Exception e)
            {
                return(false);
            }
        }
        private static bool Load()
        {
            if (LoadDat(Config.Core_Dict_Path))
            {
                return(true);
            }

            var dict = new SortedDictionary <string, WordAttr>(StrComparer.Default);

            try
            {
                int max_freq = 0;
                foreach (var line in File.ReadLines(Config.Core_Dict_Path))
                {
                    var segs     = line.Split(new[] { ' ', '\t' }, StringSplitOptions.RemoveEmptyEntries); // Regex.Split(line, @"\s");
                    var natCount = (segs.Length - 1) / 2;
                    var attr     = new WordAttr(natCount);
                    for (int i = 0; i < natCount; i++)
                    {
                        attr.natures[i] = (Nature)Enum.Parse(typeof(Nature), segs[1 + (i << 1)]);
                        attr.freqs[i]   = int.Parse(segs[(i + 1) << 1]);
                        attr.totalFreq += attr.freqs[i];
                    }
                    dict[segs[0]] = attr;
                    max_freq     += attr.totalFreq;
                }
                _trie.Build(dict);

                SaveDat(Config.Core_Dict_Path, dict);
                return(true);
            }
            catch (FileNotFoundException e)
            {
                // log warning "core dictionary file does not exist"
                return(false);
            }
            catch (IOException e)
            {
                // log warning "core dictionary file read error"
                return(false);
            }
        }
Example #8
0
        void TestSimpleTrie(bool compact)
        {
            var trie = MakeTrie();

            var doubleArrayTrie = new DoubleArrayTrie(compact);

            doubleArrayTrie.Build(trie);

            using (var ms = new MemoryStream())
            {
                doubleArrayTrie.Write(ms);

                ms.Seek(0, SeekOrigin.Begin);

                doubleArrayTrie = DoubleArrayTrie.Read(ms);
            }

            doubleArrayTrie.Lookup("a").Is(0);
            (doubleArrayTrie.Lookup("abc") > 0).IsTrue();
            (doubleArrayTrie.Lookup("あいう") > 0).IsTrue();
            (doubleArrayTrie.Lookup("xyz") < 0).IsTrue();
        }
        public static void Load(string path)
        {
            _trie = new DoubleArrayTrie <int>();
            var valueArr = LoadDat(path + ".value.dat");

            if (valueArr != null)
            {
                if (_trie.Load(path + ".trie.dat", valueArr))
                {
                    return;
                }
            }
            var map = new SortedDictionary <string, int>(StrComparer.Default);

            foreach (var line in File.ReadLines(path))
            {
                if (string.IsNullOrWhiteSpace(line))
                {
                    continue;
                }
                var segs = line.Split(new[] { ' ', '\t' }, StringSplitOptions.RemoveEmptyEntries);
                map[segs[0]] = int.Parse(segs[1]);
            }
            _trie = new DoubleArrayTrie <int>();
            _trie.Build(map);
            valueArr = new int[map.Count];
            int m = 0;

            foreach (var v in map.Values)
            {
                valueArr[m++] = v;
            }

            var fs = new FileStream(path + ".trie.dat", FileMode.Create, FileAccess.Write);

            _trie.Save(fs);
            fs.Close();
            SaveDat(path + ".value.dat", valueArr);
        }
        public static bool Load(string path)
        {
            try
            {
                _trie = new DoubleArrayTrie <AreaInfo>();
                var valueArr = LoadDat(path + ".value.dat");
                if (valueArr != null)
                {
                    if (_trie.Load(path + ".trie.dat", valueArr))
                    {
                        return(true);
                    }
                }
                // 读取txt文件
                var map = new SortedDictionary <string, AreaInfo>(StrComparer.Default);
                foreach (var line in File.ReadLines(path))
                {
                    if (string.IsNullOrWhiteSpace(line))
                    {
                        continue;
                    }
                    var segs = line.Split(new[] { ' ', '\t' }, StringSplitOptions.RemoveEmptyEntries);
                    var code = segs[0];
                    for (int i = 1; i < segs.Length; i++)
                    {
                        var name = segs[i];
                        if (Invalids.Contains(name))
                        {
                            continue;                            // 跳过无效地区名
                        }
                        if (name.Length == 2)
                        {
                            AddInMap(name, "", code, map);
                        }
                        else
                        {
                            var lastChar = name[name.Length - 1];
                            if ("市省县区州旗盟".Contains(lastChar))
                            {
                                AddInMap(name.Substring(0, name.Length - 1), lastChar.ToString(), code, map);
                            }
                            else if (name.Length < 9)
                            {
                                AddInMap(name, "", code, map);
                            }
                            var lastTwo = name.Substring(2);
                            var prevs   = name.Substring(0, name.Length - 2);
                            if (Invalids.Contains(lastTwo))
                            {
                                AddInMap(prevs, lastTwo, code, map);
                                if (prevs.Length == 3 && "市省".Contains(prevs[2]))
                                {
                                    AddInMap(name.Substring(0, 2), lastTwo, code, map);
                                }
                            }
                            if (lastChar == '旗')
                            {
                                var sublast = name[2];
                                if ("前后左中右特".Contains(sublast))
                                {
                                    AddInMap(prevs, "旗", code, map);
                                }
                            }
                            var subLastTwo = name.Substring(name.Length - 3, 2);
                            if (subLastTwo == "自治")
                            {
                                prevs = name.Substring(0, name.Length - 3);
                                var ends = name.Substring(name.Length - 3);
                                AddInMap(prevs, ends, code, map);
                                if (prevs.Length >= 4)
                                {
                                    for (int k = 2; k < prevs.Length - 1; k++)
                                    {
                                        if (k < prevs.Length - 3)
                                        {
                                            if (Nationalities.Contains(prevs.Substring(k, 4)))
                                            {
                                                AddInMap(prevs.Substring(0, k), ends, code, map);
                                                AddInMap(prevs.Substring(0, k) + "自治", lastChar.ToString(), code, map);
                                                break;
                                            }
                                        }
                                        if (k < prevs.Length - 2)
                                        {
                                            if (Nationalities.Contains(prevs.Substring(k, 3)))
                                            {
                                                AddInMap(prevs.Substring(0, k), ends, code, map);
                                                AddInMap(prevs.Substring(0, k) + "自治", lastChar.ToString(), code, map);
                                                break;
                                            }
                                        }
                                        if (Nationalities.Contains(prevs.Substring(k, 2)))
                                        {
                                            AddInMap(prevs.Substring(0, k), ends, code, map);
                                            AddInMap(prevs.Substring(0, k) + "自治", lastChar.ToString(), code, map);
                                            break;
                                        }
                                    }
                                }
                            }
                        }
                    }
                }

                _trie = new DoubleArrayTrie <AreaInfo>();
                _trie.Build(map);
                valueArr = new AreaInfo[map.Count];
                int m = 0;
                foreach (var v in map.Values)
                {
                    valueArr[m++] = v;
                }

                var fs = new FileStream(path + ".trie.dat", FileMode.Create, FileAccess.Write);
                _trie.Save(fs);
                fs.Close();
                SaveDat(path + ".value.dat", valueArr);
                return(true);
            }
            catch (Exception e)
            {
                return(false);
            }
        }
Example #11
0
 public HuffmanEncoder(HuffmanTree2 tree)
 {
     _dat.Build(tree.table);
     _reDat.Build(tree.reTable);
 }