private static bool Load(string path) { _trie = new DoubleArrayTrie <string>(); if (LoadDat(path + ".bi" + Predefine.BIN_EXT)) { return(true); } var map = new SortedDictionary <string, string>(StrComparer.Default); foreach (var line in File.ReadLines(path)) { var param = line.Split(' '); if (param[0].EndsWith("@")) { continue; } var dependency = param[1]; map[param[0]] = dependency; } if (map.Count == 0) { return(false); } _trie.Build(map); if (!SaveDat(path, map)) { // log error } return(true); }
public bool Load(string path) { _trie = new DoubleArrayTrie <V>(); var valueArr = OnLoadValue(path); if (valueArr == null) { // log info "" return(false); } if (LoadDat(path + ".trie.dat", valueArr)) { // log info "" return(true); } var keys = new List <string>(valueArr.Length); try { foreach (var line in File.ReadLines(path)) { if (string.IsNullOrWhiteSpace(line)) { continue; } var segs = line.Split(new[] { ' ', '\t' }, StringSplitOptions.RemoveEmptyEntries); keys.Add(segs[0]); } } catch (Exception e) { } var error = _trie.Build(keys, valueArr); if (error != 0) // 出错 { var map = new SortedDictionary <string, V>(StrComparer.Default); for (int i = 0; i < valueArr.Length; i++) { map[keys[i]] = valueArr[i]; } _trie = new DoubleArrayTrie <V>(); _trie.Build(map); int j = 0; foreach (var v in map.Values) { valueArr[j++] = v; } } var fs = new FileStream(path + ".trie.dat", FileMode.Create, FileAccess.Write); _trie.Save(fs); fs.Close(); OnSaveValue(valueArr, path); return(true); }
private static bool Load() { _trie = new DoubleArrayTrie <bool>(); if (LoadDat()) { return(true); } // 从原始字符串编码文件读取词典数据 try { var map = new SortedDictionary <string, bool>(StrComparer.Default); // 翻译人名,存在,则value为true var charFreqMap = new SortedDictionary <char, int>(); // 统计翻译人名中的各字符的频次 foreach (var line in File.ReadLines(Config.Translated_Person_Dict_Path)) { map[line] = true; foreach (var c in line) { if ("不赞".IndexOf(c) >= 0) { continue; // 排除一些不常用的字 } if (charFreqMap.TryGetValue(c, out int f)) { charFreqMap[c] = f + 1; } else { charFreqMap[c] = 1; } } } map["·"] = true; foreach (var p in charFreqMap) { if (p.Value < 10) { continue; // 如果单字符频次小于10,则忽略 } map[p.Key.ToString()] = true; // 否则视为一个名称的简称,认为是一个有效名 } _trie.Build(map); return(true); } catch (Exception e) { return(false); } }
public static DoubleArrayTrie Build(List <string> surfaces, bool compact) { var trie = new Trie.Trie(); foreach (var surface in surfaces) { trie.Add(surface); } var doubleArrayTrie = new DoubleArrayTrie(compact); doubleArrayTrie.Build(trie); return(doubleArrayTrie); }
public void BuildTest() { var path = Path.Combine(Config.DataRootPath, _testFile); if (File.Exists(path)) { File.Delete(path); } DoubleArrayTrie <string> trie = new DoubleArrayTrie <string>(); var errorCount = trie.Build(_mockData.Keys.ToList(), _mockData.Values.ToList()); Assert.Equal(errorCount, 0); trie.Save(Path.Combine(Config.DataRootPath, _testFile)); }
private static bool Load() { if (LoadDat(Config.Custom_Dict_Path[0])) { return(true); } dat = new DoubleArrayTrie <WordAttr>(); var dict = new SortedDictionary <string, WordAttr>(StrComparer.Default); try { for (var i = 0; i < Config.Custom_Dict_Path.Length; i++) { var p = Config.Custom_Dict_Path[i]; // 当前自定义词典文件路径 var defNat = Nature.n; int spaceIdx = p.IndexOf(' '); if (spaceIdx > 0) { // 有默认词性 var nat = p.Substring(spaceIdx + 1); // 空格之后为词性 p = p.Substring(0, spaceIdx); // defNat = NatureHelper.GetOrCreate(nat); } Load(p, defNat, dict); //bool success = //if(!success) // log warning "loading file failed: " + p } if (dict.Count == 0) { // log warning "no items loaded" dict[Constants.TAG_OTHER] = null; // 当作空白占位符 } dat.Build(dict); SaveDat(Config.Custom_Dict_Path[0], dict); return(true); } catch (Exception e) { return(false); } }
private static bool Load() { if (LoadDat(Config.Core_Dict_Path)) { return(true); } var dict = new SortedDictionary <string, WordAttr>(StrComparer.Default); try { int max_freq = 0; foreach (var line in File.ReadLines(Config.Core_Dict_Path)) { var segs = line.Split(new[] { ' ', '\t' }, StringSplitOptions.RemoveEmptyEntries); // Regex.Split(line, @"\s"); var natCount = (segs.Length - 1) / 2; var attr = new WordAttr(natCount); for (int i = 0; i < natCount; i++) { attr.natures[i] = (Nature)Enum.Parse(typeof(Nature), segs[1 + (i << 1)]); attr.freqs[i] = int.Parse(segs[(i + 1) << 1]); attr.totalFreq += attr.freqs[i]; } dict[segs[0]] = attr; max_freq += attr.totalFreq; } _trie.Build(dict); SaveDat(Config.Core_Dict_Path, dict); return(true); } catch (FileNotFoundException e) { // log warning "core dictionary file does not exist" return(false); } catch (IOException e) { // log warning "core dictionary file read error" return(false); } }
void TestSimpleTrie(bool compact) { var trie = MakeTrie(); var doubleArrayTrie = new DoubleArrayTrie(compact); doubleArrayTrie.Build(trie); using (var ms = new MemoryStream()) { doubleArrayTrie.Write(ms); ms.Seek(0, SeekOrigin.Begin); doubleArrayTrie = DoubleArrayTrie.Read(ms); } doubleArrayTrie.Lookup("a").Is(0); (doubleArrayTrie.Lookup("abc") > 0).IsTrue(); (doubleArrayTrie.Lookup("あいう") > 0).IsTrue(); (doubleArrayTrie.Lookup("xyz") < 0).IsTrue(); }
public static void Load(string path) { _trie = new DoubleArrayTrie <int>(); var valueArr = LoadDat(path + ".value.dat"); if (valueArr != null) { if (_trie.Load(path + ".trie.dat", valueArr)) { return; } } var map = new SortedDictionary <string, int>(StrComparer.Default); foreach (var line in File.ReadLines(path)) { if (string.IsNullOrWhiteSpace(line)) { continue; } var segs = line.Split(new[] { ' ', '\t' }, StringSplitOptions.RemoveEmptyEntries); map[segs[0]] = int.Parse(segs[1]); } _trie = new DoubleArrayTrie <int>(); _trie.Build(map); valueArr = new int[map.Count]; int m = 0; foreach (var v in map.Values) { valueArr[m++] = v; } var fs = new FileStream(path + ".trie.dat", FileMode.Create, FileAccess.Write); _trie.Save(fs); fs.Close(); SaveDat(path + ".value.dat", valueArr); }
public static bool Load(string path) { try { _trie = new DoubleArrayTrie <AreaInfo>(); var valueArr = LoadDat(path + ".value.dat"); if (valueArr != null) { if (_trie.Load(path + ".trie.dat", valueArr)) { return(true); } } // 读取txt文件 var map = new SortedDictionary <string, AreaInfo>(StrComparer.Default); foreach (var line in File.ReadLines(path)) { if (string.IsNullOrWhiteSpace(line)) { continue; } var segs = line.Split(new[] { ' ', '\t' }, StringSplitOptions.RemoveEmptyEntries); var code = segs[0]; for (int i = 1; i < segs.Length; i++) { var name = segs[i]; if (Invalids.Contains(name)) { continue; // 跳过无效地区名 } if (name.Length == 2) { AddInMap(name, "", code, map); } else { var lastChar = name[name.Length - 1]; if ("市省县区州旗盟".Contains(lastChar)) { AddInMap(name.Substring(0, name.Length - 1), lastChar.ToString(), code, map); } else if (name.Length < 9) { AddInMap(name, "", code, map); } var lastTwo = name.Substring(2); var prevs = name.Substring(0, name.Length - 2); if (Invalids.Contains(lastTwo)) { AddInMap(prevs, lastTwo, code, map); if (prevs.Length == 3 && "市省".Contains(prevs[2])) { AddInMap(name.Substring(0, 2), lastTwo, code, map); } } if (lastChar == '旗') { var sublast = name[2]; if ("前后左中右特".Contains(sublast)) { AddInMap(prevs, "旗", code, map); } } var subLastTwo = name.Substring(name.Length - 3, 2); if (subLastTwo == "自治") { prevs = name.Substring(0, name.Length - 3); var ends = name.Substring(name.Length - 3); AddInMap(prevs, ends, code, map); if (prevs.Length >= 4) { for (int k = 2; k < prevs.Length - 1; k++) { if (k < prevs.Length - 3) { if (Nationalities.Contains(prevs.Substring(k, 4))) { AddInMap(prevs.Substring(0, k), ends, code, map); AddInMap(prevs.Substring(0, k) + "自治", lastChar.ToString(), code, map); break; } } if (k < prevs.Length - 2) { if (Nationalities.Contains(prevs.Substring(k, 3))) { AddInMap(prevs.Substring(0, k), ends, code, map); AddInMap(prevs.Substring(0, k) + "自治", lastChar.ToString(), code, map); break; } } if (Nationalities.Contains(prevs.Substring(k, 2))) { AddInMap(prevs.Substring(0, k), ends, code, map); AddInMap(prevs.Substring(0, k) + "自治", lastChar.ToString(), code, map); break; } } } } } } } _trie = new DoubleArrayTrie <AreaInfo>(); _trie.Build(map); valueArr = new AreaInfo[map.Count]; int m = 0; foreach (var v in map.Values) { valueArr[m++] = v; } var fs = new FileStream(path + ".trie.dat", FileMode.Create, FileAccess.Write); _trie.Save(fs); fs.Close(); SaveDat(path + ".value.dat", valueArr); return(true); } catch (Exception e) { return(false); } }
public HuffmanEncoder(HuffmanTree2 tree) { _dat.Build(tree.table); _reDat.Build(tree.reTable); }