private void LoadFeatureSet() { Stream featureSetStream = GetFeatureSetStream(); da = new DoubleArrayTrieSearch(); da.Load(featureSetStream); }
public void LoadFeaturSet(string filename) { var filename_feature = filename + ".feature"; da = new DoubleArrayTrieSearch(); da.Load(filename_feature); }
//Load language model from specific file public void LoadLM(string strFileName) { //Load prob & back off values StreamReader srLM = new StreamReader(strFileName + ".prob"); BinaryReader br = new BinaryReader(srLM.BaseStream); lm_prob = new VarBigArrayNoCMP <NGram>(1024000); long index = 0; try { while (true) { NGram ngram = new NGram(); ngram.prob = br.ReadSingle(); ngram.bow = br.ReadSingle(); lm_prob[index] = ngram; index++; } } catch (EndOfStreamException err) { br.Close(); } daSearch.Load(strFileName + ".da"); }
public void LoadTest2() { var path = Path.Combine(Config.DataRootPath, _testFile2); if (!File.Exists(path)) { BuildTest2(); } var trie = new DoubleArrayTrieBuilder <string>(System.Threading.Thread.CurrentThread.ManagedThreadId); var search = new DoubleArrayTrieSearch <string>(); search.Load(path, _mockData.Values.ToList()); var res = search.Get("测试key3"); Assert.Equal(res, "测试value3"); }
//Verify whether double array trie-tree correct //strTextFileName: raw text file name used to build DA trie-tree // text file format: key \t value // key as string type // value as non-netgive integer //strDAFileName: double array trie-tree binary file name built from strTextFileName private static void Verify(string strTextFileName, string strDAFileName) { StreamReader sr = new StreamReader(strTextFileName); DoubleArrayTrieSearch das = new DoubleArrayTrieSearch(); das.Load(strDAFileName); while (sr.EndOfStream == false) { string strLine = sr.ReadLine(); if (strLine.Length == 0) { continue; } string[] items = strLine.Split('\t'); int val = int.Parse(items[1]); string fea = items[0]; int rval = das.SearchByPerfectMatch(fea); if (rval != val) { Console.WriteLine("Values in raw text file and double array trie-tree is different"); Console.WriteLine("Key-Value in text file: {0}", strLine); Console.WriteLine("Value in DA trie: {0}", rval); } } //Test SearchAsKeyPrefix function. //TestSearchPrefix_case0, TestSearchPrefix_case01, TestSearchPrefix_case012 should be in result list List<int> resultList = new List<int>(); int rlistCnt = das.SearchAsKeyPrefix("TestSearchPrefix_case012", resultList); //Test SearchByPrefix resultList = new List<int>(); rlistCnt = das.SearchByPrefix("TestSearchPrefix_case0", resultList); rlistCnt = das.SearchByPrefix("U04:京", resultList); Console.WriteLine("Done!"); }
//Verify whether double array trie-tree correct //strTextFileName: raw text file name used to build DA trie-tree // text file format: key \t value // key as string type // value as non-netgive integer //strDAFileName: double array trie-tree binary file name built from strTextFileName private static void Verify(string strTextFileName, string strDAFileName) { StreamReader sr = new StreamReader(strTextFileName); DoubleArrayTrieSearch das = new DoubleArrayTrieSearch(); das.Load(strDAFileName); while (sr.EndOfStream == false) { string strLine = sr.ReadLine(); if (strLine.Length == 0) { continue; } string[] items = strLine.Split('\t'); int val = int.Parse(items[1]); string fea = items[0]; int rval = das.SearchByPerfectMatch(fea); if (rval != val) { Console.WriteLine("Values in raw text file and double array trie-tree is different"); Console.WriteLine("Key-Value in text file: {0}", strLine); Console.WriteLine("Value in DA trie: {0}", rval); } } //Test SearchAsKeyPrefix function. //TestSearchPrefix_case0, TestSearchPrefix_case01, TestSearchPrefix_case012 should be in result list List <int> resultList = new List <int>(); int rlistCnt = das.SearchAsKeyPrefix("TestSearchPrefix_case012", resultList); //Test SearchByPrefix resultList = new List <int>(); rlistCnt = das.SearchByPrefix("TestSearchPrefix_case0", resultList); rlistCnt = das.SearchByPrefix("U04:京", resultList); Console.WriteLine("Done!"); }
//加载model文件 //返回值<0 为出错,=0为正常 public bool LoadModel(string filename) { StreamReader sr = new StreamReader(filename); string strLine; //读入版本号 strLine = sr.ReadLine(); version = uint.Parse(strLine.Split(':')[1].Trim()); //读入cost_factor strLine = sr.ReadLine(); cost_factor_ = double.Parse(strLine.Split(':')[1].Trim()); //读入maxid strLine = sr.ReadLine(); maxid_ = long.Parse(strLine.Split(':')[1].Trim()); //读入xsize strLine = sr.ReadLine(); xsize_ = uint.Parse(strLine.Split(':')[1].Trim()); //读入空行 strLine = sr.ReadLine(); //读入待标注的标签 y_ = new List <string>(); while (true) { strLine = sr.ReadLine(); if (strLine.Length == 0) { break; } y_.Add(strLine); } //读入unigram和bigram模板 unigram_templs_ = new List <string>(); bigram_templs_ = new List <string>(); while (sr.EndOfStream == false) { strLine = sr.ReadLine(); if (strLine.Length == 0) { break; } if (strLine[0] == 'U') { unigram_templs_.Add(strLine); } if (strLine[0] == 'B') { bigram_templs_.Add(strLine); } } sr.Close(); //Load all feature set data string filename_feature = filename + ".feature"; da = new DoubleArrayTrieSearch(); da.Load(filename_feature); //Load all features alpha data string filename_alpha = filename + ".alpha"; StreamReader sr_alpha = new StreamReader(filename_alpha); BinaryReader br_alpha = new BinaryReader(sr_alpha.BaseStream); if (version == Utils.MODEL_TYPE_NORM) { //feature weight array alpha_two_tuples = null; alpha_ = new double[maxid_ + 1]; for (long i = 0; i < maxid_; i++) { alpha_[i] = br_alpha.ReadSingle(); } } else if (version == Utils.MODEL_TYPE_SHRINKED) { alpha_ = null; alpha_two_tuples = new BTreeDictionary <long, double>(); for (long i = 0; i < maxid_; i++) { long key = br_alpha.ReadInt64(); double weight = br_alpha.ReadSingle(); alpha_two_tuples.Add(key, weight); } } else { Console.WriteLine("This model is not supported."); return(false); } br_alpha.Close(); return(true); }
//加载model文件 //返回值<0 为出错,=0为正常 public bool LoadModel(string filename) { var sr = new StreamReader(filename); string strLine; //读入版本号 strLine = sr.ReadLine(); version = uint.Parse(strLine.Split(':')[1].Trim()); //读入cost_factor strLine = sr.ReadLine(); cost_factor_ = double.Parse(strLine.Split(':')[1].Trim()); //读入maxid strLine = sr.ReadLine(); maxid_ = long.Parse(strLine.Split(':')[1].Trim()); //读入xsize strLine = sr.ReadLine(); xsize_ = uint.Parse(strLine.Split(':')[1].Trim()); //读入空行 strLine = sr.ReadLine(); //读入待标注的标签 y_ = new List<string>(); while (true) { strLine = sr.ReadLine(); if (strLine.Length == 0) { break; } y_.Add(strLine); } //读入unigram和bigram模板 unigram_templs_ = new List<string>(); bigram_templs_ = new List<string>(); while (sr.EndOfStream == false) { strLine = sr.ReadLine(); if (strLine.Length == 0) { break; } if (strLine[0] == 'U') { unigram_templs_.Add(strLine); } if (strLine[0] == 'B') { bigram_templs_.Add(strLine); } } sr.Close(); //Load all feature set data var filename_feature = filename + ".feature"; da = new DoubleArrayTrieSearch(); da.Load(filename_feature); //Load all features alpha data var filename_alpha = filename + ".alpha"; var sr_alpha = new StreamReader(filename_alpha); var br_alpha = new BinaryReader(sr_alpha.BaseStream); if (version == Utils.MODEL_TYPE_NORM) { //feature weight array alpha_two_tuples = null; alpha_ = new double[maxid_ + 1]; for (long i = 0; i < maxid_; i++) { alpha_[i] = br_alpha.ReadSingle(); } } else if (version == Utils.MODEL_TYPE_SHRINKED) { alpha_ = null; alpha_two_tuples = new BTreeDictionary<long, double>(); for (long i = 0; i < maxid_; i++) { var key = br_alpha.ReadInt64(); double weight = br_alpha.ReadSingle(); alpha_two_tuples.Add(key, weight); } } else { Console.WriteLine("This model is not supported."); return false; } br_alpha.Close(); return true; }
private void LoadFeatureFromFile(string strFileName) { daSearch = new DoubleArrayTrieSearch(); daSearch.Load(strFileName); }