Ejemplo n.º 1
0
        private void LoadFeatureSet()
        {
            Stream featureSetStream = GetFeatureSetStream();

            da = new DoubleArrayTrieSearch();
            da.Load(featureSetStream);
        }
Ejemplo n.º 2
0
        public void LoadFeaturSet(string filename)
        {
            var filename_feature = filename + ".feature";

            da = new DoubleArrayTrieSearch();
            da.Load(filename_feature);
        }
Ejemplo n.º 3
0
        //Load language model from specific file
        public void LoadLM(string strFileName)
        {
            //Load prob & back off values
            StreamReader srLM = new StreamReader(strFileName + ".prob");
            BinaryReader br   = new BinaryReader(srLM.BaseStream);

            lm_prob = new VarBigArrayNoCMP <NGram>(1024000);
            long index = 0;

            try
            {
                while (true)
                {
                    NGram ngram = new NGram();
                    ngram.prob     = br.ReadSingle();
                    ngram.bow      = br.ReadSingle();
                    lm_prob[index] = ngram;
                    index++;
                }
            }
            catch (EndOfStreamException err)
            {
                br.Close();
            }


            daSearch.Load(strFileName + ".da");
        }
Ejemplo n.º 4
0
        public void LoadTest2()
        {
            var path = Path.Combine(Config.DataRootPath, _testFile2);

            if (!File.Exists(path))
            {
                BuildTest2();
            }

            var trie = new DoubleArrayTrieBuilder <string>(System.Threading.Thread.CurrentThread.ManagedThreadId);

            var search = new DoubleArrayTrieSearch <string>();

            search.Load(path, _mockData.Values.ToList());

            var res = search.Get("测试key3");

            Assert.Equal(res, "测试value3");
        }
Ejemplo n.º 5
0
        //Verify whether double array trie-tree correct
        //strTextFileName: raw text file name used to build DA trie-tree
        //  text file format: key \t value
        //  key as string type
        //  value as non-netgive integer
        //strDAFileName: double array trie-tree binary file name built from strTextFileName
        private static void Verify(string strTextFileName, string strDAFileName)
        {
            StreamReader sr = new StreamReader(strTextFileName);
            DoubleArrayTrieSearch das = new DoubleArrayTrieSearch();

            das.Load(strDAFileName);
            while (sr.EndOfStream == false)
            {
                string strLine = sr.ReadLine();
                if (strLine.Length == 0)
                {
                    continue;
                }
                string[] items = strLine.Split('\t');
                int val = int.Parse(items[1]);
                string fea = items[0];

                int rval = das.SearchByPerfectMatch(fea);
                if (rval != val)
                {
                    Console.WriteLine("Values in raw text file and double array trie-tree is different");
                    Console.WriteLine("Key-Value in text file: {0}", strLine);
                    Console.WriteLine("Value in DA trie: {0}", rval);
                }
            }

            //Test SearchAsKeyPrefix function.
            //TestSearchPrefix_case0, TestSearchPrefix_case01, TestSearchPrefix_case012 should be in result list
            List<int> resultList = new List<int>();
            int rlistCnt = das.SearchAsKeyPrefix("TestSearchPrefix_case012", resultList);

            //Test SearchByPrefix
            resultList = new List<int>();
            rlistCnt = das.SearchByPrefix("TestSearchPrefix_case0", resultList);
            rlistCnt = das.SearchByPrefix("U04:京", resultList);

            Console.WriteLine("Done!");
        }
Ejemplo n.º 6
0
        //Verify whether double array trie-tree correct
        //strTextFileName: raw text file name used to build DA trie-tree
        //  text file format: key \t value
        //  key as string type
        //  value as non-netgive integer
        //strDAFileName: double array trie-tree binary file name built from strTextFileName
        private static void Verify(string strTextFileName, string strDAFileName)
        {
            StreamReader          sr  = new StreamReader(strTextFileName);
            DoubleArrayTrieSearch das = new DoubleArrayTrieSearch();

            das.Load(strDAFileName);
            while (sr.EndOfStream == false)
            {
                string strLine = sr.ReadLine();
                if (strLine.Length == 0)
                {
                    continue;
                }
                string[] items = strLine.Split('\t');
                int      val   = int.Parse(items[1]);
                string   fea   = items[0];

                int rval = das.SearchByPerfectMatch(fea);
                if (rval != val)
                {
                    Console.WriteLine("Values in raw text file and double array trie-tree is different");
                    Console.WriteLine("Key-Value in text file: {0}", strLine);
                    Console.WriteLine("Value in DA trie: {0}", rval);
                }
            }

            //Test SearchAsKeyPrefix function.
            //TestSearchPrefix_case0, TestSearchPrefix_case01, TestSearchPrefix_case012 should be in result list
            List <int> resultList = new List <int>();
            int        rlistCnt   = das.SearchAsKeyPrefix("TestSearchPrefix_case012", resultList);

            //Test SearchByPrefix
            resultList = new List <int>();
            rlistCnt   = das.SearchByPrefix("TestSearchPrefix_case0", resultList);
            rlistCnt   = das.SearchByPrefix("U04:京", resultList);

            Console.WriteLine("Done!");
        }
Ejemplo n.º 7
0
        //加载model文件
        //返回值<0 为出错,=0为正常
        public bool LoadModel(string filename)
        {
            StreamReader sr = new StreamReader(filename);
            string       strLine;


            //读入版本号
            strLine = sr.ReadLine();
            version = uint.Parse(strLine.Split(':')[1].Trim());

            //读入cost_factor
            strLine      = sr.ReadLine();
            cost_factor_ = double.Parse(strLine.Split(':')[1].Trim());

            //读入maxid
            strLine = sr.ReadLine();
            maxid_  = long.Parse(strLine.Split(':')[1].Trim());

            //读入xsize
            strLine = sr.ReadLine();
            xsize_  = uint.Parse(strLine.Split(':')[1].Trim());

            //读入空行
            strLine = sr.ReadLine();

            //读入待标注的标签
            y_ = new List <string>();
            while (true)
            {
                strLine = sr.ReadLine();
                if (strLine.Length == 0)
                {
                    break;
                }
                y_.Add(strLine);
            }

            //读入unigram和bigram模板
            unigram_templs_ = new List <string>();
            bigram_templs_  = new List <string>();
            while (sr.EndOfStream == false)
            {
                strLine = sr.ReadLine();
                if (strLine.Length == 0)
                {
                    break;
                }
                if (strLine[0] == 'U')
                {
                    unigram_templs_.Add(strLine);
                }
                if (strLine[0] == 'B')
                {
                    bigram_templs_.Add(strLine);
                }
            }
            sr.Close();

            //Load all feature set data
            string filename_feature = filename + ".feature";

            da = new DoubleArrayTrieSearch();
            da.Load(filename_feature);


            //Load all features alpha data
            string       filename_alpha = filename + ".alpha";
            StreamReader sr_alpha       = new StreamReader(filename_alpha);
            BinaryReader br_alpha       = new BinaryReader(sr_alpha.BaseStream);

            if (version == Utils.MODEL_TYPE_NORM)
            {
                //feature weight array
                alpha_two_tuples = null;
                alpha_           = new double[maxid_ + 1];
                for (long i = 0; i < maxid_; i++)
                {
                    alpha_[i] = br_alpha.ReadSingle();
                }
            }
            else if (version == Utils.MODEL_TYPE_SHRINKED)
            {
                alpha_           = null;
                alpha_two_tuples = new BTreeDictionary <long, double>();
                for (long i = 0; i < maxid_; i++)
                {
                    long   key    = br_alpha.ReadInt64();
                    double weight = br_alpha.ReadSingle();
                    alpha_two_tuples.Add(key, weight);
                }
            }
            else
            {
                Console.WriteLine("This model is not supported.");
                return(false);
            }
            br_alpha.Close();
            return(true);
        }
Ejemplo n.º 8
0
        //加载model文件
        //返回值<0 为出错,=0为正常
        public bool LoadModel(string filename)
        {
            var sr = new StreamReader(filename);
            string strLine;


            //读入版本号
            strLine = sr.ReadLine();
            version = uint.Parse(strLine.Split(':')[1].Trim());

            //读入cost_factor
            strLine = sr.ReadLine();
            cost_factor_ = double.Parse(strLine.Split(':')[1].Trim());

            //读入maxid
            strLine = sr.ReadLine();
            maxid_ = long.Parse(strLine.Split(':')[1].Trim());

            //读入xsize
            strLine = sr.ReadLine();
            xsize_ = uint.Parse(strLine.Split(':')[1].Trim());

            //读入空行
            strLine = sr.ReadLine();

            //读入待标注的标签
            y_ = new List<string>();
            while (true)
            {
                strLine = sr.ReadLine();
                if (strLine.Length == 0)
                {
                    break;
                }
                y_.Add(strLine);
            }

            //读入unigram和bigram模板
            unigram_templs_ = new List<string>();
            bigram_templs_ = new List<string>();
            while (sr.EndOfStream == false)
            {
                strLine = sr.ReadLine();
                if (strLine.Length == 0)
                {
                    break;
                }
                if (strLine[0] == 'U')
                {
                    unigram_templs_.Add(strLine);
                }
                if (strLine[0] == 'B')
                {
                    bigram_templs_.Add(strLine);
                }
            }
            sr.Close();

            //Load all feature set data
            var filename_feature = filename + ".feature";
            da = new DoubleArrayTrieSearch();
            da.Load(filename_feature);


            //Load all features alpha data
            var filename_alpha = filename + ".alpha";
            var sr_alpha = new StreamReader(filename_alpha);
            var br_alpha = new BinaryReader(sr_alpha.BaseStream);

            if (version == Utils.MODEL_TYPE_NORM)
            {
                //feature weight array
                alpha_two_tuples = null;
                alpha_ = new double[maxid_ + 1];
                for (long i = 0; i < maxid_; i++)
                {
                    alpha_[i] = br_alpha.ReadSingle();
                }
            }
            else if (version == Utils.MODEL_TYPE_SHRINKED)
            {
                alpha_ = null;
                alpha_two_tuples = new BTreeDictionary<long, double>();
                for (long i = 0; i < maxid_; i++)
                {
                    var key = br_alpha.ReadInt64();
                    double weight = br_alpha.ReadSingle();
                    alpha_two_tuples.Add(key, weight);
                }
            }
            else
            {
                Console.WriteLine("This model is not supported.");
                return false;
            }
            br_alpha.Close();
            return true;
        }
Ejemplo n.º 9
0
 private void LoadFeatureFromFile(string strFileName)
 {
     daSearch = new DoubleArrayTrieSearch();
     daSearch.Load(strFileName);
 }
Ejemplo n.º 10
0
 private void LoadFeatureFromFile(string strFileName)
 {
     daSearch = new DoubleArrayTrieSearch();
     daSearch.Load(strFileName);
 }