示例#1
0
        //使用模型初始化tag,必须先使用该函数初始化才能使用add和parse
        //正常返回为0, 错误返回<0
        public int init_by_model(ModelReader model_p)
        {
            featureIndex = model_p;
            ysize_       = (short)model_p.ysize();

            if (nbest_ > 1)
            {
                //Only allocate heap when nbest is more than 1
                heap_queue = Utils.heap_init((int)(crf_max_word_num * ysize_ * ysize_));
            }

            //Initialize feature set cache according unigram and bigram templates
            InitializeFeatureCache();

            node_   = new Node[Utils.DEFAULT_CRF_MAX_WORD_NUM, ysize_];
            result_ = new short[Utils.DEFAULT_CRF_MAX_WORD_NUM];

            //Create node and path cache
            for (short cur = 0; cur < Utils.DEFAULT_CRF_MAX_WORD_NUM; cur++)
            {
                for (short i = 0; i < ysize_; i++)
                {
                    Node n = new Node();
                    node_[cur, i] = n;

                    n.lpathList = new List <Path>();
                    n.rpathList = new List <Path>();
                    n.x         = cur;
                    n.y         = i;
                }
            }

            for (int cur = 1; cur < Utils.DEFAULT_CRF_MAX_WORD_NUM; cur++)
            {
                for (int j = 0; j < ysize_; ++j)
                {
                    for (int i = 0; i < ysize_; ++i)
                    {
                        CRFSharp.Path p = new CRFSharp.Path();
                        p.add(node_[cur - 1, j], node_[cur, i]);
                    }
                }
            }

            return(Utils.ERROR_SUCCESS);
        }
示例#2
0
        //Build feature set into indexed data
        public bool BuildFeatureSetIntoIndex(string filename, double max_slot_usage_rate_threshold, int debugLevel, string strRetrainModelFileName)
        {
            Console.WriteLine("Building {0} features into index...", featureLexicalDict.Size);

            IList<string> keyList;
            IList<int> valList;
            featureLexicalDict.GenerateLexicalIdList(out keyList, out valList);

            if (debugLevel > 0)
            {
                Console.Write("Debug: Writing raw feature set into file...");
                var filename_featureset_raw_format = filename + ".feature.raw_text";
                var sw = new StreamWriter(filename_featureset_raw_format);
                // save feature and its id into lists in raw format
                for (var i = 0; i < keyList.Count; i++)
                {
                    sw.WriteLine("{0}\t{1}", keyList[i], valList[i]);
                }
                sw.Close();
                Console.WriteLine("Done.");
            }

            //Build feature index
            var filename_featureset = filename + ".feature";
            var da = new DoubleArrayTrieBuilder(thread_num_);
            if (da.build(keyList, valList, max_slot_usage_rate_threshold) == false)
            {
                Console.WriteLine("Build lexical dictionary failed.");
                return false;
            }
            //Save indexed feature set into file
            da.save(filename_featureset);

            if (strRetrainModelFileName == null || strRetrainModelFileName.Length == 0)
            {
                //Clean up all data
                featureLexicalDict.Clear();
                featureLexicalDict = null;
                keyList = null;
                valList = null;

                GC.Collect();

                //Create weight matrix
                alpha_ = new double[feature_size() + 1];
            }
            else
            {
                Console.WriteLine();
                Console.WriteLine("Loading the existed model for re-training...");
                //Create weight matrix
                alpha_ = new double[feature_size() + 1];

                var modelReader = new ModelReader();
                modelReader.LoadModel(strRetrainModelFileName);

                if (modelReader.y_.Count == y_.Count)
                {
                    for (var i = 0; i < keyList.Count; i++)
                    {
                        var index = modelReader.get_id(keyList[i]);
                        if (index < 0)
                        {
                            continue;
                        }
                        var size = (keyList[i][0] == 'U' ? y_.Count : y_.Count * y_.Count);
                        for (var j = 0; j < size; j++)
                        {
                            alpha_[valList[i] + j + 1] = modelReader.GetAlpha(index + j);
                        }
                    }
                }
                else
                {
                    Console.WriteLine("The number of tags isn't equal between two models, it cannot be re-trained.");
                }

                //Clean up all data
                featureLexicalDict.Clear();
                featureLexicalDict = null;
                keyList = null;
                valList = null;

                GC.Collect();
            }

            return true;
        }
示例#3
0
 //Load encoded model form file
 public bool LoadModel(string strModelFileName)
 {
     modelReader = new ModelReader();
     return(modelReader.LoadModel(strModelFileName));
 }
示例#4
0
 //Load encoded model form file
 public bool LoadModel(string strModelFileName)
 {
     modelReader = new ModelReader();
     return modelReader.LoadModel(strModelFileName);
 }
示例#5
0
        //使用模型初始化tag,必须先使用该函数初始化才能使用add和parse                                                                    
        //正常返回为0, 错误返回<0
        public int init_by_model(ModelReader model_p)
        {
            featureIndex = model_p;
            ysize_ = (short)model_p.ysize();

            if (nbest_ > 1)
            {
                //Only allocate heap when nbest is more than 1
                heap_queue = Utils.heap_init((int)(crf_max_word_num * ysize_ * ysize_));
            }

            //Initialize feature set cache according unigram and bigram templates
            InitializeFeatureCache();

            node_ = new Node[crf_max_word_num, ysize_];
            result_ = new short[crf_max_word_num];

            //Create node and path cache
            for (short cur = 0; cur < crf_max_word_num; cur++)
            {
                for (short i = 0; i < ysize_; i++)
                {
                    var n = new Node();
                    node_[cur, i] = n;

                    n.lpathList = new List<Path>();
                    n.rpathList = new List<Path>();
                    n.x = cur;
                    n.y = i;
                }
            }

            for (var cur = 1; cur < crf_max_word_num; cur++)
            {
                for (var j = 0; j < ysize_; ++j)
                {
                    for (var i = 0; i < ysize_; ++i)
                    {
                        var p = new CRFSharp.Path();
                        p.add(node_[cur - 1, j], node_[cur, i]);
                    }
                }
            }

            return Utils.ERROR_SUCCESS;
        }