Esempio n. 1
0
        public ModelWritter(int thread_num, double cost_factor, uint hugeLexShrinkMemLoad)
        {
            cost_factor_ = cost_factor;
            maxid_       = 0;
            thread_num_  = thread_num;
            parallelOption.MaxDegreeOfParallelism = thread_num;

            if (hugeLexShrinkMemLoad > 0)
            {
                featureLexicalDict = new HugeFeatureLexicalDict(thread_num_, hugeLexShrinkMemLoad);
            }
            else
            {
                featureLexicalDict = new DefaultFeatureLexicalDict(thread_num_);
            }
        }
Esempio n. 2
0
        public ModelWritter(int thread_num, double cost_factor, uint hugeLexShrinkMemLoad)
        {
            cost_factor_ = cost_factor;
            maxid_ = 0;
            thread_num_ = thread_num;

#if NO_SUPPORT_PARALLEL_LIB
#else
            parallelOption.MaxDegreeOfParallelism = thread_num;
#endif

            if (hugeLexShrinkMemLoad > 0)
            {
                featureLexicalDict = new HugeFeatureLexicalDict(thread_num_, hugeLexShrinkMemLoad);
            }
            else
            {
                featureLexicalDict = new DefaultFeatureLexicalDict(thread_num_);
            }
        }
Esempio n. 3
0
        //Build feature set into indexed data
        public bool BuildFeatureSetIntoIndex(string filename, double max_slot_usage_rate_threshold, int debugLevel)
        {
            IList <string> keyList;
            IList <int>    valList;

            featureLexicalDict.GenerateLexicalIdList(out keyList, out valList);

            if (debugLevel > 0)
            {
                var filename_featureset_raw_format = filename + ".feature.raw_text";
                var sw = new StreamWriter(filename_featureset_raw_format);
                // save feature and its id into lists in raw format
                for (var i = 0; i < keyList.Count; i++)
                {
                    sw.WriteLine("{0}\t{1}", keyList[i], valList[i]);
                }
                sw.Close();
            }

            //Build feature index
            var filename_featureset = filename + ".feature";
            var da = new CRFLite.Utils.DoubleArrayTrieBuilder(thread_num_);

            if (da.build(keyList, valList, max_slot_usage_rate_threshold) == false)
            {
                return(false);
            }
            //Save indexed feature set into file
            da.save(filename_featureset);

            if (string.IsNullOrWhiteSpace(modelFileName))
            {
                //Clean up all data
                featureLexicalDict.Clear();
                featureLexicalDict = null;
                keyList            = null;
                valList            = null;

                GC.Collect();

                //Create weight matrix
                alpha_ = new double[feature_size() + 1];
            }
            else
            {
                //Create weight matrix
                alpha_ = new double[feature_size() + 1];
                var modelReader = new ModelReader(this.modelFileName);
                modelReader.LoadModel();

                if (modelReader.y_.Count == y_.Count)
                {
                    for (var i = 0; i < keyList.Count; i++)
                    {
                        var index = modelReader.get_id(keyList[i]);
                        if (index < 0)
                        {
                            continue;
                        }
                        var size = (keyList[i][0] == 'U' ? y_.Count : y_.Count * y_.Count);
                        for (var j = 0; j < size; j++)
                        {
                            alpha_[valList[i] + j + 1] = modelReader.GetAlpha(index + j);
                        }
                    }
                }
                else
                {
                }

                //Clean up all data
                featureLexicalDict.Clear();
                featureLexicalDict = null;
                keyList            = null;
                valList            = null;

                GC.Collect();
            }

            return(true);
        }
Esempio n. 4
0
        //Build feature set into indexed data
        public bool BuildFeatureSetIntoIndex(string filename, double max_slot_usage_rate_threshold, int debugLevel, string strRetrainModelFileName)
        {
            Console.WriteLine("Building {0} features into index...", featureLexicalDict.Size);

            IList<string> keyList;
            IList<int> valList;
            featureLexicalDict.GenerateLexicalIdList(out keyList, out valList);

            if (debugLevel > 0)
            {
                Console.Write("Debug: Writing raw feature set into file...");
                var filename_featureset_raw_format = filename + ".feature.raw_text";
                var sw = new StreamWriter(filename_featureset_raw_format);
                // save feature and its id into lists in raw format
                for (var i = 0; i < keyList.Count; i++)
                {
                    sw.WriteLine("{0}\t{1}", keyList[i], valList[i]);
                }
                sw.Close();
                Console.WriteLine("Done.");
            }

            //Build feature index
            var filename_featureset = filename + ".feature";
            var da = new DoubleArrayTrieBuilder(thread_num_);
            if (da.build(keyList, valList, max_slot_usage_rate_threshold) == false)
            {
                Console.WriteLine("Build lexical dictionary failed.");
                return false;
            }
            //Save indexed feature set into file
            da.save(filename_featureset);

            if (strRetrainModelFileName == null || strRetrainModelFileName.Length == 0)
            {
                //Clean up all data
                featureLexicalDict.Clear();
                featureLexicalDict = null;
                keyList = null;
                valList = null;

                GC.Collect();

                //Create weight matrix
                alpha_ = new double[feature_size() + 1];
            }
            else
            {
                Console.WriteLine();
                Console.WriteLine("Loading the existed model for re-training...");
                //Create weight matrix
                alpha_ = new double[feature_size() + 1];

                var modelReader = new ModelReader();
                modelReader.LoadModel(strRetrainModelFileName);

                if (modelReader.y_.Count == y_.Count)
                {
                    for (var i = 0; i < keyList.Count; i++)
                    {
                        var index = modelReader.get_id(keyList[i]);
                        if (index < 0)
                        {
                            continue;
                        }
                        var size = (keyList[i][0] == 'U' ? y_.Count : y_.Count * y_.Count);
                        for (var j = 0; j < size; j++)
                        {
                            alpha_[valList[i] + j + 1] = modelReader.GetAlpha(index + j);
                        }
                    }
                }
                else
                {
                    Console.WriteLine("The number of tags isn't equal between two models, it cannot be re-trained.");
                }

                //Clean up all data
                featureLexicalDict.Clear();
                featureLexicalDict = null;
                keyList = null;
                valList = null;

                GC.Collect();
            }

            return true;
        }