예제 #1
0
        //Build feature set into indexed data
        public bool BuildFeatureSetIntoIndex(string filename, double max_slot_usage_rate_threshold, int debugLevel)
        {
            IList <string> keyList;
            IList <int>    valList;

            featureLexicalDict.GenerateLexicalIdList(out keyList, out valList);

            if (debugLevel > 0)
            {
                var filename_featureset_raw_format = filename + ".feature.raw_text";
                var sw = new StreamWriter(filename_featureset_raw_format);
                // save feature and its id into lists in raw format
                for (var i = 0; i < keyList.Count; i++)
                {
                    sw.WriteLine("{0}\t{1}", keyList[i], valList[i]);
                }
                sw.Close();
            }

            //Build feature index
            var filename_featureset = filename + ".feature";
            var da = new CRFLite.Utils.DoubleArrayTrieBuilder(thread_num_);

            if (da.build(keyList, valList, max_slot_usage_rate_threshold) == false)
            {
                return(false);
            }
            //Save indexed feature set into file
            da.save(filename_featureset);

            if (string.IsNullOrWhiteSpace(modelFileName))
            {
                //Clean up all data
                featureLexicalDict.Clear();
                featureLexicalDict = null;
                keyList            = null;
                valList            = null;

                GC.Collect();

                //Create weight matrix
                alpha_ = new double[feature_size() + 1];
            }
            else
            {
                //Create weight matrix
                alpha_ = new double[feature_size() + 1];
                var modelReader = new ModelReader(this.modelFileName);
                modelReader.LoadModel();

                if (modelReader.y_.Count == y_.Count)
                {
                    for (var i = 0; i < keyList.Count; i++)
                    {
                        var index = modelReader.get_id(keyList[i]);
                        if (index < 0)
                        {
                            continue;
                        }
                        var size = (keyList[i][0] == 'U' ? y_.Count : y_.Count * y_.Count);
                        for (var j = 0; j < size; j++)
                        {
                            alpha_[valList[i] + j + 1] = modelReader.GetAlpha(index + j);
                        }
                    }
                }
                else
                {
                }

                //Clean up all data
                featureLexicalDict.Clear();
                featureLexicalDict = null;
                keyList            = null;
                valList            = null;

                GC.Collect();
            }

            return(true);
        }