Exemple #1
0
        public void calcCost(Node n)
        {
            double c = 0;

            long[] f = feature_cache_[n.fid];
            for (int i = 0; i < f.Length && f[i] != -1; i++)
            {
                c += featureIndex.GetAlpha(f[i] + n.y);
            }
            n.cost = featureIndex.cost_factor_ * c;
        }
Exemple #2
0
        public void calcCost(Node n)
        {
            double c = 0;
            var    f = feature_cache_[n.fid];

            for (int i = 0; i < f.Length; ++i)
            {
                int fCurrent = (int)f[i];
                if (fCurrent == -1)
                {
                    break;
                }
                c += featureIndex.GetAlpha(fCurrent + n.y);
            }

            n.cost = featureIndex.cost_factor_ * c;
        }
Exemple #3
0
        //Build feature set into indexed data
        public bool BuildFeatureSetIntoIndex(string filename, double max_slot_usage_rate_threshold, int debugLevel, string strRetrainModelFileName)
        {
            Console.WriteLine("Building {0} features into index...", featureLexicalDict.Size);

            IList<string> keyList;
            IList<int> valList;
            featureLexicalDict.GenerateLexicalIdList(out keyList, out valList);

            if (debugLevel > 0)
            {
                Console.Write("Debug: Writing raw feature set into file...");
                var filename_featureset_raw_format = filename + ".feature.raw_text";
                var sw = new StreamWriter(filename_featureset_raw_format);
                // save feature and its id into lists in raw format
                for (var i = 0; i < keyList.Count; i++)
                {
                    sw.WriteLine("{0}\t{1}", keyList[i], valList[i]);
                }
                sw.Close();
                Console.WriteLine("Done.");
            }

            //Build feature index
            var filename_featureset = filename + ".feature";
            var da = new DoubleArrayTrieBuilder(thread_num_);
            if (da.build(keyList, valList, max_slot_usage_rate_threshold) == false)
            {
                Console.WriteLine("Build lexical dictionary failed.");
                return false;
            }
            //Save indexed feature set into file
            da.save(filename_featureset);

            if (strRetrainModelFileName == null || strRetrainModelFileName.Length == 0)
            {
                //Clean up all data
                featureLexicalDict.Clear();
                featureLexicalDict = null;
                keyList = null;
                valList = null;

                GC.Collect();

                //Create weight matrix
                alpha_ = new double[feature_size() + 1];
            }
            else
            {
                Console.WriteLine();
                Console.WriteLine("Loading the existed model for re-training...");
                //Create weight matrix
                alpha_ = new double[feature_size() + 1];

                var modelReader = new ModelReader();
                modelReader.LoadModel(strRetrainModelFileName);

                if (modelReader.y_.Count == y_.Count)
                {
                    for (var i = 0; i < keyList.Count; i++)
                    {
                        var index = modelReader.get_id(keyList[i]);
                        if (index < 0)
                        {
                            continue;
                        }
                        var size = (keyList[i][0] == 'U' ? y_.Count : y_.Count * y_.Count);
                        for (var j = 0; j < size; j++)
                        {
                            alpha_[valList[i] + j + 1] = modelReader.GetAlpha(index + j);
                        }
                    }
                }
                else
                {
                    Console.WriteLine("The number of tags isn't equal between two models, it cannot be re-trained.");
                }

                //Clean up all data
                featureLexicalDict.Clear();
                featureLexicalDict = null;
                keyList = null;
                valList = null;

                GC.Collect();
            }

            return true;
        }