Ejemplo n.º 1
0
        public int buildFeatures()
        {
            if (word_num <= 0)
            {
                return(Utils.ERROR_INVALIDATED_PARAMETER);
            }
            using (var v = _buildersPool.GetOrCreate())
            {
                var builder = v.Item;
                var id      = 0;
                var feature_cache_row_size = 0;
                var feature_cache_size     = 0;
                for (var cur = 0; cur < word_num; cur++)
                {
                    feature_cache_row_size = 0;
                    for (int index = 0; index < featureIndex.unigram_templs_.Count; index++)
                    {
                        var templ = featureIndex.unigram_templs_[index];
                        var res   = featureIndex.apply_rule(templ, cur, builder, this);
                        if (res == null)
                        {
                            return(Utils.ERROR_EMPTY_FEATURE);
                        }
                        id = featureIndex.get_id(res.ToString());
                        if (id != -1)
                        {
                            feature_cache_[feature_cache_size][feature_cache_row_size] = id;
                            feature_cache_row_size++;
                        }
                    }
                    feature_cache_[feature_cache_size][feature_cache_row_size] = -1;
                    feature_cache_size++;
                }

                for (var cur = 0; cur < word_num; cur++)
                {
                    feature_cache_row_size = 0;
                    for (int index = 0; index < featureIndex.bigram_templs_.Count; index++)
                    {
                        var templ      = featureIndex.bigram_templs_[index];
                        var strFeature = featureIndex.apply_rule(templ, cur, builder, this);
                        if (strFeature == null)
                        {
                            return(Utils.ERROR_EMPTY_FEATURE);
                        }

                        id = featureIndex.get_id(strFeature.ToString());
                        if (id != -1)
                        {
                            feature_cache_[feature_cache_size][feature_cache_row_size] = id;
                            feature_cache_row_size++;
                        }
                    }
                    feature_cache_[feature_cache_size][feature_cache_row_size] = -1;
                    feature_cache_size++;
                }

                return(Utils.ERROR_SUCCESS);
            }
        }
Ejemplo n.º 2
0
        public int buildFeatures()
        {
            if (word_num <= 0)
            {
                return(Utils.ERROR_INVALIDATED_PARAMETER);
            }

            var id = 0;
            var feature_cache_row_size = 0;
            var feature_cache_size     = 0;

            for (var cur = 0; cur < word_num; cur++)
            {
                feature_cache_row_size = 0;
                for (int index = 0; index < featureIndex.unigram_templs_.Count; index++)
                {
                    var templ      = featureIndex.unigram_templs_[index];
                    var strFeature = featureIndex.apply_rule(templ, cur, this);
                    if (strFeature == "")
                    {
                        return(Utils.ERROR_EMPTY_FEATURE);
                    }
                    id = featureIndex.get_id(strFeature);
                    if (id != -1)
                    {
                        feature_cache_[feature_cache_size][feature_cache_row_size] = id;
                        feature_cache_row_size++;
                    }
                }
                feature_cache_[feature_cache_size][feature_cache_row_size] = -1;
                feature_cache_size++;
            }

            for (var cur = 0; cur < word_num; cur++)
            {
                feature_cache_row_size = 0;
                for (int index = 0; index < featureIndex.bigram_templs_.Count; index++)
                {
                    var templ      = featureIndex.bigram_templs_[index];
                    var strFeature = featureIndex.apply_rule(templ, cur, this);
                    if (strFeature == "")
                    {
                        return(Utils.ERROR_EMPTY_FEATURE);
                    }

                    id = featureIndex.get_id(strFeature);
                    if (id != -1)
                    {
                        feature_cache_[feature_cache_size][feature_cache_row_size] = id;
                        feature_cache_row_size++;
                    }
                }
                feature_cache_[feature_cache_size][feature_cache_row_size] = -1;
                feature_cache_size++;
            }

            return(Utils.ERROR_SUCCESS);
        }
Ejemplo n.º 3
0
        public int buildFeatures()
        {
            if (word_num <= 0)
            {
                return(Utils.ERROR_INVALIDATED_PARAMETER);
            }

            int id = 0;
            int feature_cache_row_size = 0;
            int feature_cache_size     = 0;

            for (int cur = 0; cur < word_num; cur++)
            {
                feature_cache_row_size = 0;
                foreach (string templ in featureIndex.unigram_templs_)
                {
                    string strFeature = featureIndex.apply_rule(templ, cur, this);
                    if (strFeature == "")
                    {
                        return(Utils.ERROR_EMPTY_FEATURE);
                    }
                    id = featureIndex.get_id(strFeature);
                    if (id != -1)
                    {
                        feature_cache_[feature_cache_size][feature_cache_row_size] = id;
                        feature_cache_row_size++;
                    }
                }
                feature_cache_[feature_cache_size][feature_cache_row_size] = -1;
                feature_cache_size++;
            }

            for (int cur = 0; cur < word_num; cur++)
            {
                feature_cache_row_size = 0;
                foreach (string templ in featureIndex.bigram_templs_)
                {
                    string strFeature = featureIndex.apply_rule(templ, cur, this);
                    if (strFeature == "")
                    {
                        return(Utils.ERROR_EMPTY_FEATURE);
                    }

                    id = featureIndex.get_id(strFeature);
                    if (id != -1)
                    {
                        feature_cache_[feature_cache_size][feature_cache_row_size] = id;
                        feature_cache_row_size++;
                    }
                }
                feature_cache_[feature_cache_size][feature_cache_row_size] = -1;
                feature_cache_size++;
            }

            return(Utils.ERROR_SUCCESS);
        }
Ejemplo n.º 4
0
        //Build feature set into indexed data
        public bool BuildFeatureSetIntoIndex(string filename, double max_slot_usage_rate_threshold, int debugLevel, string strRetrainModelFileName)
        {
            Console.WriteLine("Building {0} features into index...", featureLexicalDict.Size);

            IList<string> keyList;
            IList<int> valList;
            featureLexicalDict.GenerateLexicalIdList(out keyList, out valList);

            if (debugLevel > 0)
            {
                Console.Write("Debug: Writing raw feature set into file...");
                var filename_featureset_raw_format = filename + ".feature.raw_text";
                var sw = new StreamWriter(filename_featureset_raw_format);
                // save feature and its id into lists in raw format
                for (var i = 0; i < keyList.Count; i++)
                {
                    sw.WriteLine("{0}\t{1}", keyList[i], valList[i]);
                }
                sw.Close();
                Console.WriteLine("Done.");
            }

            //Build feature index
            var filename_featureset = filename + ".feature";
            var da = new DoubleArrayTrieBuilder(thread_num_);
            if (da.build(keyList, valList, max_slot_usage_rate_threshold) == false)
            {
                Console.WriteLine("Build lexical dictionary failed.");
                return false;
            }
            //Save indexed feature set into file
            da.save(filename_featureset);

            if (strRetrainModelFileName == null || strRetrainModelFileName.Length == 0)
            {
                //Clean up all data
                featureLexicalDict.Clear();
                featureLexicalDict = null;
                keyList = null;
                valList = null;

                GC.Collect();

                //Create weight matrix
                alpha_ = new double[feature_size() + 1];
            }
            else
            {
                Console.WriteLine();
                Console.WriteLine("Loading the existed model for re-training...");
                //Create weight matrix
                alpha_ = new double[feature_size() + 1];

                var modelReader = new ModelReader();
                modelReader.LoadModel(strRetrainModelFileName);

                if (modelReader.y_.Count == y_.Count)
                {
                    for (var i = 0; i < keyList.Count; i++)
                    {
                        var index = modelReader.get_id(keyList[i]);
                        if (index < 0)
                        {
                            continue;
                        }
                        var size = (keyList[i][0] == 'U' ? y_.Count : y_.Count * y_.Count);
                        for (var j = 0; j < size; j++)
                        {
                            alpha_[valList[i] + j + 1] = modelReader.GetAlpha(index + j);
                        }
                    }
                }
                else
                {
                    Console.WriteLine("The number of tags isn't equal between two models, it cannot be re-trained.");
                }

                //Clean up all data
                featureLexicalDict.Clear();
                featureLexicalDict = null;
                keyList = null;
                valList = null;

                GC.Collect();
            }

            return true;
        }