public int buildFeatures() { if (word_num <= 0) { return(Utils.ERROR_INVALIDATED_PARAMETER); } using (var v = _buildersPool.GetOrCreate()) { var builder = v.Item; var id = 0; var feature_cache_row_size = 0; var feature_cache_size = 0; for (var cur = 0; cur < word_num; cur++) { feature_cache_row_size = 0; for (int index = 0; index < featureIndex.unigram_templs_.Count; index++) { var templ = featureIndex.unigram_templs_[index]; var res = featureIndex.apply_rule(templ, cur, builder, this); if (res == null) { return(Utils.ERROR_EMPTY_FEATURE); } id = featureIndex.get_id(res.ToString()); if (id != -1) { feature_cache_[feature_cache_size][feature_cache_row_size] = id; feature_cache_row_size++; } } feature_cache_[feature_cache_size][feature_cache_row_size] = -1; feature_cache_size++; } for (var cur = 0; cur < word_num; cur++) { feature_cache_row_size = 0; for (int index = 0; index < featureIndex.bigram_templs_.Count; index++) { var templ = featureIndex.bigram_templs_[index]; var strFeature = featureIndex.apply_rule(templ, cur, builder, this); if (strFeature == null) { return(Utils.ERROR_EMPTY_FEATURE); } id = featureIndex.get_id(strFeature.ToString()); if (id != -1) { feature_cache_[feature_cache_size][feature_cache_row_size] = id; feature_cache_row_size++; } } feature_cache_[feature_cache_size][feature_cache_row_size] = -1; feature_cache_size++; } return(Utils.ERROR_SUCCESS); } }
public int buildFeatures() { if (word_num <= 0) { return(Utils.ERROR_INVALIDATED_PARAMETER); } var id = 0; var feature_cache_row_size = 0; var feature_cache_size = 0; for (var cur = 0; cur < word_num; cur++) { feature_cache_row_size = 0; for (int index = 0; index < featureIndex.unigram_templs_.Count; index++) { var templ = featureIndex.unigram_templs_[index]; var strFeature = featureIndex.apply_rule(templ, cur, this); if (strFeature == "") { return(Utils.ERROR_EMPTY_FEATURE); } id = featureIndex.get_id(strFeature); if (id != -1) { feature_cache_[feature_cache_size][feature_cache_row_size] = id; feature_cache_row_size++; } } feature_cache_[feature_cache_size][feature_cache_row_size] = -1; feature_cache_size++; } for (var cur = 0; cur < word_num; cur++) { feature_cache_row_size = 0; for (int index = 0; index < featureIndex.bigram_templs_.Count; index++) { var templ = featureIndex.bigram_templs_[index]; var strFeature = featureIndex.apply_rule(templ, cur, this); if (strFeature == "") { return(Utils.ERROR_EMPTY_FEATURE); } id = featureIndex.get_id(strFeature); if (id != -1) { feature_cache_[feature_cache_size][feature_cache_row_size] = id; feature_cache_row_size++; } } feature_cache_[feature_cache_size][feature_cache_row_size] = -1; feature_cache_size++; } return(Utils.ERROR_SUCCESS); }
public int buildFeatures() { if (word_num <= 0) { return(Utils.ERROR_INVALIDATED_PARAMETER); } int id = 0; int feature_cache_row_size = 0; int feature_cache_size = 0; for (int cur = 0; cur < word_num; cur++) { feature_cache_row_size = 0; foreach (string templ in featureIndex.unigram_templs_) { string strFeature = featureIndex.apply_rule(templ, cur, this); if (strFeature == "") { return(Utils.ERROR_EMPTY_FEATURE); } id = featureIndex.get_id(strFeature); if (id != -1) { feature_cache_[feature_cache_size][feature_cache_row_size] = id; feature_cache_row_size++; } } feature_cache_[feature_cache_size][feature_cache_row_size] = -1; feature_cache_size++; } for (int cur = 0; cur < word_num; cur++) { feature_cache_row_size = 0; foreach (string templ in featureIndex.bigram_templs_) { string strFeature = featureIndex.apply_rule(templ, cur, this); if (strFeature == "") { return(Utils.ERROR_EMPTY_FEATURE); } id = featureIndex.get_id(strFeature); if (id != -1) { feature_cache_[feature_cache_size][feature_cache_row_size] = id; feature_cache_row_size++; } } feature_cache_[feature_cache_size][feature_cache_row_size] = -1; feature_cache_size++; } return(Utils.ERROR_SUCCESS); }
//Build feature set into indexed data public bool BuildFeatureSetIntoIndex(string filename, double max_slot_usage_rate_threshold, int debugLevel, string strRetrainModelFileName) { Console.WriteLine("Building {0} features into index...", featureLexicalDict.Size); IList<string> keyList; IList<int> valList; featureLexicalDict.GenerateLexicalIdList(out keyList, out valList); if (debugLevel > 0) { Console.Write("Debug: Writing raw feature set into file..."); var filename_featureset_raw_format = filename + ".feature.raw_text"; var sw = new StreamWriter(filename_featureset_raw_format); // save feature and its id into lists in raw format for (var i = 0; i < keyList.Count; i++) { sw.WriteLine("{0}\t{1}", keyList[i], valList[i]); } sw.Close(); Console.WriteLine("Done."); } //Build feature index var filename_featureset = filename + ".feature"; var da = new DoubleArrayTrieBuilder(thread_num_); if (da.build(keyList, valList, max_slot_usage_rate_threshold) == false) { Console.WriteLine("Build lexical dictionary failed."); return false; } //Save indexed feature set into file da.save(filename_featureset); if (strRetrainModelFileName == null || strRetrainModelFileName.Length == 0) { //Clean up all data featureLexicalDict.Clear(); featureLexicalDict = null; keyList = null; valList = null; GC.Collect(); //Create weight matrix alpha_ = new double[feature_size() + 1]; } else { Console.WriteLine(); Console.WriteLine("Loading the existed model for re-training..."); //Create weight matrix alpha_ = new double[feature_size() + 1]; var modelReader = new ModelReader(); modelReader.LoadModel(strRetrainModelFileName); if (modelReader.y_.Count == y_.Count) { for (var i = 0; i < keyList.Count; i++) { var index = modelReader.get_id(keyList[i]); if (index < 0) { continue; } var size = (keyList[i][0] == 'U' ? y_.Count : y_.Count * y_.Count); for (var j = 0; j < size; j++) { alpha_[valList[i] + j + 1] = modelReader.GetAlpha(index + j); } } } else { Console.WriteLine("The number of tags isn't equal between two models, it cannot be re-trained."); } //Clean up all data featureLexicalDict.Clear(); featureLexicalDict = null; keyList = null; valList = null; GC.Collect(); } return true; }