//Build feature set into indexed data public bool BuildFeatureSetIntoIndex(string filename, double max_slot_usage_rate_threshold, int debugLevel) { IList <string> keyList; IList <int> valList; featureLexicalDict.GenerateLexicalIdList(out keyList, out valList); if (debugLevel > 0) { var filename_featureset_raw_format = filename + ".feature.raw_text"; var sw = new StreamWriter(filename_featureset_raw_format); // save feature and its id into lists in raw format for (var i = 0; i < keyList.Count; i++) { sw.WriteLine("{0}\t{1}", keyList[i], valList[i]); } sw.Close(); } //Build feature index var filename_featureset = filename + ".feature"; var da = new CRFLite.Utils.DoubleArrayTrieBuilder(thread_num_); if (da.build(keyList, valList, max_slot_usage_rate_threshold) == false) { return(false); } //Save indexed feature set into file da.save(filename_featureset); if (string.IsNullOrWhiteSpace(modelFileName)) { //Clean up all data featureLexicalDict.Clear(); featureLexicalDict = null; keyList = null; valList = null; GC.Collect(); //Create weight matrix alpha_ = new double[feature_size() + 1]; } else { //Create weight matrix alpha_ = new double[feature_size() + 1]; var modelReader = new ModelReader(this.modelFileName); modelReader.LoadModel(); if (modelReader.y_.Count == y_.Count) { for (var i = 0; i < keyList.Count; i++) { var index = modelReader.get_id(keyList[i]); if (index < 0) { continue; } var size = (keyList[i][0] == 'U' ? y_.Count : y_.Count * y_.Count); for (var j = 0; j < size; j++) { alpha_[valList[i] + j + 1] = modelReader.GetAlpha(index + j); } } } else { } //Clean up all data featureLexicalDict.Clear(); featureLexicalDict = null; keyList = null; valList = null; GC.Collect(); } return(true); }
//Build feature set into indexed data public bool BuildFeatureSetIntoIndex(string filename, double max_slot_usage_rate_threshold, int debugLevel, string strRetrainModelFileName) { Console.WriteLine("Building {0} features into index...", featureLexicalDict.Size); IList<string> keyList; IList<int> valList; featureLexicalDict.GenerateLexicalIdList(out keyList, out valList); if (debugLevel > 0) { Console.Write("Debug: Writing raw feature set into file..."); var filename_featureset_raw_format = filename + ".feature.raw_text"; var sw = new StreamWriter(filename_featureset_raw_format); // save feature and its id into lists in raw format for (var i = 0; i < keyList.Count; i++) { sw.WriteLine("{0}\t{1}", keyList[i], valList[i]); } sw.Close(); Console.WriteLine("Done."); } //Build feature index var filename_featureset = filename + ".feature"; var da = new DoubleArrayTrieBuilder(thread_num_); if (da.build(keyList, valList, max_slot_usage_rate_threshold) == false) { Console.WriteLine("Build lexical dictionary failed."); return false; } //Save indexed feature set into file da.save(filename_featureset); if (strRetrainModelFileName == null || strRetrainModelFileName.Length == 0) { //Clean up all data featureLexicalDict.Clear(); featureLexicalDict = null; keyList = null; valList = null; GC.Collect(); //Create weight matrix alpha_ = new double[feature_size() + 1]; } else { Console.WriteLine(); Console.WriteLine("Loading the existed model for re-training..."); //Create weight matrix alpha_ = new double[feature_size() + 1]; var modelReader = new ModelReader(); modelReader.LoadModel(strRetrainModelFileName); if (modelReader.y_.Count == y_.Count) { for (var i = 0; i < keyList.Count; i++) { var index = modelReader.get_id(keyList[i]); if (index < 0) { continue; } var size = (keyList[i][0] == 'U' ? y_.Count : y_.Count * y_.Count); for (var j = 0; j < size; j++) { alpha_[valList[i] + j + 1] = modelReader.GetAlpha(index + j); } } } else { Console.WriteLine("The number of tags isn't equal between two models, it cannot be re-trained."); } //Clean up all data featureLexicalDict.Clear(); featureLexicalDict = null; keyList = null; valList = null; GC.Collect(); } return true; }