//Regenerate feature id and shrink features with lower frequency public void Shrink(EncoderTagger[] xList, int freq) { var old2new = new CRFLite.Utils.BTreeDictionary <long, long>(); featureLexicalDict.Shrink(freq); maxid_ = featureLexicalDict.RegenerateFeatureId(old2new, y_.Count); var feature_count = xList.Length; //Update feature ids Parallel.For(0, feature_count, parallelOption, i => { for (var j = 0; j < xList[i].feature_cache_.Count; j++) { var newfs = new List <long>(); long rstValue = 0; for (int index = 0; index < xList[i].feature_cache_[j].Length; index++) { var v = xList[i].feature_cache_[j][index]; if (old2new.TryGetValue(v, out rstValue) == true) { newfs.Add(rstValue); } } xList[i].feature_cache_[j] = newfs.ToArray(); } }); }
//Generate feature id by NGram rules public long RegenerateFeatureId(CRFLite.Utils.BTreeDictionary <long, long> old2new, long ysize) { long maxid_ = 0; Parallel.For(0, arrayFeatureFreqSize, parallelOption, i => { //Generate new feature id var addValue = (arrayFeatureFreq[i].strFeature[0] == 'U' ? ysize : ysize * ysize); var oldValue = maxid_; while (System.Threading.Interlocked.CompareExchange(ref maxid_, oldValue + addValue, oldValue) != oldValue) { oldValue = maxid_; } //Create existed and new feature ids mapping lock (thisLock) { old2new.Add( GetId(arrayFeatureFreq[i].strFeature), oldValue); } arrayFeatureFreq[i].value = oldValue; }); return(maxid_); }
public DefaultFeatureLexicalDict(int thread_num) { featureset_dict_ = new CRFLite.Utils.BTreeDictionary <string, FeatureIdPair>(StringComparer.Ordinal, 128); maxid_ = 0; parallelOption = new ParallelOptions(); parallelOption.MaxDegreeOfParallelism = thread_num; }
public long RegenerateFeatureId(CRFLite.Utils.BTreeDictionary <long, long> old2new, long ysize) { long new_maxid = 0; //Regenerate new feature id and create feature ids mapping foreach (var it in featureset_dict_) { var strFeature = it.Key; //Regenerate new feature id old2new.Add(it.Value.Key, new_maxid); it.Value.Key = new_maxid; var addValue = (strFeature[0] == 'U' ? ysize : ysize * ysize); new_maxid += addValue; } return(new_maxid); }
public void Clear() { featureset_dict_.Clear(); featureset_dict_ = null; }