Beispiel #1
0
        //Regenerate feature id and shrink features with lower frequency
        public void Shrink(EncoderTagger[] xList, int freq)
        {
            var old2new = new CRFLite.Utils.BTreeDictionary <long, long>();

            featureLexicalDict.Shrink(freq);
            maxid_ = featureLexicalDict.RegenerateFeatureId(old2new, y_.Count);
            var feature_count = xList.Length;

            //Update feature ids
            Parallel.For(0, feature_count, parallelOption, i =>
            {
                for (var j = 0; j < xList[i].feature_cache_.Count; j++)
                {
                    var newfs     = new List <long>();
                    long rstValue = 0;
                    for (int index = 0; index < xList[i].feature_cache_[j].Length; index++)
                    {
                        var v = xList[i].feature_cache_[j][index];
                        if (old2new.TryGetValue(v, out rstValue) == true)
                        {
                            newfs.Add(rstValue);
                        }
                    }
                    xList[i].feature_cache_[j] = newfs.ToArray();
                }
            });
        }
Beispiel #2
0
        //Generate feature id by NGram rules
        public long RegenerateFeatureId(CRFLite.Utils.BTreeDictionary <long, long> old2new, long ysize)
        {
            long maxid_ = 0;

            Parallel.For(0, arrayFeatureFreqSize, parallelOption, i =>
            {
                //Generate new feature id
                var addValue = (arrayFeatureFreq[i].strFeature[0] == 'U' ? ysize : ysize * ysize);
                var oldValue = maxid_;
                while (System.Threading.Interlocked.CompareExchange(ref maxid_, oldValue + addValue, oldValue) != oldValue)
                {
                    oldValue = maxid_;
                }

                //Create existed and new feature ids mapping
                lock (thisLock)
                {
                    old2new.Add(
                        GetId(arrayFeatureFreq[i].strFeature),
                        oldValue);
                }

                arrayFeatureFreq[i].value = oldValue;
            });
            return(maxid_);
        }
 public DefaultFeatureLexicalDict(int thread_num)
 {
     featureset_dict_ = new CRFLite.Utils.BTreeDictionary <string, FeatureIdPair>(StringComparer.Ordinal, 128);
     maxid_           = 0;
     parallelOption   = new ParallelOptions();
     parallelOption.MaxDegreeOfParallelism = thread_num;
 }
        public long RegenerateFeatureId(CRFLite.Utils.BTreeDictionary <long, long> old2new, long ysize)
        {
            long new_maxid = 0;

            //Regenerate new feature id and create feature ids mapping
            foreach (var it in featureset_dict_)
            {
                var strFeature = it.Key;
                //Regenerate new feature id
                old2new.Add(it.Value.Key, new_maxid);
                it.Value.Key = new_maxid;

                var addValue = (strFeature[0] == 'U' ? ysize : ysize * ysize);
                new_maxid += addValue;
            }

            return(new_maxid);
        }
 public void Clear()
 {
     featureset_dict_.Clear();
     featureset_dict_ = null;
 }