// Load all records and generate features public EncoderTagger[] ReadAllRecords() { var arrayEncoderTagger = new EncoderTagger[trainCorpusList.Count]; var arrayEncoderTaggerSize = 0; //Generate each record features Parallel.For(0, trainCorpusList.Count, parallelOption, i => { var _x = new EncoderTagger(this); if (_x.GenerateFeature(trainCorpusList[i]) == false) { } else { var oldValue = Interlocked.Increment(ref arrayEncoderTaggerSize) - 1; arrayEncoderTagger[oldValue] = _x; if (oldValue % 10000 == 0) { //Show current progress on console Console.Write("{0}...", oldValue); } } }); trainCorpusList.Clear(); trainCorpusList = null; Console.WriteLine(); return(arrayEncoderTagger); }
//Get feature id from feature set by feature string //If feature string is not existed in the set, generate a new id and return it public bool BuildFeatures(EncoderTagger tagger) { var feature = new List <long>(); using (var v = _buildersPool.GetOrCreate()) { var localBuilder = v.Item; //tagger.feature_id_ = tagger.feature_cache_.Count; for (var cur = 0; cur < tagger.word_num; ++cur) { for (int index = 0; index < unigram_templs_.Count; index++) { var it = unigram_templs_[index]; var strFeature = apply_rule(it, cur, localBuilder, tagger); if (strFeature == null) { } else { var id = featureLexicalDict.GetOrAddId(strFeature.ToString()); feature.Add(id); } } tagger.feature_cache_.Add(feature.ToArray()); feature.Clear(); } for (var cur = 1; cur < tagger.word_num; ++cur) { for (int index = 0; index < bigram_templs_.Count; index++) { var it = bigram_templs_[index]; var strFeature = apply_rule(it, cur, localBuilder, tagger); if (strFeature == null) { } else { var id = featureLexicalDict.GetOrAddId(strFeature.ToString()); feature.Add(id); } } tagger.feature_cache_.Add(feature.ToArray()); feature.Clear(); } } return(true); }