Beispiel #1
0
        // Load all records and generate features
        public EncoderTagger[] ReadAllRecords()
        {
            var arrayEncoderTagger     = new EncoderTagger[trainCorpusList.Count];
            var arrayEncoderTaggerSize = 0;

            //Generate each record features
            Parallel.For(0, trainCorpusList.Count, parallelOption, i =>
            {
                var _x = new EncoderTagger(this);
                if (_x.GenerateFeature(trainCorpusList[i]) == false)
                {
                }
                else
                {
                    var oldValue = Interlocked.Increment(ref arrayEncoderTaggerSize) - 1;
                    arrayEncoderTagger[oldValue] = _x;

                    if (oldValue % 10000 == 0)
                    {
                        //Show current progress on console
                        Console.Write("{0}...", oldValue);
                    }
                }
            });

            trainCorpusList.Clear();
            trainCorpusList = null;

            Console.WriteLine();
            return(arrayEncoderTagger);
        }
Beispiel #2
0
        //Get feature id from feature set by feature string
        //If feature string is not existed in the set, generate a new id and return it
        public bool BuildFeatures(EncoderTagger tagger)
        {
            var feature = new List <long>();

            using (var v = _buildersPool.GetOrCreate())
            {
                var localBuilder = v.Item;
                //tagger.feature_id_ = tagger.feature_cache_.Count;
                for (var cur = 0; cur < tagger.word_num; ++cur)
                {
                    for (int index = 0; index < unigram_templs_.Count; index++)
                    {
                        var it         = unigram_templs_[index];
                        var strFeature = apply_rule(it, cur, localBuilder, tagger);
                        if (strFeature == null)
                        {
                        }
                        else
                        {
                            var id = featureLexicalDict.GetOrAddId(strFeature.ToString());
                            feature.Add(id);
                        }
                    }
                    tagger.feature_cache_.Add(feature.ToArray());
                    feature.Clear();
                }

                for (var cur = 1; cur < tagger.word_num; ++cur)
                {
                    for (int index = 0; index < bigram_templs_.Count; index++)
                    {
                        var it         = bigram_templs_[index];
                        var strFeature = apply_rule(it, cur, localBuilder, tagger);
                        if (strFeature == null)
                        {
                        }
                        else
                        {
                            var id = featureLexicalDict.GetOrAddId(strFeature.ToString());
                            feature.Add(id);
                        }
                    }

                    tagger.feature_cache_.Add(feature.ToArray());
                    feature.Clear();
                }
            }

            return(true);
        }