コード例 #1
0
        private void ApplyThresholds(WeightedDataset data)
        {
            if (wordThreshold.second > 0)
            {
                featureThresholds.Add(wordThreshold);
            }
            if (featExtractor.chars && charThreshold.second > 0)
            {
                featureThresholds.Add(charThreshold);
            }
            if (featExtractor.bigrams && bigramThreshold.second > 0)
            {
                featureThresholds.Add(bigramThreshold);
            }
            if ((featExtractor.conjunctions || featExtractor.mildConjunctions) && conjThreshold.second > 0)
            {
                featureThresholds.Add(conjThreshold);
            }
            int types = data.NumFeatureTypes();

            if (universalThreshold > 0)
            {
                data.ApplyFeatureCountThreshold(universalThreshold);
            }
            if (featureThresholds.Count > 0)
            {
                data.ApplyFeatureCountThreshold(featureThresholds);
            }
            int numRemoved = types - data.NumFeatureTypes();

            if (numRemoved > 0)
            {
                Verbose("Thresholding removed " + numRemoved + " features.");
            }
        }
コード例 #2
0
        public virtual void FinishTraining()
        {
            IntCounter <string> tagCounter = new IntCounter <string>();
            WeightedDataset     data       = new WeightedDataset(datumCounter.Size());

            foreach (TaggedWord word in datumCounter.KeySet())
            {
                int count = datumCounter.GetIntCount(word);
                if (trainOnLowCount && count > trainCountThreshold)
                {
                    continue;
                }
                if (functionWordTags.Contains(word.Word()))
                {
                    continue;
                }
                tagCounter.IncrementCount(word.Tag());
                if (trainByType)
                {
                    count = 1;
                }
                data.Add(new BasicDatum(featExtractor.MakeFeatures(word.Word()), word.Tag()), count);
            }
            datumCounter = null;
            tagDist      = Distribution.LaplaceSmoothedDistribution(tagCounter, tagCounter.Size(), 0.5);
            tagCounter   = null;
            ApplyThresholds(data);
            Verbose("Making classifier...");
            QNMinimizer minim = new QNMinimizer();
            //new ResultStoringMonitor(5, "weights"));
            //    minim.shutUp();
            LinearClassifierFactory factory = new LinearClassifierFactory(minim);

            factory.SetTol(tol);
            factory.SetSigma(sigma);
            scorer = factory.TrainClassifier(data);
            Verbose("Done training.");
        }