示例#1
0
        public static void TestNames()
        {
            //def gender_features(word):
            //    return { 'last_letter': word[-1]}
            dynamic GenderFeature(string word)
            {
                var dict = BuiltIns.Dict();

                dict["last_letter"] = string.IsNullOrEmpty(word) ? "" : word.Last().ToString();
                return(dict);
            }

            //from nltk.corpus import names
            //>>> labeled_names = ([(name, 'male') for name in names.words('male.txt')] +
            //... [(name, 'female') for name in names.words('female.txt')])
            var namesCorpus  = new NltkNet.Nltk.Corpus.Names();
            var maleNames    = namesCorpus.Words("male.txt").AsNet.Select(name => (name, gender: "male"));
            var femaleNames  = namesCorpus.Words("female.txt").AsNet.Select(name => (name, gender: "female"));
            var labeledNames = maleNames.ToList();

            labeledNames.AddRange(femaleNames);

            //>>> import random
            //>>> random.shuffle(labeled_names)
            labeledNames.Shuffle();

            //featuresets = [(gender_features(n), gender) for (n, gender) in labeled_names]
            //train_set, test_set = featuresets[500:], featuresets[:500]
            var featuresets = labeledNames.Select(nameAndGender => (GenderFeature(nameAndGender.name), nameAndGender.gender)).ToList();
            var train_set   = featuresets.SkipLast(500).ToList();
            var test_set    = featuresets.TakeLast(500).ToList();

            // nltk.NaiveBayesClassifier.train(train_set)
            dynamic classifier = Nltk.NaiveBayesClassifier.Train(train_set);

            // classifier.classify(gender_features('Neo'))
            Print("Neo=" + classifier.classify(GenderFeature("Neo")));
            Print("Trinity=" + classifier.classify(GenderFeature("Trinity")));
            Print(Nltk.Classify.Accuracy(classifier, test_set));
            classifier.show_most_informative_features(5);
        }