Esempio n. 1
0
        public static void Main(string[] args)
        {
            // Create a training set
            IList <IDatum <string, string> > trainingData = new List <IDatum <string, string> >();

            trainingData.Add(MakeStopLights(Green, Red));
            trainingData.Add(MakeStopLights(Green, Red));
            trainingData.Add(MakeStopLights(Green, Red));
            trainingData.Add(MakeStopLights(Red, Green));
            trainingData.Add(MakeStopLights(Red, Green));
            trainingData.Add(MakeStopLights(Red, Green));
            trainingData.Add(MakeStopLights(Red, Red));
            // Create a test set
            IDatum <string, string> workingLights = MakeStopLights(Green, Red);
            IDatum <string, string> brokenLights  = MakeStopLights(Red, Red);
            // Build a classifier factory
            LinearClassifierFactory <string, string> factory = new LinearClassifierFactory <string, string>();

            factory.UseConjugateGradientAscent();
            // Turn on per-iteration convergence updates
            factory.SetVerbose(true);
            //Small amount of smoothing
            factory.SetSigma(10.0);
            // Build a classifier
            LinearClassifier <string, string> classifier = factory.TrainClassifier(trainingData);

            // Check out the learned weights
            classifier.Dump();
            // Test the classifier
            System.Console.Out.WriteLine("Working instance got: " + classifier.ClassOf(workingLights));
            classifier.JustificationOf(workingLights);
            System.Console.Out.WriteLine("Broken instance got: " + classifier.ClassOf(brokenLights));
            classifier.JustificationOf(brokenLights);
        }
Esempio n. 2
0
 public virtual void TrainMulticlass(GeneralDataset <string, string> trainSet)
 {
     if (Sharpen.Runtime.EqualsIgnoreCase(relationExtractorClassifierType, "linear"))
     {
         LinearClassifierFactory <string, string> lcFactory = new LinearClassifierFactory <string, string>(1e-4, false, sigma);
         lcFactory.SetVerbose(false);
         // use in-place SGD instead of QN. this is faster but much worse!
         // lcFactory.useInPlaceStochasticGradientDescent(-1, -1, 1.0);
         // use a hybrid minimizer: start with in-place SGD, continue with QN
         // lcFactory.useHybridMinimizerWithInPlaceSGD(50, -1, sigma);
         classifier = lcFactory.TrainClassifier(trainSet);
     }
     else
     {
         if (Sharpen.Runtime.EqualsIgnoreCase(relationExtractorClassifierType, "svm"))
         {
             SVMLightClassifierFactory <string, string> svmFactory = new SVMLightClassifierFactory <string, string>();
             svmFactory.SetC(sigma);
             classifier = svmFactory.TrainClassifier(trainSet);
         }
         else
         {
             throw new Exception("Invalid classifier type: " + relationExtractorClassifierType);
         }
     }
     if (logger.IsLoggable(Level.Fine))
     {
         ReportWeights(classifier, null);
     }
 }
Esempio n. 3
0
        /// <summary>Train a sentiment model from a set of data.</summary>
        /// <param name="data">The data to train the model from.</param>
        /// <param name="modelLocation">
        /// An optional location to save the model.
        /// Note that this stream will be closed in this method,
        /// and should not be written to thereafter.
        /// </param>
        /// <returns>A sentiment classifier, ready to use.</returns>
        public static SimpleSentiment Train(IStream <SimpleSentiment.SentimentDatum> data, Optional <OutputStream> modelLocation)
        {
            // Some useful variables configuring how we train
            bool   useL1 = true;
            double sigma = 1.0;
            int    featureCountThreshold = 5;

            // Featurize the data
            Redwood.Util.ForceTrack("Featurizing");
            RVFDataset <SentimentClass, string> dataset = new RVFDataset <SentimentClass, string>();
            AtomicInteger             datasize          = new AtomicInteger(0);
            ICounter <SentimentClass> distribution      = new ClassicCounter <SentimentClass>();

            data.Unordered().Parallel().Map(null).ForEach(null);
            Redwood.Util.EndTrack("Featurizing");
            // Print label distribution
            Redwood.Util.StartTrack("Distribution");
            foreach (SentimentClass label in SentimentClass.Values())
            {
                Redwood.Util.Log(string.Format("%7d", (int)distribution.GetCount(label)) + "   " + label);
            }
            Redwood.Util.EndTrack("Distribution");
            // Train the classifier
            Redwood.Util.ForceTrack("Training");
            if (featureCountThreshold > 1)
            {
                dataset.ApplyFeatureCountThreshold(featureCountThreshold);
            }
            dataset.Randomize(42L);
            LinearClassifierFactory <SentimentClass, string> factory = new LinearClassifierFactory <SentimentClass, string>();

            factory.SetVerbose(true);
            try
            {
                factory.SetMinimizerCreator(null);
            }
            catch (Exception)
            {
            }
            factory.SetSigma(sigma);
            LinearClassifier <SentimentClass, string> classifier = factory.TrainClassifier(dataset);

            // Optionally save the model
            modelLocation.IfPresent(null);
            Redwood.Util.EndTrack("Training");
            // Evaluate the model
            Redwood.Util.ForceTrack("Evaluating");
            factory.SetVerbose(false);
            double sumAccuracy             = 0.0;
            ICounter <SentimentClass> sumP = new ClassicCounter <SentimentClass>();
            ICounter <SentimentClass> sumR = new ClassicCounter <SentimentClass>();
            int numFolds = 4;

            for (int fold = 0; fold < numFolds; ++fold)
            {
                Pair <GeneralDataset <SentimentClass, string>, GeneralDataset <SentimentClass, string> > trainTest = dataset.SplitOutFold(fold, numFolds);
                LinearClassifier <SentimentClass, string> foldClassifier = factory.TrainClassifierWithInitialWeights(trainTest.first, classifier);
                // convex objective, so this should be OK
                sumAccuracy += foldClassifier.EvaluateAccuracy(trainTest.second);
                foreach (SentimentClass label_1 in SentimentClass.Values())
                {
                    Pair <double, double> pr = foldClassifier.EvaluatePrecisionAndRecall(trainTest.second, label_1);
                    sumP.IncrementCount(label_1, pr.first);
                    sumP.IncrementCount(label_1, pr.second);
                }
            }
            DecimalFormat df = new DecimalFormat("0.000%");

            log.Info("----------");
            double aveAccuracy = sumAccuracy / ((double)numFolds);

            log.Info(string.Empty + numFolds + "-fold accuracy: " + df.Format(aveAccuracy));
            log.Info(string.Empty);
            foreach (SentimentClass label_2 in SentimentClass.Values())
            {
                double p = sumP.GetCount(label_2) / numFolds;
                double r = sumR.GetCount(label_2) / numFolds;
                log.Info(label_2 + " (P)  = " + df.Format(p));
                log.Info(label_2 + " (R)  = " + df.Format(r));
                log.Info(label_2 + " (F1) = " + df.Format(2 * p * r / (p + r)));
                log.Info(string.Empty);
            }
            log.Info("----------");
            Redwood.Util.EndTrack("Evaluating");
            // Return
            return(new SimpleSentiment(classifier));
        }