コード例 #1
0
        /// <summary>
        /// Entry point for setting up and running the experiments.
        /// </summary>
        public void Initialize()
        {
            //load gesture data and report on the number and type of available samples
            dataset = DataLoader.LoadGestureDataFrom(Config.DataPath);

            int training = 0;

            foreach (UserDataSet user_i in dataset)
            {
                training += user_i.TrainingSamples.Count;
            }

            ToTrain = new Dictionary <GestureType, List <GestureSample> >();
            foreach (GestureType gesture in Config.GesturesToUse)
            {
                ToTrain.Add(gesture, new List <GestureSample>());
            }

            foreach (UserDataSet uData in dataset)
            {
                //1-construct/prune the 'ToTrain' collection for training the Classifier

                //add all training samples into the 'ToTrain' collection.
                //sort training samples into classes
                foreach (GestureSample sample in uData.TrainingSamples)
                {
                    if (Config.GesturesToUse.Contains(sample.Gesture))
                    {
                        ToTrain[sample.Gesture].Add(sample);
                    }
                }
            }
            Recognizer = new LinearClassifier(ToTrain);
        }
コード例 #2
0
        /// <summary>
        /// Entry point for setting up and running the experiments.
        /// </summary>
        public void RunExperiments()
        {
            //load gesture data and report on the number and type of available samples
            dataset = DataLoader.LoadGestureDataFrom(Config.DataPath);

            int training = 0;

            foreach (UserDataSet user_i in dataset)
            {
                training += user_i.TrainingSamples.Count;
            }

            ToTrain = new Dictionary <GestureType, List <GestureSample> >();
            ToRecognize_Training = new Dictionary <GestureType, List <GestureSample> >();
            foreach (GestureType gesture in Config.GesturesToUse)
            {
                ToTrain.Add(gesture, new List <GestureSample>());
                ToRecognize_Training.Add(gesture, new List <GestureSample>());
            }

            PopulateDataSets();
            Recognizer = new LinearClassifier(ToTrain);

            Run();
            Console.WriteLine();
        }
コード例 #3
0
        public static void Main(string[] args)
        {
            // Create a training set
            IList <IDatum <string, string> > trainingData = new List <IDatum <string, string> >();

            trainingData.Add(MakeStopLights(Green, Red));
            trainingData.Add(MakeStopLights(Green, Red));
            trainingData.Add(MakeStopLights(Green, Red));
            trainingData.Add(MakeStopLights(Red, Green));
            trainingData.Add(MakeStopLights(Red, Green));
            trainingData.Add(MakeStopLights(Red, Green));
            trainingData.Add(MakeStopLights(Red, Red));
            // Create a test set
            IDatum <string, string> workingLights = MakeStopLights(Green, Red);
            IDatum <string, string> brokenLights  = MakeStopLights(Red, Red);
            // Build a classifier factory
            LinearClassifierFactory <string, string> factory = new LinearClassifierFactory <string, string>();

            factory.UseConjugateGradientAscent();
            // Turn on per-iteration convergence updates
            factory.SetVerbose(true);
            //Small amount of smoothing
            factory.SetSigma(10.0);
            // Build a classifier
            LinearClassifier <string, string> classifier = factory.TrainClassifier(trainingData);

            // Check out the learned weights
            classifier.Dump();
            // Test the classifier
            System.Console.Out.WriteLine("Working instance got: " + classifier.ClassOf(workingLights));
            classifier.JustificationOf(workingLights);
            System.Console.Out.WriteLine("Broken instance got: " + classifier.ClassOf(brokenLights));
            classifier.JustificationOf(brokenLights);
        }
コード例 #4
0
 public virtual void TrainMulticlass(GeneralDataset <string, string> trainSet)
 {
     if (Sharpen.Runtime.EqualsIgnoreCase(relationExtractorClassifierType, "linear"))
     {
         LinearClassifierFactory <string, string> lcFactory = new LinearClassifierFactory <string, string>(1e-4, false, sigma);
         lcFactory.SetVerbose(false);
         // use in-place SGD instead of QN. this is faster but much worse!
         // lcFactory.useInPlaceStochasticGradientDescent(-1, -1, 1.0);
         // use a hybrid minimizer: start with in-place SGD, continue with QN
         // lcFactory.useHybridMinimizerWithInPlaceSGD(50, -1, sigma);
         classifier = lcFactory.TrainClassifier(trainSet);
     }
     else
     {
         if (Sharpen.Runtime.EqualsIgnoreCase(relationExtractorClassifierType, "svm"))
         {
             SVMLightClassifierFactory <string, string> svmFactory = new SVMLightClassifierFactory <string, string>();
             svmFactory.SetC(sigma);
             classifier = svmFactory.TrainClassifier(trainSet);
         }
         else
         {
             throw new Exception("Invalid classifier type: " + relationExtractorClassifierType);
         }
     }
     if (logger.IsLoggable(Level.Fine))
     {
         ReportWeights(classifier, null);
     }
 }
コード例 #5
0
        /// <exception cref="System.IO.IOException"/>
        /// <exception cref="System.TypeLoadException"/>
        private static void DemonstrateSerialization()
        {
            System.Console.Out.WriteLine();
            System.Console.Out.WriteLine("Demonstrating working with a serialized classifier");
            ColumnDataClassifier         cdc = new ColumnDataClassifier(where + "examples/cheese2007.prop");
            IClassifier <string, string> cl  = cdc.MakeClassifier(cdc.ReadTrainingExamples(where + "examples/cheeseDisease.train"));

            // Exhibit serialization and deserialization working. Serialized to bytes in memory for simplicity
            System.Console.Out.WriteLine();
            ByteArrayOutputStream baos = new ByteArrayOutputStream();
            ObjectOutputStream    oos  = new ObjectOutputStream(baos);

            oos.WriteObject(cl);
            oos.Close();
            byte[] @object                       = baos.ToByteArray();
            ByteArrayInputStream bais            = new ByteArrayInputStream(@object);
            ObjectInputStream    ois             = new ObjectInputStream(bais);
            LinearClassifier <string, string> lc = ErasureUtils.UncheckedCast(ois.ReadObject());

            ois.Close();
            ColumnDataClassifier cdc2 = new ColumnDataClassifier(where + "examples/cheese2007.prop");

            // We compare the output of the deserialized classifier lc versus the original one cl
            // For both we use a ColumnDataClassifier to convert text lines to examples
            System.Console.Out.WriteLine();
            System.Console.Out.WriteLine("Making predictions with both classifiers");
            foreach (string line in ObjectBank.GetLineIterator(where + "examples/cheeseDisease.test", "utf-8"))
            {
                IDatum <string, string> d  = cdc.MakeDatumFromLine(line);
                IDatum <string, string> d2 = cdc2.MakeDatumFromLine(line);
                System.Console.Out.Printf("%s  =origi=>  %s (%.4f)%n", line, cl.ClassOf(d), cl.ScoresOf(d).GetCount(cl.ClassOf(d)));
                System.Console.Out.Printf("%s  =deser=>  %s (%.4f)%n", line, lc.ClassOf(d2), lc.ScoresOf(d).GetCount(lc.ClassOf(d)));
            }
        }
コード例 #6
0
        /// <summary>Train a multinomial classifier off of the provided dataset.</summary>
        /// <param name="dataset">The dataset to train the classifier off of.</param>
        /// <returns>A classifier.</returns>
        public static IClassifier <string, string> TrainMultinomialClassifier(GeneralDataset <string, string> dataset, int featureThreshold, double sigma)
        {
            // Set up the dataset and factory
            log.Info("Applying feature threshold (" + featureThreshold + ")...");
            dataset.ApplyFeatureCountThreshold(featureThreshold);
            log.Info("Randomizing dataset...");
            dataset.Randomize(42l);
            log.Info("Creating factory...");
            LinearClassifierFactory <string, string> factory = InitFactory(sigma);

            // Train the final classifier
            log.Info("BEGIN training");
            LinearClassifier <string, string> classifier = factory.TrainClassifier(dataset);

            log.Info("END training");
            // Debug
            KBPRelationExtractor.Accuracy trainAccuracy = new KBPRelationExtractor.Accuracy();
            foreach (IDatum <string, string> datum in dataset)
            {
                string guess = classifier.ClassOf(datum);
                trainAccuracy.Predict(Java.Util.Collections.Singleton(guess), Java.Util.Collections.Singleton(datum.Label()));
            }
            log.Info("Training accuracy:");
            log.Info(trainAccuracy.ToString());
            log.Info(string.Empty);
            // Return the classifier
            return(classifier);
        }
コード例 #7
0
        public static void TestDataset()
        {
            Dataset <string, string> data = new Dataset <string, string>();

            data.Add(new BasicDatum <string, string>(Arrays.AsList(new string[] { "fever", "cough", "congestion" }), "cold"));
            data.Add(new BasicDatum <string, string>(Arrays.AsList(new string[] { "fever", "cough", "nausea" }), "flu"));
            data.Add(new BasicDatum <string, string>(Arrays.AsList(new string[] { "cough", "congestion" }), "cold"));
            // data.summaryStatistics();
            NUnit.Framework.Assert.AreEqual(4, data.NumFeatures());
            NUnit.Framework.Assert.AreEqual(4, data.NumFeatureTypes());
            NUnit.Framework.Assert.AreEqual(2, data.NumClasses());
            NUnit.Framework.Assert.AreEqual(8, data.NumFeatureTokens());
            NUnit.Framework.Assert.AreEqual(3, data.Size());
            data.ApplyFeatureCountThreshold(2);
            NUnit.Framework.Assert.AreEqual(3, data.NumFeatures());
            NUnit.Framework.Assert.AreEqual(3, data.NumFeatureTypes());
            NUnit.Framework.Assert.AreEqual(2, data.NumClasses());
            NUnit.Framework.Assert.AreEqual(7, data.NumFeatureTokens());
            NUnit.Framework.Assert.AreEqual(3, data.Size());
            //Dataset data = Dataset.readSVMLightFormat(args[0]);
            //double[] scores = data.getInformationGains();
            //System.out.println(ArrayMath.mean(scores));
            //System.out.println(ArrayMath.variance(scores));
            LinearClassifierFactory <string, string> factory    = new LinearClassifierFactory <string, string>();
            LinearClassifier <string, string>        classifier = factory.TrainClassifier(data);
            IDatum <string, string> d = new BasicDatum <string, string>(Arrays.AsList(new string[] { "cough", "fever" }));

            NUnit.Framework.Assert.AreEqual("Classification incorrect", "flu", classifier.ClassOf(d));
            ICounter <string> probs = classifier.ProbabilityOf(d);

            NUnit.Framework.Assert.AreEqual("Returned probability incorrect", 0.4553, probs.GetCount("cold"), 0.0001);
            NUnit.Framework.Assert.AreEqual("Returned probability incorrect", 0.5447, probs.GetCount("flu"), 0.0001);
            System.Console.Out.WriteLine();
        }
コード例 #8
0
        public static void Main(string[] args)
        {
            Edu.Stanford.Nlp.Classify.RVFDataset <string, string> data = new Edu.Stanford.Nlp.Classify.RVFDataset <string, string>();
            ClassicCounter <string> c1 = new ClassicCounter <string>();

            c1.IncrementCount("fever", 3.5);
            c1.IncrementCount("cough", 1.1);
            c1.IncrementCount("congestion", 4.2);
            ClassicCounter <string> c2 = new ClassicCounter <string>();

            c2.IncrementCount("fever", 1.5);
            c2.IncrementCount("cough", 2.1);
            c2.IncrementCount("nausea", 3.2);
            ClassicCounter <string> c3 = new ClassicCounter <string>();

            c3.IncrementCount("cough", 2.5);
            c3.IncrementCount("congestion", 3.2);
            data.Add(new RVFDatum <string, string>(c1, "cold"));
            data.Add(new RVFDatum <string, string>(c2, "flu"));
            data.Add(new RVFDatum <string, string>(c3, "cold"));
            data.SummaryStatistics();
            LinearClassifierFactory <string, string> factory = new LinearClassifierFactory <string, string>();

            factory.UseQuasiNewton();
            LinearClassifier <string, string> c  = factory.TrainClassifier(data);
            ClassicCounter <string>           c4 = new ClassicCounter <string>();

            c4.IncrementCount("cough", 2.3);
            c4.IncrementCount("fever", 1.3);
            RVFDatum <string, string> datum = new RVFDatum <string, string>(c4);

            c.JustificationOf((IDatum <string, string>)datum);
        }
コード例 #9
0
 public void AddParameterHinten(char art)
 {
     bezeichnung = bezeichnung + art;
     bezeichnung = LinearClassifier.SwitchPosInArr(bezeichnung.ToArray(), bezeichnung.Length - 1, bezeichnung.Length - 2);
     double[] a = { 1 };
     parameter = parameter.Concat(a).ToArray();
     LinearClassifier.SwitchPosInArr(parameter, parameter.Length - 1, parameter.Length - 2);
 }
コード例 #10
0
 public LinearInterpol(IFunction func = null)
 {
     if (func == null)
     {
         func = new Polynomial(3);
     }
     this.func  = func;
     classifier = new LinearClassifier(func.n);
 }
コード例 #11
0
 public GeneralizedExpectationObjectiveFunction(GeneralDataset <L, F> labeledDataset, IList <IDatum <L, F> > unlabeledDataList, IList <F> geFeatures)
 {
     System.Console.Out.WriteLine("Number of labeled examples:" + labeledDataset.size + "\nNumber of unlabeled examples:" + unlabeledDataList.Count);
     System.Console.Out.WriteLine("Number of GE features:" + geFeatures.Count);
     this.numFeatures       = labeledDataset.NumFeatures();
     this.numClasses        = labeledDataset.NumClasses();
     this.labeledDataset    = labeledDataset;
     this.unlabeledDataList = unlabeledDataList;
     this.geFeatures        = geFeatures;
     this.classifier        = new LinearClassifier <L, F>(null, labeledDataset.featureIndex, labeledDataset.labelIndex);
     ComputeEmpiricalStatistics(geFeatures);
 }
コード例 #12
0
        public virtual void FinishTraining()
        {
            IntCounter <string> tagCounter = new IntCounter <string>();
            WeightedDataset     data       = new WeightedDataset(datumCounter.Size());

            foreach (TaggedWord word in datumCounter.KeySet())
            {
                int count = datumCounter.GetIntCount(word);
                if (trainOnLowCount && count > trainCountThreshold)
                {
                    continue;
                }
                if (functionWordTags.Contains(word.Word()))
                {
                    continue;
                }
                tagCounter.IncrementCount(word.Tag());
                if (trainByType)
                {
                    count = 1;
                }
                data.Add(new BasicDatum(featExtractor.MakeFeatures(word.Word()), word.Tag()), count);
            }
            datumCounter = null;
            tagDist      = Distribution.LaplaceSmoothedDistribution(tagCounter, tagCounter.Size(), 0.5);
            tagCounter   = null;
            ApplyThresholds(data);
            Verbose("Making classifier...");
            QNMinimizer minim = new QNMinimizer();
            //new ResultStoringMonitor(5, "weights"));
            //    minim.shutUp();
            LinearClassifierFactory factory = new LinearClassifierFactory(minim);

            factory.SetTol(tol);
            factory.SetSigma(sigma);
            scorer = factory.TrainClassifier(data);
            Verbose("Done training.");
        }
コード例 #13
0
        protected internal static void ReportWeights(LinearClassifier <string, string> classifier, string classLabel)
        {
            if (classLabel != null)
            {
                logger.Fine("CLASSIFIER WEIGHTS FOR LABEL " + classLabel);
            }
            IDictionary <string, ICounter <string> > labelsToFeatureWeights = classifier.WeightsAsMapOfCounters();
            IList <string> labels = new List <string>(labelsToFeatureWeights.Keys);

            labels.Sort();
            foreach (string label in labels)
            {
                ICounter <string> featWeights         = labelsToFeatureWeights[label];
                IList <Pair <string, double> > sorted = Counters.ToSortedListWithCounts(featWeights);
                StringBuilder bos = new StringBuilder();
                bos.Append("WEIGHTS FOR LABEL ").Append(label).Append(':');
                foreach (Pair <string, double> feat in sorted)
                {
                    bos.Append(' ').Append(feat.First()).Append(':').Append(feat.Second() + "\n");
                }
                logger.Fine(bos.ToString());
            }
        }
コード例 #14
0
 public virtual void SetPlatt(LinearClassifier <L, L> platt)
 {
     this.platt = platt;
 }
コード例 #15
0
 public SVMLightClassifier(ClassicCounter <Pair <F, L> > weightCounter, ClassicCounter <L> thresholds, LinearClassifier <L, L> platt)
     : base(weightCounter, thresholds)
 {
     this.platt = platt;
 }
コード例 #16
0
        /// <summary>Train a sentiment model from a set of data.</summary>
        /// <param name="data">The data to train the model from.</param>
        /// <param name="modelLocation">
        /// An optional location to save the model.
        /// Note that this stream will be closed in this method,
        /// and should not be written to thereafter.
        /// </param>
        /// <returns>A sentiment classifier, ready to use.</returns>
        public static SimpleSentiment Train(IStream <SimpleSentiment.SentimentDatum> data, Optional <OutputStream> modelLocation)
        {
            // Some useful variables configuring how we train
            bool   useL1 = true;
            double sigma = 1.0;
            int    featureCountThreshold = 5;

            // Featurize the data
            Redwood.Util.ForceTrack("Featurizing");
            RVFDataset <SentimentClass, string> dataset = new RVFDataset <SentimentClass, string>();
            AtomicInteger             datasize          = new AtomicInteger(0);
            ICounter <SentimentClass> distribution      = new ClassicCounter <SentimentClass>();

            data.Unordered().Parallel().Map(null).ForEach(null);
            Redwood.Util.EndTrack("Featurizing");
            // Print label distribution
            Redwood.Util.StartTrack("Distribution");
            foreach (SentimentClass label in SentimentClass.Values())
            {
                Redwood.Util.Log(string.Format("%7d", (int)distribution.GetCount(label)) + "   " + label);
            }
            Redwood.Util.EndTrack("Distribution");
            // Train the classifier
            Redwood.Util.ForceTrack("Training");
            if (featureCountThreshold > 1)
            {
                dataset.ApplyFeatureCountThreshold(featureCountThreshold);
            }
            dataset.Randomize(42L);
            LinearClassifierFactory <SentimentClass, string> factory = new LinearClassifierFactory <SentimentClass, string>();

            factory.SetVerbose(true);
            try
            {
                factory.SetMinimizerCreator(null);
            }
            catch (Exception)
            {
            }
            factory.SetSigma(sigma);
            LinearClassifier <SentimentClass, string> classifier = factory.TrainClassifier(dataset);

            // Optionally save the model
            modelLocation.IfPresent(null);
            Redwood.Util.EndTrack("Training");
            // Evaluate the model
            Redwood.Util.ForceTrack("Evaluating");
            factory.SetVerbose(false);
            double sumAccuracy             = 0.0;
            ICounter <SentimentClass> sumP = new ClassicCounter <SentimentClass>();
            ICounter <SentimentClass> sumR = new ClassicCounter <SentimentClass>();
            int numFolds = 4;

            for (int fold = 0; fold < numFolds; ++fold)
            {
                Pair <GeneralDataset <SentimentClass, string>, GeneralDataset <SentimentClass, string> > trainTest = dataset.SplitOutFold(fold, numFolds);
                LinearClassifier <SentimentClass, string> foldClassifier = factory.TrainClassifierWithInitialWeights(trainTest.first, classifier);
                // convex objective, so this should be OK
                sumAccuracy += foldClassifier.EvaluateAccuracy(trainTest.second);
                foreach (SentimentClass label_1 in SentimentClass.Values())
                {
                    Pair <double, double> pr = foldClassifier.EvaluatePrecisionAndRecall(trainTest.second, label_1);
                    sumP.IncrementCount(label_1, pr.first);
                    sumP.IncrementCount(label_1, pr.second);
                }
            }
            DecimalFormat df = new DecimalFormat("0.000%");

            log.Info("----------");
            double aveAccuracy = sumAccuracy / ((double)numFolds);

            log.Info(string.Empty + numFolds + "-fold accuracy: " + df.Format(aveAccuracy));
            log.Info(string.Empty);
            foreach (SentimentClass label_2 in SentimentClass.Values())
            {
                double p = sumP.GetCount(label_2) / numFolds;
                double r = sumR.GetCount(label_2) / numFolds;
                log.Info(label_2 + " (P)  = " + df.Format(p));
                log.Info(label_2 + " (R)  = " + df.Format(r));
                log.Info(label_2 + " (F1) = " + df.Format(2 * p * r / (p + r)));
                log.Info(string.Empty);
            }
            log.Info("----------");
            Redwood.Util.EndTrack("Evaluating");
            // Return
            return(new SimpleSentiment(classifier));
        }
コード例 #17
0
        public static void TestLC()
        {
            //double[][] input = new double[][]
            //{
            //    new double[]{0,0,0},
            //    new double[]{0,0,1},
            //    new double[]{0,1,0},
            //    new double[]{0,1,1},

            //    new double[]{1,0,0},
            //    new double[]{1,0,1},
            //    new double[]{1,1,0},
            //    new double[]{1,1,1}
            //};

            //bool[] result = new bool[]
            //{
            //    false,
            //    false,
            //    false,
            //    false,

            //    false,
            //    false,
            //    false,
            //    true
            //};

            double[][] input = new double[][]
            {
                new double[] { 0, 0 },
                new double[] { 0, 1 },
                new double[] { 1, 0 },
                new double[] { 1.1, 1.1 }
            };

            bool[] result = new bool[]
            {
                true,
                false,
                false,
                true
            };

            LinearClassifier LC = new LinearClassifier(input, result);

            while (true)
            {
                try
                {
                    Console.WriteLine("Eingabe: ");
                    double[] loop = new double[2];

                    for (int i = 0; i < 2; ++i)
                    {
                        loop[i] = Convert.ToDouble(Console.ReadLine());
                    }

                    Console.WriteLine("Ergebniss: " + LC.Classify(loop).ToString());
                }
                catch
                {
                    Console.WriteLine("Fehler bei der Eingabe");
                }
            }
        }