public static void TestDataset()
        {
            Dataset <string, string> data = new Dataset <string, string>();

            data.Add(new BasicDatum <string, string>(Arrays.AsList(new string[] { "fever", "cough", "congestion" }), "cold"));
            data.Add(new BasicDatum <string, string>(Arrays.AsList(new string[] { "fever", "cough", "nausea" }), "flu"));
            data.Add(new BasicDatum <string, string>(Arrays.AsList(new string[] { "cough", "congestion" }), "cold"));
            // data.summaryStatistics();
            NUnit.Framework.Assert.AreEqual(4, data.NumFeatures());
            NUnit.Framework.Assert.AreEqual(4, data.NumFeatureTypes());
            NUnit.Framework.Assert.AreEqual(2, data.NumClasses());
            NUnit.Framework.Assert.AreEqual(8, data.NumFeatureTokens());
            NUnit.Framework.Assert.AreEqual(3, data.Size());
            data.ApplyFeatureCountThreshold(2);
            NUnit.Framework.Assert.AreEqual(3, data.NumFeatures());
            NUnit.Framework.Assert.AreEqual(3, data.NumFeatureTypes());
            NUnit.Framework.Assert.AreEqual(2, data.NumClasses());
            NUnit.Framework.Assert.AreEqual(7, data.NumFeatureTokens());
            NUnit.Framework.Assert.AreEqual(3, data.Size());
            //Dataset data = Dataset.readSVMLightFormat(args[0]);
            //double[] scores = data.getInformationGains();
            //System.out.println(ArrayMath.mean(scores));
            //System.out.println(ArrayMath.variance(scores));
            LinearClassifierFactory <string, string> factory    = new LinearClassifierFactory <string, string>();
            LinearClassifier <string, string>        classifier = factory.TrainClassifier(data);
            IDatum <string, string> d = new BasicDatum <string, string>(Arrays.AsList(new string[] { "cough", "fever" }));

            NUnit.Framework.Assert.AreEqual("Classification incorrect", "flu", classifier.ClassOf(d));
            ICounter <string> probs = classifier.ProbabilityOf(d);

            NUnit.Framework.Assert.AreEqual("Returned probability incorrect", 0.4553, probs.GetCount("cold"), 0.0001);
            NUnit.Framework.Assert.AreEqual("Returned probability incorrect", 0.5447, probs.GetCount("flu"), 0.0001);
            System.Console.Out.WriteLine();
        }
Beispiel #2
0
        private double[] GetModelProbs(IDatum <L, F> datum)
        {
            double[]     condDist    = new double[labeledDataset.NumClasses()];
            ICounter <L> probCounter = classifier.ProbabilityOf(datum);

            foreach (L label in probCounter.KeySet())
            {
                int labelID = labeledDataset.labelIndex.IndexOf(label);
                condDist[labelID] = probCounter.GetCount(label);
            }
            return(condDist);
        }
Beispiel #3
0
        protected internal virtual string ClassOf(IDatum <string, string> datum, ExtractionObject rel)
        {
            ICounter <string> probs = classifier.ProbabilityOf(datum);
            IList <Pair <string, double> > sortedProbs = Counters.ToDescendingMagnitudeSortedListWithCounts(probs);
            double nrProb = probs.GetCount(RelationMention.Unrelated);

            foreach (Pair <string, double> choice in sortedProbs)
            {
                if (choice.first.Equals(RelationMention.Unrelated))
                {
                    return(choice.first);
                }
                if (nrProb >= choice.second)
                {
                    return(RelationMention.Unrelated);
                }
                // no prediction, all probs have the same value
                if (CompatibleLabel(choice.first, rel))
                {
                    return(choice.first);
                }
            }
            return(RelationMention.Unrelated);
        }