Ejemplo n.º 1
0
        public void TestRealValuedWeightsVsRepeatWeighting()
        {
            var rvfes1 = new RealValueFileEventStream(Tests.OpenFile("opennlp/data/maxent/real-valued-weights-training-data.txt"));
            var rvfes2 = new FileEventStream(Tests.OpenFile("opennlp/data/maxent/repeat-weighting-training-data.txt"));

            var realModel   = GIS.TrainModel(100, new OnePassRealValueDataIndexer(rvfes1, 1));
            var repeatModel = GIS.TrainModel(100, new OnePassRealValueDataIndexer(rvfes2, 1));

            rvfes1.Dispose();
            rvfes2.Dispose();

            Assert.NotNull(realModel);
            Assert.NotNull(repeatModel);

            var features2Classify = new[] { "feature2", "feature5" };
            var realResults       = realModel.Eval(features2Classify);
            var repeatResults     = repeatModel.Eval(features2Classify);

            Assert.AreEqual(realResults.Length, repeatResults.Length);

            for (var i = 0; i < realResults.Length; i++)
            {
                Assert.AreEqual(realResults[i], repeatResults[i], 0.01f);
            }

            features2Classify = new[] { "feature1", "feature2", "feature3", "feature4", "feature5" };
            realResults       = realModel.Eval(features2Classify, new[] { 5.5f, 6.1f, 9.1f, 4.0f, 1.8f });
            repeatResults     = repeatModel.Eval(features2Classify, new[] { 5.5f, 6.1f, 9.1f, 4.0f, 1.8f });

            Assert.AreEqual(realResults.Length, repeatResults.Length);
            for (var i = 0; i < realResults.Length; i++)
            {
                Assert.AreEqual(realResults[i], repeatResults[i], 0.01f);
            }
        }
Ejemplo n.º 2
0
        private void eval(Event @event, bool real)
        {
            string outcome = @event.Outcome; // Is ignored

            string[] context = @event.Context;

            double[] ocs;
            if (!real)
            {
                ocs = _model.eval(context);
            }
            else
            {
                float[] values = RealValueFileEventStream.parseContexts(context);
                ocs = _model.eval(context, values);
            }

            int numOutcomes = ocs.Length;

            DoubleStringPair[] result = new DoubleStringPair[numOutcomes];
            for (int i = 0; i < numOutcomes; i++)
            {
                result[i] = new DoubleStringPair(ocs[i], _model.getOutcome(i));
            }

            Array.Sort(result);

            // Print the most likely outcome first, down to the least likely.
            for (int i = numOutcomes - 1; i >= 0; i--)
            {
                Console.Write(result[i].stringValue + " " + result[i].doubleValue + " ");
            }
            Console.WriteLine();
        }
Ejemplo n.º 3
0
        public void TestTrainModelReturnsAqnModel()
        {
            using (var eventStream = new RealValueFileEventStream(Tests.GetFullPath(@"/opennlp/data/maxent/real-valued-weights-training-data.txt"), Encoding.UTF8)) {
                var di = new OnePassRealValueDataIndexer(eventStream, 1);

                var trainer = new QNTrainer();

                var model = trainer.TrainModel(Iterations, di);

                Assert.NotNull(model);
            }
        }
Ejemplo n.º 4
0
        public void TestDomainDimensionSanity()
        {
            using (var eventStream = new RealValueFileEventStream(Tests.GetFullPath(@"/opennlp/data/maxent/real-valued-weights-training-data.txt"), Encoding.UTF8)) {
                var di = new OnePassRealValueDataIndexer(eventStream, 1);

                di.Execute();

                var func = new NegLogLikelihood(di);

                var correctDomainDimension = di.GetPredLabels().Length *di.GetOutcomeLabels().Length;

                Assert.AreEqual(correctDomainDimension, func.Dimension);
            }
        }
Ejemplo n.º 5
0
        private static Event CreateEvent(string value, Monitor monitor)
        {
            var lastSpace = value.LastIndexOf(' ');

            if (lastSpace == -1)
            {
                return(null);
            }

            var contexts = value.Substring(0, lastSpace).RegExSplit(Expressions.Expression.Space);
            var values   = RealValueFileEventStream.ParseContexts(contexts, monitor);

            return(new Event(value.Substring(lastSpace + 1), contexts, values));
        }
Ejemplo n.º 6
0
        public void TestGradientAtNonInitialPoint()
        {
            using (var eventStream = new RealValueFileEventStream(Tests.GetFullPath(@"/opennlp/data/maxent/real-valued-weights-training-data.txt"), Encoding.UTF8)) {
                var di = new OnePassRealValueDataIndexer(eventStream, 1);

                di.Execute();

                var func = new NegLogLikelihood(di);

                var gradientAtInitialPoint = func.GradientAt(func.GetInitialPoint());
                var expectedGradient       = new[] { -9.0, -14.0, -17.0, 20.0, 8.5, 9.0, 14.0, 17.0, -20.0, -8.5 };

                Assert.True(CompareDoubleArray(expectedGradient, gradientAtInitialPoint, di, Tolerance1));
            }
        }
Ejemplo n.º 7
0
        private Event createEvent(string obs)
        {
            int lastSpace = obs.LastIndexOf(' ');

            if (lastSpace == -1)
            {
                return(null);
            }
            else
            {
                string[] contexts = obs.Substring(0, lastSpace).Split("\\s+", true);
                float[]  values   = RealValueFileEventStream.parseContexts(contexts);
                return(new Event(obs.Substring(lastSpace + 1), contexts, values));
            }
        }
Ejemplo n.º 8
0
        public void TestScaleResults()
        {
            const string smallValues = "predA=0.1 predB=0.2 A\n" + "predB=0.3 predA=0.1 B\n";
            const string largeValues = "predA=10 predB=20 A\n" + "predB=30 predA=10 B\n";

            const string smallTest = "predA=0.2 predB=0.2";
            const string largeTest = "predA=20 predB=20";

            var smallReader      = new StringReader(smallValues);
            var smallEventStream = new RealBasicEventStream(new PlainTextByLineStream(smallReader));

            var smallModel = GIS.TrainModel(100, new OnePassRealValueDataIndexer(smallEventStream, 0), false);

            var contexts     = smallTest.Split(new[] { ' ' }, StringSplitOptions.RemoveEmptyEntries);
            var values       = RealValueFileEventStream.ParseContexts(contexts);
            var smallResults = smallModel.Eval(contexts, values);

            var smallResultString = smallModel.GetAllOutcomes(smallResults);

            Console.Out.WriteLine("smallResults: " + smallResultString);

            var largeReader      = new StringReader(largeValues);
            var largeEventStream = new RealBasicEventStream(new PlainTextByLineStream(largeReader));

            var largeModel = GIS.TrainModel(100, new OnePassRealValueDataIndexer(largeEventStream, 0), false);

            contexts = largeTest.Split(new[] { ' ' }, StringSplitOptions.RemoveEmptyEntries);
            values   = RealValueFileEventStream.ParseContexts(contexts);
            var largeResults = largeModel.Eval(contexts, values);

            var largeResultString = smallModel.GetAllOutcomes(largeResults);

            Console.Out.WriteLine("largeResults: " + largeResultString);

            Assert.AreEqual(smallResults.Length, largeResults.Length);
            for (var i = 0; i < smallResults.Length; i++)
            {
                /*
                 * System.out.println(string.format(
                 * "classify with smallModel: %1$s = %2$f", smallModel.getOutcome(i),
                 * smallResults[i]));
                 * System.out.println(string.format(
                 * "classify with largeModel: %1$s = %2$f", largeModel.getOutcome(i),
                 * largeResults[i])); */

                Assert.AreEqual(smallResults[i], largeResults[i], 0.01f);
            }
        }
Ejemplo n.º 9
0
        public void TestValueAtInitialPoint()
        {
            using (var eventStream = new RealValueFileEventStream(Tests.GetFullPath(@"/opennlp/data/maxent/real-valued-weights-training-data.txt"), Encoding.UTF8)) {
                var di = new OnePassRealValueDataIndexer(eventStream, 1);

                di.Execute();

                var func = new NegLogLikelihood(di);

                const double expectedValue = 13.86294361;

                var value = func.ValueAt(func.GetInitialPoint());

                Assert.AreEqual(expectedValue, value, Tolerance1);
            }
        }
Ejemplo n.º 10
0
        public void TestGradientSanity()
        {
            using (var eventStream = new RealValueFileEventStream(Tests.GetFullPath(@"/opennlp/data/maxent/real-valued-weights-training-data.txt"), Encoding.UTF8)) {
                var di = new OnePassRealValueDataIndexer(eventStream, 1);

                di.Execute();

                var func = new NegLogLikelihood(di);

                var initial           = func.GetInitialPoint();
                var gradientAtInitial = func.GradientAt(initial);

                // then
                Assert.NotNull(gradientAtInitial);
            }
        }
Ejemplo n.º 11
0
        public void TestSerializationModel()
        {
            using (var eventStream = new RealValueFileEventStream(Tests.GetFullPath(@"/opennlp/data/maxent/real-valued-weights-training-data.txt"), Encoding.UTF8)) {
                var di = new OnePassRealValueDataIndexer(eventStream, 1);

                var trainer = new QNTrainer();

                var model = trainer.TrainModel(Iterations, di);

                Assert.NotNull(model);

                QNModel deserialized;

                using (var mem = new MemoryStream()) {
                    using (var modelWriter = new GenericModelWriter(model, new UnclosableStream(mem))) {
                        modelWriter.Persist();
                        modelWriter.Close();
                    }

                    mem.Flush();
                    mem.Seek(0, SeekOrigin.Begin);


                    using (var modelReader = new GenericModelReader(new BinaryFileDataReader(mem)))
                        deserialized = modelReader.GetModel() as QNModel;
                }
                Assert.NotNull(deserialized);

                Assert.True(model.Equals(deserialized));

                var features2Classify = new [] {
                    "feature2", "feature3", "feature3",
                    "feature3", "feature3", "feature3",
                    "feature3", "feature3", "feature3",
                    "feature3", "feature3", "feature3"
                };

                var eval01 = model.Eval(features2Classify);
                var eval02 = deserialized.Eval(features2Classify);

                Assert.AreEqual(eval01.Length, eval02.Length);
                for (var i = 0; i < eval01.Length; i++)
                {
                    Assert.AreEqual(eval01[i], eval02[i], 0.00000001);
                }
            }
        }
Ejemplo n.º 12
0
        public void TestInitialSanity()
        {
            using (var eventStream = new RealValueFileEventStream(Tests.GetFullPath(@"/opennlp/data/maxent/real-valued-weights-training-data.txt"), Encoding.UTF8)) {
                var di = new OnePassRealValueDataIndexer(eventStream, 1);

                di.Execute();

                var func = new NegLogLikelihood(di);

                var initial = func.GetInitialPoint();

                // ReSharper disable once ForCanBeConvertedToForeach
                for (var i = 0; i < initial.Length; i++)
                {
                    Assert.AreEqual(0, initial[i], Tolerance1);
                }
            }
        }
Ejemplo n.º 13
0
        public void TestValueAtNonInitialPoint02()
        {
            using (var eventStream = new RealValueFileEventStream(Tests.GetFullPath(@"/opennlp/data/maxent/real-valued-weights-training-data.txt"), Encoding.UTF8)) {
                var di = new OnePassRealValueDataIndexer(eventStream, 1);

                di.Execute();

                var func = new NegLogLikelihood(di);

                var nonInitialPoint = new double[] { 3, 2, 3, 2, 3, 2, 3, 2, 3, 2 };
                var value           = func.ValueAt(DealignDoubleArrayForTestData(nonInitialPoint,
                                                                                 di.GetPredLabels(),
                                                                                 di.GetOutcomeLabels()));
                const double expectedValue = 53.163219721099026;

                Assert.AreEqual(expectedValue, value, Tolerance2);
            }
        }
Ejemplo n.º 14
0
        public void TestGradientAtInitialPoint()
        {
            using (var eventStream = new RealValueFileEventStream(Tests.GetFullPath(@"/opennlp/data/maxent/real-valued-weights-training-data.txt"), Encoding.UTF8)) {
                var di = new OnePassRealValueDataIndexer(eventStream, 1);

                di.Execute();

                var func = new NegLogLikelihood(di);

                var nonInitialPoint           = new[] { 0.2, 0.5, 0.2, 0.5, 0.2, 0.5, 0.2, 0.5, 0.2, 0.5 };
                var gradientAtNonInitialPoint = func.GradientAt(DealignDoubleArrayForTestData(nonInitialPoint, di.GetPredLabels(), di.GetOutcomeLabels()));
                var expectedGradient          =
                    new[] { -12.755042847945553, -21.227127506102434,
                            -72.57790706276435, 38.03525795198456,
                            15.348650889354925, 12.755042847945557,
                            21.22712750610244, 72.57790706276438,
                            -38.03525795198456, -15.348650889354925 };

                Assert.True(CompareDoubleArray(expectedGradient, gradientAtNonInitialPoint, di, Tolerance1));
            }
        }
Ejemplo n.º 15
0
        public void TestInTinyDevSet()
        {
            using (
                var eventStream = new RealValueFileEventStream(Tests.GetFullPath(@"/opennlp/data/maxent/real-valued-weights-training-data.txt"), Encoding.UTF8)) {
                var di = new OnePassRealValueDataIndexer(eventStream, 1);

                var trainer = new QNTrainer();

                var model = trainer.TrainModel(Iterations, di);

                Assert.NotNull(model);

                var features2Classify = new[] {
                    "feature2", "feature3", "feature3",
                    "feature3", "feature3", "feature3",
                    "feature3", "feature3", "feature3",
                    "feature3", "feature3", "feature3"
                };
                var eval = model.Eval(features2Classify);

                Assert.NotNull(eval);
            }
        }