public void TrainModelTest(string trainFile, int chunkSize, int numOfChunk)
        {
            var model = new TrainModel(numOfChunk, 2, 64, 0.1);
            var dataset = new ClassifiedDataset(trainFile, 64, 2);

            int count = 0;

            foreach (var chunk in dataset.GetClassifiedVectorsInChunks(chunkSize))
            {
                model.Train(chunk, count);
                if (++count == numOfChunk)
                {
                    break;
                }
            }

            var weights = model.GetWeights();

            Assert.That(weights.Length, Is.EqualTo(2));

            foreach (var w in weights)
            {
                Assert.IsNotNull(w);
            }
        }
        public void TrainModelAndTestModelTest(string trainFile, string testFile)
        {
            var trainModel = new TrainModel(2, 0.1);
            var trainDataset = new ClassifiedDataset(trainFile, 64, 2);
            trainModel.Train(trainDataset.GetClassifiedVectors());

            var testModel = new TestModel(2, 0.1);
            var testDataset = new UnclassifiedDataset(testFile, 64);
            var testData = testDataset.GetDataVectors().Select(v => v.FeatureVector).ToArray();
            var results = testModel.Test(trainModel.GetInferredPosterier(), testData);

            Assert.That(results.Length, Is.EqualTo(testData.Length));
        }
        public void TrainModelAndTestModelTest(string trainFile, string testFile, int chunkSize, int numOfChunk)
        {
            var trainModel = new TrainModel(numOfChunk, 2, 64, 0.1);
            var trainDataset = new ClassifiedDataset(trainFile, 64, 2);

            var testModel = new TestModel(numOfChunk, 2, 0.1, trainModel.GetWeights());
            var testDataset = new UnclassifiedDataset(testFile, 64);

            int count = 0;

            foreach (var chunk in trainDataset.GetClassifiedVectorsInChunks(chunkSize))
            {
                trainModel.Train(chunk, count);
                if (++count == numOfChunk)
                {
                    break;
                }
            }

            var results = testModel.Test(testDataset.GetDataVectors().Select(v => v.FeatureVector).ToArray(), 0);
        }
        public void GetClassifiedVectorsTest(string filePath, bool skipParseError, bool skipOutOfRangeClass, int[] expectedVectorCounts)
        {
            ClassifiedDataset dataset = new ClassifiedDataset(filePath, 64, 2);

            if (dataset.SkipParsingErrors != skipParseError)
            {
                dataset.SkipParsingErrors = skipParseError;
            }

            if (dataset.SkipClassValueOutOfRange != skipOutOfRangeClass)
            {
                dataset.SkipClassValueOutOfRange = skipOutOfRangeClass;
            }

            IList<Vector>[] classifiedVectors = dataset.GetClassifiedVectors();

            Assert.That(classifiedVectors.Length, Is.EqualTo(2));

            for (int i = 0; i < 2; i++)
            {
                Assert.That(classifiedVectors[i].Count, Is.EqualTo(expectedVectorCounts[i]));
            }
        }
        public void GetClassifiedVectorsInChunksTest(string filePath, int chunkSize, int expectedNumOfChunk, int expectedLastChunkSize)
        {
            ClassifiedDataset dataset = new ClassifiedDataset(filePath, 64, 2);

            int actualNumOfChunk = 0;

            foreach (var chunk in dataset.GetClassifiedVectorsInChunks(chunkSize))
            {
                actualNumOfChunk++;

                Assert.That(chunk.Length, Is.EqualTo(2));

                if (actualNumOfChunk == expectedNumOfChunk)
                {
                    Assert.That(chunk.Sum(c => c.Count), Is.EqualTo(expectedLastChunkSize));
                }
                else
                {
                    Assert.That(chunk.Sum(c => c.Count), Is.EqualTo(chunkSize));
                }
            }

            Assert.That(expectedNumOfChunk, Is.EqualTo(actualNumOfChunk));
        }
        public void ParseSelectedFeaturesTest(string[] input, int numOfFeatures, int[] featureSelection, double[] expected)
        {
            var dataset = new ClassifiedDataset("nosuchfile.txt", numOfFeatures, featureSelection, 2);

            double[] actual = dataset.ParseSelectedFeatures(input);

            VerifyParsedFeatureValues(expected, actual);
        }
 public void CreationWithFeatureSelectionTest(int numOfFeatures, int[] featureSelection)
 {
     ClassifiedDataset dataset = new ClassifiedDataset("nosuchfile.txt", numOfFeatures, featureSelection, 2);
 }
        public void GetDataVectorWithFeatureSelectionTest(string input, int numOfFeatures, int[] featureSelection, int expectedClassId, string expectedQueryId, double[] expectedVectorValues, string expectedDocId)
        {
            var dataset = new ClassifiedDataset("nosuchfile.txt", numOfFeatures, featureSelection, 2);

            DataVector actual = dataset.CreateDataVector(input);

            Assert.That(actual.ClassId, Is.EqualTo(expectedClassId));
            Assert.That(actual.QueryId, Is.EqualTo(expectedQueryId));

            VerifyVectorValues(expectedVectorValues, actual.FeatureVector);

            Assert.That(actual.DocumentId, Is.EqualTo(expectedDocId));
        }
        public void TrainModelIncrementalTrainTest(string trainFile1, string trainFile2)
        {
            var model = new TrainModel(2, 0.1);

            var dataset1 = new ClassifiedDataset(trainFile1, 64, 2);
            model.TrainIncremental(dataset1.GetClassifiedVectors());
            var posteriors1 = model.GetInferredPosterier();

            var dataset2 = new ClassifiedDataset(trainFile2, 64, 2);
            model.TrainIncremental(dataset2.GetClassifiedVectors());
            var posteriors2 = model.GetInferredPosterier();

            Assert.AreEqual(posteriors1.Length, posteriors2.Length);

            Assert.That(posteriors1.Intersect(posteriors2).Count() < posteriors1.Length);
        }
        public void TrainModelWithFeatureSelectionTest(string trainFile, int[] featureSelection)
        {
            var model = new TrainModel(2, 0.1);
            var dataset = new ClassifiedDataset(trainFile, 64, featureSelection, 2);

            model.Train(dataset.GetClassifiedVectors());

            var posteriors = model.GetInferredPosterier();

            Assert.That(posteriors.Length, Is.EqualTo(2));

            foreach (var p in posteriors)
            {
                Assert.That(p.Dimension, Is.EqualTo(featureSelection.Length));
            }
        }