public void TrainModelTest(string trainFile, int chunkSize, int numOfChunk) { var model = new TrainModel(numOfChunk, 2, 64, 0.1); var dataset = new ClassifiedDataset(trainFile, 64, 2); int count = 0; foreach (var chunk in dataset.GetClassifiedVectorsInChunks(chunkSize)) { model.Train(chunk, count); if (++count == numOfChunk) { break; } } var weights = model.GetWeights(); Assert.That(weights.Length, Is.EqualTo(2)); foreach (var w in weights) { Assert.IsNotNull(w); } }
public void TrainModelAndTestModelTest(string trainFile, string testFile) { var trainModel = new TrainModel(2, 0.1); var trainDataset = new ClassifiedDataset(trainFile, 64, 2); trainModel.Train(trainDataset.GetClassifiedVectors()); var testModel = new TestModel(2, 0.1); var testDataset = new UnclassifiedDataset(testFile, 64); var testData = testDataset.GetDataVectors().Select(v => v.FeatureVector).ToArray(); var results = testModel.Test(trainModel.GetInferredPosterier(), testData); Assert.That(results.Length, Is.EqualTo(testData.Length)); }
public void TrainModelAndTestModelTest(string trainFile, string testFile, int chunkSize, int numOfChunk) { var trainModel = new TrainModel(numOfChunk, 2, 64, 0.1); var trainDataset = new ClassifiedDataset(trainFile, 64, 2); var testModel = new TestModel(numOfChunk, 2, 0.1, trainModel.GetWeights()); var testDataset = new UnclassifiedDataset(testFile, 64); int count = 0; foreach (var chunk in trainDataset.GetClassifiedVectorsInChunks(chunkSize)) { trainModel.Train(chunk, count); if (++count == numOfChunk) { break; } } var results = testModel.Test(testDataset.GetDataVectors().Select(v => v.FeatureVector).ToArray(), 0); }
public void GetClassifiedVectorsTest(string filePath, bool skipParseError, bool skipOutOfRangeClass, int[] expectedVectorCounts) { ClassifiedDataset dataset = new ClassifiedDataset(filePath, 64, 2); if (dataset.SkipParsingErrors != skipParseError) { dataset.SkipParsingErrors = skipParseError; } if (dataset.SkipClassValueOutOfRange != skipOutOfRangeClass) { dataset.SkipClassValueOutOfRange = skipOutOfRangeClass; } IList<Vector>[] classifiedVectors = dataset.GetClassifiedVectors(); Assert.That(classifiedVectors.Length, Is.EqualTo(2)); for (int i = 0; i < 2; i++) { Assert.That(classifiedVectors[i].Count, Is.EqualTo(expectedVectorCounts[i])); } }
public void GetClassifiedVectorsInChunksTest(string filePath, int chunkSize, int expectedNumOfChunk, int expectedLastChunkSize) { ClassifiedDataset dataset = new ClassifiedDataset(filePath, 64, 2); int actualNumOfChunk = 0; foreach (var chunk in dataset.GetClassifiedVectorsInChunks(chunkSize)) { actualNumOfChunk++; Assert.That(chunk.Length, Is.EqualTo(2)); if (actualNumOfChunk == expectedNumOfChunk) { Assert.That(chunk.Sum(c => c.Count), Is.EqualTo(expectedLastChunkSize)); } else { Assert.That(chunk.Sum(c => c.Count), Is.EqualTo(chunkSize)); } } Assert.That(expectedNumOfChunk, Is.EqualTo(actualNumOfChunk)); }
public void ParseSelectedFeaturesTest(string[] input, int numOfFeatures, int[] featureSelection, double[] expected) { var dataset = new ClassifiedDataset("nosuchfile.txt", numOfFeatures, featureSelection, 2); double[] actual = dataset.ParseSelectedFeatures(input); VerifyParsedFeatureValues(expected, actual); }
public void CreationWithFeatureSelectionTest(int numOfFeatures, int[] featureSelection) { ClassifiedDataset dataset = new ClassifiedDataset("nosuchfile.txt", numOfFeatures, featureSelection, 2); }
public void GetDataVectorWithFeatureSelectionTest(string input, int numOfFeatures, int[] featureSelection, int expectedClassId, string expectedQueryId, double[] expectedVectorValues, string expectedDocId) { var dataset = new ClassifiedDataset("nosuchfile.txt", numOfFeatures, featureSelection, 2); DataVector actual = dataset.CreateDataVector(input); Assert.That(actual.ClassId, Is.EqualTo(expectedClassId)); Assert.That(actual.QueryId, Is.EqualTo(expectedQueryId)); VerifyVectorValues(expectedVectorValues, actual.FeatureVector); Assert.That(actual.DocumentId, Is.EqualTo(expectedDocId)); }
public void TrainModelIncrementalTrainTest(string trainFile1, string trainFile2) { var model = new TrainModel(2, 0.1); var dataset1 = new ClassifiedDataset(trainFile1, 64, 2); model.TrainIncremental(dataset1.GetClassifiedVectors()); var posteriors1 = model.GetInferredPosterier(); var dataset2 = new ClassifiedDataset(trainFile2, 64, 2); model.TrainIncremental(dataset2.GetClassifiedVectors()); var posteriors2 = model.GetInferredPosterier(); Assert.AreEqual(posteriors1.Length, posteriors2.Length); Assert.That(posteriors1.Intersect(posteriors2).Count() < posteriors1.Length); }
public void TrainModelWithFeatureSelectionTest(string trainFile, int[] featureSelection) { var model = new TrainModel(2, 0.1); var dataset = new ClassifiedDataset(trainFile, 64, featureSelection, 2); model.Train(dataset.GetClassifiedVectors()); var posteriors = model.GetInferredPosterier(); Assert.That(posteriors.Length, Is.EqualTo(2)); foreach (var p in posteriors) { Assert.That(p.Dimension, Is.EqualTo(featureSelection.Length)); } }