/// <summary> /// Initialise a Visual Bag of Words Model /// </summary> /// <returns></returns> public static BagOfVisualWords <Accord.IFeatureDescriptor <double[]>, double[], Accord.MachineLearning.KMeans, Accord.Imaging.FastRetinaKeypointDetector> CreateBowModel() { if (Image.targetBitmap == null) { System.Drawing.Image imageTarget = System.Drawing.Image.FromFile(@"C:\Users\antmi\Pictures\bark1.jpg"); Size size = new Size(200, 200); Image.targetBitmap = new Bitmap(imageTarget, size); } var bow = BagOfVisualWords.Create(new FastRetinaKeypointDetector(), numberOfWords: 10); Bitmap[] images = new Bitmap[1] { Image.targetBitmap }; bow.Learn(images); if (Image.targetBitmapScaled == null) { System.Drawing.Image imageTarget = System.Drawing.Image.FromFile(@"C:\Users\antmi\Pictures\bark1.jpg"); Size size = new Size(100, 100); Image.targetBitmapScaled = new Bitmap(imageTarget, size); Image.scaledFeatureCount = bow.Transform(new Bitmap[1] { Image.targetBitmapScaled })[0].Sum(); } return(bow); }
private IBagOfWords <Bitmap> CreateBow() { var binarySplit = new BinarySplit(10); var surfBow = BagOfVisualWords.Create(10); return(surfBow.Learn(Images.ToArray())); }
private void bowsToolStripMenuItem_Click_1(object sender, EventArgs e) { var bow = BagOfVisualWords.Create(numberOfWords: bowSize); var images = trainData.GetBitmaps(mask); bow.Learn(images); Accord.IO.Serializer.Save(bow, dataPath + String.Format(@"\train-{0}.bow", bowSize)); logger.logStr("Done " + bowSize); Application.DoEvents(); }
public bool TrainSceneFeature(SceneFeatureData scenefeatureData) { //Create bow Bitmap mask = Utils.CreateMaskBitmap(new Size(1280, 720), new Rectangle[] { scenefeatureData.feature.area }); var bow = BagOfVisualWords.Create(numberOfWords: scenefeatureData.feature.bowSize); var images = scenefeatureData.trainData.GetBitmaps(mask); bow.Learn(images); Accord.IO.Serializer.Save(bow, path + @"\" + scenefeatureData.feature.name + String.Format(@"\train-{0}.bow", scenefeatureData.feature.bowSize)); bow.Show(); return(Train(bow, scenefeatureData)); }
public static double[][] GetSURFFeatures(Bitmap[] images, int maxFeatures, string modelSaveLocation = null) { Accord.Math.Random.Generator.Seed = 0; var bow = BagOfVisualWords.Create(numberOfWords: maxFeatures); bow.Learn(images); if (!string.IsNullOrEmpty(modelSaveLocation)) { Serializer.Save(obj: bow, path: modelSaveLocation); } double[][] features = bow.Transform(images); return(features); }
public void freak_binary_split() { #region doc_feature_freak // Ensure results are reproducible Accord.Math.Random.Generator.Seed = 0; // The Bag-of-Visual-Words model converts images of arbitrary // size into fixed-length feature vectors. In this example, we // will be setting the codebook size to 10. This means all feature // vectors that will be generated will have the same length of 10. // By default, the BoW object will use the sparse SURF as the // feature extractor and K-means as the clustering algorithm. // In this example, we will use the FREAK feature extractor // and the Binary-Split clustering algorithm instead. // Create a new Bag-of-Visual-Words (BoW) model using FREAK binary features var bow = BagOfVisualWords.Create(new FastRetinaKeypointDetector(), new BinarySplit(10)); // Get some training images Bitmap[] images = GetImages(); // Compute the model bow.Learn(images); bow.ParallelOptions.MaxDegreeOfParallelism = 1; // After this point, we will be able to translate // images into double[] feature vectors using double[][] features = bow.Transform(images); #endregion Assert.AreEqual(features.GetLength(), new[] { 6, 10 }); string str = features.ToCSharp(); double[][] expected = new double[][] { new double[] { 135, 69, 55, 131, 62, 64, 20, 29, 47, 68 }, new double[] { 299, 64, 174, 93, 32, 101, 163, 56, 17, 18 }, new double[] { 141, 70, 120, 128, 53, 52, 51, 58, 52, 26 }, new double[] { 150, 13, 200, 55, 4, 36, 58, 20, 0, 3 }, new double[] { 236, 31, 204, 72, 22, 78, 217, 53, 25, 8 }, new double[] { 208, 21, 193, 106, 8, 43, 52, 8, 4, 23 } }; for (int i = 0; i < features.Length; i++) { for (int j = 0; j < features[i].Length; j++) { Assert.IsTrue(expected[i].Contains(features[i][j])); } } #region doc_classification_feature_freak // Now, the features can be used to train any classification // algorithm as if they were the images themselves. For example, // let's assume the first three images belong to a class and // the second three to another class. We can train an SVM using int[] labels = { -1, -1, -1, +1, +1, +1 }; // Create the SMO algorithm to learn a Linear kernel SVM var teacher = new SequentialMinimalOptimization <Linear>() { Complexity = 1000 // make a hard margin SVM }; // Obtain a learned machine var svm = teacher.Learn(features, labels); // Use the machine to classify the features bool[] output = svm.Decide(features); // Compute the error between the expected and predicted labels double error = new ZeroOneLoss(labels).Loss(output); // should be 0 #endregion Assert.AreEqual(error, 0); }
public void custom_data_type_test() { #region doc_datatype // Ensure results are reproducible Accord.Math.Random.Generator.Seed = 0; // The Bag-of-Visual-Words model converts images of arbitrary // size into fixed-length feature vectors. In this example, we // will be setting the codebook size to 10. This means all feature // vectors that will be generated will have the same length of 10. // By default, the BoW object will use the sparse SURF as the // feature extractor and K-means as the clustering algorithm. // In this example, we will use the FREAK feature extractor // and the K-Modes clustering algorithm instead. // Create a new Bag-of-Visual-Words (BoW) model using FREAK binary features var bow = BagOfVisualWords.Create <FastRetinaKeypointDetector, KModes <byte>, byte[]>( new FastRetinaKeypointDetector(), new KModes <byte>(10, new Hamming())); // Get some training images Bitmap[] images = GetImages(); // Compute the model bow.Learn(images); // After this point, we will be able to translate // images into double[] feature vectors using double[][] features = bow.Transform(images); #endregion Assert.AreEqual(features.GetLength(), new[] { 6, 10 }); string str = features.ToCSharp(); double[][] expected = new double[][] { new double[] { 33, 58, 19, 35, 112, 67, 70, 155, 86, 45 }, new double[] { 130, 91, 74, 114, 200, 90, 136, 37, 53, 92 }, new double[] { 45, 49, 68, 55, 123, 142, 40, 100, 92, 37 }, new double[] { 25, 17, 89, 136, 138, 59, 33, 7, 23, 12 }, new double[] { 186, 78, 86, 133, 198, 60, 65, 25, 38, 77 }, new double[] { 45, 33, 10, 131, 192, 26, 99, 20, 82, 28 } }; for (int i = 0; i < features.Length; i++) { for (int j = 0; j < features[i].Length; j++) { Assert.IsTrue(expected[i].Contains(features[i][j])); } } #region doc_classification_datatype // Now, the features can be used to train any classification // algorithm as if they were the images themselves. For example, // let's assume the first three images belong to a class and // the second three to another class. We can train an SVM using int[] labels = { -1, -1, -1, +1, +1, +1 }; // Create the SMO algorithm to learn a Linear kernel SVM var teacher = new SequentialMinimalOptimization <Linear>() { Complexity = 1000 // make a hard margin SVM }; // Obtain a learned machine var svm = teacher.Learn(features, labels); // Use the machine to classify the features bool[] output = svm.Decide(features); // Compute the error between the expected and predicted labels double error = new ZeroOneLoss(labels).Loss(output); // should be 0 #endregion Assert.AreEqual(error, 0); }
public void custom_feature_test_lbp() { #region doc_feature_lbp // Ensure results are reproducible Accord.Math.Random.Generator.Seed = 0; // The Bag-of-Visual-Words model converts images of arbitrary // size into fixed-length feature vectors. In this example, we // will be setting the codebook size to 3. This means all feature // vectors that will be generated will have the same length of 3. // By default, the BoW object will use the sparse SURF as the // feature extractor and K-means as the clustering algorithm. // In this example, we will use the Local Binary Pattern (LBP) // feature extractor and the Binary-Split clustering algorithm. // However, this is just an example: the best features and the // best clustering algorithm might need to be found through // experimentation. Please also try with KMeans first to obtain // a baseline value. // Create a new Bag-of-Visual-Words (BoW) model using LBP features var bow = BagOfVisualWords.Create(new LocalBinaryPattern(), new BinarySplit(3)); // Since we are using generics, we can setup properties // of the binary split clustering algorithm directly: bow.Clustering.ComputeCovariances = false; bow.Clustering.ComputeProportions = false; bow.Clustering.ComputeError = false; // Get some training images Bitmap[] images = GetImages(); // Compute the model bow.Learn(images); // After this point, we will be able to translate // images into double[] feature vectors using double[][] features = bow.Transform(images); #endregion Assert.AreEqual(features.GetLength(), new[] { 6, 3 }); string str = features.ToCSharp(); double[][] expected = new double[][] { new double[] { 1608, 374, 370 }, new double[] { 1508, 337, 507 }, new double[] { 1215, 343, 794 }, new double[] { 782, 550, 1020 }, new double[] { 1480, 360, 512 }, new double[] { 15, 724, 1613 } }; for (int i = 0; i < features.Length; i++) { for (int j = 0; j < features[i].Length; j++) { Assert.IsTrue(expected[i].Contains(features[i][j])); } } #region doc_classification_feature_lbp // Now, the features can be used to train any classification // algorithm as if they were the images themselves. For example, // let's assume the first three images belong to a class and // the second three to another class. We can train an SVM using int[] labels = { -1, -1, +1, +1, +1, +1 }; // Create the SMO algorithm to learn a Linear kernel SVM var teacher = new SequentialMinimalOptimization <Gaussian>() { Complexity = 100 // make a hard margin SVM }; // Obtain a learned machine var svm = teacher.Learn(features, labels); // Use the machine to classify the features bool[] output = svm.Decide(features); // Compute the error between the expected and predicted labels double error = new ZeroOneLoss(labels).Loss(output); // should be 0 #endregion Assert.AreEqual(error, 0); }
public void custom_feature_test_haralick() { #region doc_feature_haralick // Ensure results are reproducible Accord.Math.Random.Generator.Seed = 0; // The Bag-of-Visual-Words model converts images of arbitrary // size into fixed-length feature vectors. In this example, we // will be setting the codebook size to 3. This means all feature // vectors that will be generated will have the same length of 3. // By default, the BoW object will use the sparse SURF as the // feature extractor and K-means as the clustering algorithm. // In this example, we will use the Haralick feature extractor. // Create a new Bag-of-Visual-Words (BoW) model using Haralick features var bow = BagOfVisualWords.Create(new Haralick() { CellSize = 256, // divide images in cells of 256x256 pixels Mode = HaralickMode.AverageWithRange, }, new KMeans(3)); // Generate some training images. Haralick is best for classifying // textures, so we will be generating examples of wood and clouds: var woodenGenerator = new WoodTexture(); var cloudsGenerator = new CloudsTexture(); Bitmap[] images = new[] { woodenGenerator.Generate(512, 512).ToBitmap(), woodenGenerator.Generate(512, 512).ToBitmap(), woodenGenerator.Generate(512, 512).ToBitmap(), cloudsGenerator.Generate(512, 512).ToBitmap(), cloudsGenerator.Generate(512, 512).ToBitmap(), cloudsGenerator.Generate(512, 512).ToBitmap() }; // Compute the model bow.Learn(images); bow.ParallelOptions.MaxDegreeOfParallelism = 1; // After this point, we will be able to translate // images into double[] feature vectors using double[][] features = bow.Transform(images); #endregion Assert.AreEqual(features.GetLength(), new[] { 6, 3 }); string str = features.ToCSharp(); double[][] expected = new double[][] { new double[] { 3, 0, 1 }, new double[] { 3, 0, 1 }, new double[] { 3, 0, 1 }, new double[] { 3, 1, 0 }, new double[] { 3, 1, 0 }, new double[] { 3, 1, 0 } }; for (int i = 0; i < expected.Length; i++) { for (int j = 0; j < expected[i].Length; j++) { Assert.IsTrue(expected[i][j] == features[i][j]); } } #region doc_classification_feature_haralick // Now, the features can be used to train any classification // algorithm as if they were the images themselves. For example, // let's assume the first three images belong to a class and // the second three to another class. We can train an SVM using int[] labels = { -1, -1, -1, +1, +1, +1 }; // Create the SMO algorithm to learn a Linear kernel SVM var teacher = new SequentialMinimalOptimization <Linear>() { Complexity = 100 // make a hard margin SVM }; // Obtain a learned machine var svm = teacher.Learn(features, labels); // Use the machine to classify the features bool[] output = svm.Decide(features); // Compute the error between the expected and predicted labels double error = new ZeroOneLoss(labels).Loss(output); // should be 0 #endregion Assert.AreEqual(error, 0); }
public void custom_feature_test() { #region doc_feature Accord.Math.Random.Generator.Seed = 0; // The Bag-of-Visual-Words model converts images of arbitrary // size into fixed-length feature vectors. In this example, we // will be setting the codebook size to 10. This means all feature // vectors that will be generated will have the same length of 10. // By default, the BoW object will use the sparse SURF as the // feature extractor and K-means as the clustering algorithm. // In this example, we will use the HOG feature extractor // and the Binary-Split clustering algorithm instead. However, // this is just an example: the best features and the best clustering // algorithm might need to be found through experimentation. Please // also try with KMeans first to obtain a baseline value. // Create a new Bag-of-Visual-Words (BoW) model using HOG features var bow = BagOfVisualWords.Create(new HistogramsOfOrientedGradients(), new BinarySplit(10)); // Get some training images Bitmap[] images = GetImages(); // Compute the model bow.Learn(images); // After this point, we will be able to translate // images into double[] feature vectors using double[][] features = bow.Transform(images); #endregion Assert.AreEqual(features.GetLength(), new[] { 6, 10 }); string str = features.ToCSharp(); double[][] expected = new double[][] { new double[] { 53, 285, 317, 292, 389, 264, 127, 250, 283, 92 }, new double[] { 64, 326, 267, 418, 166, 241, 160, 237, 324, 149 }, new double[] { 63, 234, 229, 221, 645, 178, 226, 178, 218, 160 }, new double[] { 87, 322, 324, 295, 180, 276, 219, 218, 247, 184 }, new double[] { 60, 312, 285, 285, 352, 274, 166, 226, 290, 102 }, new double[] { 110, 292, 299, 324, 72, 208, 317, 248, 252, 230 } }; for (int i = 0; i < features.Length; i++) { for (int j = 0; j < features[i].Length; j++) { Assert.IsTrue(expected[i].Contains(features[i][j])); } } #region doc_classification_feature // Now, the features can be used to train any classification // algorithm as if they were the images themselves. For example, // let's assume the first three images belong to a class and // the second three to another class. We can train an SVM using int[] labels = { -1, -1, -1, +1, +1, +1 }; // Create the SMO algorithm to learn a Linear kernel SVM var teacher = new SequentialMinimalOptimization <Linear>() { Complexity = 100 // make a hard margin SVM }; // Obtain a learned machine var svm = teacher.Learn(features, labels); // Use the machine to classify the features bool[] output = svm.Decide(features); // Compute the error between the expected and predicted labels double error = new ZeroOneLoss(labels).Loss(output); // should be 0 #endregion Assert.AreEqual(error, 0); }
public void custom_clustering_test() { #region doc_clustering // Ensure results are reproducible Accord.Math.Random.Generator.Seed = 0; // The Bag-of-Visual-Words model converts images of arbitrary // size into fixed-length feature vectors. In this example, we // will be setting the codebook size to 10. This means all feature // vectors that will be generated will have the same length of 10. // By default, the BoW object will use the sparse SURF as the // feature extractor and K-means as the clustering algorithm. // In this example, we will use the Binary-Split clustering // algorithm instead. // Create a new Bag-of-Visual-Words (BoW) model var bow = BagOfVisualWords.Create(new BinarySplit(10)); // Since we are using generics, we can setup properties // of the binary split clustering algorithm directly: bow.Clustering.ComputeProportions = true; bow.Clustering.ComputeCovariances = false; // Get some training images Bitmap[] images = GetImages(); // Compute the model bow.Learn(images); // After this point, we will be able to translate // images into double[] feature vectors using double[][] features = bow.Transform(images); #endregion Assert.AreEqual(-1, bow.NumberOfInputs); Assert.AreEqual(10, bow.NumberOfOutputs); Assert.AreEqual(10, bow.NumberOfWords); Assert.AreEqual(64, bow.Clustering.Clusters.NumberOfInputs); Assert.AreEqual(10, bow.Clustering.Clusters.NumberOfOutputs); Assert.AreEqual(10, bow.Clustering.Clusters.NumberOfClasses); BinarySplit binarySplit = bow.Clustering; string str = binarySplit.Clusters.Proportions.ToCSharp(); double[] expectedProportions = new double[] { 0.158034849951597, 0.11810261374637, 0.0871248789932236, 0.116408518877057, 0.103581800580833, 0.192642787996128, 0.0365440464666021, 0.0716360116166505, 0.0575992255566312, 0.058325266214908 }; Assert.IsTrue(binarySplit.Clusters.Proportions.IsEqual(expectedProportions, 1e-10)); Assert.IsTrue(binarySplit.Clusters.Covariances.All(x => x == null)); Assert.AreEqual(features.GetLength(), new[] { 6, 10 }); str = features.ToCSharp(); double[][] expected = new double[][] { new double[] { 73, 36, 41, 50, 7, 106, 23, 22, 22, 29 }, new double[] { 76, 93, 25, 128, 86, 114, 20, 91, 22, 72 }, new double[] { 106, 47, 67, 57, 37, 131, 33, 31, 22, 21 }, new double[] { 84, 41, 49, 59, 33, 73, 32, 50, 6, 33 }, new double[] { 169, 105, 92, 47, 95, 67, 16, 25, 83, 20 }, new double[] { 145, 166, 86, 140, 170, 305, 27, 77, 83, 66 } }; for (int i = 0; i < features.Length; i++) { for (int j = 0; j < features[i].Length; j++) { Assert.IsTrue(expected[i].Contains(features[i][j])); } } #region doc_classification_clustering // Now, the features can be used to train any classification // algorithm as if they were the images themselves. For example, // let's assume the first three images belong to a class and // the second three to another class. We can train an SVM using int[] labels = { -1, -1, -1, +1, +1, +1 }; // Create the SMO algorithm to learn a Linear kernel SVM var teacher = new SequentialMinimalOptimization <Linear>() { Complexity = 10000 // make a hard margin SVM }; // Obtain a learned machine var svm = teacher.Learn(features, labels); // Use the machine to classify the features bool[] output = svm.Decide(features); // Compute the error between the expected and predicted labels double error = new ZeroOneLoss(labels).Loss(output); // should be 0 #endregion Assert.AreEqual(error, 0); }
public void custom_feature_test_lbp() { #region doc_feature_lbp Accord.Math.Random.Generator.Seed = 0; // The Bag-of-Visual-Words model converts images of arbitrary // size into fixed-length feature vectors. In this example, we // will be setting the codebook size to 3. This means all feature // vectors that will be generated will have the same length of 10. // By default, the BoW object will use the sparse SURF as the // feature extractor and K-means as the clustering algorithm. // In this example, we will use the Local Binary Pattern (LBP) // feature extractor and the Binary-Split clustering algorithm. // Create a new Bag-of-Visual-Words (BoW) model using LBP features var bow = BagOfVisualWords.Create(new LocalBinaryPattern(), new BinarySplit(3)); bow.ParallelOptions.MaxDegreeOfParallelism = 1; // Get some training images Bitmap[] images = GetImages(); // Compute the model bow.Learn(images); // After this point, we will be able to translate // images into double[] feature vectors using double[][] features = bow.Transform(images); #endregion Assert.AreEqual(features.GetLength(), new[] { 6, 3 }); string str = features.ToCSharp(); double[][] expected = new double[][] { new double[] { 1686, 359, 307 }, new double[] { 1689, 356, 307 }, new double[] { 1686, 372, 294 }, new double[] { 1676, 372, 304 }, new double[] { 1700, 356, 296 }, new double[] { 1670, 378, 304 } }; for (int i = 0; i < features.Length; i++) { for (int j = 0; j < features[i].Length; j++) { Assert.IsTrue(expected[i].Contains(features[i][j])); } } #region doc_classification_feature_lbp // Now, the features can be used to train any classification // algorithm as if they were the images themselves. For example, // let's assume the first three images belong to a class and // the second three to another class. We can train an SVM using int[] labels = { -1, -1, +1, +1, +1, +1 }; // Create the SMO algorithm to learn a Linear kernel SVM var teacher = new SequentialMinimalOptimization <Linear>() { Complexity = 10 // make a hard margin SVM }; // Obtain a learned machine var svm = teacher.Learn(features, labels); // Use the machine to classify the features bool[] output = svm.Decide(features); // Compute the error between the expected and predicted labels double error = new ZeroOneLoss(labels).Loss(output); // should be 0 #endregion Assert.AreEqual(error, 0); }
//http://accord-framework.net/docs/html/T_Accord_Imaging_BagOfVisualWords.htm private void btnLearn_Click(object sender, RoutedEventArgs e) { Accord.Math.Random.Generator.Seed = 0; var bow = BagOfVisualWords.Create(new BinarySplit(6)); // Since we are using generics, we can setup properties // of the binary split clustering algorithm directly: bow.Clustering.ComputeProportions = true; bow.Clustering.ComputeCovariances = true; List <string> lstfiles = Directory.GetFiles("numbers").ToList(); lstfiles.Sort(); Bitmap[] images = new Bitmap[lstfiles.Count]; int irIndex = 0; foreach (var item in lstfiles) { Bitmap tempbm = new Bitmap(item); images[irIndex] = tempbm; irIndex++; } bow.Learn(images); double[][] features = bow.Transform(images); int[] labels = new int[images.Length]; irIndex = 0; foreach (var item in lstfiles) { labels[irIndex] = Convert.ToInt32(item.Split('_').Last().Replace(".png", "")) - 1; irIndex++; } var teacher = new MulticlassSupportVectorLearning <Linear>() { // using LIBLINEAR's L2-loss SVC dual for each SVM Learner = (p) => new LinearDualCoordinateDescent() { Loss = Loss.L2 } }; teacher.ParallelOptions.MaxDegreeOfParallelism = 1; // (Remove, comment, or change this line to enable full parallelism) // Learn a machine var machine = teacher.Learn(features, labels); // Obtain class predictions for each sample int[] predicted = machine.Decide(features); // Compute classification error double error = new ZeroOneLoss(labels).Loss(predicted); //test unforseen data lstfiles = Directory.GetFiles("test").ToList(); lstfiles.Sort(); images = new Bitmap[lstfiles.Count]; irIndex = 0; foreach (var item in lstfiles) { Bitmap tempbm = new Bitmap(item); images[irIndex] = tempbm; irIndex++; } var bow33 = BagOfVisualWords.Create(new BinarySplit(6)); // Since we are using generics, we can setup properties // of the binary split clustering algorithm directly: bow.Clustering.ComputeProportions = true; bow.Clustering.ComputeCovariances = true; // Compute the model bow33.Learn(images); double[][] features_test = bow33.Transform(images); labels = new int[images.Length]; irIndex = 0; foreach (var item in lstfiles) { labels[irIndex] = Convert.ToInt32(item.Split('_').Last().Replace(".png", "")) - 1; irIndex++; } // Obtain class predictions for each sample predicted = machine.Decide(features); // Compute classification error error = new ZeroOneLoss(labels).Loss(predicted); }
public void custom_feature_test_haralick() { #region doc_feature_haralick Accord.Math.Random.Generator.Seed = 0; // The Bag-of-Visual-Words model converts images of arbitrary // size into fixed-length feature vectors. In this example, we // will be setting the codebook size to 3. This means all feature // vectors that will be generated will have the same length of 10. // By default, the BoW object will use the sparse SURF as the // feature extractor and K-means as the clustering algorithm. // In this example, we will use the Haralick feature extractor // and the GMM clustering algorithm instead. // Create a new Bag-of-Visual-Words (BoW) model using HOG features var bow = BagOfVisualWords.Create(new Haralick(), new GaussianMixtureModel(3)); // Get some training images Bitmap[] images = GetImages(); // Compute the model bow.Learn(images); // After this point, we will be able to translate // images into double[] feature vectors using double[][] features = bow.Transform(images); #endregion Assert.AreEqual(features.GetLength(), new[] { 6, 10 }); string str = features.ToCSharp(); double[][] expected = new double[][] { new double[] { 141, 332, 240, 88, 363, 238, 282, 322, 114, 232 }, new double[] { 103, 452, 195, 140, 158, 260, 283, 368, 163, 230 }, new double[] { 88, 231, 185, 172, 631, 189, 219, 241, 237, 159 }, new double[] { 106, 318, 262, 212, 165, 276, 264, 275, 244, 230 }, new double[] { 143, 302, 231, 113, 332, 241, 273, 320, 157, 240 }, new double[] { 87, 347, 248, 249, 63, 227, 292, 288, 339, 212 } }; for (int i = 0; i < features.Length; i++) { for (int j = 0; j < features[i].Length; j++) { Assert.IsTrue(expected[i].Contains(features[i][j])); } } #region doc_classification_feature_haralick // Now, the features can be used to train any classification // algorithm as if they were the images themselves. For example, // let's assume the first three images belong to a class and // the second three to another class. We can train an SVM using int[] labels = { -1, -1, -1, +1, +1, +1 }; // Create the SMO algorithm to learn a Linear kernel SVM var teacher = new SequentialMinimalOptimization <Linear>() { Complexity = 100 // make a hard margin SVM }; // Obtain a learned machine var svm = teacher.Learn(features, labels); // Use the machine to classify the features bool[] output = svm.Decide(features); // Compute the error between the expected and predicted labels double error = new ZeroOneLoss(labels).Loss(output); // should be 0 #endregion Assert.AreEqual(error, 0); }
static void train(System.Collections.Specialized.StringDictionary args) { try { XmlDocument doc = loadXml(args["xml"]); if (doc.DocumentElement != null) { List <Tuple <Bitmap, int> > data = new List <Tuple <Bitmap, int> >(); string s = doc.DocumentElement["positive"].InnerText; foreach (string ss in System.IO.Directory.GetFiles(s)) { try { Bitmap b = new Bitmap(ss); data.Add(new Tuple <Bitmap, int>(b, 1)); } catch (Exception) { } } s = doc.DocumentElement["negative"].InnerText; foreach (string ss in System.IO.Directory.GetFiles(s)) { try { Bitmap b = new Bitmap(ss); data.Add(new Tuple <Bitmap, int>(b, -1)); } catch (Exception) { } } // create bag-of-word s = doc.DocumentElement["number"].InnerText; int i = 500; if (!Int32.TryParse(s, out i)) { i = 500; } var surfBow = BagOfVisualWords.Create(numberOfWords: i); Bitmap[] bmps = new Bitmap[data.Count]; for (i = 0; i < data.Count; i++) { bmps[i] = data[i].Item1; } //IBagOfWords<Bitmap> bow = surfBow.Learn(bmps); //double[][] features = (bow as ITransform<Bitmap, double[]>).Transform(bmps); surfBow.Learn(bmps); double[][] features = surfBow.Transform(bmps); int[] labels = new int[data.Count]; for (i = 0; i < labels.Length; i++) { labels[i] = data[i].Item2; } s = doc.DocumentElement["complexity"].InnerText; if (!Int32.TryParse(s, out i)) { i = 10000; } var teacher = new SequentialMinimalOptimization <Linear>() { Complexity = i // make a hard margin SVM }; var svm = teacher.Learn(features, labels); s = doc.DocumentElement["bow_output"].InnerText; Serializer.Save(obj: surfBow, path: s); s = doc.DocumentElement["machine_output"].InnerText; Serializer.Save(obj: svm, path: s); } } catch (Exception ex) { logIt(ex.Message); logIt(ex.StackTrace); } }
public void learn_from_disk() { string basePath = Path.Combine(TestContext.CurrentContext.TestDirectory, "Resources", "SURF"); #region doc_learn_disk // Ensure results are reproducible Accord.Math.Random.Generator.Seed = 0; // Depending on the problem we are trying to tackle, learning a BoW might require // large amounts of available memory. In those cases, we can alleviate the amount // of memory required by using only a subsample of the training datasete to learn // the model. Likewise, we can also load images from the disk on-demand instead of // having to load all of them right at the beginning. // Create a new Bag-of-Visual-Words (BoW) model var bow = BagOfVisualWords.Create(numberOfWords: 10); // We will learn the codebooks from only 25 descriptors, which // will be randomly selected from the multiple training images bow.NumberOfDescriptors = 1000; // Note: in the real world, use >10,000 samples // We will load at most 5 descriptors from each image. This means // that we will only keep 5 descriptors per image at maximum in // memory at a given time. bow.MaxDescriptorsPerInstance = 200; // Note: In the real world, use >1,000 samples // Get some training images. Here, instead of loading Bitmaps as in // the other examples, we will just specify their paths in the disk: string[] filenames = { Path.Combine(basePath, "flower01.jpg"), Path.Combine(basePath, "flower02.jpg"), Path.Combine(basePath, "flower03.jpg"), Path.Combine(basePath, "flower04.jpg"), Path.Combine(basePath, "flower05.jpg"), Path.Combine(basePath, "flower06.jpg"), }; // Compute the model bow.Learn(filenames); // After this point, we will be able to translate // images into double[] feature vectors using double[][] features = bow.Transform(filenames); // We can also check some statistics about the dataset: int numberOfImages = bow.Statistics.TotalNumberOfInstances; // 6 // Statistics about all the descriptors that have been extracted: int totalDescriptors = bow.Statistics.TotalNumberOfDescriptors; // 4132 double totalMean = bow.Statistics.TotalNumberOfDescriptorsPerInstance.Mean; // 688.66666666666663 double totalVar = bow.Statistics.TotalNumberOfDescriptorsPerInstance.Variance; // 96745.866666666669 IntRange totalRange = bow.Statistics.TotalNumberOfDescriptorsPerInstanceRange; // [409, 1265] // Statistics only about the descriptors that have been actually used: int takenDescriptors = bow.Statistics.NumberOfDescriptorsTaken; // 1000 double takenMean = bow.Statistics.NumberOfDescriptorsTakenPerInstance.Mean; // 200 double takenVar = bow.Statistics.NumberOfDescriptorsTakenPerInstance.Variance; // 0 IntRange takenRange = bow.Statistics.NumberOfDescriptorsTakenPerInstanceRange; // [200, 200] #endregion Assert.AreEqual(6, numberOfImages); Assert.AreEqual(4132, totalDescriptors); Assert.AreEqual(688.66666666666663, totalMean); Assert.AreEqual(96745.866666666669, totalVar); Assert.AreEqual(new IntRange(409, 1265), totalRange); Assert.AreEqual(1000, takenDescriptors); Assert.AreEqual(200, takenMean); Assert.AreEqual(0, takenVar); Assert.AreEqual(new IntRange(200, 200), takenRange); var kmeans = bow.Clustering as KMeans; Assert.AreEqual(64, kmeans.Clusters.NumberOfInputs); Assert.AreEqual(10, kmeans.Clusters.NumberOfOutputs); Assert.AreEqual(10, kmeans.Clusters.NumberOfClasses); string str = kmeans.Clusters.Proportions.ToCSharp(); double[] expectedProportions = new double[] { 0.029, 0.167, 0.143, 0.129, 0.079, 0.104, 0.068, 0.09, 0.094, 0.097 }; Assert.IsTrue(kmeans.Clusters.Proportions.IsEqual(expectedProportions, 1e-10)); Assert.IsTrue(kmeans.Clusters.Covariances.All(x => x == null)); Assert.AreEqual(features.GetLength(), new[] { 6, 10 }); str = features.ToCSharp(); double[][] expected = new double[][] { new double[] { 6, 104, 59, 68, 41, 7, 45, 25, 26, 28 }, new double[] { 13, 102, 61, 39, 51, 114, 69, 108, 115, 55 }, new double[] { 10, 138, 91, 78, 27, 46, 28, 39, 52, 43 }, new double[] { 4, 66, 51, 84, 59, 32, 25, 54, 61, 24 }, new double[] { 88, 85, 161, 94, 5, 119, 13, 35, 22, 97 }, new double[] { 57, 269, 134, 81, 53, 214, 59, 111, 139, 148 } }; for (int i = 0; i < features.Length; i++) { for (int j = 0; j < features[i].Length; j++) { Assert.IsTrue(expected[i].Contains(features[i][j])); } } #region doc_classification_disk // Now, the features can be used to train any classification // algorithm as if they were the images themselves. For example, // let's assume the first three images belong to a class and // the second three to another class. We can train an SVM using int[] labels = { -1, -1, -1, +1, +1, +1 }; // Create the SMO algorithm to learn a Linear kernel SVM var teacher = new SequentialMinimalOptimization <Linear>() { Complexity = 10000 // make a hard margin SVM }; // Obtain a learned machine var svm = teacher.Learn(features, labels); // Use the machine to classify the features bool[] output = svm.Decide(features); // Compute the error between the expected and predicted labels double error = new ZeroOneLoss(labels).Loss(output); #endregion Assert.IsTrue(new ZeroOneLoss(labels).IsBinary); Assert.AreEqual(error, 0); }
/// <summary> /// This methods computes the Bag-of-Visual-Words with the training images. /// </summary> /// private void btnBagOfWords_Click(object sender, EventArgs e) { int numberOfWords = (int)numWords.Value; Stopwatch sw1 = Stopwatch.StartNew(); IBagOfWords <Bitmap> bow; // Check if we will use SURF or FREAK as the feature detector if (rbSurf.Checked) { // We will use SURF, so we can use a standard clustering // algorithm that is based on Euclidean distances. A good // algorithm for clustering codewords is the Binary Split // variant of the K-Means algorithm. // Create a Binary-Split clustering algorithm BinarySplit binarySplit = new BinarySplit(numberOfWords); // Create bag-of-words (BoW) with the given algorithm BagOfVisualWords surfBow = new BagOfVisualWords(binarySplit); // Compute the BoW codebook using training images only bow = surfBow.Learn(originalTrainImages.Values.ToArray()); } else if (rbFreak.Checked) { // We will use the FREAK detector. The features generated by FREAK // are represented as bytes. While it is possible to transform those // to standard double vectors, we will demonstrate how to use a non- // Euclidean distance based algorithm to generate codewords for it. // Note: Using Binary-Split with normalized FREAK features would // possibly work better than the k-modes. This is just an example. // Create a k-Modes clustering algorithm var kmodes = new KModes <byte>(numberOfWords, new Hamming()); // Create a FREAK detector explicitly (if no detector was passed, // the BagOfVisualWords would be using a SURF detector by default). var freak = new FastRetinaKeypointDetector(); // Create bag-of-words (BoW) with the k-modes clustering and FREAK detector var freakBow = new BagOfVisualWords <FastRetinaKeypoint, byte[]>(freak, kmodes); // Compute the BoW codebook using training images only bow = freakBow.Learn(originalTrainImages.Values.ToArray()); } else { // We will use HOG, so we can use a standard clustering // algorithm that is based on Euclidean distances. A good // algorithm for clustering codewords is the Binary Split // variant of the K-Means algorithm. // Create a Binary-Split clustering algorithm BinarySplit binarySplit = new BinarySplit(numberOfWords); // Create a HOG detector explicitly (if no detector was passed, // the BagOfVisualWords would be using a SURF detector by default). var hog = new HistogramsOfOrientedGradients(); // Create bag-of-words (BoW) with the given algorithm var hogBow = BagOfVisualWords.Create(hog, binarySplit); // Compute the BoW codebook using training images only bow = hogBow.Learn(originalTrainImages.Values.ToArray()); } sw1.Stop(); // Now that we have already created and computed the BoW model, we // will use it to extract representations for each of the images in // both training and testing sets. Stopwatch sw2 = Stopwatch.StartNew(); // Extract features for all images foreach (ListViewItem item in listView1.Items) { // Get item image Bitmap image = originalImages[item.ImageKey] as Bitmap; // Get a feature vector representing this image double[] featureVector = (bow as ITransform <Bitmap, double[]>).Transform(image); // Represent it as a string so we can show it onscreen string featureString = featureVector.ToString(DefaultArrayFormatProvider.InvariantCulture); // Show it in the visual grid if (item.SubItems.Count == 2) { item.SubItems[1].Text = featureString; } else { item.SubItems.Add(featureString); } // Retrieve the class labels, that we had stored in the Tag int classLabel = (item.Tag as Tuple <double[], int>).Item2; // Now, use the Tag to store the feature vector too item.Tag = Tuple.Create(featureVector, classLabel); } sw2.Stop(); lbStatus.Text = "BoW constructed in " + sw1.Elapsed + "s. Features extracted in " + sw2.Elapsed + "s."; btnSampleRunAnalysis.Enabled = true; }
public void learn_new() { #region doc_learn // Ensure results are reproducible Accord.Math.Random.Generator.Seed = 0; // The Bag-of-Visual-Words model converts images of arbitrary // size into fixed-length feature vectors. In this example, we // will be setting the codebook size to 10. This means all feature // vectors that will be generated will have the same length of 10. // By default, the BoW object will use the sparse SURF as the // feature extractor and K-means as the clustering algorithm. // Create a new Bag-of-Visual-Words (BoW) model var bow = BagOfVisualWords.Create(numberOfWords: 10); // Note: a simple BoW model can also be created using // var bow = new BagOfVisualWords(numberOfWords: 10); // Get some training images Bitmap[] images = GetImages(); // Compute the model bow.Learn(images); // After this point, we will be able to translate // images into double[] feature vectors using double[][] features = bow.Transform(images); // We can also check some statistics about the dataset: int numberOfImages = bow.Statistics.TotalNumberOfInstances; // 6 // Statistics about all the descriptors that have been extracted: int totalDescriptors = bow.Statistics.TotalNumberOfDescriptors; // 4132 double totalMean = bow.Statistics.TotalNumberOfDescriptorsPerInstance.Mean; // 688.66666666666663 double totalVar = bow.Statistics.TotalNumberOfDescriptorsPerInstance.Variance; // 96745.866666666669 IntRange totalRange = bow.Statistics.TotalNumberOfDescriptorsPerInstanceRange; // [409, 1265] // Statistics only about the descriptors that have been actually used: int takenDescriptors = bow.Statistics.NumberOfDescriptorsTaken; // 4132 double takenMean = bow.Statistics.NumberOfDescriptorsTakenPerInstance.Mean; // 688.66666666666663 double takenVar = bow.Statistics.NumberOfDescriptorsTakenPerInstance.Variance; // 96745.866666666669 IntRange takenRange = bow.Statistics.NumberOfDescriptorsTakenPerInstanceRange; // [409, 1265] #endregion Assert.AreEqual(6, numberOfImages); Assert.AreEqual(4132, totalDescriptors); Assert.AreEqual(688.66666666666663, totalMean); Assert.AreEqual(96745.866666666669, totalVar); Assert.AreEqual(new IntRange(409, 1265), totalRange); Assert.AreEqual(4132, takenDescriptors); Assert.AreEqual(688.66666666666663, takenMean); Assert.AreEqual(96745.866666666669, takenVar); Assert.AreEqual(new IntRange(409, 1265), takenRange); var kmeans = bow.Clustering as KMeans; Assert.AreEqual(64, kmeans.Clusters.NumberOfInputs); Assert.AreEqual(10, kmeans.Clusters.NumberOfOutputs); Assert.AreEqual(10, kmeans.Clusters.NumberOfClasses); string str = kmeans.Clusters.Proportions.ToCSharp(); double[] expectedProportions = new double[] { 0.0960793804453049, 0.0767182962245886, 0.103823814133591, 0.0738141335914811, 0.0997095837366893, 0.0815585672797677, 0.0788964181994192, 0.090513068731849, 0.117376573088093, 0.181510164569216 }; Assert.IsTrue(kmeans.Clusters.Proportions.IsEqual(expectedProportions, 1e-10)); Assert.IsTrue(kmeans.Clusters.Covariances.All(x => x == null)); Assert.AreEqual(features.GetLength(), new[] { 6, 10 }); str = features.ToCSharp(); double[][] expected = new double[][] { new double[] { 47, 44, 42, 4, 23, 22, 28, 53, 50, 96 }, new double[] { 26, 91, 71, 49, 99, 70, 59, 28, 155, 79 }, new double[] { 71, 34, 51, 33, 53, 25, 44, 64, 32, 145 }, new double[] { 49, 41, 31, 24, 54, 19, 41, 63, 66, 72 }, new double[] { 137, 16, 92, 115, 39, 75, 24, 92, 41, 88 }, new double[] { 67, 91, 142, 80, 144, 126, 130, 74, 141, 270 } }; for (int i = 0; i < features.Length; i++) { for (int j = 0; j < features[i].Length; j++) { Assert.IsTrue(expected[i].Contains(features[i][j])); } } #region doc_classification // Now, the features can be used to train any classification // algorithm as if they were the images themselves. For example, // let's assume the first three images belong to a class and // the second three to another class. We can train an SVM using int[] labels = { -1, -1, -1, +1, +1, +1 }; // Create the SMO algorithm to learn a Linear kernel SVM var teacher = new SequentialMinimalOptimization <Linear>() { Complexity = 10000 // make a hard margin SVM }; // Obtain a learned machine var svm = teacher.Learn(features, labels); // Use the machine to classify the features bool[] output = svm.Decide(features); // Compute the error between the expected and predicted labels double error = new ZeroOneLoss(labels).Loss(output); #endregion Assert.IsTrue(new ZeroOneLoss(labels).IsBinary); Assert.AreEqual(error, 0); }