private static void Main(string[] args) { try { // Make sure the user entered an argument to this program. It should be the // filename for an image. if (args.Length != 1) { Console.WriteLine("error, you have to enter a BMP file as an argument to this program."); return; } // Here we declare an image object that can store color rgb_pixels. // Now load the image file into our image. If something is wrong then // load_image() will throw an exception. Also, if you linked with libpng // and libjpeg then load_image() can load PNG and JPEG files in addition // to BMP files. using (var img = Dlib.LoadImage <RgbPixel>(args[0])) { // Now convert the image into a FHOG feature image. The output, hog, is a 2D array // of 31 dimensional vectors. using (var hog = Dlib.ExtracFHogFeatures <float>(img)) { Console.WriteLine($"hog image has {hog.Rows} rows and {hog.Columns} columns."); // Let's see what the image and FHOG features look like. using (var win = new ImageWindow(img)) using (var drawhog = Dlib.DrawFHog(hog)) using (var winhog = new ImageWindow(drawhog)) { // Another thing you might want to do is map between the pixels in img and the // cells in the hog image. dlib provides the image_to_fhog() and fhog_to_image() // routines for this. Their use is demonstrated in the following loop which // responds to the user clicking on pixels in the image img. Point p; // A 2D point, used to represent pixel locations. while (win.GetNextDoubleClick(out p)) { var hp = Dlib.ImgaeToFHog(p); Console.WriteLine($"The point {p} in the input image corresponds to {hp} in hog space."); var row = hog[hp.Y]; var column = row[hp.X]; var t = Dlib.Trans(column); // Console.WriteLine($"FHOG features at this point: {t}"); } // Finally, sometimes you want to get a planar representation of the HOG features // rather than the explicit vector (i.e. interlaced) representation used above. var planar_hog = Dlib.ExtracFHogFeaturesArray <float>(img); // Now we have an array of 31 float valued image planes, each representing one of // the dimensions of the HOG feature vector. } } } } catch (Exception e) { Console.WriteLine($"exception thrown: {e}"); } }
public void ExtractFHogFeatures2() { var path = this.GetDataFile($"{LoadTarget}.bmp"); var tests = new[] { new { Type = MatrixElementTypes.Float, ExpectResult = true }, new { Type = MatrixElementTypes.Double, ExpectResult = true } }; foreach (var output in tests) { Array2DBase imageObj = null; Array2DMatrixBase outputObj = null; try { imageObj = DlibTest.LoadImageHelp(ImageTypes.RgbPixel, path); switch (output.Type) { case MatrixElementTypes.Float: outputObj = Dlib.ExtractFHogFeatures <float>(imageObj); break; case MatrixElementTypes.Double: outputObj = Dlib.ExtractFHogFeatures <double>(imageObj); break; default: throw new ArgumentOutOfRangeException(); } MatrixBase matrix = Dlib.DrawFHog(outputObj); if (this.CanGuiDebug) { var window = new ImageWindow(matrix); window.WaitUntilClosed(); } } catch (Exception e) { Console.WriteLine(e); throw; } finally { if (imageObj != null) { this.DisposeAndCheckDisposedState(imageObj); } if (outputObj != null) { this.DisposeAndCheckDisposedState(outputObj); } } } }
public void ExtractFHogFeatures() { const string testName = nameof(this.ExtractFHogFeatures); var path = this.GetDataFile($"{LoadTarget}.bmp"); var tests = new[] { new { Type = MatrixElementTypes.Float, ExpectResult = true }, new { Type = MatrixElementTypes.Double, ExpectResult = true }, new { Type = MatrixElementTypes.RgbPixel, ExpectResult = false }, new { Type = MatrixElementTypes.RgbAlphaPixel, ExpectResult = false }, new { Type = MatrixElementTypes.HsiPixel, ExpectResult = false }, new { Type = MatrixElementTypes.LabPixel, ExpectResult = false }, new { Type = MatrixElementTypes.UInt32, ExpectResult = false }, new { Type = MatrixElementTypes.UInt8, ExpectResult = false }, new { Type = MatrixElementTypes.UInt16, ExpectResult = false }, new { Type = MatrixElementTypes.Int8, ExpectResult = false }, new { Type = MatrixElementTypes.Int16, ExpectResult = false }, new { Type = MatrixElementTypes.Int32, ExpectResult = false } }; foreach (ImageTypes inputType in Enum.GetValues(typeof(ImageTypes))) { foreach (var output in tests) { if (inputType == ImageTypes.Matrix) { continue; } var expectResult = output.ExpectResult; var imageObj = DlibTest.LoadImageHelp(inputType, path); Array2DMatrixBase outputObj = null; var outputImageAction = new Func <bool, Array2DMatrixBase>(expect => { switch (output.Type) { case MatrixElementTypes.UInt8: outputObj = Dlib.ExtractFHogFeatures <byte>(imageObj); break; case MatrixElementTypes.UInt16: outputObj = Dlib.ExtractFHogFeatures <ushort>(imageObj); break; case MatrixElementTypes.UInt32: outputObj = Dlib.ExtractFHogFeatures <uint>(imageObj); break; case MatrixElementTypes.Int8: outputObj = Dlib.ExtractFHogFeatures <sbyte>(imageObj); break; case MatrixElementTypes.Int16: outputObj = Dlib.ExtractFHogFeatures <short>(imageObj); break; case MatrixElementTypes.Int32: outputObj = Dlib.ExtractFHogFeatures <int>(imageObj); break; case MatrixElementTypes.Float: outputObj = Dlib.ExtractFHogFeatures <float>(imageObj); break; case MatrixElementTypes.Double: outputObj = Dlib.ExtractFHogFeatures <double>(imageObj); break; case MatrixElementTypes.RgbPixel: outputObj = Dlib.ExtractFHogFeatures <RgbPixel>(imageObj); break; case MatrixElementTypes.RgbAlphaPixel: outputObj = Dlib.ExtractFHogFeatures <RgbAlphaPixel>(imageObj); break; case MatrixElementTypes.HsiPixel: outputObj = Dlib.ExtractFHogFeatures <HsiPixel>(imageObj); break; case MatrixElementTypes.LabPixel: outputObj = Dlib.ExtractFHogFeatures <LabPixel>(imageObj); break; default: throw new ArgumentOutOfRangeException(); } return(outputObj); }); var successAction = new Action <Array2DMatrixBase>(image => { MatrixBase ret = null; try { ret = Dlib.DrawFHog(image); } catch (Exception e) { Console.WriteLine(e); throw; } finally { if (ret != null) { this.DisposeAndCheckDisposedState(ret); } } }); var failAction = new Action(() => { Assert.True(false, $"{testName} should throw exception for InputType: {inputType}, OutputType: {output.Type}."); }); var finallyAction = new Action(() => { if (imageObj != null) { this.DisposeAndCheckDisposedState(imageObj); } if (outputObj != null) { this.DisposeAndCheckDisposedState(outputObj); } }); var exceptionAction = new Action(() => { Console.WriteLine($"Failed to execute {testName} to InputType: {inputType}, OutputType: {output.Type}."); }); DoTest(outputImageAction, expectResult, successAction, finallyAction, failAction, exceptionAction); } } }
private static void Main(string[] args) { try { // In this example we are going to train a face detector based on the // small faces dataset in the examples/faces directory. So the first // thing we do is load that dataset. This means you need to supply the // path to this faces folder as a command line argument so we will know // where it is. if (args.Length != 1) { Console.WriteLine("Give the path to the examples/faces directory as the argument to this"); Console.WriteLine("program. For example, if you are in the examples folder then execute "); Console.WriteLine("this program by running: "); Console.WriteLine(" ./fhog_object_detector_ex faces"); Console.WriteLine(); return; } var facesDirectory = args[0]; // The faces directory contains a training dataset and a separate // testing dataset. The training data consists of 4 images, each // annotated with rectangles that bound each human face. The idea is // to use this training data to learn to identify human faces in new // images. // // Once you have trained an object detector it is always important to // test it on data it wasn't trained on. Therefore, we will also load // a separate testing set of 5 images. Once we have a face detector // created from the training data we will see how well it works by // running it on the testing images. // // So here we create the variables that will hold our dataset. // images_train will hold the 4 training images and face_boxes_train // holds the locations of the faces in the training images. So for // example, the image images_train[0] has the faces given by the // rectangles in face_boxes_train[0]. IList <Matrix <byte> > tmpImagesTrain; IList <Matrix <byte> > tmpImagesTest; IList <IList <Rectangle> > tmpFaceBoxesTrain; IList <IList <Rectangle> > tmpFaceBoxesTest; // Now we load the data. These XML files list the images in each // dataset and also contain the positions of the face boxes. Obviously // you can use any kind of input format you like so long as you store // the data into images_train and face_boxes_train. But for convenience // dlib comes with tools for creating and loading XML image dataset // files. Here you see how to load the data. To create the XML files // you can use the imglab tool which can be found in the tools/imglab // folder. It is a simple graphical tool for labeling objects in images // with boxes. To see how to use it read the tools/imglab/README.txt // file. Dlib.LoadImageDataset(Path.Combine(facesDirectory, "training.xml"), out tmpImagesTrain, out tmpFaceBoxesTrain); Dlib.LoadImageDataset(Path.Combine(facesDirectory, "testing.xml"), out tmpImagesTest, out tmpFaceBoxesTest); // Now we do a little bit of pre-processing. This is optional but for // this training data it improves the results. The first thing we do is // increase the size of the images by a factor of two. We do this // because it will allow us to detect smaller faces than otherwise would // be practical (since the faces are all now twice as big). Note that, // in addition to resizing the images, these functions also make the // appropriate adjustments to the face boxes so that they still fall on // top of the faces after the images are resized. var imageTrain = new List <Matrix <byte> >(tmpImagesTrain); var faceBoxesTrain = new List <IList <Rectangle> >(tmpFaceBoxesTrain); Dlib.UpsampleImageDataset(2, imageTrain, faceBoxesTrain); var imageTest = new List <Matrix <byte> >(tmpImagesTest); var faceBoxesTest = new List <IList <Rectangle> >(tmpFaceBoxesTest); Dlib.UpsampleImageDataset(2, imageTest, faceBoxesTest); // Since human faces are generally left-right symmetric we can increase // our training dataset by adding mirrored versions of each image back // into images_train. So this next step doubles the size of our // training dataset. Again, this is obviously optional but is useful in // many object detection tasks. Dlib.AddImageLeftRightFlips(imageTrain, faceBoxesTrain); Console.WriteLine($"num training images: {imageTrain.Count()}"); Console.WriteLine($"num testing images: {imageTest.Count()}"); // Finally we get to the training code. dlib contains a number of // object detectors. This typedef tells it that you want to use the one // based on Felzenszwalb's version of the Histogram of Oriented // Gradients (commonly called HOG) detector. The 6 means that you want // it to use an image pyramid that downsamples the image at a ratio of // 5/6. Recall that HOG detectors work by creating an image pyramid and // then running the detector over each pyramid level in a sliding window // fashion. using (var scanner = new ScanFHogPyramid <PyramidDown, DefaultFHogFeatureExtractor>(6)) { // The sliding window detector will be 80 pixels wide and 80 pixels tall. scanner.SetDetectionWindowSize(80, 80); using (var trainer = new StructuralObjectDetectionTrainer <ScanFHogPyramid <PyramidDown, DefaultFHogFeatureExtractor> >(scanner)) { // Set this to the number of processing cores on your machine. trainer.SetNumThreads(4); // The trainer is a kind of support vector machine and therefore has the usual SVM // C parameter. In general, a bigger C encourages it to fit the training data // better but might lead to overfitting. You must find the best C value // empirically by checking how well the trained detector works on a test set of // images you haven't trained on. Don't just leave the value set at 1. Try a few // different C values and see what works best for your data. trainer.SetC(1); // We can tell the trainer to print it's progress to the console if we want. trainer.BeVerbose(); // The trainer will run until the "risk gap" is less than 0.01. Smaller values // make the trainer solve the SVM optimization problem more accurately but will // take longer to train. For most problems a value in the range of 0.1 to 0.01 is // plenty accurate. Also, when in verbose mode the risk gap is printed on each // iteration so you can see how close it is to finishing the training. trainer.SetEpsilon(0.01); // Now we run the trainer. For this example, it should take on the order of 10 // seconds to train. var detector = trainer.Train(imageTrain, faceBoxesTrain); // Now that we have a face detector we can test it. The first statement tests it // on the training data. It will print the precision, recall, and then average precision. using (var matrix = Dlib.TestObjectDetectionFunction(detector, imageTrain, faceBoxesTrain)) Console.WriteLine($"training results: {matrix}"); // However, to get an idea if it really worked without overfitting we need to run // it on images it wasn't trained on. The next line does this. Happily, we see // that the object detector works perfectly on the testing images. using (var matrix = Dlib.TestObjectDetectionFunction(detector, imageTest, faceBoxesTest)) Console.WriteLine($"testing results: {matrix}"); // If you have read any papers that use HOG you have probably seen the nice looking // "sticks" visualization of a learned HOG detector. This next line creates a // window with such a visualization of our detector. It should look somewhat like // a face. using (var fhog = Dlib.DrawFHog(detector)) using (var hogwin = new ImageWindow(fhog, "Learned fHOG detector")) { // Now for the really fun part. Let's display the testing images on the screen and // show the output of the face detector overlaid on each image. You will see that // it finds all the faces without false alarming on any non-faces. using (var win = new ImageWindow()) for (var i = 0; i < imageTest.Count; ++i) { // Run the detector and get the face detections. var dets = detector.Operator(imageTest[i]); win.ClearOverlay(); win.SetImage(imageTest[i]); win.AddOverlay(dets, new RgbPixel(255, 0, 0)); Console.WriteLine("Hit enter to process the next image..."); Console.ReadKey(); Console.WriteLine(""); } } // Like everything in dlib, you can save your detector to disk using the // serialize() function. detector.Serialize("face_detector.svm"); // Then you can recall it using the deserialize() function. using (var tmp = new ScanFHogPyramid <PyramidDown, DefaultFHogFeatureExtractor>(6)) using (var detector2 = new ObjectDetector <ScanFHogPyramid <PyramidDown, DefaultFHogFeatureExtractor> >(tmp)) detector2.Deserialize("face_detector.svm"); // Now let's talk about some optional features of this training tool as well as some // important points you should understand. // // The first thing that should be pointed out is that, since this is a sliding // window classifier, it can't output an arbitrary rectangle as a detection. In // this example our sliding window is 80 by 80 pixels and is run over an image // pyramid. This means that it can only output detections that are at least 80 by // 80 pixels in size (recall that this is why we upsampled the images after loading // them). It also means that the aspect ratio of the outputs is 1. So if, // for example, you had a box in your training data that was 200 pixels by 10 // pixels then it would simply be impossible for the detector to learn to detect // it. Similarly, if you had a really small box it would be unable to learn to // detect it. // // So the training code performs an input validation check on the training data and // will throw an exception if it detects any boxes that are impossible to detect // given your setting of scanning window size and image pyramid resolution. You // can use a statement like: // remove_unobtainable_rectangles(trainer, images_train, face_boxes_train) // to automatically discard these impossible boxes from your training dataset // before running the trainer. This will avoid getting the "impossible box" // exception. However, I would recommend you be careful that you are not throwing // away truth boxes you really care about. The remove_unobtainable_rectangles() // will return the set of removed rectangles so you can visually inspect them and // make sure you are OK that they are being removed. // // Next, note that any location in the images not marked with a truth box is // implicitly treated as a negative example. This means that when creating // training data it is critical that you label all the objects you want to detect. // So for example, if you are making a face detector then you must mark all the // faces in each image. However, sometimes there are objects in images you are // unsure about or simply don't care if the detector identifies or not. For these // objects you can pass in a set of "ignore boxes" as a third argument to the // trainer.train() function. The trainer will simply disregard any detections that // happen to hit these boxes. // // Another useful thing you can do is evaluate multiple HOG detectors together. The // benefit of this is increased testing speed since it avoids recomputing the HOG // features for each run of the detector. You do this by storing your detectors // into a std::vector and then invoking evaluate_detectors() like so: var myDetectors = new List <ObjectDetector <ScanFHogPyramid <PyramidDown, DefaultFHogFeatureExtractor> > >(); myDetectors.Add(detector); var dect2 = Dlib.EvaluateDetectors(myDetectors, imageTrain[0]); // // // Finally, you can add a nuclear norm regularizer to the SVM trainer. Doing has // two benefits. First, it can cause the learned HOG detector to be composed of // separable filters and therefore makes it execute faster when detecting objects. // It can also help with generalization since it tends to make the learned HOG // filters smoother. To enable this option you call the following function before // you create the trainer object: // scanner.set_nuclear_norm_regularization_strength(1.0); // The argument determines how important it is to have a small nuclear norm. A // bigger regularization strength means it is more important. The smaller the // nuclear norm the smoother and faster the learned HOG filters will be, but if the // regularization strength value is too large then the SVM will not fit the data // well. This is analogous to giving a C value that is too small. // // You can see how many separable filters are inside your detector like so: Console.WriteLine($"num filters: {Dlib.NumSeparableFilters(detector)}"); // You can also control how many filters there are by explicitly thresholding the // singular values of the filters like this: using (var newDetector = Dlib.ThresholdFilterSingularValues(detector, 0.1)) { } // That removes filter components with singular values less than 0.1. The bigger // this number the fewer separable filters you will have and the faster the // detector will run. However, a large enough threshold will hurt detection // accuracy. } } } catch (Exception e) { Console.WriteLine(e); } }