Exemplo n.º 1
0
        private static void Main(string[] args)
        {
            try
            {
                // Make sure the user entered an argument to this program.  It should be the
                // filename for an image.
                if (args.Length != 1)
                {
                    Console.WriteLine("error, you have to enter a BMP file as an argument to this program.");
                    return;
                }

                // Here we declare an image object that can store color rgb_pixels.

                // Now load the image file into our image.  If something is wrong then
                // load_image() will throw an exception.  Also, if you linked with libpng
                // and libjpeg then load_image() can load PNG and JPEG files in addition
                // to BMP files.
                using (var img = Dlib.LoadImage <RgbPixel>(args[0]))
                {
                    // Now convert the image into a FHOG feature image.  The output, hog, is a 2D array
                    // of 31 dimensional vectors.
                    using (var hog = Dlib.ExtracFHogFeatures <float>(img))
                    {
                        Console.WriteLine($"hog image has {hog.Rows} rows and {hog.Columns} columns.");

                        // Let's see what the image and FHOG features look like.
                        using (var win = new ImageWindow(img))
                            using (var drawhog = Dlib.DrawFHog(hog))
                                using (var winhog = new ImageWindow(drawhog))
                                {
                                    // Another thing you might want to do is map between the pixels in img and the
                                    // cells in the hog image.  dlib provides the image_to_fhog() and fhog_to_image()
                                    // routines for this.  Their use is demonstrated in the following loop which
                                    // responds to the user clicking on pixels in the image img.
                                    Point p; // A 2D point, used to represent pixel locations.
                                    while (win.GetNextDoubleClick(out p))
                                    {
                                        var hp = Dlib.ImgaeToFHog(p);
                                        Console.WriteLine($"The point {p} in the input image corresponds to {hp} in hog space.");
                                        var row    = hog[hp.Y];
                                        var column = row[hp.X];
                                        var t      = Dlib.Trans(column);
                                        // Console.WriteLine($"FHOG features at this point: {t}");
                                    }

                                    // Finally, sometimes you want to get a planar representation of the HOG features
                                    // rather than the explicit vector (i.e. interlaced) representation used above.
                                    var planar_hog = Dlib.ExtracFHogFeaturesArray <float>(img);
                                    // Now we have an array of 31 float valued image planes, each representing one of
                                    // the dimensions of the HOG feature vector.
                                }
                    }
                }
            }
            catch (Exception e)
            {
                Console.WriteLine($"exception thrown: {e}");
            }
        }
Exemplo n.º 2
0
        public void ExtractFHogFeatures2()
        {
            var path = this.GetDataFile($"{LoadTarget}.bmp");

            var tests = new[]
            {
                new { Type = MatrixElementTypes.Float, ExpectResult = true },
                new { Type = MatrixElementTypes.Double, ExpectResult = true }
            };

            foreach (var output in tests)
            {
                Array2DBase       imageObj  = null;
                Array2DMatrixBase outputObj = null;

                try
                {
                    imageObj = DlibTest.LoadImageHelp(ImageTypes.RgbPixel, path);

                    switch (output.Type)
                    {
                    case MatrixElementTypes.Float:
                        outputObj = Dlib.ExtractFHogFeatures <float>(imageObj);
                        break;

                    case MatrixElementTypes.Double:
                        outputObj = Dlib.ExtractFHogFeatures <double>(imageObj);
                        break;

                    default:
                        throw new ArgumentOutOfRangeException();
                    }

                    MatrixBase matrix = Dlib.DrawFHog(outputObj);

                    if (this.CanGuiDebug)
                    {
                        var window = new ImageWindow(matrix);
                        window.WaitUntilClosed();
                    }
                }
                catch (Exception e)
                {
                    Console.WriteLine(e);
                    throw;
                }
                finally
                {
                    if (imageObj != null)
                    {
                        this.DisposeAndCheckDisposedState(imageObj);
                    }
                    if (outputObj != null)
                    {
                        this.DisposeAndCheckDisposedState(outputObj);
                    }
                }
            }
        }
Exemplo n.º 3
0
        public void ExtractFHogFeatures()
        {
            const string testName = nameof(this.ExtractFHogFeatures);
            var          path     = this.GetDataFile($"{LoadTarget}.bmp");

            var tests = new[]
            {
                new { Type = MatrixElementTypes.Float, ExpectResult = true },
                new { Type = MatrixElementTypes.Double, ExpectResult = true },
                new { Type = MatrixElementTypes.RgbPixel, ExpectResult = false },
                new { Type = MatrixElementTypes.RgbAlphaPixel, ExpectResult = false },
                new { Type = MatrixElementTypes.HsiPixel, ExpectResult = false },
                new { Type = MatrixElementTypes.LabPixel, ExpectResult = false },
                new { Type = MatrixElementTypes.UInt32, ExpectResult = false },
                new { Type = MatrixElementTypes.UInt8, ExpectResult = false },
                new { Type = MatrixElementTypes.UInt16, ExpectResult = false },
                new { Type = MatrixElementTypes.Int8, ExpectResult = false },
                new { Type = MatrixElementTypes.Int16, ExpectResult = false },
                new { Type = MatrixElementTypes.Int32, ExpectResult = false }
            };

            foreach (ImageTypes inputType in Enum.GetValues(typeof(ImageTypes)))
            {
                foreach (var output in tests)
                {
                    if (inputType == ImageTypes.Matrix)
                    {
                        continue;
                    }

                    var expectResult            = output.ExpectResult;
                    var imageObj                = DlibTest.LoadImageHelp(inputType, path);
                    Array2DMatrixBase outputObj = null;

                    var outputImageAction = new Func <bool, Array2DMatrixBase>(expect =>
                    {
                        switch (output.Type)
                        {
                        case MatrixElementTypes.UInt8:
                            outputObj = Dlib.ExtractFHogFeatures <byte>(imageObj);
                            break;

                        case MatrixElementTypes.UInt16:
                            outputObj = Dlib.ExtractFHogFeatures <ushort>(imageObj);
                            break;

                        case MatrixElementTypes.UInt32:
                            outputObj = Dlib.ExtractFHogFeatures <uint>(imageObj);
                            break;

                        case MatrixElementTypes.Int8:
                            outputObj = Dlib.ExtractFHogFeatures <sbyte>(imageObj);
                            break;

                        case MatrixElementTypes.Int16:
                            outputObj = Dlib.ExtractFHogFeatures <short>(imageObj);
                            break;

                        case MatrixElementTypes.Int32:
                            outputObj = Dlib.ExtractFHogFeatures <int>(imageObj);
                            break;

                        case MatrixElementTypes.Float:
                            outputObj = Dlib.ExtractFHogFeatures <float>(imageObj);
                            break;

                        case MatrixElementTypes.Double:
                            outputObj = Dlib.ExtractFHogFeatures <double>(imageObj);
                            break;

                        case MatrixElementTypes.RgbPixel:
                            outputObj = Dlib.ExtractFHogFeatures <RgbPixel>(imageObj);
                            break;

                        case MatrixElementTypes.RgbAlphaPixel:
                            outputObj = Dlib.ExtractFHogFeatures <RgbAlphaPixel>(imageObj);
                            break;

                        case MatrixElementTypes.HsiPixel:
                            outputObj = Dlib.ExtractFHogFeatures <HsiPixel>(imageObj);
                            break;

                        case MatrixElementTypes.LabPixel:
                            outputObj = Dlib.ExtractFHogFeatures <LabPixel>(imageObj);
                            break;

                        default:
                            throw new ArgumentOutOfRangeException();
                        }

                        return(outputObj);
                    });

                    var successAction = new Action <Array2DMatrixBase>(image =>
                    {
                        MatrixBase ret = null;

                        try
                        {
                            ret = Dlib.DrawFHog(image);
                        }
                        catch (Exception e)
                        {
                            Console.WriteLine(e);
                            throw;
                        }
                        finally
                        {
                            if (ret != null)
                            {
                                this.DisposeAndCheckDisposedState(ret);
                            }
                        }
                    });

                    var failAction = new Action(() =>
                    {
                        Assert.True(false, $"{testName} should throw exception for InputType: {inputType}, OutputType: {output.Type}.");
                    });

                    var finallyAction = new Action(() =>
                    {
                        if (imageObj != null)
                        {
                            this.DisposeAndCheckDisposedState(imageObj);
                        }
                        if (outputObj != null)
                        {
                            this.DisposeAndCheckDisposedState(outputObj);
                        }
                    });

                    var exceptionAction = new Action(() =>
                    {
                        Console.WriteLine($"Failed to execute {testName} to InputType: {inputType}, OutputType: {output.Type}.");
                    });

                    DoTest(outputImageAction, expectResult, successAction, finallyAction, failAction, exceptionAction);
                }
            }
        }
Exemplo n.º 4
0
        private static void Main(string[] args)
        {
            try
            {
                // In this example we are going to train a face detector based on the
                // small faces dataset in the examples/faces directory.  So the first
                // thing we do is load that dataset.  This means you need to supply the
                // path to this faces folder as a command line argument so we will know
                // where it is.
                if (args.Length != 1)
                {
                    Console.WriteLine("Give the path to the examples/faces directory as the argument to this");
                    Console.WriteLine("program.  For example, if you are in the examples folder then execute ");
                    Console.WriteLine("this program by running: ");
                    Console.WriteLine("   ./fhog_object_detector_ex faces");
                    Console.WriteLine();
                    return;
                }

                var facesDirectory = args[0];
                // The faces directory contains a training dataset and a separate
                // testing dataset.  The training data consists of 4 images, each
                // annotated with rectangles that bound each human face.  The idea is
                // to use this training data to learn to identify human faces in new
                // images.
                //
                // Once you have trained an object detector it is always important to
                // test it on data it wasn't trained on.  Therefore, we will also load
                // a separate testing set of 5 images.  Once we have a face detector
                // created from the training data we will see how well it works by
                // running it on the testing images.
                //
                // So here we create the variables that will hold our dataset.
                // images_train will hold the 4 training images and face_boxes_train
                // holds the locations of the faces in the training images.  So for
                // example, the image images_train[0] has the faces given by the
                // rectangles in face_boxes_train[0].
                IList <Matrix <byte> >     tmpImagesTrain;
                IList <Matrix <byte> >     tmpImagesTest;
                IList <IList <Rectangle> > tmpFaceBoxesTrain;
                IList <IList <Rectangle> > tmpFaceBoxesTest;

                // Now we load the data.  These XML files list the images in each
                // dataset and also contain the positions of the face boxes.  Obviously
                // you can use any kind of input format you like so long as you store
                // the data into images_train and face_boxes_train.  But for convenience
                // dlib comes with tools for creating and loading XML image dataset
                // files.  Here you see how to load the data.  To create the XML files
                // you can use the imglab tool which can be found in the tools/imglab
                // folder.  It is a simple graphical tool for labeling objects in images
                // with boxes.  To see how to use it read the tools/imglab/README.txt
                // file.
                Dlib.LoadImageDataset(Path.Combine(facesDirectory, "training.xml"), out tmpImagesTrain, out tmpFaceBoxesTrain);
                Dlib.LoadImageDataset(Path.Combine(facesDirectory, "testing.xml"), out tmpImagesTest, out tmpFaceBoxesTest);

                // Now we do a little bit of pre-processing.  This is optional but for
                // this training data it improves the results.  The first thing we do is
                // increase the size of the images by a factor of two.  We do this
                // because it will allow us to detect smaller faces than otherwise would
                // be practical (since the faces are all now twice as big).  Note that,
                // in addition to resizing the images, these functions also make the
                // appropriate adjustments to the face boxes so that they still fall on
                // top of the faces after the images are resized.
                var imageTrain     = new List <Matrix <byte> >(tmpImagesTrain);
                var faceBoxesTrain = new List <IList <Rectangle> >(tmpFaceBoxesTrain);
                Dlib.UpsampleImageDataset(2, imageTrain, faceBoxesTrain);
                var imageTest     = new List <Matrix <byte> >(tmpImagesTest);
                var faceBoxesTest = new List <IList <Rectangle> >(tmpFaceBoxesTest);
                Dlib.UpsampleImageDataset(2, imageTest, faceBoxesTest);

                // Since human faces are generally left-right symmetric we can increase
                // our training dataset by adding mirrored versions of each image back
                // into images_train.  So this next step doubles the size of our
                // training dataset.  Again, this is obviously optional but is useful in
                // many object detection tasks.
                Dlib.AddImageLeftRightFlips(imageTrain, faceBoxesTrain);
                Console.WriteLine($"num training images: {imageTrain.Count()}");
                Console.WriteLine($"num testing images:  {imageTest.Count()}");


                // Finally we get to the training code.  dlib contains a number of
                // object detectors.  This typedef tells it that you want to use the one
                // based on Felzenszwalb's version of the Histogram of Oriented
                // Gradients (commonly called HOG) detector.  The 6 means that you want
                // it to use an image pyramid that downsamples the image at a ratio of
                // 5/6.  Recall that HOG detectors work by creating an image pyramid and
                // then running the detector over each pyramid level in a sliding window
                // fashion.
                using (var scanner = new ScanFHogPyramid <PyramidDown, DefaultFHogFeatureExtractor>(6))
                {
                    // The sliding window detector will be 80 pixels wide and 80 pixels tall.
                    scanner.SetDetectionWindowSize(80, 80);

                    using (var trainer = new StructuralObjectDetectionTrainer <ScanFHogPyramid <PyramidDown, DefaultFHogFeatureExtractor> >(scanner))
                    {
                        // Set this to the number of processing cores on your machine.
                        trainer.SetNumThreads(4);
                        // The trainer is a kind of support vector machine and therefore has the usual SVM
                        // C parameter.  In general, a bigger C encourages it to fit the training data
                        // better but might lead to overfitting.  You must find the best C value
                        // empirically by checking how well the trained detector works on a test set of
                        // images you haven't trained on.  Don't just leave the value set at 1.  Try a few
                        // different C values and see what works best for your data.
                        trainer.SetC(1);
                        // We can tell the trainer to print it's progress to the console if we want.
                        trainer.BeVerbose();
                        // The trainer will run until the "risk gap" is less than 0.01.  Smaller values
                        // make the trainer solve the SVM optimization problem more accurately but will
                        // take longer to train.  For most problems a value in the range of 0.1 to 0.01 is
                        // plenty accurate.  Also, when in verbose mode the risk gap is printed on each
                        // iteration so you can see how close it is to finishing the training.
                        trainer.SetEpsilon(0.01);


                        // Now we run the trainer.  For this example, it should take on the order of 10
                        // seconds to train.
                        var detector = trainer.Train(imageTrain, faceBoxesTrain);

                        // Now that we have a face detector we can test it.  The first statement tests it
                        // on the training data.  It will print the precision, recall, and then average precision.
                        using (var matrix = Dlib.TestObjectDetectionFunction(detector, imageTrain, faceBoxesTrain))
                            Console.WriteLine($"training results: {matrix}");
                        // However, to get an idea if it really worked without overfitting we need to run
                        // it on images it wasn't trained on.  The next line does this.  Happily, we see
                        // that the object detector works perfectly on the testing images.
                        using (var matrix = Dlib.TestObjectDetectionFunction(detector, imageTest, faceBoxesTest))
                            Console.WriteLine($"testing results: {matrix}");

                        // If you have read any papers that use HOG you have probably seen the nice looking
                        // "sticks" visualization of a learned HOG detector.  This next line creates a
                        // window with such a visualization of our detector.  It should look somewhat like
                        // a face.
                        using (var fhog = Dlib.DrawFHog(detector))
                            using (var hogwin = new ImageWindow(fhog, "Learned fHOG detector"))
                            {
                                // Now for the really fun part.  Let's display the testing images on the screen and
                                // show the output of the face detector overlaid on each image.  You will see that
                                // it finds all the faces without false alarming on any non-faces.
                                using (var win = new ImageWindow())
                                    for (var i = 0; i < imageTest.Count; ++i)
                                    {
                                        // Run the detector and get the face detections.
                                        var dets = detector.Operator(imageTest[i]);
                                        win.ClearOverlay();
                                        win.SetImage(imageTest[i]);
                                        win.AddOverlay(dets, new RgbPixel(255, 0, 0));
                                        Console.WriteLine("Hit enter to process the next image...");
                                        Console.ReadKey();
                                        Console.WriteLine("");
                                    }
                            }


                        // Like everything in dlib, you can save your detector to disk using the
                        // serialize() function.
                        detector.Serialize("face_detector.svm");

                        // Then you can recall it using the deserialize() function.
                        using (var tmp = new ScanFHogPyramid <PyramidDown, DefaultFHogFeatureExtractor>(6))
                            using (var detector2 = new ObjectDetector <ScanFHogPyramid <PyramidDown, DefaultFHogFeatureExtractor> >(tmp))
                                detector2.Deserialize("face_detector.svm");



                        // Now let's talk about some optional features of this training tool as well as some
                        // important points you should understand.
                        //
                        // The first thing that should be pointed out is that, since this is a sliding
                        // window classifier, it can't output an arbitrary rectangle as a detection.  In
                        // this example our sliding window is 80 by 80 pixels and is run over an image
                        // pyramid.  This means that it can only output detections that are at least 80 by
                        // 80 pixels in size (recall that this is why we upsampled the images after loading
                        // them).  It also means that the aspect ratio of the outputs is 1.  So if,
                        // for example, you had a box in your training data that was 200 pixels by 10
                        // pixels then it would simply be impossible for the detector to learn to detect
                        // it.  Similarly, if you had a really small box it would be unable to learn to
                        // detect it.
                        //
                        // So the training code performs an input validation check on the training data and
                        // will throw an exception if it detects any boxes that are impossible to detect
                        // given your setting of scanning window size and image pyramid resolution.  You
                        // can use a statement like:
                        //   remove_unobtainable_rectangles(trainer, images_train, face_boxes_train)
                        // to automatically discard these impossible boxes from your training dataset
                        // before running the trainer.  This will avoid getting the "impossible box"
                        // exception.  However, I would recommend you be careful that you are not throwing
                        // away truth boxes you really care about.  The remove_unobtainable_rectangles()
                        // will return the set of removed rectangles so you can visually inspect them and
                        // make sure you are OK that they are being removed.
                        //
                        // Next, note that any location in the images not marked with a truth box is
                        // implicitly treated as a negative example.  This means that when creating
                        // training data it is critical that you label all the objects you want to detect.
                        // So for example, if you are making a face detector then you must mark all the
                        // faces in each image.  However, sometimes there are objects in images you are
                        // unsure about or simply don't care if the detector identifies or not.  For these
                        // objects you can pass in a set of "ignore boxes" as a third argument to the
                        // trainer.train() function.  The trainer will simply disregard any detections that
                        // happen to hit these boxes.
                        //
                        // Another useful thing you can do is evaluate multiple HOG detectors together. The
                        // benefit of this is increased testing speed since it avoids recomputing the HOG
                        // features for each run of the detector.  You do this by storing your detectors
                        // into a std::vector and then invoking evaluate_detectors() like so:
                        var myDetectors = new List <ObjectDetector <ScanFHogPyramid <PyramidDown, DefaultFHogFeatureExtractor> > >();
                        myDetectors.Add(detector);
                        var dect2 = Dlib.EvaluateDetectors(myDetectors, imageTrain[0]);
                        //
                        //
                        // Finally, you can add a nuclear norm regularizer to the SVM trainer.  Doing has
                        // two benefits.  First, it can cause the learned HOG detector to be composed of
                        // separable filters and therefore makes it execute faster when detecting objects.
                        // It can also help with generalization since it tends to make the learned HOG
                        // filters smoother.  To enable this option you call the following function before
                        // you create the trainer object:
                        //    scanner.set_nuclear_norm_regularization_strength(1.0);
                        // The argument determines how important it is to have a small nuclear norm.  A
                        // bigger regularization strength means it is more important.  The smaller the
                        // nuclear norm the smoother and faster the learned HOG filters will be, but if the
                        // regularization strength value is too large then the SVM will not fit the data
                        // well.  This is analogous to giving a C value that is too small.
                        //
                        // You can see how many separable filters are inside your detector like so:
                        Console.WriteLine($"num filters: {Dlib.NumSeparableFilters(detector)}");
                        // You can also control how many filters there are by explicitly thresholding the
                        // singular values of the filters like this:
                        using (var newDetector = Dlib.ThresholdFilterSingularValues(detector, 0.1))
                        {
                        }
                        // That removes filter components with singular values less than 0.1.  The bigger
                        // this number the fewer separable filters you will have and the faster the
                        // detector will run.  However, a large enough threshold will hurt detection
                        // accuracy.
                    }
                }
            }
            catch (Exception e)
            {
                Console.WriteLine(e);
            }
        }