Example #1
        private static void RandomlyCropImage(Matrix <RgbPixel> inputImage, Matrix <ushort> labelImage, TrainingSample crop, Rand rnd)
            var rect = MakeRandomCroppingRectResNet(inputImage, rnd);

            using (var chipDims = new ChipDims(227, 227))
                using (var chipDetails = new ChipDetails(rect, chipDims))
                    // Crop the input image.
                    crop.InputImage = Dlib.ExtractImageChip <RgbPixel>(inputImage, chipDetails, InterpolationTypes.Bilinear);

                    // Crop the labels correspondingly. However, note that here bilinear
                    // interpolation would make absolutely no sense - you wouldn't say that
                    // a bicycle is half-way between an aeroplane and a bird, would you?
                    crop.LabelImage = Dlib.ExtractImageChip <ushort>(labelImage, chipDetails, InterpolationTypes.NearestNeighbor);

                    // Also randomly flip the input image and the labels.
                    if (rnd.GetRandomDouble() > 0.5)
                        var tmpInput = Dlib.FlipLR(crop.InputImage);
                        var tmpLabel = Dlib.FlipLR(crop.LabelImage);
                        crop.InputImage = tmpInput;
                        crop.LabelImage = tmpLabel;

                    // And then randomly adjust the colors.
                    Dlib.ApplyRandomColorOffset(crop.InputImage, rnd);
Example #2
        public void ExtractImageChip()
            var path = this.GetDataFile($"{LoadTarget}.bmp");

            const int loop      = 1000;
            var       sizeArray = new long[loop];
            var       first     = GetCurrentMemory();

            var start = GetCurrentMemory() - first;

            using (var image = DlibTest.LoadImage(ImageTypes.RgbPixel, path))
                using (var dims = new ChipDims(227, 227))
                    using (var chip = new ChipDetails(new Rectangle(0, 0, 100, 100), dims))
                        for (var count = 0; count < loop; count++)
                            using (Dlib.ExtractImageChip <RgbPixel>(image, chip))
                                sizeArray[count] = GetCurrentMemory();

            // Important!!
            GC.Collect(2, GCCollectionMode.Forced, true);

            var end = GetCurrentMemory() - first;

            Console.WriteLine("        Start Total Memory = {0} KB", start / 1024);
            Console.WriteLine("          End Total Memory = {0} KB", end / 1024);
            Console.WriteLine("Delta (End - Start) Memory = {0} KB", (end - start) / 1024);

            // Rough estimate whether occur memory leak (less than 10240KB)
            Assert.IsTrue((end - start) / 1024 < 10240);
Example #3
        private static void Main(string[] args)
            if (args.Length != 1)
                Console.WriteLine("You call this program like this: ");
                Console.WriteLine("./dnn_semantic_segmentation_train_ex /path/to/images");
                Console.WriteLine("You will also need a trained 'semantic_segmentation_voc2012net.dnn' file.");
                Console.WriteLine("You can either train it yourself (see example program");
                Console.WriteLine("dnn_semantic_segmentation_train_ex), or download a");
                Console.WriteLine("copy from here: http://dlib.net/files/semantic_segmentation_voc2012net.dnn");

                // Read the file containing the trained network from the working directory.
                using (var net = LossMulticlassLogPerPixel.Deserialize("semantic_segmentation_voc2012net.dnn"))
                    // Show inference results in a window.
                    using (var win = new ImageWindow())
                        // Find supported image files.
                        var files = Directory.GetFiles(args[0])
                                    .Where(s => s.EndsWith(".jpeg") || s.EndsWith(".jpg") || s.EndsWith(".png")).ToArray();
                        Console.WriteLine($"Found {files.Length} images, processing...");
                        foreach (var file in files)
                            // Load the input image.
                            using (var inputImage = Dlib.LoadImageAsMatrix <RgbPixel>(file))
                                // Create predictions for each pixel. At this point, the type of each prediction
                                // is an index (a value between 0 and 20). Note that the net may return an image
                                // that is not exactly the same size as the input.
                                using (var output = net.Operator(inputImage))
                                    using (var temp = output.First())
                                        // Crop the returned image to be exactly the same size as the input.
                                        var rect = Rectangle.CenteredRect((int)(temp.Columns / 2d), (int)(temp.Rows / 2d), (uint)inputImage.Columns, (uint)inputImage.Rows);
                                        using (var dims = new ChipDims((uint)inputImage.Rows, (uint)inputImage.Columns))
                                            using (var chipDetails = new ChipDetails(rect, dims))
                                                using (var indexLabelImage = Dlib.ExtractImageChip <ushort>(temp, chipDetails, InterpolationTypes.NearestNeighbor))
                                                    // Convert the indexes to RGB values.
                                                    using (var rgbLabelImage = IndexLabelImageToRgbLabelImage(indexLabelImage))
                                                        // Show the input image on the left, and the predicted RGB labels on the right.
                                                        using (var joinedRow = Dlib.JoinRows(inputImage, rgbLabelImage))

                                                            // Find the most prominent class label from amongst the per-pixel predictions.
                                                            var classLabel = GetMostProminentNonBackgroundClassLabel(indexLabelImage);

                                                            Console.WriteLine($"{file} : {classLabel} - hit enter to process the next image");
            catch (Exception e)
Example #4
        // Calculate the per-pixel accuracy on a dataset whose file names are supplied as a parameter.
        private static double CalculateAccuracy(LossMulticlassLogPerPixel anet, IEnumerable <ImageInfo> dataset)
            var numRight = 0;
            var numWrong = 0;

            foreach (var imageInfo in dataset)
                // Load the input image.
                using (var inputImage = Dlib.LoadImageAsMatrix <RgbPixel>(imageInfo.ImageFilename))
                    // Load the ground-truth (RGB) labels.;
                    using (var rgbLabelImage = Dlib.LoadImageAsMatrix <RgbPixel>(imageInfo.ClassLabelFilename))
                        // Create predictions for each pixel. At this point, the type of each prediction
                        // is an index (a value between 0 and 20). Note that the net may return an image
                        // that is not exactly the same size as the input.
                        using (var output = anet.Operator(inputImage))
                            using (var temp = output.First())
                                // Convert the indexes to RGB values.
                                using (var indexLabelImage = new Matrix <ushort>())
                                    PascalVOC2012.RgbLabelImageToIndexLabelImage(rgbLabelImage, indexLabelImage);

                                    // Crop the net output to be exactly the same size as the input.
                                    using (var chipDims = new ChipDims((uint)inputImage.Rows, (uint)inputImage.Columns))
                                        using (var chipDetails = new ChipDetails(Dlib.CenteredRect(temp.Columns / 2, temp.Rows / 2,
                                            using (var netOutput = Dlib.ExtractImageChip <ushort>(temp, chipDetails, InterpolationTypes.NearestNeighbor))
                                                var nr = indexLabelImage.Rows;
                                                var nc = indexLabelImage.Columns;

                                                // Compare the predicted values to the ground-truth values.
                                                for (var r = 0; r < nr; ++r)
                                                    for (var c = 0; c < nc; ++c)
                                                        var truth = indexLabelImage[r, c];
                                                        if (truth != LossMulticlassLogPerPixel.LabelToIgnore)
                                                            var prediction = netOutput[r, c];
                                                            if (prediction == truth)

            // Return the accuracy estimate.
            return(numRight / (double)(numRight + numWrong));
Example #5
        private static void Main(string[] args)
            if (args.Length != 1)
                Console.WriteLine("You call this program like this: ");
                Console.WriteLine("./dnn_instance_segmentation_train_ex /path/to/images");
                Console.WriteLine($"You will also need a trained '{InstanceSegmentationNetFilename}' file.");
                Console.WriteLine("You can either train it yourself (see example program");
                Console.WriteLine("dnn_instance_segmentation_train_ex), or download a");
                Console.WriteLine($"copy from here: http://dlib.net/files/{InstanceSegmentationNetFilename}");

                // Read the file containing the trained network from the working directory.
                using (var deserialize = new ProxyDeserialize(InstanceSegmentationNetFilename))
                    using (var detNet = LossMmod.Deserialize(deserialize, 4))
                        var segNetsByClass = new Dictionary <string, LossMulticlassLogPerPixel>();
                        segNetsByClass.Deserialize(deserialize, 4);

                        // Show inference results in a window.
                        using (var win = new ImageWindow())
                            // Find supported image files.
                            var files = Directory.GetFiles(args[0])
                                        .Where(s => s.EndsWith(".jpeg") || s.EndsWith(".jpg") || s.EndsWith(".png")).ToArray();

                            using (var rnd = new Rand())
                                Console.WriteLine($"Found {files.Length} images, processing...");
                                foreach (var file in files.Select(s => new FileInfo(s)))
                                    // Load the input image.
                                    using (var inputImage = Dlib.LoadImageAsMatrix <RgbPixel>(file.FullName))
                                        // Create predictions for each pixel. At this point, the type of each prediction
                                        // is an index (a value between 0 and 20). Note that the net may return an image
                                        // that is not exactly the same size as the input.
                                        using (var output = detNet.Operator(inputImage))
                                            var instances = output.First().ToList();
                                            instances.Sort((lhs, rhs) => (int)lhs.Rect.Area - (int)rhs.Rect.Area);

                                            using (var rgbLabelImage = new Matrix <RgbPixel>())
                                                rgbLabelImage.SetSize(inputImage.Rows, inputImage.Columns);
                                                rgbLabelImage.Assign(Enumerable.Range(0, rgbLabelImage.Size).Select(i => new RgbPixel(0, 0, 0)).ToArray());

                                                var foundSomething = false;
                                                foreach (var instance in instances)
                                                    if (!foundSomething)
                                                        Console.Write("Found ");
                                                        foundSomething = true;
                                                        Console.Write(", ");


                                                    var croppingRect = GetCroppingRect(instance.Rect);
                                                    using (var dims = new ChipDims(SegDim, SegDim))
                                                        using (var chipDetails = new ChipDetails(croppingRect, dims))
                                                            using (var inputChip = Dlib.ExtractImageChip <RgbPixel>(inputImage, chipDetails, InterpolationTypes.Bilinear))
                                                                if (!segNetsByClass.TryGetValue(instance.Label, out var i))
                                                                    // per-class segmentation net not found, so we must be using the same net for all classes
                                                                    // (see bool separate_seg_net_for_each_class in dnn_instance_segmentation_train_ex.cpp)
                                                                    if (segNetsByClass.Count == 1)
                                                                        throw new ApplicationException();
                                                                    if (string.IsNullOrEmpty(segNetsByClass.First().Key))
                                                                        throw new ApplicationException();

                                                                var segNet = i != null
                                                               ? i                             // use the segmentation net trained for this class
                                                               : segNetsByClass.First().Value; // use the same segmentation net for all classes

                                                                using (var mask = segNet.Operator(inputChip))
                                                                    var randomColor = new RgbPixel(

                                                                    using (var resizedMask = new Matrix <ushort>((int)chipDetails.Rect.Height, (int)chipDetails.Rect.Width))
                                                                        Dlib.ResizeImage(mask.First(), resizedMask);

                                                                        for (int r = 0, nr = resizedMask.Rows; r < nr; ++r)
                                                                            for (int c = 0, nc = resizedMask.Columns; c < nc; ++c)
                                                                                if (resizedMask[r, c] != 0)
                                                                                    var y = (int)(chipDetails.Rect.Top + r);
                                                                                    var x = (int)(chipDetails.Rect.Left + c);
                                                                                    if (y >= 0 && y < rgbLabelImage.Rows && x >= 0 && x < rgbLabelImage.Columns)
                                                                                        rgbLabelImage[y, x] = randomColor;

                                                                    var voc2012Class = PascalVOC2012.FindVoc2012Class(instance.Label);
                                                                    Dlib.DrawRectangle(rgbLabelImage, instance.Rect, voc2012Class.RgbLabel, 1u);


                                                using (var tmp = Dlib.JoinRows(inputImage, rgbLabelImage))
                                                    // Show the input image on the left, and the predicted RGB labels on the right.

                                                    if (instances.Any())
                                                        Console.Write($" in {file.Name} - hit enter to process the next image");

                        foreach (var kvp in segNetsByClass)
            catch (Exception e)
        public void ExtractImageChip()
            const string testName = nameof(ExtractImageChip);
            var          path     = this.GetDataFile($"{LoadTarget}.bmp");

            var tests = new[]
                new { Type = ImageTypes.RgbPixel, ExpectResult = true },
                new { Type = ImageTypes.RgbAlphaPixel, ExpectResult = false },
                new { Type = ImageTypes.UInt8, ExpectResult = true },
                new { Type = ImageTypes.UInt16, ExpectResult = true },
                new { Type = ImageTypes.UInt32, ExpectResult = true },
                new { Type = ImageTypes.Int8, ExpectResult = true },
                new { Type = ImageTypes.Int16, ExpectResult = true },
                new { Type = ImageTypes.Int32, ExpectResult = true },
                new { Type = ImageTypes.HsiPixel, ExpectResult = true },
                new { Type = ImageTypes.Float, ExpectResult = true },
                new { Type = ImageTypes.Double, ExpectResult = true }

            var type = this.GetType().Name;

            using (var dims = new ChipDims(227, 227))
                using (var chip = new ChipDetails(new Rectangle(0, 0, 100, 100), dims))
                    foreach (var input in tests)
                        var expectResult = input.ExpectResult;
                        var imageObj     = DlibTest.LoadImage(input.Type, path);

                        var outputImageAction = new Func <bool, Array2DBase>(expect =>
                            switch (input.Type)
                            case ImageTypes.RgbPixel:
                                return(Dlib.ExtractImageChip <RgbPixel>(imageObj, chip));

                            case ImageTypes.RgbAlphaPixel:
                                return(Dlib.ExtractImageChip <RgbAlphaPixel>(imageObj, chip));

                            case ImageTypes.UInt8:
                                return(Dlib.ExtractImageChip <byte>(imageObj, chip));

                            case ImageTypes.UInt16:
                                return(Dlib.ExtractImageChip <ushort>(imageObj, chip));

                            case ImageTypes.UInt32:
                                return(Dlib.ExtractImageChip <uint>(imageObj, chip));

                            case ImageTypes.Int8:
                                return(Dlib.ExtractImageChip <sbyte>(imageObj, chip));

                            case ImageTypes.Int16:
                                return(Dlib.ExtractImageChip <short>(imageObj, chip));

                            case ImageTypes.Int32:
                                return(Dlib.ExtractImageChip <int>(imageObj, chip));

                            case ImageTypes.HsiPixel:
                                return(Dlib.ExtractImageChip <HsiPixel>(imageObj, chip));

                            case ImageTypes.Float:
                                return(Dlib.ExtractImageChip <float>(imageObj, chip));

                            case ImageTypes.Double:
                                return(Dlib.ExtractImageChip <double>(imageObj, chip));

                                throw new ArgumentOutOfRangeException();

                        var successAction = new Action <Array2DBase>(image =>
                            Dlib.SaveBmp(image, $"{Path.Combine(this.GetOutDir(type, testName), $"{LoadTarget}_{input.Type}.bmp")}");

                        var failAction = new Action(() =>
                            Assert.Fail($"{testName} should throw exception for InputType: {input.Type}.");

                        var finallyAction = new Action(() =>
                            if (imageObj != null)

                        var exceptionAction = new Action(() =>
                            Console.WriteLine($"Failed to execute {testName} to InputType: {input.Type}, Type: {input.Type}.");

                        DoTest(outputImageAction, expectResult, successAction, finallyAction, failAction, exceptionAction);
Example #7
        private static void Main(string[] args)
                // In this example we are going to train a face detector based on the
                // small faces dataset in the examples/faces directory.  So the first
                // thing we do is load that dataset.  This means you need to supply the
                // path to this faces folder as a command line argument so we will know
                // where it is.
                if (args.Length != 1)
                    Console.WriteLine("Give the path to the examples/faces directory as the argument to this");
                    Console.WriteLine("program.  For example, if you are in the examples folder then execute ");
                    Console.WriteLine("this program by running: ");
                    Console.WriteLine("   ./dnn_mmod_ex faces");

                var facesDirectory = args[0];

                // The faces directory contains a training dataset and a separate
                // testing dataset.  The training data consists of 4 images, each
                // annotated with rectangles that bound each human face.  The idea is
                // to use this training data to learn to identify human faces in new
                // images.
                // Once you have trained an object detector it is always important to
                // test it on data it wasn't trained on.  Therefore, we will also load
                // a separate testing set of 5 images.  Once we have a face detector
                // created from the training data we will see how well it works by
                // running it on the testing images.
                // So here we create the variables that will hold our dataset.
                // images_train will hold the 4 training images and face_boxes_train
                // holds the locations of the faces in the training images.  So for
                // example, the image images_train[0] has the faces given by the
                // rectangles in face_boxes_train[0].
                IList <Matrix <RgbPixel> > imagesTrain;
                IList <Matrix <RgbPixel> > imagesTest;
                IList <IList <MModRect> >  faceBoxesTrain;
                IList <IList <MModRect> >  faceBoxesTest;

                // Now we load the data.  These XML files list the images in each dataset
                // and also contain the positions of the face boxes.  Obviously you can use
                // any kind of input format you like so long as you store the data into
                // images_train and face_boxes_train.  But for convenience dlib comes with
                // tools for creating and loading XML image datasets.  Here you see how to
                // load the data.  To create the XML files you can use the imglab tool which
                // can be found in the tools/imglab folder.  It is a simple graphical tool
                // for labeling objects in images with boxes.  To see how to use it read the
                // tools/imglab/README.txt file.
                Dlib.LoadImageDataset(facesDirectory + "/training.xml", out imagesTrain, out faceBoxesTrain);
                Dlib.LoadImageDataset(facesDirectory + "/testing.xml", out imagesTest, out faceBoxesTest);

                Console.WriteLine($"num training images: {imagesTrain.Count()}");
                Console.WriteLine($"num testing images:  {imagesTest.Count()}");

                // The MMOD algorithm has some options you can set to control its behavior.  However,
                // you can also call the constructor with your training annotations and a "target
                // object size" and it will automatically configure itself in a reasonable way for your
                // problem.  Here we are saying that faces are still recognizably faces when they are
                // 40x40 pixels in size.  You should generally pick the smallest size where this is
                // true.  Based on this information the mmod_options constructor will automatically
                // pick a good sliding window width and height.  It will also automatically set the
                // non-max-suppression parameters to something reasonable.  For further details see the
                // mmod_options documentation.
                using (var options = new MModOptions(faceBoxesTrain, 40, 40))
                    // The detector will automatically decide to use multiple sliding windows if needed.
                    // For the face data, only one is needed however.
                    var detectorWindows = options.DetectorWindows.ToArray();
                    Console.WriteLine($"num detector windows: {detectorWindows.Length}");
                    foreach (var w in detectorWindows)
                        Console.WriteLine($"detector window width by height: {w.Width} x {w.Height}");

                    Console.WriteLine($"overlap NMS IOU thresh:             {options.OverlapsNms.GetIouThresh()}");
                    Console.WriteLine($"overlap NMS percent covered thresh: {options.OverlapsNms.GetPercentCoveredThresh()}");

                    // Now we are ready to create our network and trainer.
                    using (var net = new LossMmod(options, 2))
                        // The MMOD loss requires that the number of filters in the final network layer equal
                        // options.detector_windows.size().  So we set that here as well.
                        using (var subnet = net.GetSubnet())
                            using (var details = subnet.GetLayerDetails())
                                using (var trainer = new DnnTrainer <LossMmod>(net))
                                    trainer.SetSynchronizationFile("mmod_sync", 5 * 60);

                                    // Now let's train the network.  We are going to use mini-batches of 150
                                    // images.   The images are random crops from our training set (see
                                    // random_cropper_ex.cpp for a discussion of the random_cropper).
                                    IEnumerable <Matrix <RgbPixel> > miniBatchSamples;
                                    //IEnumerable<IEnumerable<RgbPixel>> mini_batch_labels;
                                    IEnumerable <IEnumerable <MModRect> > miniBatchLabels;

                                    using (var cropper = new RandomCropper())
                                        using (var chipDims = new ChipDims(200, 200))
                                            cropper.ChipDims = chipDims;
                                            // Usually you want to give the cropper whatever min sizes you passed to the
                                            // mmod_options constructor, which is what we do here.
                                            cropper.SetMinObjectSize(40, 40);

                                            using (var rnd = new Rand())
                                                // Run the trainer until the learning rate gets small.  This will probably take several
                                                // hours.
                                                while (trainer.GetLearningRate() >= 1e-4)
                                                    cropper.Operator(150, imagesTrain, faceBoxesTrain, out miniBatchSamples, out miniBatchLabels);
                                                    // We can also randomly jitter the colors and that often helps a detector
                                                    // generalize better to new images.
                                                    foreach (var img in miniBatchSamples)
                                                        Dlib.DisturbColors(img, rnd);

                                                    LossMmod.TrainOneStep(trainer, miniBatchSamples, miniBatchLabels);

                                                // wait for training threads to stop
                                                Console.WriteLine("done training");

                                                // Save the network to disk
                                                LossMmod.Serialize(net, "mmod_network.dat");

                                                // Now that we have a face detector we can test it.  The first statement tests it
                                                // on the training data.  It will print the precision, recall, and then average precision.
                                                // This statement should indicate that the network works perfectly on the
                                                // training data.
                                                using (var matrix = Dlib.TestObjectDetectionFunction(net, imagesTrain, faceBoxesTrain))
                                                    Console.WriteLine($"training results: {matrix}");
                                                // However, to get an idea if it really worked without overfitting we need to run
                                                // it on images it wasn't trained on.  The next line does this.   Happily,
                                                // this statement indicates that the detector finds most of the faces in the
                                                // testing data.
                                                using (var matrix = Dlib.TestObjectDetectionFunction(net, imagesTest, faceBoxesTest))
                                                    Console.WriteLine($"testing results:  {matrix}");

                                                // If you are running many experiments, it's also useful to log the settings used
                                                // during the training experiment.  This statement will print the settings we used to
                                                // the screen.

                                                // Now lets run the detector on the testing images and look at the outputs.
                                                using (var win = new ImageWindow())
                                                    foreach (var img in imagesTest)
                                                        var dets = net.Operator(img);
                                                        foreach (var d in dets[0])


                                                        foreach (var det in dets)
                                                            foreach (var d in det)

                                                // Now that you finished this example, you should read dnn_mmod_train_find_cars_ex.cpp,
                                                // which is a more advanced example.  It discusses many issues surrounding properly
                                                // setting the MMOD parameters and creating a good training dataset.

            catch (Exception e)